mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
Merge remote-tracking branch 'origin/main' into sid/types-and-lints
# Conflicts: # gateway/run.py # tools/delegate_tool.py
This commit is contained in:
commit
847ffca715
171 changed files with 15125 additions and 1675 deletions
|
|
@ -476,6 +476,133 @@ class TestGetTextAuxiliaryClient:
|
|||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
|
||||
class TestNousAuxiliaryRefresh:
|
||||
def test_try_nous_prefers_runtime_credentials(self):
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
mock_openai.return_value = MagicMock()
|
||||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
# No Portal recommendation → falls back to the hardcoded default.
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
|
||||
|
||||
def test_try_nous_uses_portal_recommendation_for_text(self):
|
||||
"""When the Portal recommends a compaction model, _try_nous honors it."""
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
mock_openai.return_value = MagicMock()
|
||||
client, model = _try_nous(vision=False)
|
||||
|
||||
assert client is not None
|
||||
assert model == "minimax/minimax-m2.7"
|
||||
assert mock_rec.call_args.kwargs["vision"] is False
|
||||
|
||||
def test_try_nous_uses_portal_recommendation_for_vision(self):
|
||||
"""Vision tasks should ask for the vision-specific recommendation."""
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
|
||||
patch("agent.auxiliary_client.OpenAI"),
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
client, model = _try_nous(vision=True)
|
||||
|
||||
assert client is not None
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert mock_rec.call_args.kwargs["vision"] is True
|
||||
|
||||
def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
|
||||
"""If the Portal lookup throws, we must still return a usable model."""
|
||||
fresh_base = "https://inference-api.nousresearch.com/v1"
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
|
||||
patch("agent.auxiliary_client.OpenAI"),
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
client, model = _try_nous()
|
||||
|
||||
assert client is not None
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_call_llm_retries_nous_after_401(self):
|
||||
class _Auth401(Exception):
|
||||
status_code = 401
|
||||
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
fresh_client.chat.completions.create.return_value = {"ok": True}
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
|
||||
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
|
||||
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
|
||||
):
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert stale_client.chat.completions.create.call_count == 1
|
||||
assert fresh_client.chat.completions.create.call_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_call_llm_retries_nous_after_401(self):
|
||||
class _Auth401(Exception):
|
||||
status_code = 401
|
||||
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
|
||||
|
||||
fresh_async_client = MagicMock()
|
||||
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
|
||||
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
|
||||
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
|
||||
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert stale_client.chat.completions.create.await_count == 1
|
||||
assert fresh_async_client.chat.completions.create.await_count == 1
|
||||
|
||||
# ── Payment / credit exhaustion fallback ─────────────────────────────────
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
|
|||
|
||||
|
||||
class TestResolveVisionMainFirst:
|
||||
"""Vision auto-detection prefers main provider + main model first."""
|
||||
"""Vision auto-detection prefers the main provider first."""
|
||||
|
||||
def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
|
||||
"""OpenRouter main with vision-capable model → aux vision uses main model."""
|
||||
|
|
@ -200,28 +200,49 @@ class TestResolveVisionMainFirst:
|
|||
assert mock_resolve.call_args.args[0] == "openrouter"
|
||||
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
|
||||
|
||||
def test_nous_main_vision_uses_main_model(self):
|
||||
"""Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
|
||||
def test_nous_main_vision_uses_paid_nous_vision_backend(self):
|
||||
"""Paid Nous main → aux vision uses the dedicated Nous vision backend."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="nous",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="openai/gpt-5",
|
||||
), patch(
|
||||
"agent.auxiliary_client.resolve_provider_client"
|
||||
) as mock_resolve, patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_strict_vision_backend",
|
||||
return_value=(MagicMock(), "google/gemini-3-flash-preview"),
|
||||
):
|
||||
mock_client = MagicMock()
|
||||
mock_resolve.return_value = (mock_client, "openai/gpt-5")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "nous"
|
||||
assert model == "openai/gpt-5"
|
||||
assert client is not None
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
|
||||
"""Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="nous",
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="xiaomi/mimo-v2-pro",
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_strict_vision_backend",
|
||||
return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
|
||||
):
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "nous"
|
||||
assert client is not None
|
||||
assert model == "xiaomi/mimo-v2-omni"
|
||||
|
||||
def test_exotic_provider_with_vision_override_preserved(self):
|
||||
"""xiaomi → mimo-v2-omni override still wins over main_model."""
|
||||
|
|
|
|||
111
tests/agent/test_image_gen_registry.py
Normal file
111
tests/agent/test_image_gen_registry.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""Tests for agent/image_gen_registry.py — provider registration & active lookup."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import image_gen_registry
|
||||
from agent.image_gen_provider import ImageGenProvider
|
||||
|
||||
|
||||
class _FakeProvider(ImageGenProvider):
|
||||
def __init__(self, name: str, available: bool = True):
|
||||
self._name = name
|
||||
self._available = available
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
def generate(self, prompt, aspect_ratio="landscape", **kw):
|
||||
return {"success": True, "image": f"{self._name}://{prompt}"}
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_registry():
|
||||
image_gen_registry._reset_for_tests()
|
||||
yield
|
||||
image_gen_registry._reset_for_tests()
|
||||
|
||||
|
||||
class TestRegisterProvider:
|
||||
def test_register_and_lookup(self):
|
||||
provider = _FakeProvider("fake")
|
||||
image_gen_registry.register_provider(provider)
|
||||
assert image_gen_registry.get_provider("fake") is provider
|
||||
|
||||
def test_rejects_non_provider(self):
|
||||
with pytest.raises(TypeError):
|
||||
image_gen_registry.register_provider("not a provider") # type: ignore[arg-type]
|
||||
|
||||
def test_rejects_empty_name(self):
|
||||
class Empty(ImageGenProvider):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return ""
|
||||
|
||||
def generate(self, prompt, aspect_ratio="landscape", **kw):
|
||||
return {}
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
image_gen_registry.register_provider(Empty())
|
||||
|
||||
def test_reregister_overwrites(self):
|
||||
a = _FakeProvider("same")
|
||||
b = _FakeProvider("same")
|
||||
image_gen_registry.register_provider(a)
|
||||
image_gen_registry.register_provider(b)
|
||||
assert image_gen_registry.get_provider("same") is b
|
||||
|
||||
def test_list_is_sorted(self):
|
||||
image_gen_registry.register_provider(_FakeProvider("zeta"))
|
||||
image_gen_registry.register_provider(_FakeProvider("alpha"))
|
||||
names = [p.name for p in image_gen_registry.list_providers()]
|
||||
assert names == ["alpha", "zeta"]
|
||||
|
||||
|
||||
class TestGetActiveProvider:
|
||||
def test_single_provider_autoresolves(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
image_gen_registry.register_provider(_FakeProvider("solo"))
|
||||
active = image_gen_registry.get_active_provider()
|
||||
assert active is not None and active.name == "solo"
|
||||
|
||||
def test_fal_preferred_on_multi_without_config(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
image_gen_registry.register_provider(_FakeProvider("fal"))
|
||||
image_gen_registry.register_provider(_FakeProvider("openai"))
|
||||
active = image_gen_registry.get_active_provider()
|
||||
assert active is not None and active.name == "fal"
|
||||
|
||||
def test_explicit_config_wins(self, tmp_path, monkeypatch):
|
||||
import yaml
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
yaml.safe_dump({"image_gen": {"provider": "openai"}})
|
||||
)
|
||||
image_gen_registry.register_provider(_FakeProvider("fal"))
|
||||
image_gen_registry.register_provider(_FakeProvider("openai"))
|
||||
active = image_gen_registry.get_active_provider()
|
||||
assert active is not None and active.name == "openai"
|
||||
|
||||
def test_missing_configured_provider_falls_back(self, tmp_path, monkeypatch):
|
||||
import yaml
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
yaml.safe_dump({"image_gen": {"provider": "replicate"}})
|
||||
)
|
||||
# Only FAL is registered — configured provider doesn't exist
|
||||
image_gen_registry.register_provider(_FakeProvider("fal"))
|
||||
active = image_gen_registry.get_active_provider()
|
||||
# Falls back to FAL preference (legacy default) rather than None
|
||||
assert active is not None and active.name == "fal"
|
||||
|
||||
def test_none_when_empty(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
assert image_gen_registry.get_active_provider() is None
|
||||
115
tests/agent/test_kimi_coding_anthropic_thinking.py
Normal file
115
tests/agent/test_kimi_coding_anthropic_thinking.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
|
||||
|
||||
Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
|
||||
but has its own thinking semantics. When ``thinking.enabled`` is present in
|
||||
the request, Kimi validates the message history and requires every prior
|
||||
assistant tool-call message to carry OpenAI-style ``reasoning_content``.
|
||||
|
||||
The Anthropic path never populates that field, and
|
||||
``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
|
||||
third-party endpoints — so after one turn with tool calls the next request
|
||||
fails with HTTP 400::
|
||||
|
||||
thinking is enabled but reasoning_content is missing in assistant
|
||||
tool call message at index N
|
||||
|
||||
Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
|
||||
``ChatCompletionsTransport`` (#13503). On the Anthropic route the right
|
||||
thing to do is drop the parameter entirely and let Kimi drive reasoning
|
||||
server-side.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestKimiCodingSkipsAnthropicThinking:
|
||||
"""build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base_url",
|
||||
[
|
||||
"https://api.kimi.com/coding",
|
||||
"https://api.kimi.com/coding/v1",
|
||||
"https://api.kimi.com/coding/anthropic",
|
||||
"https://api.kimi.com/coding/",
|
||||
],
|
||||
)
|
||||
def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url=base_url,
|
||||
)
|
||||
assert "thinking" not in kwargs, (
|
||||
"Anthropic thinking must not be sent to Kimi /coding — "
|
||||
"endpoint requires reasoning_content on history we don't preserve."
|
||||
)
|
||||
assert "output_config" not in kwargs
|
||||
|
||||
def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": False},
|
||||
base_url="https://api.kimi.com/coding",
|
||||
)
|
||||
assert "thinking" not in kwargs
|
||||
|
||||
def test_non_kimi_third_party_still_gets_thinking(self) -> None:
|
||||
"""MiniMax and other third-party Anthropic endpoints must retain thinking."""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="MiniMax-M2.7",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
)
|
||||
assert "thinking" in kwargs
|
||||
assert kwargs["thinking"]["type"] == "enabled"
|
||||
|
||||
def test_native_anthropic_still_gets_thinking(self) -> None:
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url=None,
|
||||
)
|
||||
assert "thinking" in kwargs
|
||||
|
||||
def test_kimi_root_endpoint_unaffected(self) -> None:
|
||||
"""Only the /coding route is special-cased — plain api.kimi.com is not.
|
||||
|
||||
``api.kimi.com`` without ``/coding`` uses the chat_completions transport
|
||||
(see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
|
||||
should never see it, but if it somehow does we should not suppress
|
||||
thinking there — that path has different semantics.
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
tools=None,
|
||||
max_tokens=4096,
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
base_url="https://api.kimi.com/v1",
|
||||
)
|
||||
assert "thinking" in kwargs
|
||||
|
|
@ -789,6 +789,24 @@ class TestPromptBuilderConstants:
|
|||
assert "cron" in PLATFORM_HINTS
|
||||
assert "cli" in PLATFORM_HINTS
|
||||
|
||||
def test_cli_hint_does_not_suggest_media_tags(self):
|
||||
# Regression: MEDIA:/path tags are intercepted only by messaging
|
||||
# gateway platforms. On the CLI they render as literal text and
|
||||
# confuse users. The CLI hint must steer the agent away from them.
|
||||
cli_hint = PLATFORM_HINTS["cli"]
|
||||
assert "MEDIA:" in cli_hint, (
|
||||
"CLI hint should mention MEDIA: in order to tell the agent "
|
||||
"NOT to use it (negative guidance)."
|
||||
)
|
||||
# Must contain explicit "don't" language near the MEDIA reference.
|
||||
assert any(
|
||||
marker in cli_hint.lower()
|
||||
for marker in ("do not emit media", "not intercepted", "do not", "don't")
|
||||
), "CLI hint should explicitly discourage MEDIA: tags."
|
||||
# Messaging hints should still advertise MEDIA: positively (sanity
|
||||
# check that this test is calibrated correctly).
|
||||
assert "include MEDIA:" in PLATFORM_HINTS["telegram"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Environment hints
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class TestBuildChildProgressCallback:
|
|||
|
||||
# task_index=0 in a batch of 3 → prefix "[1]"
|
||||
cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
|
||||
cb0("web_search", "test")
|
||||
cb0("tool.started", "web_search", "test", {})
|
||||
output = buf.getvalue()
|
||||
assert "[1]" in output
|
||||
|
||||
|
|
@ -201,7 +201,7 @@ class TestBuildChildProgressCallback:
|
|||
buf.truncate(0)
|
||||
buf.seek(0)
|
||||
cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
|
||||
cb2("web_search", "test")
|
||||
cb2("tool.started", "web_search", "test", {})
|
||||
output = buf.getvalue()
|
||||
assert "[3]" in output
|
||||
|
||||
|
|
|
|||
164
tests/agent/transports/test_bedrock_transport.py
Normal file
164
tests/agent/transports/test_bedrock_transport.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
"""Tests for the BedrockTransport."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.transports import get_transport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transport():
|
||||
import agent.transports.bedrock # noqa: F401
|
||||
return get_transport("bedrock_converse")
|
||||
|
||||
|
||||
class TestBedrockBasic:
|
||||
|
||||
def test_api_mode(self, transport):
|
||||
assert transport.api_mode == "bedrock_converse"
|
||||
|
||||
def test_registered(self, transport):
|
||||
assert transport is not None
|
||||
|
||||
|
||||
class TestBedrockBuildKwargs:
|
||||
|
||||
def test_basic_kwargs(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hello"}]
|
||||
kw = transport.build_kwargs(model="anthropic.claude-3-5-sonnet-20241022-v2:0", messages=msgs)
|
||||
assert kw["modelId"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
|
||||
assert kw["__bedrock_converse__"] is True
|
||||
assert kw["__bedrock_region__"] == "us-east-1"
|
||||
assert "messages" in kw
|
||||
|
||||
def test_custom_region(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
messages=msgs,
|
||||
region="eu-west-1",
|
||||
)
|
||||
assert kw["__bedrock_region__"] == "eu-west-1"
|
||||
|
||||
def test_max_tokens(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
messages=msgs,
|
||||
max_tokens=8192,
|
||||
)
|
||||
assert kw["inferenceConfig"]["maxTokens"] == 8192
|
||||
|
||||
|
||||
class TestBedrockConvertTools:
|
||||
|
||||
def test_convert_tools(self, transport):
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"description": "Run commands",
|
||||
"parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
|
||||
}
|
||||
}]
|
||||
result = transport.convert_tools(tools)
|
||||
assert len(result) == 1
|
||||
assert result[0]["toolSpec"]["name"] == "terminal"
|
||||
|
||||
|
||||
class TestBedrockValidate:
|
||||
|
||||
def test_none(self, transport):
|
||||
assert transport.validate_response(None) is False
|
||||
|
||||
def test_raw_dict_valid(self, transport):
|
||||
assert transport.validate_response({"output": {"message": {}}}) is True
|
||||
|
||||
def test_raw_dict_invalid(self, transport):
|
||||
assert transport.validate_response({"error": "fail"}) is False
|
||||
|
||||
def test_normalized_valid(self, transport):
|
||||
r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
|
||||
assert transport.validate_response(r) is True
|
||||
|
||||
|
||||
class TestBedrockMapFinishReason:
|
||||
|
||||
def test_end_turn(self, transport):
|
||||
assert transport.map_finish_reason("end_turn") == "stop"
|
||||
|
||||
def test_tool_use(self, transport):
|
||||
assert transport.map_finish_reason("tool_use") == "tool_calls"
|
||||
|
||||
def test_max_tokens(self, transport):
|
||||
assert transport.map_finish_reason("max_tokens") == "length"
|
||||
|
||||
def test_guardrail(self, transport):
|
||||
assert transport.map_finish_reason("guardrail_intervened") == "content_filter"
|
||||
|
||||
def test_unknown(self, transport):
|
||||
assert transport.map_finish_reason("unknown") == "stop"
|
||||
|
||||
|
||||
class TestBedrockNormalize:
|
||||
|
||||
def _make_bedrock_response(self, text="Hello", tool_calls=None, stop_reason="end_turn"):
|
||||
"""Build a raw Bedrock converse response dict."""
|
||||
content = []
|
||||
if text:
|
||||
content.append({"text": text})
|
||||
if tool_calls:
|
||||
for tc in tool_calls:
|
||||
content.append({
|
||||
"toolUse": {
|
||||
"toolUseId": tc["id"],
|
||||
"name": tc["name"],
|
||||
"input": tc["input"],
|
||||
}
|
||||
})
|
||||
return {
|
||||
"output": {"message": {"role": "assistant", "content": content}},
|
||||
"stopReason": stop_reason,
|
||||
"usage": {"inputTokens": 10, "outputTokens": 5, "totalTokens": 15},
|
||||
}
|
||||
|
||||
def test_text_response(self, transport):
|
||||
raw = self._make_bedrock_response(text="Hello world")
|
||||
nr = transport.normalize_response(raw)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_tool_call_response(self, transport):
|
||||
raw = self._make_bedrock_response(
|
||||
text=None,
|
||||
tool_calls=[{"id": "tool_1", "name": "terminal", "input": {"command": "ls"}}],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
nr = transport.normalize_response(raw)
|
||||
assert nr.finish_reason == "tool_calls"
|
||||
assert len(nr.tool_calls) == 1
|
||||
assert nr.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_already_normalized_response(self, transport):
|
||||
"""Test normalize_response handles already-normalized SimpleNamespace (from dispatch site)."""
|
||||
pre_normalized = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(
|
||||
content="Hello from Bedrock",
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
),
|
||||
finish_reason="stop",
|
||||
)],
|
||||
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
|
||||
)
|
||||
nr = transport.normalize_response(pre_normalized)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello from Bedrock"
|
||||
assert nr.finish_reason == "stop"
|
||||
assert nr.usage is not None
|
||||
assert nr.usage.prompt_tokens == 10
|
||||
349
tests/agent/transports/test_chat_completions.py
Normal file
349
tests/agent/transports/test_chat_completions.py
Normal file
|
|
@ -0,0 +1,349 @@
|
|||
"""Tests for the ChatCompletionsTransport."""
|
||||
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.transports import get_transport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transport():
|
||||
import agent.transports.chat_completions # noqa: F401
|
||||
return get_transport("chat_completions")
|
||||
|
||||
|
||||
class TestChatCompletionsBasic:
|
||||
|
||||
def test_api_mode(self, transport):
|
||||
assert transport.api_mode == "chat_completions"
|
||||
|
||||
def test_registered(self, transport):
|
||||
assert transport is not None
|
||||
|
||||
def test_convert_tools_identity(self, transport):
|
||||
tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
|
||||
assert transport.convert_tools(tools) is tools
|
||||
|
||||
def test_convert_messages_no_codex_leaks(self, transport):
|
||||
msgs = [{"role": "user", "content": "hi"}]
|
||||
result = transport.convert_messages(msgs)
|
||||
assert result is msgs # no copy needed
|
||||
|
||||
def test_convert_messages_strips_codex_fields(self, transport):
|
||||
msgs = [
|
||||
{"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
|
||||
"tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
|
||||
"type": "function", "function": {"name": "t", "arguments": "{}"}}]},
|
||||
]
|
||||
result = transport.convert_messages(msgs)
|
||||
assert "codex_reasoning_items" not in result[0]
|
||||
assert "call_id" not in result[0]["tool_calls"][0]
|
||||
assert "response_item_id" not in result[0]["tool_calls"][0]
|
||||
# Original list untouched (deepcopy-on-demand)
|
||||
assert "codex_reasoning_items" in msgs[0]
|
||||
|
||||
|
||||
class TestChatCompletionsBuildKwargs:
|
||||
|
||||
def test_basic_kwargs(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hello"}]
|
||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, timeout=30.0)
|
||||
assert kw["model"] == "gpt-4o"
|
||||
assert kw["messages"][0]["content"] == "Hello"
|
||||
assert kw["timeout"] == 30.0
|
||||
|
||||
def test_developer_role_swap(self, transport):
|
||||
msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="gpt-5.4", messages=msgs, model_lower="gpt-5.4")
|
||||
assert kw["messages"][0]["role"] == "developer"
|
||||
|
||||
def test_no_developer_swap_for_non_gpt5(self, transport):
|
||||
msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="claude-sonnet-4", messages=msgs, model_lower="claude-sonnet-4")
|
||||
assert kw["messages"][0]["role"] == "system"
|
||||
|
||||
def test_tools_included(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
|
||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, tools=tools)
|
||||
assert kw["tools"] == tools
|
||||
|
||||
def test_openrouter_provider_prefs(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-4o", messages=msgs,
|
||||
is_openrouter=True,
|
||||
provider_preferences={"only": ["openai"]},
|
||||
)
|
||||
assert kw["extra_body"]["provider"] == {"only": ["openai"]}
|
||||
|
||||
def test_nous_tags(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
|
||||
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
|
||||
|
||||
def test_reasoning_default(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-4o", messages=msgs,
|
||||
supports_reasoning=True,
|
||||
)
|
||||
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
|
||||
|
||||
def test_nous_omits_disabled_reasoning(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-4o", messages=msgs,
|
||||
supports_reasoning=True,
|
||||
is_nous=True,
|
||||
reasoning_config={"enabled": False},
|
||||
)
|
||||
# Nous rejects enabled=false; reasoning omitted entirely
|
||||
assert "reasoning" not in kw.get("extra_body", {})
|
||||
|
||||
def test_ollama_num_ctx(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="llama3", messages=msgs,
|
||||
ollama_num_ctx=32768,
|
||||
)
|
||||
assert kw["extra_body"]["options"]["num_ctx"] == 32768
|
||||
|
||||
def test_custom_think_false(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="qwen3", messages=msgs,
|
||||
is_custom_provider=True,
|
||||
reasoning_config={"effort": "none"},
|
||||
)
|
||||
assert kw["extra_body"]["think"] is False
|
||||
|
||||
def test_max_tokens_with_fn(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-4o", messages=msgs,
|
||||
max_tokens=4096,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["max_tokens"] == 4096
|
||||
|
||||
def test_ephemeral_overrides_max_tokens(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-4o", messages=msgs,
|
||||
max_tokens=4096,
|
||||
ephemeral_max_output_tokens=2048,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["max_tokens"] == 2048
|
||||
|
||||
def test_nvidia_default_max_tokens(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="glm-4.7", messages=msgs,
|
||||
is_nvidia_nim=True,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# NVIDIA default: 16384
|
||||
assert kw["max_tokens"] == 16384
|
||||
|
||||
def test_qwen_default_max_tokens(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="qwen3-coder-plus", messages=msgs,
|
||||
is_qwen_portal=True,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# Qwen default: 65536
|
||||
assert kw["max_tokens"] == 65536
|
||||
|
||||
def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="anthropic/claude-sonnet-4.6", messages=msgs,
|
||||
is_openrouter=True,
|
||||
anthropic_max_output=64000,
|
||||
)
|
||||
# Set as plain max_tokens (not via fn) because the aggregator proxies to
|
||||
# Anthropic Messages API which requires the field.
|
||||
assert kw["max_tokens"] == 64000
|
||||
|
||||
def test_request_overrides_last(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-4o", messages=msgs,
|
||||
request_overrides={"service_tier": "priority"},
|
||||
)
|
||||
assert kw["service_tier"] == "priority"
|
||||
|
||||
def test_fixed_temperature(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
|
||||
assert kw["temperature"] == 0.6
|
||||
|
||||
def test_omit_temperature(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
|
||||
# omit wins
|
||||
assert "temperature" not in kw
|
||||
|
||||
|
||||
class TestChatCompletionsKimi:
|
||||
"""Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
|
||||
|
||||
def test_kimi_max_tokens_default(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||
is_kimi=True,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# Kimi CLI default: 32000
|
||||
assert kw["max_tokens"] == 32000
|
||||
|
||||
def test_kimi_reasoning_effort_top_level(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||
is_kimi=True,
|
||||
reasoning_config={"effort": "high"},
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# Kimi requires reasoning_effort as a top-level parameter
|
||||
assert kw["reasoning_effort"] == "high"
|
||||
|
||||
def test_kimi_reasoning_effort_omitted_when_thinking_disabled(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||
is_kimi=True,
|
||||
reasoning_config={"enabled": False},
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# Mirror Kimi CLI: omit reasoning_effort entirely when thinking off
|
||||
assert "reasoning_effort" not in kw
|
||||
|
||||
def test_kimi_thinking_enabled_extra_body(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||
is_kimi=True,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_kimi_thinking_disabled_extra_body(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
|
||||
is_kimi=True,
|
||||
reasoning_config={"enabled": False},
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
|
||||
|
||||
class TestChatCompletionsValidate:
|
||||
|
||||
def test_none(self, transport):
|
||||
assert transport.validate_response(None) is False
|
||||
|
||||
def test_no_choices(self, transport):
|
||||
r = SimpleNamespace(choices=None)
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
def test_empty_choices(self, transport):
|
||||
r = SimpleNamespace(choices=[])
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
def test_valid(self, transport):
|
||||
r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
|
||||
assert transport.validate_response(r) is True
|
||||
|
||||
|
||||
class TestChatCompletionsNormalize:
|
||||
|
||||
def test_text_response(self, transport):
|
||||
r = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
|
||||
finish_reason="stop",
|
||||
)],
|
||||
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello"
|
||||
assert nr.finish_reason == "stop"
|
||||
assert nr.tool_calls is None
|
||||
|
||||
def test_tool_call_response(self, transport):
|
||||
tc = SimpleNamespace(
|
||||
id="call_123",
|
||||
function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
|
||||
)
|
||||
r = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
|
||||
finish_reason="tool_calls",
|
||||
)],
|
||||
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30),
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert len(nr.tool_calls) == 1
|
||||
assert nr.tool_calls[0].name == "terminal"
|
||||
assert nr.tool_calls[0].id == "call_123"
|
||||
|
||||
def test_tool_call_extra_content_preserved(self, transport):
|
||||
"""Gemini 3 thinking models attach extra_content with thought_signature
|
||||
on tool_calls. Without this replay on the next turn, the API rejects
|
||||
the request with 400. The transport MUST surface extra_content so the
|
||||
agent loop can write it back into the assistant message."""
|
||||
tc = SimpleNamespace(
|
||||
id="call_gem",
|
||||
function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
|
||||
extra_content={"google": {"thought_signature": "SIG_ABC123"}},
|
||||
)
|
||||
r = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
|
||||
finish_reason="tool_calls",
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.tool_calls[0].provider_data == {
|
||||
"extra_content": {"google": {"thought_signature": "SIG_ABC123"}}
|
||||
}
|
||||
|
||||
def test_reasoning_content_preserved_separately(self, transport):
|
||||
"""DeepSeek/Moonshot use reasoning_content distinct from reasoning.
|
||||
Don't merge them — the thinking-prefill retry check reads each field
|
||||
separately."""
|
||||
r = SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(
|
||||
content=None, tool_calls=None,
|
||||
reasoning="summary text",
|
||||
reasoning_content="detailed scratchpad",
|
||||
),
|
||||
finish_reason="stop",
|
||||
)],
|
||||
usage=None,
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.reasoning == "summary text"
|
||||
assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
|
||||
|
||||
|
||||
class TestChatCompletionsCacheStats:
|
||||
|
||||
def test_no_usage(self, transport):
|
||||
r = SimpleNamespace(usage=None)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_no_details(self, transport):
|
||||
r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=None))
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_with_cache(self, transport):
|
||||
details = SimpleNamespace(cached_tokens=500, cache_write_tokens=100)
|
||||
r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
|
||||
result = transport.extract_cache_stats(r)
|
||||
assert result == {"cached_tokens": 500, "creation_tokens": 100}
|
||||
220
tests/agent/transports/test_codex_transport.py
Normal file
220
tests/agent/transports/test_codex_transport.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Tests for the ResponsesApiTransport (Codex)."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.transports import get_transport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transport():
|
||||
import agent.transports.codex # noqa: F401
|
||||
return get_transport("codex_responses")
|
||||
|
||||
|
||||
class TestCodexTransportBasic:
|
||||
|
||||
def test_api_mode(self, transport):
|
||||
assert transport.api_mode == "codex_responses"
|
||||
|
||||
def test_registered_on_import(self, transport):
|
||||
assert transport is not None
|
||||
|
||||
def test_convert_tools(self, transport):
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"description": "Run a command",
|
||||
"parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
|
||||
}
|
||||
}]
|
||||
result = transport.convert_tools(tools)
|
||||
assert len(result) == 1
|
||||
assert result[0]["type"] == "function"
|
||||
assert result[0]["name"] == "terminal"
|
||||
|
||||
|
||||
class TestCodexBuildKwargs:
|
||||
|
||||
def test_basic_kwargs(self, transport):
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4",
|
||||
messages=messages,
|
||||
tools=[],
|
||||
)
|
||||
assert kw["model"] == "gpt-5.4"
|
||||
assert kw["instructions"] == "You are helpful."
|
||||
assert "input" in kw
|
||||
assert kw["store"] is False
|
||||
|
||||
def test_system_extracted_from_messages(self, transport):
|
||||
messages = [
|
||||
{"role": "system", "content": "Custom system prompt"},
|
||||
{"role": "user", "content": "Hi"},
|
||||
]
|
||||
kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
|
||||
assert kw["instructions"] == "Custom system prompt"
|
||||
|
||||
def test_no_system_uses_default(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
|
||||
assert kw["instructions"] # should be non-empty default
|
||||
|
||||
def test_reasoning_config(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
reasoning_config={"effort": "high"},
|
||||
)
|
||||
assert kw.get("reasoning", {}).get("effort") == "high"
|
||||
|
||||
def test_reasoning_disabled(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
reasoning_config={"enabled": False},
|
||||
)
|
||||
assert "reasoning" not in kw or kw.get("include") == []
|
||||
|
||||
def test_session_id_sets_cache_key(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
session_id="test-session-123",
|
||||
)
|
||||
assert kw.get("prompt_cache_key") == "test-session-123"
|
||||
|
||||
def test_github_responses_no_cache_key(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
session_id="test-session",
|
||||
is_github_responses=True,
|
||||
)
|
||||
assert "prompt_cache_key" not in kw
|
||||
|
||||
def test_max_tokens(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
max_tokens=4096,
|
||||
)
|
||||
assert kw.get("max_output_tokens") == 4096
|
||||
|
||||
def test_codex_backend_no_max_output_tokens(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
max_tokens=4096,
|
||||
is_codex_backend=True,
|
||||
)
|
||||
assert "max_output_tokens" not in kw
|
||||
|
||||
def test_xai_headers(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="grok-3", messages=messages, tools=[],
|
||||
session_id="conv-123",
|
||||
is_xai_responses=True,
|
||||
)
|
||||
assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123"
|
||||
|
||||
def test_minimal_effort_clamped(self, transport):
|
||||
messages = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.4", messages=messages, tools=[],
|
||||
reasoning_config={"effort": "minimal"},
|
||||
)
|
||||
# "minimal" should be clamped to "low"
|
||||
assert kw.get("reasoning", {}).get("effort") == "low"
|
||||
|
||||
|
||||
class TestCodexValidateResponse:
|
||||
|
||||
def test_none_response(self, transport):
|
||||
assert transport.validate_response(None) is False
|
||||
|
||||
def test_empty_output(self, transport):
|
||||
r = SimpleNamespace(output=[], output_text=None)
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
def test_valid_output(self, transport):
|
||||
r = SimpleNamespace(output=[{"type": "message", "content": []}])
|
||||
assert transport.validate_response(r) is True
|
||||
|
||||
def test_output_text_fallback_not_valid(self, transport):
|
||||
"""validate_response is strict — output_text doesn't make it valid.
|
||||
The caller handles output_text fallback with diagnostic logging."""
|
||||
r = SimpleNamespace(output=None, output_text="Some text")
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
|
||||
class TestCodexMapFinishReason:
|
||||
|
||||
def test_completed(self, transport):
|
||||
assert transport.map_finish_reason("completed") == "stop"
|
||||
|
||||
def test_incomplete(self, transport):
|
||||
assert transport.map_finish_reason("incomplete") == "length"
|
||||
|
||||
def test_failed(self, transport):
|
||||
assert transport.map_finish_reason("failed") == "stop"
|
||||
|
||||
def test_unknown(self, transport):
|
||||
assert transport.map_finish_reason("unknown_status") == "stop"
|
||||
|
||||
|
||||
class TestCodexNormalizeResponse:
|
||||
|
||||
def test_text_response(self, transport):
|
||||
"""Normalize a simple text Codex response."""
|
||||
r = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
content=[SimpleNamespace(type="output_text", text="Hello world")],
|
||||
status="completed",
|
||||
),
|
||||
],
|
||||
status="completed",
|
||||
incomplete_details=None,
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5,
|
||||
input_tokens_details=None, output_tokens_details=None),
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_tool_call_response(self, transport):
|
||||
"""Normalize a Codex response with tool calls."""
|
||||
r = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="function_call",
|
||||
call_id="call_abc123",
|
||||
name="terminal",
|
||||
arguments=json.dumps({"command": "ls"}),
|
||||
id="fc_abc123",
|
||||
status="completed",
|
||||
),
|
||||
],
|
||||
status="completed",
|
||||
incomplete_details=None,
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=20,
|
||||
input_tokens_details=None, output_tokens_details=None),
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.finish_reason == "tool_calls"
|
||||
assert len(nr.tool_calls) == 1
|
||||
tc = nr.tool_calls[0]
|
||||
assert tc.name == "terminal"
|
||||
assert '"command"' in tc.arguments
|
||||
Loading…
Add table
Add a link
Reference in a new issue