hermes-agent/tests/agent/transports/test_chat_completions.py
kshitijk4poor 83d86ce344 feat: add ChatCompletionsTransport + wire all default paths
Third concrete transport — handles the default 'chat_completions' api_mode used
by ~16 OpenAI-compatible providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama,
DeepSeek, xAI, Kimi, custom, etc.). Wires build_kwargs + validate_response to
production paths.

Based on PR #13447 by @kshitijk4poor, with fixes:
- Preserve tool_call.extra_content (Gemini thought_signature) via
  ToolCall.provider_data — the original shim stripped it, causing 400 errors
  on multi-turn Gemini 3 thinking requests.
- Preserve reasoning_content distinctly from reasoning (DeepSeek/Moonshot) so
  the thinking-prefill retry check (_has_structured) still triggers.
- Port Kimi/Moonshot quirks (32000 max_tokens, top-level reasoning_effort,
  extra_body.thinking) that landed on main after the original PR was opened.
- Keep _qwen_prepare_chat_messages_inplace alive and call it through the
  transport when sanitization already deepcopied (avoids a second deepcopy).
- Skip the back-compat SimpleNamespace shim in the main normalize loop — for
  chat_completions, response.choices[0].message is already the right shape
  with .content/.tool_calls/.reasoning/.reasoning_content/.reasoning_details
  and per-tool-call .extra_content from the OpenAI SDK.

run_agent.py: -239 lines in _build_api_kwargs default branch extracted to the
transport. build_kwargs now owns: codex-field sanitization, Qwen portal prep,
developer role swap, provider preferences, max_tokens resolution (ephemeral >
user > NVIDIA 16384 > Qwen 65536 > Kimi 32000 > anthropic_max_output), Kimi
reasoning_effort + extra_body.thinking, OpenRouter/Nous/GitHub reasoning,
Nous product attribution tags, Ollama num_ctx, custom-provider think=false,
Qwen vl_high_resolution_images, request_overrides.

39 new transport tests (8 build_kwargs, 5 Kimi, 4 validate, 4 normalize
including extra_content regression, 3 cache stats, 3 basic). Tests/run_agent/
targeted suite passes (885/885 + 15 skipped; the 1 remaining failure is the
test_concurrent_interrupt flake present on origin/main).
2026-04-21 20:50:02 -07:00

349 lines
14 KiB
Python

"""Tests for the ChatCompletionsTransport."""
import pytest
from types import SimpleNamespace
from agent.transports import get_transport
from agent.transports.types import NormalizedResponse, ToolCall
@pytest.fixture
def transport():
import agent.transports.chat_completions # noqa: F401
return get_transport("chat_completions")
class TestChatCompletionsBasic:
def test_api_mode(self, transport):
assert transport.api_mode == "chat_completions"
def test_registered(self, transport):
assert transport is not None
def test_convert_tools_identity(self, transport):
tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
assert transport.convert_tools(tools) is tools
def test_convert_messages_no_codex_leaks(self, transport):
msgs = [{"role": "user", "content": "hi"}]
result = transport.convert_messages(msgs)
assert result is msgs # no copy needed
def test_convert_messages_strips_codex_fields(self, transport):
msgs = [
{"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
"tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
"type": "function", "function": {"name": "t", "arguments": "{}"}}]},
]
result = transport.convert_messages(msgs)
assert "codex_reasoning_items" not in result[0]
assert "call_id" not in result[0]["tool_calls"][0]
assert "response_item_id" not in result[0]["tool_calls"][0]
# Original list untouched (deepcopy-on-demand)
assert "codex_reasoning_items" in msgs[0]
class TestChatCompletionsBuildKwargs:
def test_basic_kwargs(self, transport):
msgs = [{"role": "user", "content": "Hello"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, timeout=30.0)
assert kw["model"] == "gpt-4o"
assert kw["messages"][0]["content"] == "Hello"
assert kw["timeout"] == 30.0
def test_developer_role_swap(self, transport):
msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-5.4", messages=msgs, model_lower="gpt-5.4")
assert kw["messages"][0]["role"] == "developer"
def test_no_developer_swap_for_non_gpt5(self, transport):
msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="claude-sonnet-4", messages=msgs, model_lower="claude-sonnet-4")
assert kw["messages"][0]["role"] == "system"
def test_tools_included(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, tools=tools)
assert kw["tools"] == tools
def test_openrouter_provider_prefs(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
is_openrouter=True,
provider_preferences={"only": ["openai"]},
)
assert kw["extra_body"]["provider"] == {"only": ["openai"]}
def test_nous_tags(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
def test_reasoning_default(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
supports_reasoning=True,
)
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
def test_nous_omits_disabled_reasoning(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
supports_reasoning=True,
is_nous=True,
reasoning_config={"enabled": False},
)
# Nous rejects enabled=false; reasoning omitted entirely
assert "reasoning" not in kw.get("extra_body", {})
def test_ollama_num_ctx(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="llama3", messages=msgs,
ollama_num_ctx=32768,
)
assert kw["extra_body"]["options"]["num_ctx"] == 32768
def test_custom_think_false(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="qwen3", messages=msgs,
is_custom_provider=True,
reasoning_config={"effort": "none"},
)
assert kw["extra_body"]["think"] is False
def test_max_tokens_with_fn(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
max_tokens=4096,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["max_tokens"] == 4096
def test_ephemeral_overrides_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
max_tokens=4096,
ephemeral_max_output_tokens=2048,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["max_tokens"] == 2048
def test_nvidia_default_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="glm-4.7", messages=msgs,
is_nvidia_nim=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# NVIDIA default: 16384
assert kw["max_tokens"] == 16384
def test_qwen_default_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="qwen3-coder-plus", messages=msgs,
is_qwen_portal=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Qwen default: 65536
assert kw["max_tokens"] == 65536
def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="anthropic/claude-sonnet-4.6", messages=msgs,
is_openrouter=True,
anthropic_max_output=64000,
)
# Set as plain max_tokens (not via fn) because the aggregator proxies to
# Anthropic Messages API which requires the field.
assert kw["max_tokens"] == 64000
def test_request_overrides_last(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
request_overrides={"service_tier": "priority"},
)
assert kw["service_tier"] == "priority"
def test_fixed_temperature(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
assert kw["temperature"] == 0.6
def test_omit_temperature(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
# omit wins
assert "temperature" not in kw
class TestChatCompletionsKimi:
"""Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
def test_kimi_max_tokens_default(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Kimi CLI default: 32000
assert kw["max_tokens"] == 32000
def test_kimi_reasoning_effort_top_level(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
reasoning_config={"effort": "high"},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Kimi requires reasoning_effort as a top-level parameter
assert kw["reasoning_effort"] == "high"
def test_kimi_reasoning_effort_omitted_when_thinking_disabled(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
reasoning_config={"enabled": False},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Mirror Kimi CLI: omit reasoning_effort entirely when thinking off
assert "reasoning_effort" not in kw
def test_kimi_thinking_enabled_extra_body(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
def test_kimi_thinking_disabled_extra_body(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
reasoning_config={"enabled": False},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
class TestChatCompletionsValidate:
def test_none(self, transport):
assert transport.validate_response(None) is False
def test_no_choices(self, transport):
r = SimpleNamespace(choices=None)
assert transport.validate_response(r) is False
def test_empty_choices(self, transport):
r = SimpleNamespace(choices=[])
assert transport.validate_response(r) is False
def test_valid(self, transport):
r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
assert transport.validate_response(r) is True
class TestChatCompletionsNormalize:
def test_text_response(self, transport):
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
finish_reason="stop",
)],
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
)
nr = transport.normalize_response(r)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello"
assert nr.finish_reason == "stop"
assert nr.tool_calls is None
def test_tool_call_response(self, transport):
tc = SimpleNamespace(
id="call_123",
function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
)
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
finish_reason="tool_calls",
)],
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30),
)
nr = transport.normalize_response(r)
assert len(nr.tool_calls) == 1
assert nr.tool_calls[0].name == "terminal"
assert nr.tool_calls[0].id == "call_123"
def test_tool_call_extra_content_preserved(self, transport):
"""Gemini 3 thinking models attach extra_content with thought_signature
on tool_calls. Without this replay on the next turn, the API rejects
the request with 400. The transport MUST surface extra_content so the
agent loop can write it back into the assistant message."""
tc = SimpleNamespace(
id="call_gem",
function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
extra_content={"google": {"thought_signature": "SIG_ABC123"}},
)
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
finish_reason="tool_calls",
)],
usage=None,
)
nr = transport.normalize_response(r)
assert nr.tool_calls[0].provider_data == {
"extra_content": {"google": {"thought_signature": "SIG_ABC123"}}
}
def test_reasoning_content_preserved_separately(self, transport):
"""DeepSeek/Moonshot use reasoning_content distinct from reasoning.
Don't merge them — the thinking-prefill retry check reads each field
separately."""
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(
content=None, tool_calls=None,
reasoning="summary text",
reasoning_content="detailed scratchpad",
),
finish_reason="stop",
)],
usage=None,
)
nr = transport.normalize_response(r)
assert nr.reasoning == "summary text"
assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
class TestChatCompletionsCacheStats:
def test_no_usage(self, transport):
r = SimpleNamespace(usage=None)
assert transport.extract_cache_stats(r) is None
def test_no_details(self, transport):
r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=None))
assert transport.extract_cache_stats(r) is None
def test_with_cache(self, transport):
details = SimpleNamespace(cached_tokens=500, cache_write_tokens=100)
r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
result = transport.extract_cache_stats(r)
assert result == {"cached_tokens": 500, "creation_tokens": 100}