hermes-agent/tests/test_mini_swe_runner.py
Teknium 3cba81ebed
fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157)
Kimi's gateway selects the correct temperature server-side based on the
active mode (thinking -> 1.0, non-thinking -> 0.6).  Sending any
temperature value — even the previously "correct" one — conflicts with
gateway-managed defaults.

Replaces the old approach of forcing specific temperature values (0.6
for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel
that tells all call sites to strip the temperature key from API kwargs
entirely.

Changes:
- agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model()
  prefix check (covers all kimi-* models), _fixed_temperature_for_model()
  returns sentinel for kimi models.  _build_call_kwargs() strips temp.
- run_agent.py: _build_api_kwargs, flush_memories, and summary generation
  paths all handle the sentinel by popping/omitting temperature.
- trajectory_compressor.py: _effective_temperature_for_model returns None
  for kimi (sentinel mapped), direct client calls use kwargs dict to
  conditionally include temperature.
- mini_swe_runner.py: same sentinel handling via wrapper function.
- 6 test files updated: all 'forces temperature X' assertions replaced
  with 'temperature not in kwargs' assertions.

Net: -76 lines (171 added, 247 removed).
Inspired by PR #13137 (@kshitijk4poor).
2026-04-20 12:23:05 -07:00

60 lines
2 KiB
Python

from types import SimpleNamespace
from unittest.mock import MagicMock, patch
def test_run_task_kimi_omits_temperature():
"""Kimi models should NOT have client-side temperature overrides.
The Kimi gateway selects the correct temperature server-side.
"""
with patch("openai.OpenAI") as mock_openai:
client = MagicMock()
client.chat.completions.create.return_value = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]
)
mock_openai.return_value = client
from mini_swe_runner import MiniSWERunner
runner = MiniSWERunner(
model="kimi-for-coding",
base_url="https://api.kimi.com/coding/v1",
api_key="test-key",
env_type="local",
max_iterations=1,
)
runner._create_env = MagicMock()
runner._cleanup_env = MagicMock()
result = runner.run_task("2+2")
assert result["completed"] is True
assert "temperature" not in client.chat.completions.create.call_args.kwargs
def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():
"""kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
with patch("openai.OpenAI") as mock_openai:
client = MagicMock()
client.base_url = "https://api.moonshot.ai/v1"
client.chat.completions.create.return_value = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]
)
mock_openai.return_value = client
from mini_swe_runner import MiniSWERunner
runner = MiniSWERunner(
model="kimi-k2.5",
base_url="https://api.moonshot.ai/v1",
api_key="test-key",
env_type="local",
max_iterations=1,
)
runner._create_env = MagicMock()
runner._cleanup_env = MagicMock()
result = runner.run_task("2+2")
assert result["completed"] is True
assert "temperature" not in client.chat.completions.create.call_args.kwargs