hermes-agent/tests/run_agent/test_run_agent_codex_responses.py
Siddharth Balyan f3006ebef9
refactor(tests): re-architect tests + fix CI failures (#5946)
* refactor: re-architect tests to mirror the codebase

* Update tests.yml

* fix: add missing tool_error imports after registry refactor

* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist

patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.

* fix(tests): fix update_check and telegram xdist failures

- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
  monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
  directly, it uses get_hermes_home() from hermes_constants.

- test_telegram_conflict/approval_buttons: provide real exception classes
  for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
  except clause in connect() doesn't fail with "catching classes that do
  not inherit from BaseException" when xdist pollutes sys.modules.

* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
2026-04-07 17:19:07 -07:00

1041 lines
36 KiB
Python

import sys
import types
from types import SimpleNamespace
import pytest
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
def _patch_agent_bootstrap(monkeypatch):
monkeypatch.setattr(
run_agent,
"get_tool_definitions",
lambda **kwargs: [
{
"type": "function",
"function": {
"name": "terminal",
"description": "Run shell commands.",
"parameters": {"type": "object", "properties": {}},
},
}
],
)
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
def _build_agent(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://chatgpt.com/backend-api/codex",
api_key="codex-token",
quiet_mode=True,
max_iterations=4,
skip_context_files=True,
skip_memory=True,
)
agent._cleanup_task_resources = lambda task_id: None
agent._persist_session = lambda messages, history=None: None
agent._save_trajectory = lambda messages, user_message, completed: None
agent._save_session_log = lambda messages: None
return agent
def _build_copilot_agent(monkeypatch, *, model="gpt-5.4"):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model=model,
provider="copilot",
api_mode="codex_responses",
base_url="https://api.githubcopilot.com",
api_key="gh-token",
quiet_mode=True,
max_iterations=4,
skip_context_files=True,
skip_memory=True,
)
agent._cleanup_task_resources = lambda task_id: None
agent._persist_session = lambda messages, history=None: None
agent._save_trajectory = lambda messages, user_message, completed: None
agent._save_session_log = lambda messages: None
return agent
def _codex_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
status="completed",
model="gpt-5-codex",
)
def _codex_tool_call_response():
return SimpleNamespace(
output=[
SimpleNamespace(
type="function_call",
id="fc_1",
call_id="call_1",
name="terminal",
arguments="{}",
)
],
usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
status="completed",
model="gpt-5-codex",
)
def _codex_incomplete_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
status="in_progress",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="in_progress",
model="gpt-5-codex",
)
def _codex_commentary_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
phase="commentary",
status="completed",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="completed",
model="gpt-5-codex",
)
def _codex_ack_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
status="completed",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="completed",
model="gpt-5-codex",
)
class _FakeResponsesStream:
def __init__(self, *, final_response=None, final_error=None):
self._final_response = final_response
self._final_error = final_error
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
return iter(())
def get_final_response(self):
if self._final_error is not None:
raise self._final_error
return self._final_response
class _FakeCreateStream:
def __init__(self, events):
self._events = list(events)
self.closed = False
def __iter__(self):
return iter(self._events)
def close(self):
self.closed = True
def _codex_request_kwargs():
return {
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [{"role": "user", "content": "Ping"}],
"tools": None,
"store": False,
}
def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://openrouter.ai/api/v1",
provider="openai-codex",
api_key="codex-token",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
assert agent.api_mode == "codex_responses"
assert agent.provider == "openai-codex"
def test_api_mode_normalizes_provider_case(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://openrouter.ai/api/v1",
provider="OpenAI-Codex",
api_key="codex-token",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
assert agent.provider == "openai-codex"
assert agent.api_mode == "codex_responses"
def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://chatgpt.com/backend-api/codex",
provider="openrouter",
api_key="test-token",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
assert agent.api_mode == "chat_completions"
assert agent.provider == "openrouter"
def test_build_api_kwargs_codex(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = agent._build_api_kwargs(
[
{"role": "system", "content": "You are Hermes."},
{"role": "user", "content": "Ping"},
]
)
assert kwargs["model"] == "gpt-5-codex"
assert kwargs["instructions"] == "You are Hermes."
assert kwargs["store"] is False
assert isinstance(kwargs["input"], list)
assert kwargs["input"][0]["role"] == "user"
assert kwargs["tools"][0]["type"] == "function"
assert kwargs["tools"][0]["name"] == "terminal"
assert kwargs["tools"][0]["strict"] is False
assert "function" not in kwargs["tools"][0]
assert kwargs["store"] is False
assert kwargs["tool_choice"] == "auto"
assert kwargs["parallel_tool_calls"] is True
assert isinstance(kwargs["prompt_cache_key"], str)
assert len(kwargs["prompt_cache_key"]) > 0
assert "timeout" not in kwargs
assert "max_tokens" not in kwargs
assert "extra_body" not in kwargs
def test_build_api_kwargs_copilot_responses_omits_openai_only_fields(monkeypatch):
agent = _build_copilot_agent(monkeypatch)
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
assert kwargs["model"] == "gpt-5.4"
assert kwargs["store"] is False
assert kwargs["tool_choice"] == "auto"
assert kwargs["parallel_tool_calls"] is True
assert kwargs["reasoning"] == {"effort": "medium"}
assert "prompt_cache_key" not in kwargs
assert "include" not in kwargs
def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_model(monkeypatch):
agent = _build_copilot_agent(monkeypatch, model="gpt-4.1")
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
assert "reasoning" not in kwargs
assert "include" not in kwargs
assert "prompt_cache_key" not in kwargs
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
if calls["stream"] == 1:
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=lambda **kwargs: _codex_message_response("fallback"),
)
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert response.output[0].content[0].text == "stream ok"
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0, "create": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
def _fake_create(**kwargs):
calls["create"] += 1
return _codex_message_response("create fallback ok")
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=_fake_create,
)
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert response.output[0].content[0].text == "create fallback ok"
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0, "create": 0}
create_stream = _FakeCreateStream(
[
SimpleNamespace(type="response.created"),
SimpleNamespace(type="response.in_progress"),
SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
]
)
def _fake_stream(**kwargs):
calls["stream"] += 1
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
def _fake_create(**kwargs):
calls["create"] += 1
assert kwargs.get("stream") is True
return create_stream
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=_fake_create,
)
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert create_stream.closed is True
assert response.output[0].content[0].text == "streamed create ok"
def test_run_conversation_codex_plain_text(monkeypatch):
agent = _build_agent(monkeypatch)
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
result = agent.run_conversation("Say OK")
assert result["completed"] is True
assert result["final_response"] == "OK"
assert result["messages"][-1]["role"] == "assistant"
assert result["messages"][-1]["content"] == "OK"
def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"api": 0, "refresh": 0}
class _UnauthorizedError(RuntimeError):
def __init__(self):
super().__init__("Error code: 401 - unauthorized")
self.status_code = 401
def _fake_api_call(api_kwargs):
calls["api"] += 1
if calls["api"] == 1:
raise _UnauthorizedError()
return _codex_message_response("Recovered after refresh")
def _fake_refresh(*, force=True):
calls["refresh"] += 1
assert force is True
return True
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
result = agent.run_conversation("Say OK")
assert calls["api"] == 2
assert calls["refresh"] == 1
assert result["completed"] is True
assert result["final_response"] == "Recovered after refresh"
def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
agent = _build_agent(monkeypatch)
closed = {"value": False}
rebuilt = {"kwargs": None}
class _ExistingClient:
def close(self):
closed["value"] = True
class _RebuiltClient:
pass
def _fake_openai(**kwargs):
rebuilt["kwargs"] = kwargs
return _RebuiltClient()
monkeypatch.setattr(
"hermes_cli.auth.resolve_codex_runtime_credentials",
lambda force_refresh=True: {
"api_key": "new-codex-token",
"base_url": "https://chatgpt.com/backend-api/codex",
},
)
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
agent.client = _ExistingClient()
ok = agent._try_refresh_codex_client_credentials(force=True)
assert ok is True
assert closed["value"] is True
assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
assert isinstance(agent.client, _RebuiltClient)
def test_run_conversation_codex_tool_round_trip(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("run a command")
assert result["completed"] is True
assert result["final_response"] == "done"
assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
agent = _build_agent(monkeypatch)
items = agent._chat_messages_to_responses_input(
[
{"role": "user", "content": "Run terminal"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_abc123",
"type": "function",
"function": {"name": "terminal", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
]
)
function_call = next(item for item in items if item.get("type") == "function_call")
function_output = next(item for item in items if item.get("type") == "function_call_output")
assert function_call["call_id"] == "call_abc123"
assert "id" not in function_call
assert function_output["call_id"] == "call_abc123"
def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
agent = _build_agent(monkeypatch)
items = agent._chat_messages_to_responses_input(
[
{"role": "user", "content": "Run terminal"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_pair123|fc_pair123",
"type": "function",
"function": {"name": "terminal", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
]
)
function_call = next(item for item in items if item.get("type") == "function_call")
function_output = next(item for item in items if item.get("type") == "function_call_output")
assert function_call["call_id"] == "call_pair123"
assert "id" not in function_call
assert function_output["call_id"] == "call_pair123"
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
preflight = agent._preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [
{"role": "user", "content": "hi"},
{
"type": "function_call",
"id": "call_bad",
"call_id": "call_good",
"name": "terminal",
"arguments": "{}",
},
],
"tools": [],
"store": False,
}
)
fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
assert fn_call["call_id"] == "call_good"
assert "id" not in fn_call
def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
agent._preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [{"type": "function_call_output", "output": "{}"}],
"tools": [],
"store": False,
}
)
def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = _codex_request_kwargs()
kwargs["some_unknown_field"] = "value"
with pytest.raises(ValueError, match="unsupported field"):
agent._preflight_codex_api_kwargs(kwargs)
def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = _codex_request_kwargs()
kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"]
kwargs["temperature"] = 0.7
kwargs["max_output_tokens"] = 4096
result = agent._preflight_codex_api_kwargs(kwargs)
assert result["reasoning"] == {"effort": "high", "summary": "auto"}
assert result["include"] == ["reasoning.encrypted_content"]
assert result["temperature"] == 0.7
assert result["max_output_tokens"] == 4096
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]
requests = []
def _fake_api_call(api_kwargs):
requests.append(api_kwargs)
return responses.pop(0)
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("run a command")
assert result["completed"] is True
assert result["final_response"] == "done"
assert len(requests) >= 2
replay_input = requests[1]["input"]
function_call = next(item for item in replay_input if item.get("type") == "function_call")
function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
assert function_call["call_id"] == "call_1"
assert "id" not in function_call
assert function_output["call_id"] == "call_1"
def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_incomplete_message_response("I'll inspect the repo structure first."),
_codex_tool_call_response(),
_codex_message_response("Architecture summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("analyze repo")
assert result["completed"] is True
assert result["final_response"] == "Architecture summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "inspect the repo structure" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
agent = _build_agent(monkeypatch)
assistant_message, finish_reason = agent._normalize_codex_response(
_codex_commentary_message_response("I'll inspect the repository first.")
)
assert finish_reason == "incomplete"
assert "inspect the repository" in (assistant_message.content or "")
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_commentary_message_response("I'll inspect the repo structure first."),
_codex_tool_call_response(),
_codex_message_response("Architecture summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("analyze repo")
assert result["completed"] is True
assert result["final_response"] == "Architecture summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "inspect the repo structure" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_ack_message_response(
"Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
),
_codex_tool_call_response(),
_codex_message_response("Architecture summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
assert result["completed"] is True
assert result["final_response"] == "Architecture summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "inspect ~/openclaw-studio" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(
msg.get("role") == "user"
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_ack_message_response(
"I'll check what's in the current directory and call out 3 notable items."
),
_codex_tool_call_response(),
_codex_message_response("Directory summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("look at current directory and list 3 notable things")
assert result["completed"] is True
assert result["final_response"] == "Directory summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "current directory" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(
msg.get("role") == "user"
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_dump_api_request_debug_uses_responses_url(monkeypatch, tmp_path):
"""Debug dumps should show /responses URL when in codex_responses mode."""
import json
agent = _build_agent(monkeypatch)
agent.base_url = "http://127.0.0.1:9208/v1"
agent.logs_dir = tmp_path
dump_file = agent._dump_api_request_debug(_codex_request_kwargs(), reason="preflight")
payload = json.loads(dump_file.read_text())
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/responses"
def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path):
"""Debug dumps should show /chat/completions URL for chat_completions mode."""
import json
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-4o",
base_url="http://127.0.0.1:9208/v1",
api_key="test-key",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
agent.logs_dir = tmp_path
dump_file = agent._dump_api_request_debug(
{"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]},
reason="preflight",
)
payload = json.loads(dump_file.read_text())
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions"
# --- Reasoning-only response tests (fix for empty content retry loop) ---
def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."):
"""Codex response containing only reasoning items — no message text, no tool calls."""
return SimpleNamespace(
output=[
SimpleNamespace(
type="reasoning",
id="rs_001",
encrypted_content=encrypted_content,
summary=[SimpleNamespace(type="summary_text", text=summary_text)],
status="completed",
)
],
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
status="completed",
model="gpt-5-codex",
)
def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch):
"""A response with only reasoning items and no content should be 'incomplete', not 'stop'.
Without this fix, reasoning-only responses get finish_reason='stop' which
sends them into the empty-content retry loop (3 retries then failure).
"""
agent = _build_agent(monkeypatch)
assistant_message, finish_reason = agent._normalize_codex_response(
_codex_reasoning_only_response()
)
assert finish_reason == "incomplete"
assert assistant_message.content == ""
assert assistant_message.codex_reasoning_items is not None
assert len(assistant_message.codex_reasoning_items) == 1
assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123"
def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
"""If a response has both reasoning and message content, it should still be 'stop'."""
agent = _build_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(
type="reasoning",
id="rs_001",
encrypted_content="enc_xyz",
summary=[SimpleNamespace(type="summary_text", text="Thinking...")],
status="completed",
),
SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="Here is the answer.")],
status="completed",
),
],
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
status="completed",
model="gpt-5-codex",
)
assistant_message, finish_reason = agent._normalize_codex_response(response)
assert finish_reason == "stop"
assert "Here is the answer" in assistant_message.content
def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch):
"""End-to-end: reasoning-only → final message should succeed, not hit retry loop."""
agent = _build_agent(monkeypatch)
responses = [
_codex_reasoning_only_response(),
_codex_message_response("The final answer is 42."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
result = agent.run_conversation("what is the answer?")
assert result["completed"] is True
assert result["final_response"] == "The final answer is 42."
# The reasoning-only turn should be in messages as an incomplete interim
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and msg.get("codex_reasoning_items") is not None
for msg in result["messages"]
)
def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch):
"""Encrypted codex_reasoning_items must be preserved in interim messages
even when there is no visible reasoning text or content."""
agent = _build_agent(monkeypatch)
# Response with encrypted reasoning but no human-readable summary
reasoning_response = SimpleNamespace(
output=[
SimpleNamespace(
type="reasoning",
id="rs_002",
encrypted_content="enc_opaque_blob",
summary=[],
status="completed",
)
],
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
status="completed",
model="gpt-5-codex",
)
responses = [
reasoning_response,
_codex_message_response("Done thinking."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
result = agent.run_conversation("think hard")
assert result["completed"] is True
assert result["final_response"] == "Done thinking."
# The interim message must have codex_reasoning_items preserved
interim_msgs = [
msg for msg in result["messages"]
if msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
]
assert len(interim_msgs) >= 1
assert interim_msgs[0].get("codex_reasoning_items") is not None
assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob"
def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch):
"""When converting a reasoning-only interim message to Responses API input,
the reasoning items must be followed by an assistant message (even if empty)
to satisfy the API's 'required following item' constraint."""
agent = _build_agent(monkeypatch)
messages = [
{"role": "user", "content": "think hard"},
{
"role": "assistant",
"content": "",
"reasoning": None,
"finish_reason": "incomplete",
"codex_reasoning_items": [
{"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []},
],
},
]
items = agent._chat_messages_to_responses_input(messages)
# Find the reasoning item
reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
assert len(reasoning_indices) == 1
ri_idx = reasoning_indices[0]
# There must be a following item after the reasoning
assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)"
following = items[ri_idx + 1]
assert following.get("role") == "assistant"
def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch):
"""Two consecutive reasoning-only responses with different encrypted content
must NOT be treated as duplicates."""
agent = _build_agent(monkeypatch)
responses = [
# First reasoning-only response
SimpleNamespace(
output=[
SimpleNamespace(
type="reasoning", id="rs_001",
encrypted_content="enc_first", summary=[], status="completed",
)
],
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
status="completed", model="gpt-5-codex",
),
# Second reasoning-only response (different encrypted content)
SimpleNamespace(
output=[
SimpleNamespace(
type="reasoning", id="rs_002",
encrypted_content="enc_second", summary=[], status="completed",
)
],
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
status="completed", model="gpt-5-codex",
),
_codex_message_response("Final answer after thinking."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
result = agent.run_conversation("think very hard")
assert result["completed"] is True
assert result["final_response"] == "Final answer after thinking."
# Both reasoning-only interim messages should be in history (not collapsed)
interim_msgs = [
msg for msg in result["messages"]
if msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
]
assert len(interim_msgs) == 2
encrypted_contents = [
msg["codex_reasoning_items"][0]["encrypted_content"]
for msg in interim_msgs
]
assert "enc_first" in encrypted_contents
assert "enc_second" in encrypted_contents