mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
* refactor: re-architect tests to mirror the codebase
* Update tests.yml
* fix: add missing tool_error imports after registry refactor
* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist
patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.
* fix(tests): fix update_check and telegram xdist failures
- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
directly, it uses get_hermes_home() from hermes_constants.
- test_telegram_conflict/approval_buttons: provide real exception classes
for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
except clause in connect() doesn't fail with "catching classes that do
not inherit from BaseException" when xdist pollutes sys.modules.
* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
1041 lines
36 KiB
Python
1041 lines
36 KiB
Python
import sys
|
|
import types
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
|
|
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
|
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
|
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
|
|
|
import run_agent
|
|
|
|
|
|
def _patch_agent_bootstrap(monkeypatch):
|
|
monkeypatch.setattr(
|
|
run_agent,
|
|
"get_tool_definitions",
|
|
lambda **kwargs: [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "terminal",
|
|
"description": "Run shell commands.",
|
|
"parameters": {"type": "object", "properties": {}},
|
|
},
|
|
}
|
|
],
|
|
)
|
|
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
|
|
|
|
|
def _build_agent(monkeypatch):
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
agent = run_agent.AIAgent(
|
|
model="gpt-5-codex",
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
api_key="codex-token",
|
|
quiet_mode=True,
|
|
max_iterations=4,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
agent._cleanup_task_resources = lambda task_id: None
|
|
agent._persist_session = lambda messages, history=None: None
|
|
agent._save_trajectory = lambda messages, user_message, completed: None
|
|
agent._save_session_log = lambda messages: None
|
|
return agent
|
|
|
|
|
|
def _build_copilot_agent(monkeypatch, *, model="gpt-5.4"):
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
|
|
agent = run_agent.AIAgent(
|
|
model=model,
|
|
provider="copilot",
|
|
api_mode="codex_responses",
|
|
base_url="https://api.githubcopilot.com",
|
|
api_key="gh-token",
|
|
quiet_mode=True,
|
|
max_iterations=4,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
agent._cleanup_task_resources = lambda task_id: None
|
|
agent._persist_session = lambda messages, history=None: None
|
|
agent._save_trajectory = lambda messages, user_message, completed: None
|
|
agent._save_session_log = lambda messages: None
|
|
return agent
|
|
|
|
|
|
def _codex_message_response(text: str):
|
|
return SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="message",
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
|
|
def _codex_tool_call_response():
|
|
return SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="function_call",
|
|
id="fc_1",
|
|
call_id="call_1",
|
|
name="terminal",
|
|
arguments="{}",
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
|
|
def _codex_incomplete_message_response(text: str):
|
|
return SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="message",
|
|
status="in_progress",
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
|
status="in_progress",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
|
|
def _codex_commentary_message_response(text: str):
|
|
return SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="message",
|
|
phase="commentary",
|
|
status="completed",
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
|
|
def _codex_ack_message_response(text: str):
|
|
return SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="message",
|
|
status="completed",
|
|
content=[SimpleNamespace(type="output_text", text=text)],
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
|
|
class _FakeResponsesStream:
|
|
def __init__(self, *, final_response=None, final_error=None):
|
|
self._final_response = final_response
|
|
self._final_error = final_error
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
def __iter__(self):
|
|
return iter(())
|
|
|
|
def get_final_response(self):
|
|
if self._final_error is not None:
|
|
raise self._final_error
|
|
return self._final_response
|
|
|
|
|
|
class _FakeCreateStream:
|
|
def __init__(self, events):
|
|
self._events = list(events)
|
|
self.closed = False
|
|
|
|
def __iter__(self):
|
|
return iter(self._events)
|
|
|
|
def close(self):
|
|
self.closed = True
|
|
|
|
|
|
def _codex_request_kwargs():
|
|
return {
|
|
"model": "gpt-5-codex",
|
|
"instructions": "You are Hermes.",
|
|
"input": [{"role": "user", "content": "Ping"}],
|
|
"tools": None,
|
|
"store": False,
|
|
}
|
|
|
|
|
|
def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
agent = run_agent.AIAgent(
|
|
model="gpt-5-codex",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
provider="openai-codex",
|
|
api_key="codex-token",
|
|
quiet_mode=True,
|
|
max_iterations=1,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
assert agent.api_mode == "codex_responses"
|
|
assert agent.provider == "openai-codex"
|
|
|
|
|
|
def test_api_mode_normalizes_provider_case(monkeypatch):
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
agent = run_agent.AIAgent(
|
|
model="gpt-5-codex",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
provider="OpenAI-Codex",
|
|
api_key="codex-token",
|
|
quiet_mode=True,
|
|
max_iterations=1,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
assert agent.provider == "openai-codex"
|
|
assert agent.api_mode == "codex_responses"
|
|
|
|
|
|
def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
agent = run_agent.AIAgent(
|
|
model="gpt-5-codex",
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
provider="openrouter",
|
|
api_key="test-token",
|
|
quiet_mode=True,
|
|
max_iterations=1,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
assert agent.api_mode == "chat_completions"
|
|
assert agent.provider == "openrouter"
|
|
|
|
|
|
def test_build_api_kwargs_codex(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
kwargs = agent._build_api_kwargs(
|
|
[
|
|
{"role": "system", "content": "You are Hermes."},
|
|
{"role": "user", "content": "Ping"},
|
|
]
|
|
)
|
|
|
|
assert kwargs["model"] == "gpt-5-codex"
|
|
assert kwargs["instructions"] == "You are Hermes."
|
|
assert kwargs["store"] is False
|
|
assert isinstance(kwargs["input"], list)
|
|
assert kwargs["input"][0]["role"] == "user"
|
|
assert kwargs["tools"][0]["type"] == "function"
|
|
assert kwargs["tools"][0]["name"] == "terminal"
|
|
assert kwargs["tools"][0]["strict"] is False
|
|
assert "function" not in kwargs["tools"][0]
|
|
assert kwargs["store"] is False
|
|
assert kwargs["tool_choice"] == "auto"
|
|
assert kwargs["parallel_tool_calls"] is True
|
|
assert isinstance(kwargs["prompt_cache_key"], str)
|
|
assert len(kwargs["prompt_cache_key"]) > 0
|
|
assert "timeout" not in kwargs
|
|
assert "max_tokens" not in kwargs
|
|
assert "extra_body" not in kwargs
|
|
|
|
|
|
def test_build_api_kwargs_copilot_responses_omits_openai_only_fields(monkeypatch):
|
|
agent = _build_copilot_agent(monkeypatch)
|
|
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
|
|
|
assert kwargs["model"] == "gpt-5.4"
|
|
assert kwargs["store"] is False
|
|
assert kwargs["tool_choice"] == "auto"
|
|
assert kwargs["parallel_tool_calls"] is True
|
|
assert kwargs["reasoning"] == {"effort": "medium"}
|
|
assert "prompt_cache_key" not in kwargs
|
|
assert "include" not in kwargs
|
|
|
|
|
|
def test_build_api_kwargs_copilot_responses_omits_reasoning_for_non_reasoning_model(monkeypatch):
|
|
agent = _build_copilot_agent(monkeypatch, model="gpt-4.1")
|
|
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
|
|
|
assert "reasoning" not in kwargs
|
|
assert "include" not in kwargs
|
|
assert "prompt_cache_key" not in kwargs
|
|
|
|
|
|
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
calls = {"stream": 0}
|
|
|
|
def _fake_stream(**kwargs):
|
|
calls["stream"] += 1
|
|
if calls["stream"] == 1:
|
|
return _FakeResponsesStream(
|
|
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
|
)
|
|
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
|
|
|
|
agent.client = SimpleNamespace(
|
|
responses=SimpleNamespace(
|
|
stream=_fake_stream,
|
|
create=lambda **kwargs: _codex_message_response("fallback"),
|
|
)
|
|
)
|
|
|
|
response = agent._run_codex_stream(_codex_request_kwargs())
|
|
assert calls["stream"] == 2
|
|
assert response.output[0].content[0].text == "stream ok"
|
|
|
|
|
|
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
calls = {"stream": 0, "create": 0}
|
|
|
|
def _fake_stream(**kwargs):
|
|
calls["stream"] += 1
|
|
return _FakeResponsesStream(
|
|
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
|
)
|
|
|
|
def _fake_create(**kwargs):
|
|
calls["create"] += 1
|
|
return _codex_message_response("create fallback ok")
|
|
|
|
agent.client = SimpleNamespace(
|
|
responses=SimpleNamespace(
|
|
stream=_fake_stream,
|
|
create=_fake_create,
|
|
)
|
|
)
|
|
|
|
response = agent._run_codex_stream(_codex_request_kwargs())
|
|
assert calls["stream"] == 2
|
|
assert calls["create"] == 1
|
|
assert response.output[0].content[0].text == "create fallback ok"
|
|
|
|
|
|
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
calls = {"stream": 0, "create": 0}
|
|
create_stream = _FakeCreateStream(
|
|
[
|
|
SimpleNamespace(type="response.created"),
|
|
SimpleNamespace(type="response.in_progress"),
|
|
SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
|
|
]
|
|
)
|
|
|
|
def _fake_stream(**kwargs):
|
|
calls["stream"] += 1
|
|
return _FakeResponsesStream(
|
|
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
|
)
|
|
|
|
def _fake_create(**kwargs):
|
|
calls["create"] += 1
|
|
assert kwargs.get("stream") is True
|
|
return create_stream
|
|
|
|
agent.client = SimpleNamespace(
|
|
responses=SimpleNamespace(
|
|
stream=_fake_stream,
|
|
create=_fake_create,
|
|
)
|
|
)
|
|
|
|
response = agent._run_codex_stream(_codex_request_kwargs())
|
|
assert calls["stream"] == 2
|
|
assert calls["create"] == 1
|
|
assert create_stream.closed is True
|
|
assert response.output[0].content[0].text == "streamed create ok"
|
|
|
|
|
|
def test_run_conversation_codex_plain_text(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
|
|
|
|
result = agent.run_conversation("Say OK")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "OK"
|
|
assert result["messages"][-1]["role"] == "assistant"
|
|
assert result["messages"][-1]["content"] == "OK"
|
|
|
|
|
|
def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
calls = {"api": 0, "refresh": 0}
|
|
|
|
class _UnauthorizedError(RuntimeError):
|
|
def __init__(self):
|
|
super().__init__("Error code: 401 - unauthorized")
|
|
self.status_code = 401
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
calls["api"] += 1
|
|
if calls["api"] == 1:
|
|
raise _UnauthorizedError()
|
|
return _codex_message_response("Recovered after refresh")
|
|
|
|
def _fake_refresh(*, force=True):
|
|
calls["refresh"] += 1
|
|
assert force is True
|
|
return True
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
|
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
|
|
|
|
result = agent.run_conversation("Say OK")
|
|
|
|
assert calls["api"] == 2
|
|
assert calls["refresh"] == 1
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Recovered after refresh"
|
|
|
|
|
|
def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
closed = {"value": False}
|
|
rebuilt = {"kwargs": None}
|
|
|
|
class _ExistingClient:
|
|
def close(self):
|
|
closed["value"] = True
|
|
|
|
class _RebuiltClient:
|
|
pass
|
|
|
|
def _fake_openai(**kwargs):
|
|
rebuilt["kwargs"] = kwargs
|
|
return _RebuiltClient()
|
|
|
|
monkeypatch.setattr(
|
|
"hermes_cli.auth.resolve_codex_runtime_credentials",
|
|
lambda force_refresh=True: {
|
|
"api_key": "new-codex-token",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
},
|
|
)
|
|
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
|
|
|
|
agent.client = _ExistingClient()
|
|
ok = agent._try_refresh_codex_client_credentials(force=True)
|
|
|
|
assert ok is True
|
|
assert closed["value"] is True
|
|
assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
|
|
assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|
|
assert isinstance(agent.client, _RebuiltClient)
|
|
|
|
|
|
def test_run_conversation_codex_tool_round_trip(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
for call in assistant_message.tool_calls:
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": call.id,
|
|
"content": '{"ok":true}',
|
|
}
|
|
)
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
result = agent.run_conversation("run a command")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "done"
|
|
assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
items = agent._chat_messages_to_responses_input(
|
|
[
|
|
{"role": "user", "content": "Run terminal"},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"tool_calls": [
|
|
{
|
|
"id": "call_abc123",
|
|
"type": "function",
|
|
"function": {"name": "terminal", "arguments": "{}"},
|
|
}
|
|
],
|
|
},
|
|
{"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
|
|
]
|
|
)
|
|
|
|
function_call = next(item for item in items if item.get("type") == "function_call")
|
|
function_output = next(item for item in items if item.get("type") == "function_call_output")
|
|
|
|
assert function_call["call_id"] == "call_abc123"
|
|
assert "id" not in function_call
|
|
assert function_output["call_id"] == "call_abc123"
|
|
|
|
|
|
def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
items = agent._chat_messages_to_responses_input(
|
|
[
|
|
{"role": "user", "content": "Run terminal"},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"tool_calls": [
|
|
{
|
|
"id": "call_pair123|fc_pair123",
|
|
"type": "function",
|
|
"function": {"name": "terminal", "arguments": "{}"},
|
|
}
|
|
],
|
|
},
|
|
{"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
|
|
]
|
|
)
|
|
|
|
function_call = next(item for item in items if item.get("type") == "function_call")
|
|
function_output = next(item for item in items if item.get("type") == "function_call_output")
|
|
|
|
assert function_call["call_id"] == "call_pair123"
|
|
assert "id" not in function_call
|
|
assert function_output["call_id"] == "call_pair123"
|
|
|
|
|
|
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
preflight = agent._preflight_codex_api_kwargs(
|
|
{
|
|
"model": "gpt-5-codex",
|
|
"instructions": "You are Hermes.",
|
|
"input": [
|
|
{"role": "user", "content": "hi"},
|
|
{
|
|
"type": "function_call",
|
|
"id": "call_bad",
|
|
"call_id": "call_good",
|
|
"name": "terminal",
|
|
"arguments": "{}",
|
|
},
|
|
],
|
|
"tools": [],
|
|
"store": False,
|
|
}
|
|
)
|
|
|
|
fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
|
|
assert fn_call["call_id"] == "call_good"
|
|
assert "id" not in fn_call
|
|
|
|
|
|
def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
|
|
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
|
|
agent._preflight_codex_api_kwargs(
|
|
{
|
|
"model": "gpt-5-codex",
|
|
"instructions": "You are Hermes.",
|
|
"input": [{"type": "function_call_output", "output": "{}"}],
|
|
"tools": [],
|
|
"store": False,
|
|
}
|
|
)
|
|
|
|
|
|
def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
kwargs = _codex_request_kwargs()
|
|
kwargs["some_unknown_field"] = "value"
|
|
|
|
with pytest.raises(ValueError, match="unsupported field"):
|
|
agent._preflight_codex_api_kwargs(kwargs)
|
|
|
|
|
|
def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
kwargs = _codex_request_kwargs()
|
|
kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
|
|
kwargs["include"] = ["reasoning.encrypted_content"]
|
|
kwargs["temperature"] = 0.7
|
|
kwargs["max_output_tokens"] = 4096
|
|
|
|
result = agent._preflight_codex_api_kwargs(kwargs)
|
|
assert result["reasoning"] == {"effort": "high", "summary": "auto"}
|
|
assert result["include"] == ["reasoning.encrypted_content"]
|
|
assert result["temperature"] == 0.7
|
|
assert result["max_output_tokens"] == 4096
|
|
|
|
|
|
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
|
requests = []
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
requests.append(api_kwargs)
|
|
return responses.pop(0)
|
|
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
for call in assistant_message.tool_calls:
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": call.id,
|
|
"content": '{"ok":true}',
|
|
}
|
|
)
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
result = agent.run_conversation("run a command")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "done"
|
|
assert len(requests) >= 2
|
|
|
|
replay_input = requests[1]["input"]
|
|
function_call = next(item for item in replay_input if item.get("type") == "function_call")
|
|
function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
|
|
assert function_call["call_id"] == "call_1"
|
|
assert "id" not in function_call
|
|
assert function_output["call_id"] == "call_1"
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [
|
|
_codex_incomplete_message_response("I'll inspect the repo structure first."),
|
|
_codex_tool_call_response(),
|
|
_codex_message_response("Architecture summary complete."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
for call in assistant_message.tool_calls:
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": call.id,
|
|
"content": '{"ok":true}',
|
|
}
|
|
)
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
result = agent.run_conversation("analyze repo")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Architecture summary complete."
|
|
assert any(
|
|
msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
and "inspect the repo structure" in (msg.get("content") or "")
|
|
for msg in result["messages"]
|
|
)
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
assistant_message, finish_reason = agent._normalize_codex_response(
|
|
_codex_commentary_message_response("I'll inspect the repository first.")
|
|
)
|
|
|
|
assert finish_reason == "incomplete"
|
|
assert "inspect the repository" in (assistant_message.content or "")
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [
|
|
_codex_commentary_message_response("I'll inspect the repo structure first."),
|
|
_codex_tool_call_response(),
|
|
_codex_message_response("Architecture summary complete."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
for call in assistant_message.tool_calls:
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": call.id,
|
|
"content": '{"ok":true}',
|
|
}
|
|
)
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
result = agent.run_conversation("analyze repo")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Architecture summary complete."
|
|
assert any(
|
|
msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
and "inspect the repo structure" in (msg.get("content") or "")
|
|
for msg in result["messages"]
|
|
)
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [
|
|
_codex_ack_message_response(
|
|
"Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
|
|
),
|
|
_codex_tool_call_response(),
|
|
_codex_message_response("Architecture summary complete."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
for call in assistant_message.tool_calls:
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": call.id,
|
|
"content": '{"ok":true}',
|
|
}
|
|
)
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Architecture summary complete."
|
|
assert any(
|
|
msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
and "inspect ~/openclaw-studio" in (msg.get("content") or "")
|
|
for msg in result["messages"]
|
|
)
|
|
assert any(
|
|
msg.get("role") == "user"
|
|
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
|
|
for msg in result["messages"]
|
|
)
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [
|
|
_codex_ack_message_response(
|
|
"I'll check what's in the current directory and call out 3 notable items."
|
|
),
|
|
_codex_tool_call_response(),
|
|
_codex_message_response("Directory summary complete."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
|
for call in assistant_message.tool_calls:
|
|
messages.append(
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": call.id,
|
|
"content": '{"ok":true}',
|
|
}
|
|
)
|
|
|
|
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
|
|
|
result = agent.run_conversation("look at current directory and list 3 notable things")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Directory summary complete."
|
|
assert any(
|
|
msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
and "current directory" in (msg.get("content") or "")
|
|
for msg in result["messages"]
|
|
)
|
|
assert any(
|
|
msg.get("role") == "user"
|
|
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
|
|
for msg in result["messages"]
|
|
)
|
|
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
|
|
|
|
|
def test_dump_api_request_debug_uses_responses_url(monkeypatch, tmp_path):
|
|
"""Debug dumps should show /responses URL when in codex_responses mode."""
|
|
import json
|
|
agent = _build_agent(monkeypatch)
|
|
agent.base_url = "http://127.0.0.1:9208/v1"
|
|
agent.logs_dir = tmp_path
|
|
|
|
dump_file = agent._dump_api_request_debug(_codex_request_kwargs(), reason="preflight")
|
|
|
|
payload = json.loads(dump_file.read_text())
|
|
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/responses"
|
|
|
|
|
|
def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path):
|
|
"""Debug dumps should show /chat/completions URL for chat_completions mode."""
|
|
import json
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
agent = run_agent.AIAgent(
|
|
model="gpt-4o",
|
|
base_url="http://127.0.0.1:9208/v1",
|
|
api_key="test-key",
|
|
quiet_mode=True,
|
|
max_iterations=1,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
agent.logs_dir = tmp_path
|
|
|
|
dump_file = agent._dump_api_request_debug(
|
|
{"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]},
|
|
reason="preflight",
|
|
)
|
|
|
|
payload = json.loads(dump_file.read_text())
|
|
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions"
|
|
|
|
|
|
# --- Reasoning-only response tests (fix for empty content retry loop) ---
|
|
|
|
|
|
def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."):
|
|
"""Codex response containing only reasoning items — no message text, no tool calls."""
|
|
return SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="reasoning",
|
|
id="rs_001",
|
|
encrypted_content=encrypted_content,
|
|
summary=[SimpleNamespace(type="summary_text", text=summary_text)],
|
|
status="completed",
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
|
|
def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch):
|
|
"""A response with only reasoning items and no content should be 'incomplete', not 'stop'.
|
|
|
|
Without this fix, reasoning-only responses get finish_reason='stop' which
|
|
sends them into the empty-content retry loop (3 retries then failure).
|
|
"""
|
|
agent = _build_agent(monkeypatch)
|
|
assistant_message, finish_reason = agent._normalize_codex_response(
|
|
_codex_reasoning_only_response()
|
|
)
|
|
|
|
assert finish_reason == "incomplete"
|
|
assert assistant_message.content == ""
|
|
assert assistant_message.codex_reasoning_items is not None
|
|
assert len(assistant_message.codex_reasoning_items) == 1
|
|
assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123"
|
|
|
|
|
|
def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
|
|
"""If a response has both reasoning and message content, it should still be 'stop'."""
|
|
agent = _build_agent(monkeypatch)
|
|
response = SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="reasoning",
|
|
id="rs_001",
|
|
encrypted_content="enc_xyz",
|
|
summary=[SimpleNamespace(type="summary_text", text="Thinking...")],
|
|
status="completed",
|
|
),
|
|
SimpleNamespace(
|
|
type="message",
|
|
content=[SimpleNamespace(type="output_text", text="Here is the answer.")],
|
|
status="completed",
|
|
),
|
|
],
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
assistant_message, finish_reason = agent._normalize_codex_response(response)
|
|
|
|
assert finish_reason == "stop"
|
|
assert "Here is the answer" in assistant_message.content
|
|
|
|
|
|
def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch):
|
|
"""End-to-end: reasoning-only → final message should succeed, not hit retry loop."""
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [
|
|
_codex_reasoning_only_response(),
|
|
_codex_message_response("The final answer is 42."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
result = agent.run_conversation("what is the answer?")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "The final answer is 42."
|
|
# The reasoning-only turn should be in messages as an incomplete interim
|
|
assert any(
|
|
msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
and msg.get("codex_reasoning_items") is not None
|
|
for msg in result["messages"]
|
|
)
|
|
|
|
|
|
def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch):
|
|
"""Encrypted codex_reasoning_items must be preserved in interim messages
|
|
even when there is no visible reasoning text or content."""
|
|
agent = _build_agent(monkeypatch)
|
|
# Response with encrypted reasoning but no human-readable summary
|
|
reasoning_response = SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="reasoning",
|
|
id="rs_002",
|
|
encrypted_content="enc_opaque_blob",
|
|
summary=[],
|
|
status="completed",
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
responses = [
|
|
reasoning_response,
|
|
_codex_message_response("Done thinking."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
result = agent.run_conversation("think hard")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Done thinking."
|
|
# The interim message must have codex_reasoning_items preserved
|
|
interim_msgs = [
|
|
msg for msg in result["messages"]
|
|
if msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
]
|
|
assert len(interim_msgs) >= 1
|
|
assert interim_msgs[0].get("codex_reasoning_items") is not None
|
|
assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob"
|
|
|
|
|
|
def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch):
|
|
"""When converting a reasoning-only interim message to Responses API input,
|
|
the reasoning items must be followed by an assistant message (even if empty)
|
|
to satisfy the API's 'required following item' constraint."""
|
|
agent = _build_agent(monkeypatch)
|
|
messages = [
|
|
{"role": "user", "content": "think hard"},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"reasoning": None,
|
|
"finish_reason": "incomplete",
|
|
"codex_reasoning_items": [
|
|
{"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []},
|
|
],
|
|
},
|
|
]
|
|
items = agent._chat_messages_to_responses_input(messages)
|
|
|
|
# Find the reasoning item
|
|
reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
|
|
assert len(reasoning_indices) == 1
|
|
ri_idx = reasoning_indices[0]
|
|
|
|
# There must be a following item after the reasoning
|
|
assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)"
|
|
following = items[ri_idx + 1]
|
|
assert following.get("role") == "assistant"
|
|
|
|
|
|
def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch):
|
|
"""Two consecutive reasoning-only responses with different encrypted content
|
|
must NOT be treated as duplicates."""
|
|
agent = _build_agent(monkeypatch)
|
|
responses = [
|
|
# First reasoning-only response
|
|
SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="reasoning", id="rs_001",
|
|
encrypted_content="enc_first", summary=[], status="completed",
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
status="completed", model="gpt-5-codex",
|
|
),
|
|
# Second reasoning-only response (different encrypted content)
|
|
SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="reasoning", id="rs_002",
|
|
encrypted_content="enc_second", summary=[], status="completed",
|
|
)
|
|
],
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
|
status="completed", model="gpt-5-codex",
|
|
),
|
|
_codex_message_response("Final answer after thinking."),
|
|
]
|
|
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
|
|
|
result = agent.run_conversation("think very hard")
|
|
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Final answer after thinking."
|
|
# Both reasoning-only interim messages should be in history (not collapsed)
|
|
interim_msgs = [
|
|
msg for msg in result["messages"]
|
|
if msg.get("role") == "assistant"
|
|
and msg.get("finish_reason") == "incomplete"
|
|
]
|
|
assert len(interim_msgs) == 2
|
|
encrypted_contents = [
|
|
msg["codex_reasoning_items"][0]["encrypted_content"]
|
|
for msg in interim_msgs
|
|
]
|
|
assert "enc_first" in encrypted_contents
|
|
assert "enc_second" in encrypted_contents
|