hermes-agent/tests/tools/test_codex_bridge_tool.py

231 lines
8.7 KiB
Python

import json
import tools.codex_bridge_tool as bridge
from tools.codex_bridge_tool import CodexBridgeManager, CodexBridgeStore
class FakeCodexClient:
instances = []
def __init__(self, task_id, task, manager):
self.task_id = task_id
self.task = task
self.manager = manager
self.requests = []
self.responses = []
self.closed = False
FakeCodexClient.instances.append(self)
def start(self, *, codex_home=None):
self.codex_home = codex_home
def initialize(self):
return {"userAgent": "fake-codex", "codexHome": "/tmp/codex"}
def request(self, method, params=None, timeout=30):
self.requests.append((method, params, timeout))
if method == "thread/start":
return {"thread": {"id": "thread-1"}}
if method == "turn/start":
return {"turn": {"id": "turn-1", "status": "inProgress"}}
if method == "turn/steer":
return {"ok": True, "steered": params}
if method == "turn/interrupt":
return {"ok": True, "interrupted": params}
raise AssertionError(f"unexpected request: {method}")
def notify(self, method, params=None):
self.notifications = getattr(self, "notifications", [])
self.notifications.append((method, params))
def respond(self, request_id, result):
self.responses.append((request_id, result))
def close(self):
self.closed = True
def make_manager(tmp_path, monkeypatch):
FakeCodexClient.instances.clear()
monkeypatch.setattr(bridge, "CodexJsonRpcClient", FakeCodexClient)
store = CodexBridgeStore(tmp_path / "codex_bridge.db")
return CodexBridgeManager(store=store)
def test_start_task_uses_app_server_thread_turn_without_mailbox(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
result = manager.start_task("Investigate the failing test", cwd=str(tmp_path))
assert result["success"] is True
assert result["protocol"] == {"transport": "app-server stdio", "mailbox": False}
task = result["task"]
assert task["status"] == "working"
assert task["codex_thread_id"] == "thread-1"
assert task["codex_turn_id"] == "turn-1"
client = FakeCodexClient.instances[0]
methods = [method for method, _params, _timeout in client.requests]
assert methods == ["thread/start", "turn/start"]
thread_params = client.requests[0][1]
assert thread_params["sandbox"] == "read-only"
assert thread_params["approvalPolicy"] == "untrusted"
assert "mailbox" not in json.dumps(client.requests).lower()
assert "outbox" not in json.dumps(client.requests).lower()
assert "inbox" not in json.dumps(client.requests).lower()
def test_start_task_records_notify_target(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
result = manager.start_task("Analyze tests", cwd=str(tmp_path), notify_target="feishu:chat-1")
task_id = result["task"]["hermes_task_id"]
assert result["task"]["notify_target"] == "feishu:chat-1"
assert result["task"]["notification_status"] == "pending"
persisted = manager.status(task_id)["task"]
assert persisted["notify_target"] == "feishu:chat-1"
assert persisted["notification_status"] == "pending"
def test_server_approval_request_can_be_reported_and_resolved(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
started = manager.start_task("Run a safe command", cwd=str(tmp_path))
task_id = started["task"]["hermes_task_id"]
client = FakeCodexClient.instances[0]
manager.handle_server_request(
task_id,
client,
{
"id": "approval-1",
"method": "item/commandExecution/requestApproval",
"params": {"threadId": "thread-1", "turnId": "turn-1", "command": "pwd"},
},
)
status = manager.status(task_id)
assert status["task"]["status"] == "waiting_for_approval"
assert status["task"]["pending_requests"][0]["request_id"] == "approval-1"
response = manager.respond(task_id, "approval-1", decision="decline")
assert response["success"] is True
assert client.responses == [("approval-1", {"decision": "decline"})]
assert manager.status(task_id)["task"]["pending_requests"] == []
def test_request_user_input_response_uses_answers_payload(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
started = manager.start_task("Ask for missing context", cwd=str(tmp_path))
task_id = started["task"]["hermes_task_id"]
client = FakeCodexClient.instances[0]
manager.handle_server_request(
task_id,
client,
{
"id": "input-1",
"method": "item/tool/requestUserInput",
"params": {
"threadId": "thread-1",
"turnId": "turn-1",
"questions": [{"id": "q1", "question": "Which file?", "options": None}],
},
},
)
answers = {"q1": {"answers": ["README.md"]}}
manager.respond(task_id, "input-1", decision="decline", answers=answers)
assert client.responses == [("input-1", {"answers": answers})]
def test_steer_and_interrupt_call_codex_turn_methods(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
started = manager.start_task("Long running task", cwd=str(tmp_path))
task_id = started["task"]["hermes_task_id"]
client = FakeCodexClient.instances[0]
steer = manager.steer(task_id, "Only analyze; do not edit.")
interrupt = manager.interrupt(task_id)
assert steer["success"] is True
assert interrupt["task"]["status"] == "cancelled"
assert client.requests[-2][0] == "turn/steer"
assert client.requests[-2][1]["expectedTurnId"] == "turn-1"
assert client.requests[-1][0] == "turn/interrupt"
def test_notify_completed_sends_once_for_targeted_completed_task(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
started = manager.start_task("Summarize a bug", cwd=str(tmp_path), notify_target="feishu:chat-1")
task_id = started["task"]["hermes_task_id"]
deliveries = []
manager.record_event(
task_id,
"turn/completed",
{"turn": {"id": "turn-1", "status": "completed"}, "message": "Done fixing it."},
)
first = manager.notify_completed(notifier=lambda target, message: deliveries.append((target, message)) or {"ok": True})
second = manager.notify_completed(notifier=lambda target, message: deliveries.append((target, message)) or {"ok": True})
assert first["processed"] == 1
assert first["notifications"][0]["notification_status"] == "sent"
assert first["notifications"][0]["sent"] is True
assert second["processed"] == 0
assert len(deliveries) == 1
assert deliveries[0][0] == "feishu:chat-1"
assert task_id in deliveries[0][1]
assert manager.status(task_id)["task"]["notification_status"] == "sent"
def test_notify_completed_marks_no_target_without_sending(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
started = manager.start_task("No callback needed", cwd=str(tmp_path))
task_id = started["task"]["hermes_task_id"]
manager.record_event(
task_id,
"turn/completed",
{"turn": {"id": "turn-1", "status": "completed"}, "message": "Done."},
)
result = manager.notify_completed(notifier=lambda _target, _message: (_ for _ in ()).throw(AssertionError("sent")))
assert result["processed"] == 1
assert result["notifications"][0]["notification_status"] == "no_target"
assert result["notifications"][0]["sent"] is False
assert manager.status(task_id)["task"]["notification_status"] == "no_target"
def test_notify_completed_dry_run_does_not_send_or_mark(tmp_path, monkeypatch):
manager = make_manager(tmp_path, monkeypatch)
started = manager.start_task("Preview callback", cwd=str(tmp_path), notify_target="local")
task_id = started["task"]["hermes_task_id"]
manager.record_event(
task_id,
"turn/completed",
{"turn": {"id": "turn-1", "status": "completed"}, "message": "Done."},
)
result = manager.notify_completed(
dry_run=True,
notifier=lambda _target, _message: (_ for _ in ()).throw(AssertionError("sent")),
)
assert result["processed"] == 1
assert result["notifications"][0]["notification_status"] == "dry_run"
assert result["notifications"][0]["sent"] is False
assert manager.status(task_id)["task"]["notification_status"] == "pending"
def test_tool_schema_refuses_danger_full_access():
props = bridge.CODEX_BRIDGE_SCHEMA["parameters"]["properties"]
assert "danger-full-access" not in props["sandbox"]["enum"]
assert "never" not in props["approval_policy"]["enum"]
assert "notify_completed" in props["action"]["enum"]
assert "notify_target" in props