hermes-agent/tests/skills/test_codex_bridge_skill.py

284 lines
8.9 KiB
Python

import importlib.util
import json
import sys
from pathlib import Path
SKILL_REFS = Path(__file__).resolve().parents[2] / "skills" / "codex-bridge" / "references"
def load_reference_module(name):
module_path = SKILL_REFS / f"{name}.py"
sys.path.insert(0, str(SKILL_REFS))
try:
spec = importlib.util.spec_from_file_location(f"codex_bridge_skill_{name}", module_path)
module = importlib.util.module_from_spec(spec)
assert spec and spec.loader
spec.loader.exec_module(module)
return module
finally:
try:
sys.path.remove(str(SKILL_REFS))
except ValueError:
pass
def test_validator_rejects_unsafe_start_inputs(tmp_path):
validator = load_reference_module("validator")
for sandbox in ["danger-full-access", "network-only"]:
try:
validator.validate_start_input("hello", str(tmp_path), sandbox, "untrusted")
except validator.ValidationError as exc:
assert "sandbox" in str(exc) or "danger-full-access" in str(exc)
else:
raise AssertionError(f"expected {sandbox} to be rejected")
try:
validator.validate_start_input("hello", str(tmp_path), "read-only", "never")
except validator.ValidationError as exc:
assert "approval_policy" in str(exc)
else:
raise AssertionError("expected approval_policy=never to be rejected")
try:
validator.validate_start_input("", str(tmp_path), "read-only", "untrusted")
except validator.ValidationError as exc:
assert "prompt" in str(exc)
else:
raise AssertionError("expected empty prompt to be rejected")
try:
validator.validate_start_input("hello", str(tmp_path / "missing"), "read-only", "untrusted")
except validator.ValidationError as exc:
assert "cwd" in str(exc)
else:
raise AssertionError("expected missing cwd to be rejected")
def test_validator_requires_safe_start_output_contract():
validator = load_reference_module("validator")
valid = {
"success": True,
"protocol": {"mailbox": False, "transport": "app-server stdio"},
"task": {
"hermes_task_id": "codex-1",
"codex_thread_id": "thread-1",
"codex_turn_id": "turn-1",
},
}
validator.validate_start_output(valid)
invalid = dict(valid)
invalid["protocol"] = {"mailbox": True, "transport": "app-server stdio"}
try:
validator.validate_start_output(invalid)
except validator.ValidationError as exc:
assert "mailbox" in str(exc)
else:
raise AssertionError("expected mailbox output to be rejected")
invalid = dict(valid)
invalid["protocol"] = {"mailbox": False, "transport": "mailbox"}
try:
validator.validate_start_output(invalid)
except validator.ValidationError as exc:
assert "app-server" in str(exc)
else:
raise AssertionError("expected non app-server transport to be rejected")
def test_cli_start_validates_and_emits_bridge_json(tmp_path, monkeypatch, capsys):
cli = load_reference_module("cli")
calls = []
def fake_codex_bridge(**kwargs):
calls.append(kwargs)
return json.dumps(
{
"success": True,
"protocol": {"mailbox": False, "transport": "app-server stdio"},
"task": {
"hermes_task_id": "codex-abc",
"codex_thread_id": "thread-abc",
"codex_turn_id": "turn-abc",
},
}
)
monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge)
exit_code = cli.main(["start", "--cwd", str(tmp_path), "--prompt", "Analyze tests"])
assert exit_code == 0
output = json.loads(capsys.readouterr().out)
assert output["task"]["hermes_task_id"] == "codex-abc"
assert calls == [
{
"action": "start",
"prompt": "Analyze tests",
"cwd": str(tmp_path),
"model": None,
"sandbox": "read-only",
"approval_policy": "untrusted",
"codex_home": None,
"notify_target": None,
}
]
def test_cli_start_passes_notify_target(tmp_path, monkeypatch, capsys):
cli = load_reference_module("cli")
calls = []
def fake_codex_bridge(**kwargs):
calls.append(kwargs)
return json.dumps(
{
"success": True,
"protocol": {"mailbox": False, "transport": "app-server stdio"},
"task": {
"hermes_task_id": "codex-abc",
"codex_thread_id": "thread-abc",
"codex_turn_id": "turn-abc",
"notify_target": kwargs["notify_target"],
},
}
)
monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge)
exit_code = cli.main(["start", "--cwd", str(tmp_path), "--notify-target", "local", "--prompt", "Analyze tests"])
assert exit_code == 0
output = json.loads(capsys.readouterr().out)
assert output["task"]["notify_target"] == "local"
assert calls[0]["notify_target"] == "local"
def test_cli_respond_maps_request_id_to_bridge_instruction(monkeypatch, capsys):
cli = load_reference_module("cli")
calls = []
def fake_codex_bridge(**kwargs):
calls.append(kwargs)
return json.dumps({"success": True, "response": {"decision": kwargs["decision"]}})
monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge)
exit_code = cli.main(
[
"respond",
"codex-abc",
"--request-id",
"approval-1",
"--decision",
"decline",
"--answers",
'{"q1": {"answers": ["yes"]}}',
]
)
assert exit_code == 0
output = json.loads(capsys.readouterr().out)
assert output["response"] == {"decision": "decline"}
assert calls == [
{
"action": "respond",
"task_id": "codex-abc",
"instruction": "approval-1",
"decision": "decline",
"answers": {"q1": {"answers": ["yes"]}},
}
]
def test_cli_smoke_test_polls_until_completed_with_sentinel(tmp_path, monkeypatch, capsys):
cli = load_reference_module("cli")
calls = []
def fake_codex_bridge(**kwargs):
calls.append(kwargs)
action = kwargs["action"]
if action == "start":
return json.dumps(
{
"success": True,
"protocol": {"mailbox": False, "transport": "app-server stdio"},
"task": {
"hermes_task_id": "codex-smoke",
"codex_thread_id": "thread-smoke",
"codex_turn_id": "turn-smoke",
},
}
)
return json.dumps(
{
"success": True,
"task": {
"hermes_task_id": "codex-smoke",
"status": "completed",
"recent_events": [{"payload_summary": "assistant replied CODEX_ASYNC_OK"}],
"final_summary": None,
},
}
)
monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge)
monkeypatch.setattr(cli.time, "sleep", lambda _seconds: None)
exit_code = cli.main(
[
"smoke-test",
"--cwd",
str(tmp_path),
"--wait",
"3",
"--timeout",
"10",
"--poll-interval",
"0.01",
]
)
assert exit_code == 0
output = json.loads(capsys.readouterr().out)
assert output["success"] is True
assert output["task_id"] == "codex-smoke"
assert [call["action"] for call in calls] == ["start", "status"]
assert "CODEX_ASYNC_OK" in calls[0]["prompt"]
assert calls[0]["notify_target"] is None
def test_cli_notify_completed_dry_run_uses_bridge_without_real_notifier(monkeypatch, capsys):
cli = load_reference_module("cli")
calls = []
def fake_codex_bridge(**kwargs):
calls.append(kwargs)
return json.dumps(
{
"success": True,
"dry_run": True,
"processed": 1,
"notifications": [
{
"task_id": "codex-abc",
"target": "local",
"notification_status": "dry_run",
"sent": False,
"message": "preview",
}
],
}
)
monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge)
exit_code = cli.main(["notify-completed", "--limit", "5", "--dry-run"])
assert exit_code == 0
output = json.loads(capsys.readouterr().out)
assert output["notifications"][0]["notification_status"] == "dry_run"
assert calls == [{"action": "notify_completed", "limit": 5, "dry_run": True}]