diff --git a/skills/codex-bridge/SKILL.md b/skills/codex-bridge/SKILL.md new file mode 100644 index 0000000000..bd56e01ad0 --- /dev/null +++ b/skills/codex-bridge/SKILL.md @@ -0,0 +1,59 @@ +--- +name: codex-bridge +description: Start and control local Codex tasks through Hermes Codex Bridge app-server integration. +version: 1.0.0 +platforms: [linux, macos] +metadata: + hermes: + tags: [codex, agent, bridge, app-server] + category: software-development +--- + +# Codex Bridge + +Use this skill when you need Hermes to start or steer a local Codex task through the Codex app-server protocol. + +## CLI + +Run the reference CLI from the repository root: + +```bash +python skills/codex-bridge/references/cli.py start --prompt "Inspect this repository and summarize the test layout." +python skills/codex-bridge/references/cli.py status +python skills/codex-bridge/references/cli.py list +python skills/codex-bridge/references/cli.py steer --instruction "Focus only on tests." +python skills/codex-bridge/references/cli.py interrupt +python skills/codex-bridge/references/cli.py respond --request-id --decision decline +python skills/codex-bridge/references/cli.py smoke-test --wait 10 --timeout 60 +``` + +The CLI is a productized wrapper around `tools.codex_bridge_tool.codex_bridge`. +It does not implement the app-server protocol itself and does not use mailbox, +inbox, or outbox files. + +## Safety Defaults + +- Sandbox is limited to `read-only` or `workspace-write`. +- `danger-full-access` is rejected. +- Approval policy is limited to `untrusted` or `on-request`. +- `approval_policy=never` is rejected. +- `start` requires a non-empty prompt and an existing `cwd`. + +## Output + +Commands print JSON to stdout. Validation errors return: + +```json +{"success": false, "error": "..."} +``` + +Successful `start` output is validated to ensure: + +- `success` is `true` +- `protocol.mailbox` is `false` +- `protocol.transport` includes `app-server` +- task id, Codex thread id, and Codex turn id are present + +The smoke test starts an async Codex task, polls `status`, and succeeds only +when the final task status is `completed` and `CODEX_ASYNC_OK` appears in +`recent_events` or `final_summary`. diff --git a/skills/codex-bridge/references/__init__.py b/skills/codex-bridge/references/__init__.py new file mode 100644 index 0000000000..387e2e52a1 --- /dev/null +++ b/skills/codex-bridge/references/__init__.py @@ -0,0 +1 @@ +"""Codex Bridge skill reference utilities.""" diff --git a/skills/codex-bridge/references/cli.py b/skills/codex-bridge/references/cli.py new file mode 100644 index 0000000000..4810c26cda --- /dev/null +++ b/skills/codex-bridge/references/cli.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +"""Productized CLI for Hermes Codex Bridge.""" + +from __future__ import annotations + +import argparse +import json +import sys +import time +from pathlib import Path +from typing import Any + + +REPO_ROOT = Path(__file__).resolve().parents[3] +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +try: + from .validator import ( + SMOKE_SENTINEL, + TERMINAL_STATUSES, + ValidationError, + parse_json_object, + validate_approval_policy, + validate_bridge_output, + validate_interrupt_input, + validate_respond_input, + validate_sandbox, + validate_smoke_test_result, + validate_start_input, + validate_status_input, + validate_steer_input, + ) +except ImportError: + from validator import ( # type: ignore + SMOKE_SENTINEL, + TERMINAL_STATUSES, + ValidationError, + parse_json_object, + validate_approval_policy, + validate_bridge_output, + validate_interrupt_input, + validate_respond_input, + validate_sandbox, + validate_smoke_test_result, + validate_start_input, + validate_status_input, + validate_steer_input, + ) + +from tools.codex_bridge_tool import DEFAULT_APPROVAL_POLICY, DEFAULT_SANDBOX, codex_bridge + + +def emit(data: dict[str, Any]) -> None: + print(json.dumps(data, ensure_ascii=False, sort_keys=True)) + + +def call_bridge(action: str, **kwargs: Any) -> dict[str, Any]: + raw = codex_bridge(action=action, **kwargs) + try: + data = json.loads(raw) + except json.JSONDecodeError as exc: + raise ValidationError(f"codex_bridge returned invalid JSON for {action}: {exc.msg}") from exc + validate_bridge_output(action, data) + return data + + +def _prompt_from_args(args: argparse.Namespace) -> str: + prompt = args.prompt + if prompt is None and args.prompt_text: + prompt = " ".join(args.prompt_text) + return prompt or "" + + +def cmd_start(args: argparse.Namespace) -> dict[str, Any]: + prompt = _prompt_from_args(args) + validate_start_input(prompt, args.cwd, args.sandbox, args.approval_policy) + return call_bridge( + "start", + prompt=prompt, + cwd=args.cwd, + model=args.model, + sandbox=args.sandbox, + approval_policy=args.approval_policy, + codex_home=args.codex_home, + ) + + +def cmd_status(args: argparse.Namespace) -> dict[str, Any]: + validate_status_input(args.task_id) + return call_bridge("status", task_id=args.task_id) + + +def cmd_list(args: argparse.Namespace) -> dict[str, Any]: + return call_bridge("list", limit=args.limit) + + +def cmd_steer(args: argparse.Namespace) -> dict[str, Any]: + validate_steer_input(args.task_id, args.instruction) + return call_bridge("steer", task_id=args.task_id, instruction=args.instruction) + + +def cmd_interrupt(args: argparse.Namespace) -> dict[str, Any]: + validate_interrupt_input(args.task_id) + return call_bridge("interrupt", task_id=args.task_id) + + +def cmd_respond(args: argparse.Namespace) -> dict[str, Any]: + answers = parse_json_object(args.answers, field_name="answers") + validate_respond_input(args.task_id, args.request_id, args.decision, answers) + return call_bridge( + "respond", + task_id=args.task_id, + instruction=args.request_id, + decision=args.decision, + answers=answers, + ) + + +def _smoke_prompt(wait_seconds: int) -> str: + return ( + f"Wait {wait_seconds} seconds asynchronously, then reply exactly {SMOKE_SENTINEL}. " + "Do not modify files." + ) + + +def cmd_smoke_test(args: argparse.Namespace) -> dict[str, Any]: + validate_start_input(_smoke_prompt(args.wait), args.cwd, args.sandbox, args.approval_policy) + started = call_bridge( + "start", + prompt=_smoke_prompt(args.wait), + cwd=args.cwd, + model=args.model, + sandbox=args.sandbox, + approval_policy=args.approval_policy, + codex_home=args.codex_home, + ) + task_id = started["task"]["hermes_task_id"] + deadline = time.monotonic() + args.timeout + last_status: dict[str, Any] | None = None + while time.monotonic() < deadline: + time.sleep(args.poll_interval) + last_status = call_bridge("status", task_id=task_id) + task = last_status.get("task") or {} + if task.get("status") in TERMINAL_STATUSES: + validate_smoke_test_result(last_status) + return { + "success": True, + "task_id": task_id, + "status": task.get("status"), + "start": started, + "final_status": last_status, + } + return { + "success": False, + "error": f"smoke-test timed out after {args.timeout} seconds.", + "task_id": task_id, + "start": started, + "last_status": last_status, + } + + +def add_common_start_options(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--cwd", default=str(Path.cwd()), help="Working directory for Codex.") + parser.add_argument("--model", default=None, help="Optional Codex model override.") + parser.add_argument("--sandbox", default=DEFAULT_SANDBOX, type=validate_sandbox) + parser.add_argument("--approval-policy", default=DEFAULT_APPROVAL_POLICY, type=validate_approval_policy) + parser.add_argument("--codex-home", default=None, help="Optional CODEX_HOME override.") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Hermes Codex Bridge skill CLI") + subparsers = parser.add_subparsers(dest="command", required=True) + + start = subparsers.add_parser("start", help="Start a Codex task.") + start.add_argument("--prompt", help="Task prompt.") + start.add_argument("prompt_text", nargs="*", help="Task prompt as positional text.") + add_common_start_options(start) + start.set_defaults(func=cmd_start) + + status = subparsers.add_parser("status", help="Show task status.") + status.add_argument("task_id") + status.set_defaults(func=cmd_status) + + list_parser = subparsers.add_parser("list", help="List recent Codex Bridge tasks.") + list_parser.add_argument("--limit", type=int, default=10) + list_parser.set_defaults(func=cmd_list) + + steer = subparsers.add_parser("steer", help="Steer an active Codex turn.") + steer.add_argument("task_id") + steer.add_argument("--instruction", required=True) + steer.set_defaults(func=cmd_steer) + + interrupt = subparsers.add_parser("interrupt", help="Interrupt an active Codex turn.") + interrupt.add_argument("task_id") + interrupt.set_defaults(func=cmd_interrupt) + + respond = subparsers.add_parser("respond", help="Respond to a pending Codex request.") + respond.add_argument("task_id") + respond.add_argument("--request-id", required=True) + respond.add_argument("--decision", default="decline") + respond.add_argument("--answers", default=None, help="JSON object for user-input answers.") + respond.set_defaults(func=cmd_respond) + + smoke = subparsers.add_parser("smoke-test", help="Run an async Codex Bridge smoke test.") + smoke.add_argument("--wait", type=int, default=10) + smoke.add_argument("--timeout", type=int, default=60) + smoke.add_argument("--poll-interval", type=float, default=2.0) + add_common_start_options(smoke) + smoke.set_defaults(func=cmd_smoke_test) + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + try: + args = parser.parse_args(argv) + result = args.func(args) + emit(result) + return 0 if result.get("success") is True else 1 + except ValidationError as exc: + emit({"success": False, "error": str(exc)}) + return 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/codex-bridge/references/validator.py b/skills/codex-bridge/references/validator.py new file mode 100644 index 0000000000..34ed62e3a1 --- /dev/null +++ b/skills/codex-bridge/references/validator.py @@ -0,0 +1,152 @@ +"""Validation helpers for the Codex Bridge skill CLI.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Mapping + + +ALLOWED_SANDBOXES = {"read-only", "workspace-write"} +ALLOWED_APPROVAL_POLICIES = {"untrusted", "on-request"} +ALLOWED_DECISIONS = {"accept", "acceptForSession", "decline", "cancel"} +TERMINAL_STATUSES = {"completed", "failed", "cancelled"} +SMOKE_SENTINEL = "CODEX_ASYNC_OK" + + +class ValidationError(ValueError): + """Raised when a CLI input or bridge output fails validation.""" + + +def parse_json_object(value: str | None, *, field_name: str) -> dict[str, Any]: + if not value: + return {} + try: + parsed = json.loads(value) + except json.JSONDecodeError as exc: + raise ValidationError(f"{field_name} must be valid JSON: {exc.msg}") from exc + if not isinstance(parsed, dict): + raise ValidationError(f"{field_name} must be a JSON object.") + return parsed + + +def validate_sandbox(sandbox: str) -> str: + if sandbox == "danger-full-access": + raise ValidationError("danger-full-access is not allowed for Codex Bridge.") + if sandbox not in ALLOWED_SANDBOXES: + allowed = ", ".join(sorted(ALLOWED_SANDBOXES)) + raise ValidationError(f"sandbox must be one of: {allowed}.") + return sandbox + + +def validate_approval_policy(approval_policy: str) -> str: + if approval_policy not in ALLOWED_APPROVAL_POLICIES: + allowed = ", ".join(sorted(ALLOWED_APPROVAL_POLICIES)) + raise ValidationError(f"approval_policy must be one of: {allowed}.") + return approval_policy + + +def validate_start_input(prompt: str, cwd: str, sandbox: str, approval_policy: str) -> None: + if not prompt or not prompt.strip(): + raise ValidationError("start prompt must be non-empty.") + cwd_path = Path(cwd).expanduser() + if not cwd_path.exists() or not cwd_path.is_dir(): + raise ValidationError(f"cwd must be an existing directory: {cwd}") + validate_sandbox(sandbox) + validate_approval_policy(approval_policy) + + +def validate_task_id(action: str, task_id: str | None) -> None: + if not task_id or not str(task_id).strip(): + raise ValidationError(f"{action} requires task_id.") + + +def validate_steer_input(task_id: str | None, instruction: str | None) -> None: + validate_task_id("steer", task_id) + if not instruction or not instruction.strip(): + raise ValidationError("steer requires instruction.") + + +def validate_interrupt_input(task_id: str | None) -> None: + validate_task_id("interrupt", task_id) + + +def validate_status_input(task_id: str | None) -> None: + validate_task_id("status", task_id) + + +def validate_respond_input( + task_id: str | None, + request_id: str | None, + decision: str, + answers: Mapping[str, Any] | None, +) -> None: + validate_task_id("respond", task_id) + if not request_id or not str(request_id).strip(): + raise ValidationError("respond requires request_id.") + if decision not in ALLOWED_DECISIONS: + allowed = ", ".join(sorted(ALLOWED_DECISIONS)) + raise ValidationError(f"decision must be one of: {allowed}.") + if answers is not None and not isinstance(answers, Mapping): + raise ValidationError("answers must be a JSON object.") + + +def validate_start_output(data: Mapping[str, Any]) -> None: + if data.get("success") is not True: + raise ValidationError("start output must have success=true.") + protocol = data.get("protocol") + if not isinstance(protocol, Mapping): + raise ValidationError("start output must include protocol.") + if protocol.get("mailbox") is not False: + raise ValidationError("start output must have protocol.mailbox=false.") + transport = str(protocol.get("transport") or "") + if "app-server" not in transport: + raise ValidationError("start output protocol.transport must include app-server.") + task = data.get("task") + if not isinstance(task, Mapping): + raise ValidationError("start output must include task.") + required = { + "hermes_task_id": "task id", + "codex_thread_id": "thread id", + "codex_turn_id": "turn id", + } + for key, label in required.items(): + if not task.get(key): + raise ValidationError(f"start output missing {label}.") + + +def validate_bridge_output(action: str, data: Mapping[str, Any]) -> None: + if not isinstance(data, Mapping): + raise ValidationError("bridge output must be a JSON object.") + if data.get("success") is not True and data.get("error"): + raise ValidationError(str(data["error"])) + if action == "start": + validate_start_output(data) + return + if "success" in data and data.get("success") is not True: + raise ValidationError(str(data.get("error") or f"{action} failed.")) + + +def contains_text(value: Any, needle: str) -> bool: + if isinstance(value, str): + return needle in value + if isinstance(value, Mapping): + return any(contains_text(v, needle) for v in value.values()) + if isinstance(value, list): + return any(contains_text(v, needle) for v in value) + return False + + +def validate_smoke_test_result(status_data: Mapping[str, Any]) -> None: + task = status_data.get("task") + if not isinstance(task, Mapping): + raise ValidationError("smoke-test status output must include task.") + status = task.get("status") + if status != "completed": + raise ValidationError(f"smoke-test final status must be completed, got {status!r}.") + searchable = { + "recent_events": task.get("recent_events", []), + "final_summary": task.get("final_summary"), + } + if not contains_text(searchable, SMOKE_SENTINEL): + raise ValidationError(f"smoke-test output did not include {SMOKE_SENTINEL}.") diff --git a/tests/skills/test_codex_bridge_skill.py b/tests/skills/test_codex_bridge_skill.py new file mode 100644 index 0000000000..e2f20dadb7 --- /dev/null +++ b/tests/skills/test_codex_bridge_skill.py @@ -0,0 +1,220 @@ +import importlib.util +import json +import sys +from pathlib import Path + + +SKILL_REFS = Path(__file__).resolve().parents[2] / "skills" / "codex-bridge" / "references" + + +def load_reference_module(name): + module_path = SKILL_REFS / f"{name}.py" + sys.path.insert(0, str(SKILL_REFS)) + try: + spec = importlib.util.spec_from_file_location(f"codex_bridge_skill_{name}", module_path) + module = importlib.util.module_from_spec(spec) + assert spec and spec.loader + spec.loader.exec_module(module) + return module + finally: + try: + sys.path.remove(str(SKILL_REFS)) + except ValueError: + pass + + +def test_validator_rejects_unsafe_start_inputs(tmp_path): + validator = load_reference_module("validator") + + for sandbox in ["danger-full-access", "network-only"]: + try: + validator.validate_start_input("hello", str(tmp_path), sandbox, "untrusted") + except validator.ValidationError as exc: + assert "sandbox" in str(exc) or "danger-full-access" in str(exc) + else: + raise AssertionError(f"expected {sandbox} to be rejected") + + try: + validator.validate_start_input("hello", str(tmp_path), "read-only", "never") + except validator.ValidationError as exc: + assert "approval_policy" in str(exc) + else: + raise AssertionError("expected approval_policy=never to be rejected") + + try: + validator.validate_start_input("", str(tmp_path), "read-only", "untrusted") + except validator.ValidationError as exc: + assert "prompt" in str(exc) + else: + raise AssertionError("expected empty prompt to be rejected") + + try: + validator.validate_start_input("hello", str(tmp_path / "missing"), "read-only", "untrusted") + except validator.ValidationError as exc: + assert "cwd" in str(exc) + else: + raise AssertionError("expected missing cwd to be rejected") + + +def test_validator_requires_safe_start_output_contract(): + validator = load_reference_module("validator") + + valid = { + "success": True, + "protocol": {"mailbox": False, "transport": "app-server stdio"}, + "task": { + "hermes_task_id": "codex-1", + "codex_thread_id": "thread-1", + "codex_turn_id": "turn-1", + }, + } + validator.validate_start_output(valid) + + invalid = dict(valid) + invalid["protocol"] = {"mailbox": True, "transport": "app-server stdio"} + try: + validator.validate_start_output(invalid) + except validator.ValidationError as exc: + assert "mailbox" in str(exc) + else: + raise AssertionError("expected mailbox output to be rejected") + + invalid = dict(valid) + invalid["protocol"] = {"mailbox": False, "transport": "mailbox"} + try: + validator.validate_start_output(invalid) + except validator.ValidationError as exc: + assert "app-server" in str(exc) + else: + raise AssertionError("expected non app-server transport to be rejected") + + +def test_cli_start_validates_and_emits_bridge_json(tmp_path, monkeypatch, capsys): + cli = load_reference_module("cli") + calls = [] + + def fake_codex_bridge(**kwargs): + calls.append(kwargs) + return json.dumps( + { + "success": True, + "protocol": {"mailbox": False, "transport": "app-server stdio"}, + "task": { + "hermes_task_id": "codex-abc", + "codex_thread_id": "thread-abc", + "codex_turn_id": "turn-abc", + }, + } + ) + + monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge) + + exit_code = cli.main(["start", "--cwd", str(tmp_path), "--prompt", "Analyze tests"]) + + assert exit_code == 0 + output = json.loads(capsys.readouterr().out) + assert output["task"]["hermes_task_id"] == "codex-abc" + assert calls == [ + { + "action": "start", + "prompt": "Analyze tests", + "cwd": str(tmp_path), + "model": None, + "sandbox": "read-only", + "approval_policy": "untrusted", + "codex_home": None, + } + ] + + +def test_cli_respond_maps_request_id_to_bridge_instruction(monkeypatch, capsys): + cli = load_reference_module("cli") + calls = [] + + def fake_codex_bridge(**kwargs): + calls.append(kwargs) + return json.dumps({"success": True, "response": {"decision": kwargs["decision"]}}) + + monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge) + + exit_code = cli.main( + [ + "respond", + "codex-abc", + "--request-id", + "approval-1", + "--decision", + "decline", + "--answers", + '{"q1": {"answers": ["yes"]}}', + ] + ) + + assert exit_code == 0 + output = json.loads(capsys.readouterr().out) + assert output["response"] == {"decision": "decline"} + assert calls == [ + { + "action": "respond", + "task_id": "codex-abc", + "instruction": "approval-1", + "decision": "decline", + "answers": {"q1": {"answers": ["yes"]}}, + } + ] + + +def test_cli_smoke_test_polls_until_completed_with_sentinel(tmp_path, monkeypatch, capsys): + cli = load_reference_module("cli") + calls = [] + + def fake_codex_bridge(**kwargs): + calls.append(kwargs) + action = kwargs["action"] + if action == "start": + return json.dumps( + { + "success": True, + "protocol": {"mailbox": False, "transport": "app-server stdio"}, + "task": { + "hermes_task_id": "codex-smoke", + "codex_thread_id": "thread-smoke", + "codex_turn_id": "turn-smoke", + }, + } + ) + return json.dumps( + { + "success": True, + "task": { + "hermes_task_id": "codex-smoke", + "status": "completed", + "recent_events": [{"payload_summary": "assistant replied CODEX_ASYNC_OK"}], + "final_summary": None, + }, + } + ) + + monkeypatch.setattr(cli, "codex_bridge", fake_codex_bridge) + monkeypatch.setattr(cli.time, "sleep", lambda _seconds: None) + + exit_code = cli.main( + [ + "smoke-test", + "--cwd", + str(tmp_path), + "--wait", + "3", + "--timeout", + "10", + "--poll-interval", + "0.01", + ] + ) + + assert exit_code == 0 + output = json.loads(capsys.readouterr().out) + assert output["success"] is True + assert output["task_id"] == "codex-smoke" + assert [call["action"] for call in calls] == ["start", "status"] + assert "CODEX_ASYNC_OK" in calls[0]["prompt"]