diff --git a/agent/transports/codex_app_server.py b/agent/transports/codex_app_server.py
new file mode 100644
index 00000000000..b1aeaa00786
--- /dev/null
+++ b/agent/transports/codex_app_server.py
@@ -0,0 +1,368 @@
+"""Codex app-server JSON-RPC client.
+
+Speaks the protocol documented in codex-rs/app-server/README.md (codex 0.125+).
+Transport is newline-delimited JSON-RPC 2.0 over stdio: spawn `codex app-server`,
+do an `initialize` handshake, then drive `thread/start` + `turn/start` and
+consume streaming `item/*` notifications until `turn/completed`.
+
+This module is the wire-level speaker only. Higher-level concerns (event
+projection into Hermes' display, approval bridging, transcript projection into
+AIAgent.messages, plugin migration) live in sibling modules.
+
+Status: optional opt-in runtime gated behind `model.openai_runtime ==
+"codex_app_server"`. Hermes' default tool dispatch is unchanged when this
+runtime is not selected.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import queue
+import subprocess
+import threading
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+# Default minimum codex version we test against. The PR sets this from the
+# `codex --version` parsed at install time; bumping is a one-line change here.
+MIN_CODEX_VERSION = (0, 125, 0)
+
+
+@dataclass
+class CodexAppServerError(RuntimeError):
+    """Raised on JSON-RPC errors from the app-server."""
+
+    code: int
+    message: str
+    data: Optional[Any] = None
+
+    def __str__(self) -> str:  # pragma: no cover - trivial
+        return f"codex app-server error {self.code}: {self.message}"
+
+
+@dataclass
+class _Pending:
+    queue: queue.Queue
+    method: str
+    sent_at: float = field(default_factory=time.time)
+
+
+class CodexAppServerClient:
+    """Minimal JSON-RPC 2.0 client for `codex app-server` over stdio.
+
+    Threading model:
+      - Spawning thread (caller) drives request/response pairs synchronously.
+      - One reader thread parses stdout, dispatches replies to the right
+        pending future, and routes notifications + server-initiated requests
+        to bounded queues that the caller drains on their own cadence.
+      - One reader thread captures stderr for diagnostics; codex emits
+        tracing logs there at RUST_LOG-controlled levels.
+
+    Intentionally NOT async. AIAgent.run_conversation() is synchronous and
+    runs on the main thread; layering asyncio just to drive a stdio child
+    creates surprising interrupt semantics. We use blocking queues with
+    timeouts and rely on `turn/interrupt` for cancellation.
+    """
+
+    def __init__(
+        self,
+        codex_bin: str = "codex",
+        codex_home: Optional[str] = None,
+        extra_args: Optional[list[str]] = None,
+        env: Optional[dict[str, str]] = None,
+    ) -> None:
+        self._codex_bin = codex_bin
+        cmd = [codex_bin, "app-server"] + list(extra_args or [])
+        spawn_env = os.environ.copy()
+        if env:
+            spawn_env.update(env)
+        if codex_home:
+            spawn_env["CODEX_HOME"] = codex_home
+        # Codex emits tracing to stderr; default WARN keeps it quiet for users.
+        spawn_env.setdefault("RUST_LOG", "warn")
+
+        self._proc = subprocess.Popen(
+            cmd,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            bufsize=0,
+            env=spawn_env,
+        )
+        self._next_id = 1
+        self._pending: dict[int, _Pending] = {}
+        self._pending_lock = threading.Lock()
+        self._notifications: queue.Queue = queue.Queue()
+        self._server_requests: queue.Queue = queue.Queue()
+        self._stderr_lines: list[str] = []
+        self._stderr_lock = threading.Lock()
+        self._closed = False
+        self._initialized = False
+
+        self._reader = threading.Thread(target=self._read_stdout, daemon=True)
+        self._reader.start()
+        self._stderr_reader = threading.Thread(target=self._read_stderr, daemon=True)
+        self._stderr_reader.start()
+
+    # ---------- lifecycle ----------
+
+    def initialize(
+        self,
+        client_name: str = "hermes",
+        client_title: str = "Hermes Agent",
+        client_version: str = "0.1",
+        capabilities: Optional[dict] = None,
+        timeout: float = 10.0,
+    ) -> dict:
+        """Send `initialize` + `initialized` handshake. Returns the server's
+        InitializeResponse (userAgent, codexHome, platformFamily, platformOs)."""
+        if self._initialized:
+            raise RuntimeError("already initialized")
+        params = {
+            "clientInfo": {
+                "name": client_name,
+                "title": client_title,
+                "version": client_version,
+            },
+            "capabilities": capabilities or {},
+        }
+        result = self.request("initialize", params, timeout=timeout)
+        self.notify("initialized")
+        self._initialized = True
+        return result
+
+    def close(self, timeout: float = 3.0) -> None:
+        """Close stdin and wait for the subprocess to exit, escalating to kill."""
+        if self._closed:
+            return
+        self._closed = True
+        try:
+            if self._proc.stdin and not self._proc.stdin.closed:
+                self._proc.stdin.close()
+        except Exception:
+            pass
+        try:
+            self._proc.terminate()
+            self._proc.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            try:
+                self._proc.kill()
+                self._proc.wait(timeout=1.0)
+            except Exception:
+                pass
+
+    def __enter__(self) -> "CodexAppServerClient":
+        return self
+
+    def __exit__(self, *exc: Any) -> None:
+        self.close()
+
+    # ---------- send/receive ----------
+
+    def request(
+        self,
+        method: str,
+        params: Optional[dict] = None,
+        timeout: float = 30.0,
+    ) -> dict:
+        """Send a JSON-RPC request and block on the response. Returns `result`,
+        raises CodexAppServerError on `error`."""
+        rid = self._take_id()
+        q: queue.Queue = queue.Queue(maxsize=1)
+        with self._pending_lock:
+            self._pending[rid] = _Pending(queue=q, method=method)
+        self._send({"id": rid, "method": method, "params": params or {}})
+        try:
+            msg = q.get(timeout=timeout)
+        except queue.Empty:
+            with self._pending_lock:
+                self._pending.pop(rid, None)
+            raise TimeoutError(
+                f"codex app-server method {method!r} timed out after {timeout}s"
+            )
+        if "error" in msg:
+            err = msg["error"]
+            raise CodexAppServerError(
+                code=err.get("code", -1),
+                message=err.get("message", ""),
+                data=err.get("data"),
+            )
+        return msg.get("result", {})
+
+    def notify(self, method: str, params: Optional[dict] = None) -> None:
+        """Send a JSON-RPC notification (no id, no response expected)."""
+        self._send({"method": method, "params": params or {}})
+
+    def respond(self, request_id: Any, result: dict) -> None:
+        """Reply to a server-initiated request (e.g. approval prompts)."""
+        self._send({"id": request_id, "result": result})
+
+    def respond_error(
+        self, request_id: Any, code: int, message: str, data: Optional[Any] = None
+    ) -> None:
+        """Reply to a server-initiated request with an error."""
+        err: dict[str, Any] = {"code": code, "message": message}
+        if data is not None:
+            err["data"] = data
+        self._send({"id": request_id, "error": err})
+
+    def take_notification(self, timeout: float = 0.0) -> Optional[dict]:
+        """Pop the next streaming notification, or return None on timeout.
+
+        timeout=0.0 means non-blocking. Use small positive timeouts inside the
+        AIAgent turn loop to interleave reads with interrupt checks."""
+        try:
+            if timeout <= 0:
+                return self._notifications.get_nowait()
+            return self._notifications.get(timeout=timeout)
+        except queue.Empty:
+            return None
+
+    def take_server_request(self, timeout: float = 0.0) -> Optional[dict]:
+        """Pop the next server-initiated request (e.g. exec/applyPatch approval)."""
+        try:
+            if timeout <= 0:
+                return self._server_requests.get_nowait()
+            return self._server_requests.get(timeout=timeout)
+        except queue.Empty:
+            return None
+
+    # ---------- diagnostics ----------
+
+    def stderr_tail(self, n: int = 20) -> list[str]:
+        """Return last n lines of codex's stderr (for error reports)."""
+        with self._stderr_lock:
+            return list(self._stderr_lines[-n:])
+
+    def is_alive(self) -> bool:
+        return self._proc.poll() is None
+
+    # ---------- internals ----------
+
+    def _take_id(self) -> int:
+        # JSON-RPC ids only need to be unique per-connection. A simple
+        # monotonically increasing int is the common choice and matches what
+        # codex's own clients use.
+        rid = self._next_id
+        self._next_id += 1
+        return rid
+
+    def _send(self, obj: dict) -> None:
+        if self._closed:
+            raise RuntimeError("codex app-server client is closed")
+        if self._proc.stdin is None:
+            raise RuntimeError("codex app-server stdin not available")
+        try:
+            self._proc.stdin.write((json.dumps(obj) + "\n").encode("utf-8"))
+            self._proc.stdin.flush()
+        except (BrokenPipeError, ValueError) as exc:
+            raise RuntimeError(
+                f"codex app-server stdin closed unexpectedly: {exc}"
+            ) from exc
+
+    def _read_stdout(self) -> None:
+        if self._proc.stdout is None:
+            return
+        try:
+            for line in iter(self._proc.stdout.readline, b""):
+                if not line:
+                    break
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    msg = json.loads(line)
+                except json.JSONDecodeError:
+                    # Non-JSON output is unexpected on stdout; tracing belongs
+                    # on stderr. Surface it via stderr buffer for diagnostics.
+                    with self._stderr_lock:
+                        self._stderr_lines.append(
+                            f"<non-json on stdout> {line[:200]!r}"
+                        )
+                    continue
+                self._dispatch(msg)
+        except Exception as exc:
+            with self._stderr_lock:
+                self._stderr_lines.append(f"<stdout reader error> {exc}")
+
+    def _dispatch(self, msg: dict) -> None:
+        # Reply (has id + result/error, no method)
+        if "id" in msg and ("result" in msg or "error" in msg):
+            with self._pending_lock:
+                pending = self._pending.pop(msg["id"], None)
+            if pending is not None:
+                try:
+                    pending.queue.put_nowait(msg)
+                except queue.Full:  # pragma: no cover - defensive
+                    pass
+            return
+        # Server-initiated request (has id + method)
+        if "id" in msg and "method" in msg:
+            self._server_requests.put(msg)
+            return
+        # Notification (no id)
+        if "method" in msg:
+            self._notifications.put(msg)
+
+    def _read_stderr(self) -> None:
+        if self._proc.stderr is None:
+            return
+        try:
+            for line in iter(self._proc.stderr.readline, b""):
+                if not line:
+                    break
+                with self._stderr_lock:
+                    self._stderr_lines.append(
+                        line.decode("utf-8", "replace").rstrip()
+                    )
+                    # Bound memory: keep last 500 lines.
+                    if len(self._stderr_lines) > 500:
+                        self._stderr_lines = self._stderr_lines[-500:]
+        except Exception:  # pragma: no cover
+            pass
+
+
+def parse_codex_version(output: str) -> Optional[tuple[int, int, int]]:
+    """Parse `codex --version` output. Returns (major, minor, patch) or None."""
+    # Output format: "codex-cli 0.130.0" possibly followed by metadata.
+    import re
+
+    match = re.search(r"(\d+)\.(\d+)\.(\d+)", output or "")
+    if not match:
+        return None
+    return (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+
+
+def check_codex_binary(
+    codex_bin: str = "codex", min_version: tuple[int, int, int] = MIN_CODEX_VERSION
+) -> tuple[bool, str]:
+    """Verify codex CLI is installed and meets minimum version.
+
+    Returns (ok, message). Used by setup wizard and runtime startup."""
+    try:
+        proc = subprocess.run(
+            [codex_bin, "--version"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except FileNotFoundError:
+        return False, (
+            f"codex CLI not found at {codex_bin!r}. Install with: "
+            f"npm i -g @openai/codex"
+        )
+    except subprocess.TimeoutExpired:
+        return False, "codex --version timed out"
+    if proc.returncode != 0:
+        return False, f"codex --version exited {proc.returncode}: {proc.stderr.strip()}"
+    version = parse_codex_version(proc.stdout)
+    if version is None:
+        return False, f"could not parse codex version from: {proc.stdout!r}"
+    if version < min_version:
+        return False, (
+            f"codex {'.'.join(map(str, version))} is older than required "
+            f"{'.'.join(map(str, min_version))}. Run: npm i -g @openai/codex"
+        )
+    return True, ".".join(map(str, version))
diff --git a/agent/transports/codex_app_server_session.py b/agent/transports/codex_app_server_session.py
new file mode 100644
index 00000000000..619cfeabfc1
--- /dev/null
+++ b/agent/transports/codex_app_server_session.py
@@ -0,0 +1,525 @@
+"""Session adapter for codex app-server runtime.
+
+Owns one Codex thread per Hermes session. Drives `turn/start`, consumes
+streaming notifications via CodexEventProjector, handles server-initiated
+approval requests (apply_patch, exec command), translates cancellation,
+and returns a clean turn result that AIAgent.run_conversation() can splice
+into its `messages` list.
+
+Lifecycle:
+    session = CodexAppServerSession(cwd="/home/x/proj")
+    session.ensure_started()                              # spawns + handshake + thread/start
+    result = session.run_turn(user_input="hello")         # blocks until turn/completed
+    # result.final_text          → assistant text returned to caller
+    # result.projected_messages  → list of {role, content, ...} for messages list
+    # result.tool_iterations     → how many tool-shaped items completed (skill nudge counter)
+    # result.interrupted         → True if Ctrl+C / interrupt_requested fired mid-turn
+    session.close()                                       # tears down subprocess
+
+Threading model: the adapter is single-threaded from the caller's perspective.
+The underlying CodexAppServerClient owns its own reader threads but exposes
+blocking-with-timeout queues that this adapter polls in a loop, so the run_turn
+call is synchronous and behaves like AIAgent's existing chat_completions loop.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import threading
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+from agent.transports.codex_app_server import (
+    CodexAppServerClient,
+    CodexAppServerError,
+)
+from agent.transports.codex_event_projector import CodexEventProjector
+
+logger = logging.getLogger(__name__)
+
+
+# Permission profile mapping mirrors the docstring in PR proposal:
+# Hermes' tools.terminal.security_mode → Codex's permissions profile id.
+# Defaults if config is missing → workspace-write (matches Codex's own default).
+_HERMES_TO_CODEX_PERMISSION_PROFILE = {
+    "auto": "workspace-write",
+    "approval-required": "read-only-with-approval",
+    "unrestricted": "full-access",
+    # Backstop alias used by some skills/tests.
+    "yolo": "full-access",
+}
+
+
+@dataclass
+class TurnResult:
+    """Result of one user→assistant→tool turn through the codex app-server."""
+
+    final_text: str = ""
+    projected_messages: list[dict] = field(default_factory=list)
+    tool_iterations: int = 0
+    interrupted: bool = False
+    error: Optional[str] = None  # Set if turn ended in a non-recoverable error
+    turn_id: Optional[str] = None
+    thread_id: Optional[str] = None
+
+
+@dataclass
+class _ServerRequestRouting:
+    """Default policies for codex-side approval requests when no interactive
+    callback is wired in. These are only used by tests + cron / non-interactive
+    contexts; the live CLI path passes an approval_callback that defers to
+    tools.approval.prompt_dangerous_approval()."""
+
+    auto_approve_exec: bool = False
+    auto_approve_apply_patch: bool = False
+
+
+class CodexAppServerSession:
+    """One Codex thread per Hermes session, lifetime owned by AIAgent.
+
+    Not thread-safe — one caller drives it at a time, matching how AIAgent's
+    run_conversation() loop is structured today. The codex client itself can
+    handle interleaved reads/writes via its own threads, but the adapter's
+    state (projector, thread_id, turn counter) is owned by the caller thread.
+    """
+
+    def __init__(
+        self,
+        *,
+        cwd: Optional[str] = None,
+        codex_bin: str = "codex",
+        codex_home: Optional[str] = None,
+        permission_profile: Optional[str] = None,
+        approval_callback: Optional[Callable[..., str]] = None,
+        on_event: Optional[Callable[[dict], None]] = None,
+        request_routing: Optional[_ServerRequestRouting] = None,
+        client_factory: Optional[Callable[..., CodexAppServerClient]] = None,
+    ) -> None:
+        self._cwd = cwd or os.getcwd()
+        self._codex_bin = codex_bin
+        self._codex_home = codex_home
+        self._permission_profile = (
+            permission_profile or _HERMES_TO_CODEX_PERMISSION_PROFILE.get(
+                os.environ.get("HERMES_TERMINAL_SECURITY_MODE", "auto"),
+                "workspace-write",
+            )
+        )
+        self._approval_callback = approval_callback
+        self._on_event = on_event  # Display hook (kawaii spinner ticks etc.)
+        self._routing = request_routing or _ServerRequestRouting()
+        self._client_factory = client_factory or CodexAppServerClient
+
+        self._client: Optional[CodexAppServerClient] = None
+        self._thread_id: Optional[str] = None
+        self._interrupt_event = threading.Event()
+        # Pending file-change items, keyed by item id. Populated on
+        # item/started for fileChange items; consumed by the approval
+        # bridge when codex sends item/fileChange/requestApproval. The
+        # approval params don't carry the changeset, so we cache here
+        # to surface a real summary in the approval prompt (quirk #4).
+        self._pending_file_changes: dict[str, str] = {}
+        self._closed = False
+
+    # ---------- lifecycle ----------
+
+    def ensure_started(self) -> str:
+        """Spawn the subprocess, do the initialize handshake, and start a
+        thread. Returns the codex thread id. Idempotent — repeated calls
+        return the same thread id."""
+        if self._thread_id is not None:
+            return self._thread_id
+        if self._client is None:
+            self._client = self._client_factory(
+                codex_bin=self._codex_bin, codex_home=self._codex_home
+            )
+        self._client.initialize(
+            client_name="hermes",
+            client_title="Hermes Agent",
+            client_version=_get_hermes_version(),
+        )
+        # Permission selection is intentionally NOT sent on thread/start.
+        # Two reasons (live-tested against codex 0.130.0):
+        #   1. `thread/start.permissions` is gated behind the experimentalApi
+        #      capability on this codex version — we'd have to opt in during
+        #      initialize and accept the unstable surface.
+        #   2. Even with experimentalApi declared and the correct shape
+        #      (`{"type": "profile", "id": "..."}`, not `{"profileId": ...}`),
+        #      codex requires a matching `[permissions]` table in
+        #      ~/.codex/config.toml or it fails the request with
+        #      'default_permissions requires a [permissions] table'.
+        # Letting codex pick its default (`:read-only` unless the user has
+        # configured otherwise in their codex config.toml) is the standard
+        # codex CLI workflow and avoids fighting codex's own validation.
+        # Users who want a write-capable profile configure it in their
+        # ~/.codex/config.toml the same way they would for any codex usage.
+        params: dict[str, Any] = {"cwd": self._cwd}
+        result = self._client.request("thread/start", params, timeout=15)
+        self._thread_id = result["thread"]["id"]
+        logger.info(
+            "codex app-server thread started: id=%s profile=%s cwd=%s",
+            self._thread_id[:8],
+            self._permission_profile,
+            self._cwd,
+        )
+        return self._thread_id
+
+    def close(self) -> None:
+        if self._closed:
+            return
+        self._closed = True
+        if self._client is not None:
+            try:
+                self._client.close()
+            except Exception:  # pragma: no cover - best-effort cleanup
+                pass
+            self._client = None
+        self._thread_id = None
+
+    def __enter__(self) -> "CodexAppServerSession":
+        return self
+
+    def __exit__(self, *exc: Any) -> None:
+        self.close()
+
+    # ---------- interrupt ----------
+
+    def request_interrupt(self) -> None:
+        """Idempotent: signal the active turn loop to issue turn/interrupt
+        and unwind. Called by AIAgent's _interrupt_requested path."""
+        self._interrupt_event.set()
+
+    # ---------- per-turn ----------
+
+    def run_turn(
+        self,
+        user_input: str,
+        *,
+        turn_timeout: float = 600.0,
+        notification_poll_timeout: float = 0.25,
+    ) -> TurnResult:
+        """Send a user message and block until turn/completed, while
+        forwarding server-initiated approval requests and projecting items
+        into Hermes' messages shape."""
+        self.ensure_started()
+        assert self._client is not None and self._thread_id is not None
+
+        self._interrupt_event.clear()
+        projector = CodexEventProjector()
+        result = TurnResult(thread_id=self._thread_id)
+
+        # Send turn/start with the user input. Text-only for now (codex
+        # supports rich content but Hermes' text path is the common case).
+        try:
+            ts = self._client.request(
+                "turn/start",
+                {
+                    "threadId": self._thread_id,
+                    "input": [{"type": "text", "text": user_input}],
+                },
+                timeout=10,
+            )
+        except CodexAppServerError as exc:
+            result.error = f"turn/start failed: {exc}"
+            return result
+
+        result.turn_id = (ts.get("turn") or {}).get("id")
+        deadline = time.time() + turn_timeout
+        turn_complete = False
+
+        while time.time() < deadline and not turn_complete:
+            if self._interrupt_event.is_set():
+                self._issue_interrupt(result.turn_id)
+                result.interrupted = True
+                break
+
+            # Drain any server-initiated requests (approvals) before
+            # reading notifications, so the codex side isn't blocked.
+            sreq = self._client.take_server_request(timeout=0)
+            if sreq is not None:
+                # Drain any pending notifications first so per-turn state
+                # (e.g. _pending_file_changes for fileChange approvals) is
+                # up to date when we make the approval decision. Bounded
+                # to avoid starving the server-request response.
+                for _ in range(8):
+                    pending = self._client.take_notification(timeout=0)
+                    if pending is None:
+                        break
+                    self._track_pending_file_change(pending)
+                    proj = projector.project(pending)
+                    if proj.messages:
+                        result.projected_messages.extend(proj.messages)
+                    if proj.is_tool_iteration:
+                        result.tool_iterations += 1
+                    if proj.final_text is not None:
+                        result.final_text = proj.final_text
+                self._handle_server_request(sreq)
+                continue
+
+            note = self._client.take_notification(
+                timeout=notification_poll_timeout
+            )
+            if note is None:
+                continue
+
+            method = note.get("method", "")
+            if self._on_event is not None:
+                try:
+                    self._on_event(note)
+                except Exception:  # pragma: no cover - display callback
+                    logger.debug("on_event callback raised", exc_info=True)
+
+            # Track in-progress fileChange items so the approval bridge
+            # can surface a real change summary when codex requests
+            # approval (the approval params themselves don't carry the
+            # changeset). Quirk #4 fix.
+            self._track_pending_file_change(note)
+
+            # Project into messages
+            projection = projector.project(note)
+            if projection.messages:
+                result.projected_messages.extend(projection.messages)
+            if projection.is_tool_iteration:
+                result.tool_iterations += 1
+            if projection.final_text is not None:
+                # Codex can emit multiple agentMessage items in one turn
+                # (e.g. partial then final). Take the last one as canonical.
+                result.final_text = projection.final_text
+
+            if method == "turn/completed":
+                turn_complete = True
+                turn_status = (
+                    (note.get("params") or {}).get("turn") or {}
+                ).get("status")
+                if turn_status and turn_status not in ("completed", "interrupted"):
+                    err_obj = (
+                        (note.get("params") or {}).get("turn") or {}
+                    ).get("error")
+                    if err_obj:
+                        result.error = (
+                            f"turn ended status={turn_status}: "
+                            f"{err_obj.get('message') or err_obj}"
+                        )
+
+        if not turn_complete and not result.interrupted:
+            # Hit the deadline. Issue interrupt to stop wasted compute.
+            self._issue_interrupt(result.turn_id)
+            result.interrupted = True
+            result.error = result.error or f"turn timed out after {turn_timeout}s"
+
+        return result
+
+    # ---------- internals ----------
+
+    def _issue_interrupt(self, turn_id: Optional[str]) -> None:
+        if self._client is None or self._thread_id is None or turn_id is None:
+            return
+        try:
+            self._client.request(
+                "turn/interrupt",
+                {"threadId": self._thread_id, "turnId": turn_id},
+                timeout=5,
+            )
+        except CodexAppServerError as exc:
+            # "no active turn to interrupt" is fine — already done.
+            logger.debug("turn/interrupt non-fatal: %s", exc)
+        except TimeoutError:
+            logger.warning("turn/interrupt timed out")
+
+    def _handle_server_request(self, req: dict) -> None:
+        """Translate a codex server request (approval) into Hermes' approval
+        flow, then send the response.
+
+        Method names verified live against codex 0.130.0 (Apr 2026):
+          item/commandExecution/requestApproval — exec approvals
+          item/fileChange/requestApproval       — apply_patch approvals
+          item/permissions/requestApproval      — permissions changes
+                                                  (we decline; user controls
+                                                  permission profile in
+                                                  ~/.codex/config.toml).
+        """
+        if self._client is None:
+            return
+        method = req.get("method", "")
+        rid = req.get("id")
+        params = req.get("params") or {}
+
+        if method == "item/commandExecution/requestApproval":
+            decision = self._decide_exec_approval(params)
+            self._client.respond(rid, {"decision": decision})
+        elif method == "item/fileChange/requestApproval":
+            decision = self._decide_apply_patch_approval(params)
+            self._client.respond(rid, {"decision": decision})
+        elif method == "item/permissions/requestApproval":
+            # Codex sometimes asks to escalate permissions mid-turn. We
+            # always decline — the user already chose their permission
+            # profile in ~/.codex/config.toml and surprise escalations
+            # shouldn't be silently accepted.
+            self._client.respond(rid, {"decision": "decline"})
+        elif method == "mcpServer/elicitation/request":
+            # Codex's MCP layer asks the user for structured input on
+            # behalf of an MCP server (e.g. tool-call confirmation,
+            # OAuth, form data). For our own hermes-tools callback we
+            # auto-accept — the user already approved Hermes' tools
+            # by enabling the runtime, and we never expose anything
+            # codex's built-in shell can't already do. For other MCP
+            # servers we decline so the user explicitly opts in via
+            # codex's own auth flow.
+            server_name = params.get("serverName") or ""
+            if server_name == "hermes-tools":
+                self._client.respond(
+                    rid,
+                    {"action": "accept", "content": None, "_meta": None},
+                )
+            else:
+                self._client.respond(
+                    rid,
+                    {"action": "decline", "content": None, "_meta": None},
+                )
+        else:
+            # Unknown server request — codex can extend this surface. Reject
+            # cleanly so codex doesn't hang waiting for us.
+            logger.warning("Unknown codex server request: %s", method)
+            self._client.respond_error(
+                rid, code=-32601, message=f"Unsupported method: {method}"
+            )
+
+    def _decide_exec_approval(self, params: dict) -> str:
+        if self._routing.auto_approve_exec:
+            return "accept"
+        command = params.get("command") or ""
+        # Codex's CommandExecutionRequestApprovalParams has cwd as Optional —
+        # fall back to the session's cwd when codex doesn't include it so the
+        # approval prompt is never empty (quirk #10 fix).
+        cwd = params.get("cwd") or self._cwd or "<unknown>"
+        reason = params.get("reason")
+        description = f"Codex requests exec in {cwd}"
+        if reason:
+            description += f" — {reason}"
+        if self._approval_callback is not None:
+            try:
+                choice = self._approval_callback(
+                    command, description, allow_permanent=False
+                )
+                return _approval_choice_to_codex_decision(choice)
+            except Exception:
+                logger.exception("approval_callback raised on exec request")
+                return "decline"
+        return "decline"  # fail-closed when no callback wired
+
+    def _decide_apply_patch_approval(self, params: dict) -> str:
+        if self._routing.auto_approve_apply_patch:
+            return "accept"
+        if self._approval_callback is not None:
+            # FileChangeRequestApprovalParams gives us reason + grantRoot.
+            # The actual changeset lives on the corresponding fileChange
+            # item which the projector has already cached for us — look it
+            # up by item_id so the user sees what's actually changing.
+            reason = params.get("reason")
+            grant_root = params.get("grantRoot")
+            item_id = params.get("itemId") or ""
+            change_summary = self._lookup_pending_file_change(item_id)
+            description_parts = []
+            if reason:
+                description_parts.append(reason)
+            if change_summary:
+                description_parts.append(change_summary)
+            if grant_root:
+                description_parts.append(f"grants write to {grant_root}")
+            description = (
+                "; ".join(description_parts)
+                if description_parts
+                else "Codex requests to apply a patch"
+            )
+            command_label = (
+                f"apply_patch: {change_summary}" if change_summary
+                else f"apply_patch: {reason}" if reason
+                else "apply_patch"
+            )
+            try:
+                choice = self._approval_callback(
+                    command_label,
+                    description,
+                    allow_permanent=False,
+                )
+                return _approval_choice_to_codex_decision(choice)
+            except Exception:
+                logger.exception("approval_callback raised on apply_patch")
+                return "decline"
+        return "decline"
+
+    def _track_pending_file_change(self, note: dict) -> None:
+        """Maintain self._pending_file_changes from item/started + item/completed
+        notifications. Lets the apply_patch approval prompt show what's
+        actually changing — codex's approval params don't carry the data."""
+        method = note.get("method", "")
+        params = note.get("params") or {}
+        item = params.get("item") or {}
+        if item.get("type") != "fileChange":
+            return
+        item_id = item.get("id") or ""
+        if not item_id:
+            return
+        if method == "item/started":
+            changes = item.get("changes") or []
+            if not changes:
+                self._pending_file_changes[item_id] = "1 change pending"
+                return
+            kinds: dict[str, int] = {}
+            paths: list[str] = []
+            for ch in changes:
+                if not isinstance(ch, dict):
+                    continue
+                kind = (ch.get("kind") or {}).get("type") or "update"
+                kinds[kind] = kinds.get(kind, 0) + 1
+                p = ch.get("path") or ""
+                if p:
+                    paths.append(p)
+            counts = ", ".join(f"{n} {k}" for k, n in sorted(kinds.items()))
+            preview = ", ".join(paths[:3])
+            if len(paths) > 3:
+                preview += f", +{len(paths) - 3} more"
+            self._pending_file_changes[item_id] = (
+                f"{counts}: {preview}" if preview else counts
+            )
+        elif method == "item/completed":
+            self._pending_file_changes.pop(item_id, None)
+
+    def _lookup_pending_file_change(self, item_id: str) -> Optional[str]:
+        """Look up an in-progress fileChange item by id and summarize its
+        changes for the approval prompt. Returns None when we don't have
+        the item cached (e.g. approval arrived before item/started, or
+        fileChange item content not tracked yet)."""
+        if not item_id:
+            return None
+        cached = self._pending_file_changes.get(item_id)
+        if not cached:
+            return None
+        return cached
+
+
+def _approval_choice_to_codex_decision(choice: str) -> str:
+    """Map Hermes approval choices onto codex's CommandExecutionApprovalDecision
+    / FileChangeApprovalDecision wire values.
+
+    Hermes returns 'once', 'session', 'always', or 'deny'.
+    Codex expects 'accept', 'acceptForSession', 'decline', or 'cancel'
+    (verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs
+    on codex 0.130.0).
+    """
+    if choice in ("once",):
+        return "accept"
+    if choice in ("session", "always"):
+        return "acceptForSession"
+    return "decline"
+
+
+def _get_hermes_version() -> str:
+    """Best-effort Hermes version string for codex's userAgent line."""
+    try:
+        from importlib.metadata import version
+
+        return version("hermes-agent")
+    except Exception:  # pragma: no cover
+        return "0.0.0"
diff --git a/agent/transports/codex_event_projector.py b/agent/transports/codex_event_projector.py
new file mode 100644
index 00000000000..0a388a60cfb
--- /dev/null
+++ b/agent/transports/codex_event_projector.py
@@ -0,0 +1,312 @@
+"""Projects codex app-server events into Hermes' messages list.
+
+The translator that lets Hermes' memory/skill review keep working under the
+Codex runtime: it converts Codex `item/*` notifications into the standard
+OpenAI-shaped `{role, content, tool_calls, tool_call_id}` entries that
+`agent/curator.py` already knows how to read.
+
+Codex emits items with a discriminator field `type`:
+  - userMessage         → {role: "user", content}
+  - agentMessage        → {role: "assistant", content}
+  - reasoning           → stashed in the assistant's "reasoning" field
+  - commandExecution    → assistant tool_call(name="exec") + tool result
+  - fileChange          → assistant tool_call(name="apply_patch") + tool result
+  - mcpToolCall         → assistant tool_call(name=f"mcp.{server}.{tool}") + tool result
+  - dynamicToolCall     → assistant tool_call(name=tool) + tool result
+  - plan/hookPrompt/collabAgentToolCall → recorded as opaque assistant notes
+
+Each item maps to AT MOST one assistant entry + one tool entry, preserving
+Hermes' message-alternation invariants (system → user → assistant → user/tool
+→ assistant → ...). Multiple Codex tool calls within one Codex turn produce
+multiple consecutive (assistant, tool) pairs, which is the same shape Hermes
+already produces for parallel tool calls.
+
+Counters tracked alongside projection:
+  - tool_iterations: ticks once per completed tool-shaped item. Used by
+    AIAgent._iters_since_skill (skill nudge gate, default threshold 10).
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+
+def _deterministic_call_id(item_type: str, item_id: str) -> str:
+    """Stable id for tool_call message correlation.
+
+    Uses the codex item id directly when present (already a uuid); falls back
+    to a content hash so replay produces the same id across sessions and
+    prefix caches stay valid. See AGENTS.md Pitfall #16 (deterministic IDs in
+    tool call history)."""
+    if item_id:
+        return f"codex_{item_type}_{item_id}"
+    digest = hashlib.sha256(f"{item_type}".encode()).hexdigest()[:16]
+    return f"codex_{item_type}_{digest}"
+
+
+def _format_tool_args(d: dict) -> str:
+    """Format a dict as JSON the way Hermes' existing tool_calls path does."""
+    return json.dumps(d, ensure_ascii=False, sort_keys=True)
+
+
+@dataclass
+class ProjectionResult:
+    """Output of projecting one Codex item.
+
+    `messages` is a list because some Codex items produce two messages
+    (assistant tool_call + tool result). Empty list = item ignored (e.g. a
+    streaming `outputDelta` that doesn't materialize into messages until the
+    `item/completed` event)."""
+
+    messages: list[dict] = field(default_factory=list)
+    is_tool_iteration: bool = False
+    final_text: Optional[str] = None  # Set when an agentMessage completes
+
+
+class CodexEventProjector:
+    """Stateful projector consuming Codex notifications in arrival order.
+
+    Owns the in-progress reasoning content (codex emits reasoning as separate
+    items but Hermes stashes it on the next assistant message)."""
+
+    def __init__(self) -> None:
+        self._pending_reasoning: list[str] = []
+
+    def project(self, notification: dict) -> ProjectionResult:
+        """Project a single notification. Idempotent for non-completion events;
+        only `item/completed` and `turn/completed` materialize messages."""
+        method = notification.get("method", "")
+        params = notification.get("params", {}) or {}
+
+        # We only materialize messages on `item/completed`. Streaming deltas
+        # (`item/<type>/outputDelta`, `item/<type>/delta`) are display-only and
+        # don't enter the messages list — same way Hermes already only writes
+        # the assistant message after the streaming completion event.
+        if method != "item/completed":
+            return ProjectionResult()
+
+        item = params.get("item") or {}
+        item_type = item.get("type") or ""
+        item_id = item.get("id") or ""
+
+        if item_type == "agentMessage":
+            return self._project_agent_message(item)
+        if item_type == "reasoning":
+            self._pending_reasoning.extend(item.get("summary") or [])
+            self._pending_reasoning.extend(item.get("content") or [])
+            return ProjectionResult()
+        if item_type == "commandExecution":
+            return self._project_command(item, item_id)
+        if item_type == "fileChange":
+            return self._project_file_change(item, item_id)
+        if item_type == "mcpToolCall":
+            return self._project_mcp_tool_call(item, item_id)
+        if item_type == "dynamicToolCall":
+            return self._project_dynamic_tool_call(item, item_id)
+        if item_type == "userMessage":
+            return self._project_user_message(item)
+
+        # Unknown / rare items (plan, hookPrompt, collabAgentToolCall, etc.)
+        # — record as opaque assistant note so memory review can still see
+        # *something* happened, but don't fabricate tool_call structure.
+        return self._project_opaque(item, item_type)
+
+    # ---------- per-type projections ----------
+
+    def _project_agent_message(self, item: dict) -> ProjectionResult:
+        text = item.get("text") or ""
+        msg: dict[str, Any] = {"role": "assistant", "content": text}
+        if self._pending_reasoning:
+            msg["reasoning"] = "\n".join(self._pending_reasoning)
+            self._pending_reasoning = []
+        return ProjectionResult(messages=[msg], final_text=text)
+
+    def _project_user_message(self, item: dict) -> ProjectionResult:
+        # codex's userMessage content is a list of UserInput variants. For
+        # projection purposes we flatten any text fragments and ignore
+        # non-text parts (images, etc.) — Hermes' messages store text only.
+        text_parts: list[str] = []
+        for fragment in item.get("content") or []:
+            if isinstance(fragment, dict):
+                if fragment.get("type") == "text":
+                    text_parts.append(fragment.get("text") or "")
+                elif "text" in fragment:
+                    text_parts.append(str(fragment["text"]))
+        return ProjectionResult(
+            messages=[{"role": "user", "content": "\n".join(text_parts)}]
+        )
+
+    def _project_command(self, item: dict, item_id: str) -> ProjectionResult:
+        call_id = _deterministic_call_id("exec", item_id)
+        args = {
+            "command": item.get("command") or "",
+            "cwd": item.get("cwd") or "",
+        }
+        assistant_msg = {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": "exec_command",
+                        "arguments": _format_tool_args(args),
+                    },
+                }
+            ],
+        }
+        if self._pending_reasoning:
+            assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
+            self._pending_reasoning = []
+        output = item.get("aggregatedOutput") or ""
+        exit_code = item.get("exitCode")
+        if exit_code is not None and exit_code != 0:
+            output = f"[exit {exit_code}]\n{output}"
+        tool_msg = {
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": output,
+        }
+        return ProjectionResult(
+            messages=[assistant_msg, tool_msg], is_tool_iteration=True
+        )
+
+    def _project_file_change(self, item: dict, item_id: str) -> ProjectionResult:
+        call_id = _deterministic_call_id("apply_patch", item_id)
+        # Reduce the codex changes array to a digest the agent loop will
+        # find readable. We record per-file change kinds (Add/Update/Delete)
+        # without inlining full file contents — those can be huge.
+        changes_summary = []
+        for change in item.get("changes") or []:
+            kind = (change.get("kind") or {}).get("type") or "update"
+            path = change.get("path") or ""
+            changes_summary.append({"kind": kind, "path": path})
+        args = {"changes": changes_summary}
+        assistant_msg = {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": "apply_patch",
+                        "arguments": _format_tool_args(args),
+                    },
+                }
+            ],
+        }
+        if self._pending_reasoning:
+            assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
+            self._pending_reasoning = []
+        status = item.get("status") or "unknown"
+        n = len(changes_summary)
+        tool_msg = {
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": f"apply_patch status={status}, {n} change(s)",
+        }
+        return ProjectionResult(
+            messages=[assistant_msg, tool_msg], is_tool_iteration=True
+        )
+
+    def _project_mcp_tool_call(self, item: dict, item_id: str) -> ProjectionResult:
+        server = item.get("server") or "mcp"
+        tool = item.get("tool") or "unknown"
+        call_id = _deterministic_call_id(f"mcp_{server}_{tool}", item_id)
+        args = item.get("arguments") or {}
+        if not isinstance(args, dict):
+            args = {"arguments": args}
+        assistant_msg = {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": f"mcp.{server}.{tool}",
+                        "arguments": _format_tool_args(args),
+                    },
+                }
+            ],
+        }
+        if self._pending_reasoning:
+            assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
+            self._pending_reasoning = []
+        result = item.get("result")
+        error = item.get("error")
+        if error:
+            content = f"[error] {json.dumps(error, ensure_ascii=False)[:1000]}"
+        elif result is not None:
+            content = json.dumps(result, ensure_ascii=False)[:4000]
+        else:
+            content = ""
+        tool_msg = {
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": content,
+        }
+        return ProjectionResult(
+            messages=[assistant_msg, tool_msg], is_tool_iteration=True
+        )
+
+    def _project_dynamic_tool_call(
+        self, item: dict, item_id: str
+    ) -> ProjectionResult:
+        tool = item.get("tool") or "unknown"
+        call_id = _deterministic_call_id(f"dyn_{tool}", item_id)
+        args = item.get("arguments") or {}
+        if not isinstance(args, dict):
+            args = {"arguments": args}
+        assistant_msg = {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": tool,
+                        "arguments": _format_tool_args(args),
+                    },
+                }
+            ],
+        }
+        if self._pending_reasoning:
+            assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
+            self._pending_reasoning = []
+        content_items = item.get("contentItems") or []
+        if isinstance(content_items, list) and content_items:
+            content = json.dumps(content_items, ensure_ascii=False)[:4000]
+        else:
+            success = item.get("success")
+            content = f"success={success}"
+        tool_msg = {
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": content,
+        }
+        return ProjectionResult(
+            messages=[assistant_msg, tool_msg], is_tool_iteration=True
+        )
+
+    def _project_opaque(self, item: dict, item_type: str) -> ProjectionResult:
+        # Record the existence of the item without inventing tool_calls.
+        # Memory review will see this and may or may not save anything.
+        try:
+            payload = json.dumps(item, ensure_ascii=False)[:1500]
+        except (TypeError, ValueError):
+            payload = repr(item)[:1500]
+        return ProjectionResult(
+            messages=[
+                {
+                    "role": "assistant",
+                    "content": f"[codex {item_type}] {payload}",
+                }
+            ]
+        )
diff --git a/agent/transports/hermes_tools_mcp_server.py b/agent/transports/hermes_tools_mcp_server.py
new file mode 100644
index 00000000000..f7f8ae24887
--- /dev/null
+++ b/agent/transports/hermes_tools_mcp_server.py
@@ -0,0 +1,225 @@
+"""Hermes-tools-as-MCP server for the codex_app_server runtime.
+
+When the user runs `openai/*` turns through the codex app-server, codex
+owns the loop and builds its own tool list. By default, that means
+Hermes' richer tool surface — web search, browser automation,
+delegate_task subagents, vision analysis, persistent memory, skills,
+cross-session search, image generation, TTS — is unreachable.
+
+This module exposes a curated subset of those Hermes tools to the
+spawned codex subprocess via stdio MCP. Codex registers it as a normal
+MCP server (per `~/.codex/config.toml [mcp_servers.hermes-tools]`) and
+the user gets full Hermes capability inside a Codex turn.
+
+Scope (what we expose):
+  - web_search, web_extract              — Firecrawl, no codex equivalent
+  - browser_navigate / _click / _type /  — Camofox/Browserbase automation
+    _snapshot / _screenshot / _scroll / _back / _press / _vision
+  - delegate_task                        — Hermes subagents
+  - vision_analyze                       — image inspection by vision model
+  - image_generate                       — image generation
+  - memory                               — Hermes' persistent memory store
+  - skill_view, skills_list              — Hermes' skill library
+  - session_search                       — cross-session search
+  - text_to_speech                       — TTS
+
+What we DO NOT expose (codex has equivalents):
+  - terminal / shell                     — codex's own shell tool
+  - read_file / write_file / patch       — codex's apply_patch + shell
+  - search_files / process               — codex's shell
+  - clarify, todo                        — codex's own UX
+
+Run with: python -m agent.transports.hermes_tools_mcp_server
+Spawned by: CodexAppServerSession.ensure_started() when the runtime is
+            active and config opts in.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import sys
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# Tools we expose. Each name MUST match a registered Hermes tool that
+# `model_tools.handle_function_call()` can dispatch.
+#
+# What we deliberately DO NOT expose:
+#   - terminal / shell / read_file / write_file / patch / search_files /
+#     process — codex's built-ins cover these and approval routes through
+#     codex's own UI.
+#   - delegate_task / memory / session_search / todo — these are
+#     `_AGENT_LOOP_TOOLS` in Hermes (model_tools.py:493). They require
+#     the running AIAgent context to dispatch (mid-loop state), so a
+#     stateless MCP callback can't drive them. Hermes' default runtime
+#     keeps these working; the codex_app_server runtime cannot.
+EXPOSED_TOOLS: tuple[str, ...] = (
+    "web_search",
+    "web_extract",
+    "browser_navigate",
+    "browser_click",
+    "browser_type",
+    "browser_press",
+    "browser_snapshot",
+    "browser_scroll",
+    "browser_back",
+    "browser_get_images",
+    "browser_console",
+    "browser_vision",
+    "vision_analyze",
+    "image_generate",
+    "skill_view",
+    "skills_list",
+    "text_to_speech",
+    # Kanban worker handoff tools — gated on HERMES_KANBAN_TASK env var
+    # (set by the kanban dispatcher when spawning a worker). Without these
+    # in the callback, a worker spawned with openai_runtime=codex_app_server
+    # could do the work but couldn't report completion back to the kernel,
+    # making it hang until timeout. Stateless dispatch — they just read
+    # the env var and write to ~/.hermes/kanban.db.
+    "kanban_complete",
+    "kanban_block",
+    "kanban_comment",
+    "kanban_heartbeat",
+    "kanban_show",
+    "kanban_list",
+    # NOTE: kanban_create / kanban_unblock / kanban_link are orchestrator-
+    # only — the kanban tool gates them on HERMES_KANBAN_TASK being unset.
+    # They're exposed here for orchestrator agents running on the codex
+    # runtime that need to dispatch new tasks.
+    "kanban_create",
+    "kanban_unblock",
+    "kanban_link",
+)
+
+
+def _build_server() -> Any:
+    """Create the FastMCP server with Hermes tools attached. Lazy imports
+    so the module can be imported without the mcp package installed
+    (we degrade to a clear error only when actually run)."""
+    try:
+        from mcp.server.fastmcp import FastMCP
+    except ImportError as exc:  # pragma: no cover - install hint
+        raise ImportError(
+            f"hermes-tools MCP server requires the 'mcp' package: {exc}"
+        ) from exc
+
+    # Discover Hermes tools so dispatch works.
+    from model_tools import (
+        get_tool_definitions,
+        handle_function_call,
+    )
+
+    mcp = FastMCP(
+        "hermes-tools",
+        instructions=(
+            "Hermes Agent's tool surface, exposed for use inside a Codex "
+            "session. Use these for capabilities Codex's built-in toolset "
+            "doesn't cover: web search/extract, browser automation, "
+            "subagent delegation, vision, image generation, persistent "
+            "memory, skills, and cross-session search."
+        ),
+    )
+
+    # Pull authoritative Hermes tool schemas for the ones we expose, so
+    # MCP clients see the same parameter docs Hermes gives the model.
+    all_defs = {
+        td["function"]["name"]: td["function"]
+        for td in (get_tool_definitions(quiet_mode=True) or [])
+        if isinstance(td, dict) and td.get("type") == "function"
+    }
+
+    exposed_count = 0
+
+    for name in EXPOSED_TOOLS:
+        spec = all_defs.get(name)
+        if spec is None:
+            logger.debug(
+                "skipping %s — not registered in this Hermes process", name
+            )
+            continue
+
+        description = spec.get("description") or f"Hermes {name} tool"
+        params_schema = spec.get("parameters") or {"type": "object", "properties": {}}
+
+        # FastMCP wants a Python callable. Build a closure that takes the
+        # arguments dict, dispatches via handle_function_call, and returns
+        # the result string. We use add_tool() for full control over the
+        # input schema (FastMCP's @tool() decorator inspects type hints,
+        # which we can't get from a JSON schema at runtime).
+        def _make_handler(tool_name: str):
+            def _dispatch(**kwargs: Any) -> str:
+                try:
+                    return handle_function_call(tool_name, kwargs or {})
+                except Exception as exc:
+                    logger.exception("tool %s raised", tool_name)
+                    return json.dumps({"error": str(exc), "tool": tool_name})
+            _dispatch.__name__ = tool_name
+            _dispatch.__doc__ = description
+            return _dispatch
+
+        try:
+            mcp.add_tool(
+                _make_handler(name),
+                name=name,
+                description=description,
+                # FastMCP accepts JSON schema directly via the
+                # input_schema parameter on newer versions; older
+                # versions use parameters_schema. Try both for compat.
+            )
+        except TypeError:
+            # Older mcp SDK signature — fall back to decorator-style.
+            handler = _make_handler(name)
+            handler = mcp.tool(name=name, description=description)(handler)
+
+        exposed_count += 1
+
+    logger.info(
+        "hermes-tools MCP server registered %d/%d tools",
+        exposed_count,
+        len(EXPOSED_TOOLS),
+    )
+    return mcp
+
+
+def main(argv: Optional[list[str]] = None) -> int:
+    """Entry point for `python -m agent.transports.hermes_tools_mcp_server`."""
+    argv = argv or sys.argv[1:]
+    verbose = "--verbose" in argv or "-v" in argv
+
+    log_level = logging.INFO if verbose else logging.WARNING
+    logging.basicConfig(
+        level=log_level,
+        stream=sys.stderr,  # MCP uses stdio for protocol — logs MUST go to stderr
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+
+    # Quiet mode: keep Hermes' own banners off stdout (which is the MCP wire).
+    os.environ.setdefault("HERMES_QUIET", "1")
+    os.environ.setdefault("HERMES_REDACT_SECRETS", "true")
+
+    try:
+        server = _build_server()
+    except ImportError as exc:
+        sys.stderr.write(f"hermes-tools MCP server cannot start: {exc}\n")
+        return 2
+
+    # FastMCP runs with stdio transport by default when launched as a
+    # subprocess.
+    try:
+        server.run()
+    except KeyboardInterrupt:
+        return 0
+    except Exception as exc:
+        logger.exception("hermes-tools MCP server crashed")
+        sys.stderr.write(f"hermes-tools MCP server error: {exc}\n")
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/cli.py b/cli.py
index 72ffd0b1708..5560846320d 100644
--- a/cli.py
+++ b/cli.py
@@ -6774,6 +6774,46 @@ class HermesCLI:
         else:
             _cprint("    (session only — add --global to persist)")
 
+    def _handle_codex_runtime(self, cmd_original: str) -> None:
+        """Handle /codex-runtime — toggle the codex app-server runtime opt-in.
+
+        Usage:
+            /codex-runtime                       — show current state
+            /codex-runtime auto                  — Hermes default (chat_completions)
+            /codex-runtime codex_app_server      — hand turns to codex subprocess
+            /codex-runtime on / off              — synonyms for the above
+        """
+        from hermes_cli import codex_runtime_switch as crs
+
+        parts = cmd_original.split(None, 1)
+        raw_args = parts[1].strip() if len(parts) > 1 else ""
+        new_value, errors = crs.parse_args(raw_args)
+        if errors:
+            for err in errors:
+                _cprint(f"❌ {err}")
+            return
+
+        # Load + persist via the existing config helpers
+        try:
+            from hermes_cli.config import load_config, save_config
+        except Exception as exc:
+            _cprint(f"❌ could not load config: {exc}")
+            return
+        cfg = load_config()
+
+        result = crs.apply(
+            cfg,
+            new_value,
+            persist_callback=(save_config if new_value is not None else None),
+        )
+
+        prefix = "✓" if result.success else "✗"
+        for line in result.message.splitlines():
+            _cprint(f"  {prefix} {line}" if line.startswith("openai_runtime")
+                    else f"    {line}")
+        if result.success and result.requires_new_session:
+            _cprint("    Tip: `/reset` starts a new session immediately.")
+
     def _should_handle_model_command_inline(self, text: str, has_images: bool = False) -> bool:
         """Return True when /model should be handled immediately on the UI thread."""
         if not text or has_images or not _looks_like_slash_command(text):
@@ -7454,6 +7494,8 @@ class HermesCLI:
             self._handle_resume_command(cmd_original)
         elif canonical == "model":
             self._handle_model_switch(cmd_original)
+        elif canonical == "codex-runtime":
+            self._handle_codex_runtime(cmd_original)
         elif canonical == "gquota":
             self._handle_gquota_command(cmd_original)
 
diff --git a/gateway/run.py b/gateway/run.py
index 4946a7e6c1e..95f1d811543 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -6128,6 +6128,12 @@ class GatewayRunner:
             if _cmd_def_inner and _cmd_def_inner.name == "model":
                 return "Agent is running — wait or /stop first, then switch models."
 
+            # /codex-runtime must not be used while the agent is running.
+            # Switching mid-turn would split a turn across two transports.
+            if _cmd_def_inner and _cmd_def_inner.name == "codex-runtime":
+                return ("Agent is running — wait or /stop first, then "
+                        "change runtime.")
+
             # /approve and /deny must bypass the running-agent interrupt path.
             # The agent thread is blocked on a threading.Event inside
             # tools/approval.py — sending an interrupt won't unblock it.
@@ -6462,6 +6468,9 @@ class GatewayRunner:
         if canonical == "model":
             return await self._handle_model_command(event)
 
+        if canonical == "codex-runtime":
+            return await self._handle_codex_runtime_command(event)
+
         if canonical == "personality":
             return await self._handle_personality_command(event)
 
@@ -9242,6 +9251,51 @@ class GatewayRunner:
 
         return "\n".join(lines)
 
+    async def _handle_codex_runtime_command(self, event: MessageEvent) -> str:
+        """Handle /codex-runtime command in the gateway.
+
+        Same surface as the CLI handler in cli.py:
+            /codex-runtime                  — show current state
+            /codex-runtime auto             — Hermes default runtime
+            /codex-runtime codex_app_server — codex subprocess runtime
+            /codex-runtime on / off         — synonyms
+
+        On change, the cached agent for this session is evicted so the next
+        message creates a fresh AIAgent with the new api_mode wired in
+        (avoids prompt-cache invalidation mid-session)."""
+        from hermes_cli import codex_runtime_switch as crs
+
+        raw_args = event.get_command_args().strip() if event else ""
+        new_value, errors = crs.parse_args(raw_args)
+        if errors:
+            return "❌ " + "\n❌ ".join(errors)
+
+        # Load + persist via the same helpers used for /model and /yolo
+        try:
+            from hermes_cli.config import load_config, save_config
+        except Exception as exc:
+            return f"❌ Could not load config: {exc}"
+        cfg = load_config()
+
+        result = crs.apply(
+            cfg,
+            new_value,
+            persist_callback=(save_config if new_value is not None else None),
+        )
+
+        # On a real change, evict the cached agent so the new runtime takes
+        # effect on the next message rather than waiting for cache TTL.
+        if result.success and new_value is not None and result.requires_new_session:
+            try:
+                session_key = self._session_key_for_source(event.source)
+                self._evict_cached_agent(session_key)
+            except Exception:
+                logger.debug("could not evict cached agent after codex-runtime change",
+                             exc_info=True)
+
+        prefix = "✓" if result.success else "✗"
+        return f"{prefix} {result.message}"
+
     async def _handle_personality_command(self, event: MessageEvent) -> str:
         """Handle /personality command - list or set a personality."""
         from hermes_constants import display_hermes_home
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 1cfb0d51f76..c4ec348ef48 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -581,6 +581,19 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
     if mcp_connected:
         summary_parts.append(f"{mcp_connected} MCP servers")
     summary_parts.append("/help for commands")
+    # Indicate when the codex_app_server runtime is active so users
+    # understand why tool counts may not match what's actually reachable
+    # (codex builds its own tool list inside the spawned subprocess).
+    try:
+        from hermes_cli.codex_runtime_switch import get_current_runtime
+        from hermes_cli.config import load_config as _load_cfg
+        if get_current_runtime(_load_cfg()) == "codex_app_server":
+            right_lines.append(
+                f"[bold {accent}]Runtime:[/] [{text}]codex app-server[/] "
+                f"[dim {dim}](terminal/file ops/MCP run inside codex)[/]"
+            )
+    except Exception:
+        pass
     # Show active profile name when not 'default'
     try:
         from hermes_cli.profiles import get_active_profile_name
diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py
new file mode 100644
index 00000000000..c00ec26bd29
--- /dev/null
+++ b/hermes_cli/codex_runtime_plugin_migration.py
@@ -0,0 +1,598 @@
+"""Migrate Hermes' MCP server config and Codex's installed curated plugins
+to the format Codex expects in ~/.codex/config.toml.
+
+When the user enables the codex_app_server runtime, the codex subprocess
+runs its own MCP client and its own plugin runtime (Linear, Atlassian,
+Asana, plus per-account ChatGPT apps via app/list). For both of those to
+be useful, the user's choices need to be visible to codex too. This
+module:
+
+  1. Reads Hermes' YAML and writes equivalent [mcp_servers.<name>]
+     entries to ~/.codex/config.toml.
+  2. Queries codex's `plugin/list` for the openai-curated marketplace
+     and writes [plugins."<name>@<marketplace>"] entries for any plugin
+     the user has installed=true on their codex CLI. (This is what
+     OpenClaw calls "migrate native codex plugins" — the YouTube-video-
+     worthy bit Pash highlighted: Canva, GitHub, Calendar, Gmail
+     pre-configured.)
+  3. Writes a [permissions] default profile so users on this runtime
+     don't get an approval prompt on every write attempt.
+
+What translates (MCP servers):
+  Hermes mcp_servers.<n>.command/args/env  → codex stdio transport
+  Hermes mcp_servers.<n>.url/headers       → codex streamable_http transport
+  Hermes mcp_servers.<n>.timeout           → codex tool_timeout_sec
+  Hermes mcp_servers.<n>.connect_timeout   → codex startup_timeout_sec
+
+What does NOT translate (warned + skipped):
+  Hermes-specific keys (sampling, etc.) — codex's MCP client has no
+  equivalent. Listed in the per-server skipped[] field of the report.
+
+What's NOT migrated (intentional):
+  AGENTS.md — codex respects this file natively in its cwd. Hermes' own
+  AGENTS.md (project-level) is already in the worktree, so codex picks
+  it up without translation. No code needed.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# Marker comments wrapping the managed section so re-runs can detect
+# what's ours and what's user-edited. Both must appear or strip is a no-op.
+MIGRATION_MARKER = (
+    "# managed by hermes-agent — `hermes codex-runtime migrate` regenerates this section"
+)
+MIGRATION_END_MARKER = (
+    "# end hermes-agent managed section"
+)
+
+
+@dataclass
+class MigrationReport:
+    """Outcome of a migration pass."""
+
+    target_path: Optional[Path] = None
+    migrated: list[str] = field(default_factory=list)
+    skipped_keys_per_server: dict[str, list[str]] = field(default_factory=dict)
+    migrated_plugins: list[str] = field(default_factory=list)
+    plugin_query_error: Optional[str] = None
+    wrote_permissions_default: Optional[str] = None
+    errors: list[str] = field(default_factory=list)
+    written: bool = False
+    dry_run: bool = False
+
+    def summary(self) -> str:
+        lines = []
+        if self.dry_run:
+            lines.append(f"(dry run) Would write {self.target_path}")
+        elif self.written:
+            lines.append(f"Wrote {self.target_path}")
+        if self.migrated:
+            lines.append(f"Migrated {len(self.migrated)} MCP server(s):")
+            for name in self.migrated:
+                skipped = self.skipped_keys_per_server.get(name, [])
+                note = (
+                    f" (skipped: {', '.join(skipped)})" if skipped else ""
+                )
+                lines.append(f"  - {name}{note}")
+        else:
+            lines.append("No MCP servers found in Hermes config.")
+        if self.migrated_plugins:
+            lines.append(
+                f"Migrated {len(self.migrated_plugins)} native Codex plugin(s):"
+            )
+            for name in self.migrated_plugins:
+                lines.append(f"  - {name}")
+        elif self.plugin_query_error:
+            lines.append(f"Codex plugin discovery skipped: {self.plugin_query_error}")
+        if self.wrote_permissions_default:
+            lines.append(
+                f"Wrote default_permissions = "
+                f"{self.wrote_permissions_default!r}"
+            )
+        for err in self.errors:
+            lines.append(f"⚠ {err}")
+        return "\n".join(lines)
+
+
+# Hermes keys that codex's MCP schema doesn't support — dropped during
+# migration with a warning. Anything not on the keep list AND not the
+# transport keys is added to skipped.
+_KNOWN_HERMES_KEYS = {
+    # transport — stdio
+    "command", "args", "env", "cwd",
+    # transport — http
+    "url", "headers", "transport",
+    # timeouts
+    "timeout", "connect_timeout",
+    # general
+    "enabled", "description",
+}
+
+# Subset that have a direct codex equivalent.
+_KEYS_DROPPED_WITH_WARNING = {
+    # Hermes' sampling subsection — codex MCP has no equivalent
+    "sampling",
+}
+
+
+def _translate_one_server(
+    name: str, hermes_cfg: dict
+) -> tuple[Optional[dict], list[str]]:
+    """Translate one Hermes MCP server config to the codex inline-table dict
+    representation. Returns (codex_entry, skipped_keys).
+
+    codex_entry is a dict ready for TOML serialization, or None when the
+    server can't be translated (e.g. neither command nor url present)."""
+    if not isinstance(hermes_cfg, dict):
+        return None, []
+
+    skipped: list[str] = []
+    out: dict[str, Any] = {}
+
+    has_command = bool(hermes_cfg.get("command"))
+    has_url = bool(hermes_cfg.get("url"))
+
+    if has_command and has_url:
+        skipped.append("url (both command and url set; preferring stdio)")
+        has_url = False
+
+    if has_command:
+        # Stdio transport
+        out["command"] = str(hermes_cfg["command"])
+        args = hermes_cfg.get("args") or []
+        if args:
+            out["args"] = [str(a) for a in args]
+        env = hermes_cfg.get("env") or {}
+        if env:
+            # Codex expects string values
+            out["env"] = {str(k): str(v) for k, v in env.items()}
+        cwd = hermes_cfg.get("cwd")
+        if cwd:
+            out["cwd"] = str(cwd)
+    elif has_url:
+        # streamable_http transport (codex covers both http and SSE here)
+        out["url"] = str(hermes_cfg["url"])
+        headers = hermes_cfg.get("headers") or {}
+        if headers:
+            out["http_headers"] = {str(k): str(v) for k, v in headers.items()}
+        # Hermes' transport: sse hint is informational; codex auto-negotiates
+        if hermes_cfg.get("transport") == "sse":
+            skipped.append("transport=sse (codex auto-negotiates)")
+    else:
+        return None, ["no command or url field"]
+
+    # Timeouts
+    if "timeout" in hermes_cfg:
+        try:
+            out["tool_timeout_sec"] = float(hermes_cfg["timeout"])
+        except (TypeError, ValueError):
+            skipped.append("timeout (not numeric)")
+    if "connect_timeout" in hermes_cfg:
+        try:
+            out["startup_timeout_sec"] = float(hermes_cfg["connect_timeout"])
+        except (TypeError, ValueError):
+            skipped.append("connect_timeout (not numeric)")
+
+    # Enabled flag (codex defaults to true so we only emit when explicitly false)
+    if hermes_cfg.get("enabled") is False:
+        out["enabled"] = False
+
+    # Detect keys we explicitly drop with warning
+    for key in hermes_cfg:
+        if key in _KEYS_DROPPED_WITH_WARNING:
+            skipped.append(f"{key} (no codex equivalent)")
+        elif key not in _KNOWN_HERMES_KEYS:
+            skipped.append(f"{key} (unknown Hermes key)")
+
+    return out, skipped
+
+
+def _format_toml_value(value: Any) -> str:
+    """Minimal TOML value formatter for the value types we emit.
+
+    We only emit strings, numbers, booleans, and tables of those — no nested
+    arrays of tables. This covers everything codex's MCP schema accepts."""
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)):
+        return repr(value)
+    if isinstance(value, str):
+        # Escape per TOML basic-string rules. Order matters: backslash
+        # first so the other escapes don't get re-escaped.
+        # Control characters (newline, tab, etc.) must use \-escapes
+        # because TOML basic strings don't allow literal control chars
+        # — passing them through would produce invalid TOML that codex
+        # would refuse to load. Paths usually don't contain control
+        # chars but env-var passthrough (HERMES_HOME, PYTHONPATH) could
+        # in pathological cases.
+        escaped = (
+            value
+            .replace("\\", "\\\\")
+            .replace('"', '\\"')
+            .replace("\b", "\\b")
+            .replace("\t", "\\t")
+            .replace("\n", "\\n")
+            .replace("\f", "\\f")
+            .replace("\r", "\\r")
+        )
+        return f'"{escaped}"'
+    if isinstance(value, list):
+        items = ", ".join(_format_toml_value(v) for v in value)
+        return f"[{items}]"
+    if isinstance(value, dict):
+        items = ", ".join(
+            f'{_quote_key(k)} = {_format_toml_value(v)}' for k, v in value.items()
+        )
+        return "{ " + items + " }" if items else "{}"
+    raise ValueError(f"Unsupported TOML value type: {type(value).__name__}")
+
+
+def _quote_key(key: str) -> str:
+    """Return key bare-or-quoted depending on whether it's a valid bare key."""
+    if all(c.isalnum() or c in "-_" for c in key) and key:
+        return key
+    escaped = key.replace("\\", "\\\\").replace('"', '\\"')
+    return f'"{escaped}"'
+
+def render_codex_toml_section(
+    servers: dict[str, dict],
+    plugins: Optional[list[dict]] = None,
+    default_permission_profile: Optional[str] = None,
+) -> str:
+    """Render the managed [mcp_servers.<n>] / [plugins.<id>] / [permissions]
+    block for ~/.codex/config.toml.
+
+    Args:
+        servers: dict of MCP server name → translated codex inline-table
+        plugins: optional list of {name, marketplace, enabled} for native
+            Codex plugins to enable. (E.g. the Linear / Atlassian / Asana
+            curated plugins, or per-account ChatGPT apps.)
+        default_permission_profile: when set, write `[permissions] default`
+            so the user doesn't get an approval prompt on every write
+            attempt. Common values: "workspace-write", "read-only",
+            "full-access".
+    """
+    out = [MIGRATION_MARKER]
+    if not servers and not plugins and not default_permission_profile:
+        out.append("# (no MCP servers, plugins, or permissions configured by Hermes)")
+        out.append(MIGRATION_END_MARKER)
+        return "\n".join(out) + "\n"
+
+    if default_permission_profile:
+        # Codex's config schema: `default_permissions` is a top-level
+        # string referencing a profile name. Built-in profile names start
+        # with ":" (":workspace-write", ":read-only", ":full-access"). The
+        # [permissions] table is for *user-defined* named profiles with
+        # structured fields — not what we want.
+        normalized = (
+            default_permission_profile
+            if default_permission_profile.startswith(":")
+            else f":{default_permission_profile}"
+        )
+        out.append("")
+        out.append(f"default_permissions = {_format_toml_value(normalized)}")
+
+    if servers:
+        for name in sorted(servers.keys()):
+            cfg = servers[name]
+            out.append("")
+            out.append(f"[mcp_servers.{_quote_key(name)}]")
+            for k, v in cfg.items():
+                out.append(f"{_quote_key(k)} = {_format_toml_value(v)}")
+
+    if plugins:
+        for plugin in sorted(plugins, key=lambda p: f"{p.get('name','')}@{p.get('marketplace','')}"):
+            name = plugin.get("name") or ""
+            marketplace = plugin.get("marketplace") or "openai-curated"
+            enabled = bool(plugin.get("enabled", True))
+            qualified = f"{name}@{marketplace}"
+            out.append("")
+            out.append(f'[plugins.{_quote_key(qualified)}]')
+            out.append(f"enabled = {_format_toml_value(enabled)}")
+
+    out.append("")
+    out.append(MIGRATION_END_MARKER)
+    return "\n".join(out) + "\n"
+
+
+def _strip_existing_managed_block(toml_text: str) -> str:
+    """Remove any prior managed section so re-runs idempotently replace it.
+
+    The managed section is everything between MIGRATION_MARKER (start) and
+    MIGRATION_END_MARKER (end), inclusive of both markers. User-edited
+    sections above or below are preserved verbatim.
+
+    Backward compatibility: if the start marker is found but no end marker
+    follows, we fall back to the heuristic that swallows lines until we
+    hit a section that's not [mcp_servers.*]/[plugins.*]/[permissions]/
+    a `default_permissions =` key. This matches what older versions of
+    this code wrote so re-runs don't break configs from prior Hermes
+    versions."""
+    lines = toml_text.splitlines(keepends=True)
+    out: list[str] = []
+    in_managed = False
+    saw_end_marker = False
+    for line in lines:
+        line_stripped_nl = line.rstrip("\n")
+        if line_stripped_nl == MIGRATION_MARKER:
+            in_managed = True
+            saw_end_marker = False
+            continue
+        if in_managed:
+            if line_stripped_nl == MIGRATION_END_MARKER:
+                in_managed = False
+                saw_end_marker = True
+                continue
+            stripped = line.lstrip()
+            if not saw_end_marker and stripped.startswith("[") and not (
+                stripped.startswith("[mcp_servers")
+                or stripped.startswith("[plugins")
+                or stripped.startswith("[permissions]")
+                or stripped.startswith("[permissions.")
+            ):
+                # Old-format managed block without end marker: bail back
+                # to user content as soon as we see a non-managed section.
+                in_managed = False
+                out.append(line)
+                continue
+            # Otherwise swallow the line.
+            continue
+        out.append(line)
+    return "".join(out)
+
+
+def _query_codex_plugins(
+    codex_home: Optional[Path] = None,
+    timeout: float = 8.0,
+) -> tuple[list[dict], Optional[str]]:
+    """Query codex's `plugin/list` for installed curated plugins.
+
+    Spawns `codex app-server` briefly, sends initialize + plugin/list,
+    extracts plugins where installed=true. Returns (plugins, error).
+    Plugins is a list of {name, marketplace, enabled} dicts ready for
+    render_codex_toml_section().
+
+    On any failure (codex not installed, RPC error, timeout) returns
+    ([], error_message). Migration treats this as non-fatal — MCP
+    servers and permissions still write through.
+    """
+    try:
+        from agent.transports.codex_app_server import CodexAppServerClient
+    except Exception as exc:
+        return [], f"transport unavailable: {exc}"
+
+    try:
+        with CodexAppServerClient(
+            codex_home=str(codex_home) if codex_home else None
+        ) as client:
+            client.initialize(client_name="hermes-migration")
+            resp = client.request("plugin/list", {}, timeout=timeout)
+    except Exception as exc:
+        return [], f"plugin/list query failed: {exc}"
+
+    out: list[dict] = []
+    seen: set[tuple[str, str]] = set()
+    marketplaces = resp.get("marketplaces") or []
+    if not isinstance(marketplaces, list):
+        return [], "plugin/list response missing 'marketplaces'"
+    for marketplace in marketplaces:
+        if not isinstance(marketplace, dict):
+            continue
+        market_name = str(marketplace.get("name") or "openai-curated")
+        plugins = marketplace.get("plugins") or []
+        if not isinstance(plugins, list):
+            continue
+        for plugin in plugins:
+            if not isinstance(plugin, dict):
+                continue
+            installed = bool(plugin.get("installed", False))
+            if not installed:
+                continue
+            name = str(plugin.get("name") or "")
+            if not name:
+                continue
+            key = (name, market_name)
+            if key in seen:
+                continue
+            seen.add(key)
+            # Carry forward whatever 'enabled' codex reports — defaults to
+            # true for installed plugins. This is the same shape OpenClaw
+            # writes when migrating native codex plugins.
+            out.append({
+                "name": name,
+                "marketplace": market_name,
+                "enabled": bool(plugin.get("enabled", True)),
+            })
+    return out, None
+
+
+def _build_hermes_tools_mcp_entry() -> dict:
+    """Build the codex stdio-transport entry that launches Hermes' own
+    tool surface as an MCP server. Codex's subprocess will call back into
+    this for browser/web/delegate_task/vision/memory/skills tools.
+
+    The command runs the worktree's Python via the current sys.executable
+    so a hermes installed under /opt/, /usr/local/, or a venv all work.
+    HERMES_HOME and PYTHONPATH are passed through so the spawned process
+    sees the same config + module layout the user is running."""
+    import sys
+
+    env: dict[str, str] = {}
+    # HERMES_HOME passes through if set so the MCP subprocess sees the
+    # same config / auth / sessions DB as the parent CLI.
+    hermes_home = os.environ.get("HERMES_HOME")
+    if hermes_home:
+        env["HERMES_HOME"] = hermes_home
+    # PYTHONPATH passes through so a worktree-launched hermes finds the
+    # branch's modules instead of the installed package.
+    pythonpath = os.environ.get("PYTHONPATH")
+    if pythonpath:
+        env["PYTHONPATH"] = pythonpath
+    # Quiet mode + redaction defaults so the MCP wire stays clean.
+    env["HERMES_QUIET"] = "1"
+    env["HERMES_REDACT_SECRETS"] = env.get("HERMES_REDACT_SECRETS", "true")
+
+    out: dict[str, Any] = {
+        "command": sys.executable,
+        "args": ["-m", "agent.transports.hermes_tools_mcp_server"],
+    }
+    if env:
+        out["env"] = env
+    # Generous timeouts — browser_navigate or delegate_task can take a
+    # while; we don't want codex's MCP client to give up too early.
+    out["startup_timeout_sec"] = 30.0
+    out["tool_timeout_sec"] = 600.0
+    return out
+
+
+def migrate(
+    hermes_config: dict,
+    *,
+    codex_home: Optional[Path] = None,
+    dry_run: bool = False,
+    discover_plugins: bool = True,
+    default_permission_profile: Optional[str] = ":workspace",
+    expose_hermes_tools: bool = True,
+) -> MigrationReport:
+    """Translate Hermes mcp_servers config + Codex curated plugins into
+    ~/.codex/config.toml.
+
+    Args:
+        hermes_config: full ~/.hermes/config.yaml dict
+        codex_home: override CODEX_HOME (defaults to ~/.codex)
+        dry_run: skip the actual write; report what would happen
+        discover_plugins: when True (default), query `plugin/list` against
+            the live codex CLI to migrate any installed curated plugins
+            into [plugins."<name>@<marketplace>"] entries. Set False to
+            skip the subprocess spawn (for tests or restricted environments).
+        default_permission_profile: when set (default ":workspace"), write
+            top-level `default_permissions = "<name>"` so users on this
+            runtime don't get an approval prompt on every write attempt.
+            Built-in codex profile names are ":workspace", ":read-only",
+            ":danger-no-sandbox" (note the leading ":"). Also accepts a
+            user-defined profile name (no leading ":") that the user has
+            configured in their own [permissions.<name>] table. Set None
+            to leave permissions unset and let codex use its compiled-in
+            default (which is read-only).
+        expose_hermes_tools: when True (default), register Hermes' own
+            tool surface (web_search, browser_*, delegate_task, vision,
+            memory, skills, etc.) as an MCP server in ~/.codex/config.toml
+            so the codex subprocess can call back into Hermes for tools
+            codex doesn't have built in. Set False to opt out.
+    """
+    report = MigrationReport(dry_run=dry_run)
+    codex_home = codex_home or Path.home() / ".codex"
+    target = codex_home / "config.toml"
+    report.target_path = target
+
+    hermes_servers = (hermes_config or {}).get("mcp_servers") or {}
+    if not isinstance(hermes_servers, dict):
+        report.errors.append(
+            "mcp_servers in Hermes config is not a dict; cannot migrate."
+        )
+        return report
+
+    translated: dict[str, dict] = {}
+    for name, cfg in hermes_servers.items():
+        out, skipped = _translate_one_server(str(name), cfg or {})
+        if out is None:
+            report.errors.append(
+                f"server {name!r} skipped: {', '.join(skipped) or 'no transport configured'}"
+            )
+            continue
+        translated[str(name)] = out
+        if skipped:
+            report.skipped_keys_per_server[str(name)] = skipped
+        report.migrated.append(str(name))
+
+    # Discover installed Codex curated plugins. Best-effort — never blocks
+    # the migration if codex is unreachable or the RPC fails.
+    plugins: list[dict] = []
+    if discover_plugins and not dry_run:
+        plugins, plugin_err = _query_codex_plugins(codex_home=codex_home)
+        if plugin_err:
+            report.plugin_query_error = plugin_err
+        for p in plugins:
+            report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}")
+
+    # Track whether we wrote a default permission profile so the report
+    # surfaces it to the user.
+    if default_permission_profile:
+        report.wrote_permissions_default = default_permission_profile
+
+    # Inject Hermes' own tool surface as an MCP server so the spawned
+    # codex subprocess can call back into Hermes for the tools codex
+    # doesn't ship with — web_search, browser_*, delegate_task, vision,
+    # memory, skills, session_search, image_generate, text_to_speech.
+    # The server itself is agent/transports/hermes_tools_mcp_server.py
+    # and is launched on demand by codex (stdio MCP).
+    if expose_hermes_tools:
+        translated["hermes-tools"] = _build_hermes_tools_mcp_entry()
+        if "hermes-tools" not in report.migrated:
+            report.migrated.append("hermes-tools")
+
+    # Build the new managed block
+    managed_block = render_codex_toml_section(
+        translated, plugins=plugins,
+        default_permission_profile=default_permission_profile,
+    )
+
+    # Read existing codex config if any, strip the prior managed block,
+    # append the new one.
+    if target.exists():
+        try:
+            existing = target.read_text(encoding="utf-8")
+        except Exception as exc:
+            report.errors.append(f"could not read {target}: {exc}")
+            return report
+        without_managed = _strip_existing_managed_block(existing)
+        # Ensure exactly one blank line between user content and managed block
+        if without_managed and not without_managed.endswith("\n"):
+            without_managed += "\n"
+        new_text = (
+            without_managed.rstrip("\n") + "\n\n" + managed_block
+            if without_managed.strip()
+            else managed_block
+        )
+    else:
+        new_text = managed_block
+
+    if dry_run:
+        return report
+
+    try:
+        codex_home.mkdir(parents=True, exist_ok=True)
+        # Atomic write: write to a temp file in the same directory then
+        # rename. Same-directory rename is atomic on POSIX and ReplaceFile
+        # on Windows. Avoids leaving a half-written config.toml that
+        # codex would refuse to load if we crash mid-write.
+        import tempfile
+        tmp_fd, tmp_path_str = tempfile.mkstemp(
+            prefix=".config.toml.", dir=str(codex_home)
+        )
+        tmp_path = Path(tmp_path_str)
+        try:
+            with os.fdopen(tmp_fd, "w", encoding="utf-8") as fh:
+                fh.write(new_text)
+            tmp_path.replace(target)
+        except Exception:
+            # Clean up the temp file if the rename didn't happen.
+            try:
+                if tmp_path.exists():
+                    tmp_path.unlink()
+            except Exception:
+                pass
+            raise
+        report.written = True
+    except Exception as exc:
+        report.errors.append(f"could not write {target}: {exc}")
+    return report
diff --git a/hermes_cli/codex_runtime_switch.py b/hermes_cli/codex_runtime_switch.py
new file mode 100644
index 00000000000..b3adda12b54
--- /dev/null
+++ b/hermes_cli/codex_runtime_switch.py
@@ -0,0 +1,266 @@
+"""Shared logic for the /codex-runtime slash command.
+
+Toggles `model.openai_runtime` between "auto" (= chat_completions, Hermes'
+default) and "codex_app_server" (= hand turns to a codex subprocess).
+
+Both CLI (cli.py) and gateway (gateway/run.py) call into this module so the
+behavior stays identical across surfaces.
+
+The actual runtime resolution happens in hermes_cli.runtime_provider's
+_maybe_apply_codex_app_server_runtime() helper, which reads the persisted
+config value. This module just persists the value and reports the change.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+VALID_RUNTIMES = ("auto", "codex_app_server")
+
+
+@dataclass
+class CodexRuntimeStatus:
+    """Result of a /codex-runtime invocation. Callers render this however
+    suits their surface (CLI uses Rich panels, gateway sends a text message)."""
+
+    success: bool
+    new_value: Optional[str] = None
+    old_value: Optional[str] = None
+    message: str = ""
+    requires_new_session: bool = False
+    codex_binary_ok: bool = True
+    codex_version: Optional[str] = None
+
+
+def parse_args(arg_string: str) -> tuple[Optional[str], list[str]]:
+    """Parse the slash-command argument string. Returns (value, errors).
+
+    No args         → return current state (value=None)
+    'auto' / 'codex_app_server' / 'on' / 'off' → return that value
+    anything else   → error
+    """
+    raw = (arg_string or "").strip().lower()
+    if not raw:
+        return None, []
+    # Accept human-friendly synonyms
+    if raw in ("on", "codex", "enable"):
+        return "codex_app_server", []
+    if raw in ("off", "default", "disable", "hermes"):
+        return "auto", []
+    if raw in VALID_RUNTIMES:
+        return raw, []
+    return None, [
+        f"Unknown runtime {raw!r}. Use one of: auto, codex_app_server, on, off"
+    ]
+
+
+def get_current_runtime(config: dict) -> str:
+    """Read the current `model.openai_runtime` value from a config dict.
+    Returns 'auto' for unset / empty / unrecognized values."""
+    if not isinstance(config, dict):
+        return "auto"
+    model_cfg = config.get("model") or {}
+    if not isinstance(model_cfg, dict):
+        return "auto"
+    value = str(model_cfg.get("openai_runtime") or "").strip().lower()
+    if value in VALID_RUNTIMES:
+        return value
+    return "auto"
+
+
+def set_runtime(config: dict, new_value: str) -> str:
+    """Mutate the config dict in place to persist the new runtime value.
+    Returns the previous value for callers that want to report a delta."""
+    if new_value not in VALID_RUNTIMES:
+        raise ValueError(
+            f"invalid runtime {new_value!r}; must be one of {VALID_RUNTIMES}"
+        )
+    old = get_current_runtime(config)
+    if not isinstance(config.get("model"), dict):
+        config["model"] = {}
+    config["model"]["openai_runtime"] = new_value
+    return old
+
+
+def check_codex_binary_ok() -> tuple[bool, Optional[str]]:
+    """Best-effort verification that codex CLI is installed at acceptable
+    version. Returns (ok, version_or_message)."""
+    try:
+        from agent.transports.codex_app_server import check_codex_binary
+
+        return check_codex_binary()
+    except Exception as exc:  # pragma: no cover
+        return False, f"codex check failed: {exc}"
+
+
+def apply(
+    config: dict,
+    new_value: Optional[str],
+    *,
+    persist_callback=None,
+) -> CodexRuntimeStatus:
+    """Top-level entry point used by both CLI and gateway handlers.
+
+    Args:
+        config: in-memory config dict (will be mutated when new_value is set)
+        new_value: desired runtime; None means "show current state only"
+        persist_callback: optional callable taking the mutated config dict
+            and persisting it to disk. Skipped when None (used by tests).
+
+    Returns: CodexRuntimeStatus describing the outcome.
+    """
+    current = get_current_runtime(config)
+
+    # Cache the codex binary check for this apply() call. Subprocess spawn
+    # is cheap (~50ms for `codex --version`), but we'd otherwise call it up
+    # to 3 times in the enable path (read-only/state, gate, success message).
+    # None = not yet checked; (bool, str) = result.
+    _binary_check: Optional[tuple[bool, Optional[str]]] = None
+
+    def _check_binary_cached() -> tuple[bool, Optional[str]]:
+        nonlocal _binary_check
+        if _binary_check is None:
+            _binary_check = check_codex_binary_ok()
+        return _binary_check
+
+    # Read-only call: just report state
+    if new_value is None:
+        ok, ver = _check_binary_cached()
+        msg = (
+            f"openai_runtime: {current}\n"
+            f"codex CLI: {'OK ' + ver if ok else 'not available — ' + (ver or 'install with `npm i -g @openai/codex`')}"
+        )
+        return CodexRuntimeStatus(
+            success=True,
+            new_value=current,
+            old_value=current,
+            message=msg,
+            codex_binary_ok=ok,
+            codex_version=ver if ok else None,
+        )
+
+    # No change requested
+    if new_value == current:
+        return CodexRuntimeStatus(
+            success=True,
+            new_value=current,
+            old_value=current,
+            message=f"openai_runtime already set to {current}",
+        )
+
+    # If switching ON, verify codex CLI is installed before persisting —
+    # an opt-in toggle that silently fails on the first turn is the
+    # worst possible UX. Block here with a clear install hint.
+    if new_value == "codex_app_server":
+        ok, ver_or_msg = _check_binary_cached()
+        if not ok:
+            return CodexRuntimeStatus(
+                success=False,
+                new_value=None,
+                old_value=current,
+                message=(
+                    "Cannot enable codex_app_server runtime: "
+                    f"{ver_or_msg or 'codex CLI not available'}\n"
+                    "Install with: npm i -g @openai/codex"
+                ),
+                codex_binary_ok=False,
+                codex_version=None,
+            )
+
+    set_runtime(config, new_value)
+    if persist_callback is not None:
+        try:
+            persist_callback(config)
+        except Exception as exc:
+            logger.exception("failed to persist openai_runtime change")
+            return CodexRuntimeStatus(
+                success=False,
+                new_value=new_value,
+                old_value=current,
+                message=f"updated config in memory but persist failed: {exc}",
+            )
+
+    msg_lines = [
+        f"openai_runtime: {current} → {new_value}",
+    ]
+    if new_value == "codex_app_server":
+        ok, ver = _check_binary_cached()
+        if ok:
+            msg_lines.append(f"codex CLI: {ver}")
+        # Auto-migrate Hermes' MCP servers + Codex's installed curated
+        # plugins into ~/.codex/config.toml so the spawned codex subprocess
+        # sees the same tool surface AND can call back into Hermes for
+        # browser/web/delegate_task/vision/memory tools (#7 fix).
+        # Failures are non-fatal — the runtime change still proceeds.
+        try:
+            from hermes_cli.codex_runtime_plugin_migration import migrate
+            mig_report = migrate(config)
+            # Tools/MCP servers (excluding the hermes-tools callback,
+            # which is internal plumbing — surface separately).
+            user_servers = [
+                s for s in mig_report.migrated if s != "hermes-tools"
+            ]
+            if user_servers:
+                msg_lines.append(
+                    f"Migrated {len(user_servers)} MCP server(s): "
+                    f"{', '.join(user_servers)}"
+                )
+            # Native Codex plugin migration (Linear, GitHub, etc.)
+            if mig_report.migrated_plugins:
+                msg_lines.append(
+                    f"Migrated {len(mig_report.migrated_plugins)} native "
+                    f"Codex plugin(s): {', '.join(mig_report.migrated_plugins)}"
+                )
+            elif mig_report.plugin_query_error:
+                msg_lines.append(
+                    f"Codex plugin discovery skipped: "
+                    f"{mig_report.plugin_query_error}"
+                )
+            # Permissions + Hermes tool callback are always-on production
+            # bits the user benefits from knowing about.
+            if mig_report.wrote_permissions_default:
+                msg_lines.append(
+                    f"Default sandbox: {mig_report.wrote_permissions_default} "
+                    f"(no approval prompt on every write)"
+                )
+            if "hermes-tools" in mig_report.migrated:
+                msg_lines.append(
+                    "Hermes tool callback registered: codex can now use "
+                    "web_search, web_extract, browser_*, vision_analyze, "
+                    "image_generate, skill_view, skills_list, text_to_speech, "
+                    "kanban_* (worker + orchestrator) via MCP."
+                )
+                msg_lines.append(
+                    "  (delegate_task, memory, session_search, todo run "
+                    "only on the default Hermes runtime — they need the "
+                    "agent loop context.)"
+                )
+            msg_lines.append(f"  (config: {mig_report.target_path})")
+            for err in mig_report.errors:
+                msg_lines.append(f"⚠ MCP migration: {err}")
+        except Exception as exc:
+            msg_lines.append(f"⚠ MCP migration skipped: {exc}")
+        msg_lines.append(
+            "OpenAI/Codex turns now run through `codex app-server` "
+            "(terminal/file ops/patching inside Codex; "
+            "Hermes tools available via MCP callback)."
+        )
+        msg_lines.append(
+            "Effective on next session — current cached agent keeps "
+            "the prior runtime to preserve prompt cache."
+        )
+    else:
+        msg_lines.append("OpenAI/Codex turns will use the default Hermes runtime.")
+        msg_lines.append("Effective on next session.")
+    return CodexRuntimeStatus(
+        success=True,
+        new_value=new_value,
+        old_value=current,
+        message="\n".join(msg_lines),
+        requires_new_session=True,
+    )
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 56a62c85a0a..62790bf9c14 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -120,6 +120,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
                cli_only=True),
     CommandDef("model", "Switch model for this session", "Configuration",
                aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
+    CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
+               "Configuration", args_hint="[auto|codex_app_server]"),
     CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
                cli_only=True),
 
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 1652b72034c..4ac21ea4568 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -164,7 +164,18 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
         return "chat_completions"
 
 
-_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
+_VALID_API_MODES = {
+    "chat_completions",
+    "codex_responses",
+    "anthropic_messages",
+    "bedrock_converse",
+    # Optional opt-in: hand the entire turn to a `codex app-server` subprocess
+    # so terminal/file-ops/patching/sandboxing run inside Codex's own runtime
+    # instead of Hermes' tool dispatch. Gated behind config key
+    # `model.openai_runtime == "codex_app_server"` AND provider in
+    # {"openai", "openai-codex"}. Default is unchanged.
+    "codex_app_server",
+}
 
 
 def _parse_api_mode(raw: Any) -> Optional[str]:
@@ -176,6 +187,32 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
     return None
 
 
+def _maybe_apply_codex_app_server_runtime(
+    *,
+    provider: str,
+    api_mode: str,
+    model_cfg: Optional[Dict[str, Any]],
+) -> str:
+    """Optional opt-in: rewrite api_mode → "codex_app_server" for OpenAI/Codex
+    providers when the user has explicitly enabled that runtime via
+    `model.openai_runtime: codex_app_server` in config.yaml.
+
+    Default behavior is preserved: when the key is unset, "auto", or empty,
+    this function is a no-op. Only providers in {"openai", "openai-codex"}
+    are eligible — other providers (anthropic, openrouter, etc.) cannot be
+    rerouted through codex.
+
+    Returns the (possibly-rewritten) api_mode."""
+    if not model_cfg:
+        return api_mode
+    if provider not in ("openai", "openai-codex"):
+        return api_mode
+    runtime = str(model_cfg.get("openai_runtime") or "").strip().lower()
+    if runtime == "codex_app_server":
+        return "codex_app_server"
+    return api_mode
+
+
 def _resolve_runtime_from_pool_entry(
     *,
     provider: str,
@@ -293,6 +330,12 @@ def _resolve_runtime_from_pool_entry(
     if api_mode == "anthropic_messages" and provider in {"opencode-zen", "opencode-go"}:
         base_url = re.sub(r"/v1/?$", "", base_url)
 
+    # Optional opt-in: route OpenAI/Codex turns through `codex app-server`.
+    # Inert when `model.openai_runtime` is unset or "auto".
+    api_mode = _maybe_apply_codex_app_server_runtime(
+        provider=provider, api_mode=api_mode, model_cfg=model_cfg
+    )
+
     return {
         "provider": provider,
         "api_mode": api_mode,
diff --git a/run_agent.py b/run_agent.py
index f2f3379e0d7..f9eaee85af6 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1271,7 +1271,7 @@ class AIAgent:
         self.provider = provider_name or ""
         self.acp_command = acp_command or command
         self.acp_args = list(acp_args or args or [])
-        if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}:
+        if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse", "codex_app_server"}:
             self.api_mode = api_mode
         elif self.provider == "openai-codex":
             self.api_mode = "codex_responses"
@@ -4267,13 +4267,24 @@ class AIAgent:
                     # reconstruct auth from scratch -- producing the spurious
                     # "No LLM provider configured" warning at end of turn.
                     _parent_runtime = self._current_main_runtime()
+                    _parent_api_mode = _parent_runtime.get("api_mode") or None
+                    # The review fork needs to call agent-loop tools (memory,
+                    # skill_manage). Those tools require Hermes' own dispatch,
+                    # which the codex_app_server runtime bypasses entirely
+                    # (it runs the turn inside codex's subprocess). So when
+                    # the parent is on codex_app_server, downgrade the review
+                    # fork to codex_responses — same auth/credentials, but
+                    # talks to the OpenAI Responses API directly so Hermes
+                    # owns the loop and the agent-loop tools dispatch.
+                    if _parent_api_mode == "codex_app_server":
+                        _parent_api_mode = "codex_responses"
                     review_agent = AIAgent(
                         model=self.model,
                         max_iterations=16,
                         quiet_mode=True,
                         platform=self.platform,
                         provider=self.provider,
-                        api_mode=_parent_runtime.get("api_mode") or None,
+                        api_mode=_parent_api_mode,
                         base_url=_parent_runtime.get("base_url") or None,
                         api_key=_parent_runtime.get("api_key") or None,
                         credential_pool=getattr(self, "_credential_pool", None),
@@ -12115,6 +12126,20 @@ class AIAgent:
             except Exception:
                 pass
 
+        # Optional opt-in runtime: if api_mode == codex_app_server, hand the
+        # turn to the codex app-server subprocess (terminal/file ops/patching
+        # all run inside Codex). Default Hermes path is bypassed entirely.
+        # See agent/transports/codex_app_server_session.py for the adapter
+        # and references/codex-app-server-runtime.md for the rationale.
+        if self.api_mode == "codex_app_server":
+            return self._run_codex_app_server_turn(
+                user_message=user_message,
+                original_user_message=original_user_message,
+                messages=messages,
+                effective_task_id=effective_task_id,
+                should_review_memory=_should_review_memory,
+            )
+
         while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) or self._budget_grace_call:
             # Reset per-turn checkpoint dedup so each iteration can take one snapshot
             self._checkpoint_mgr.new_turn()
@@ -15554,6 +15579,130 @@ class AIAgent:
         result = self.run_conversation(message, stream_callback=stream_callback)
         return result["final_response"]
 
+    def _run_codex_app_server_turn(
+        self,
+        *,
+        user_message: str,
+        original_user_message: Any,
+        messages: List[Dict[str, Any]],
+        effective_task_id: str,
+        should_review_memory: bool = False,
+    ) -> Dict[str, Any]:
+        """Codex app-server runtime path. Hands the entire turn to a `codex
+        app-server` subprocess and projects its events back into Hermes'
+        messages list so memory/skill review keep working.
+
+        Called from run_conversation() when self.api_mode == "codex_app_server".
+        Returns the same dict shape as the chat_completions path.
+        """
+        from agent.transports.codex_app_server_session import CodexAppServerSession
+
+        # Lazy session: one CodexAppServerSession per AIAgent instance.
+        # Spawned on first turn, reused across turns, closed at AIAgent
+        # shutdown (see _cleanup hook).
+        if not hasattr(self, "_codex_session") or self._codex_session is None:
+            cwd = getattr(self, "session_cwd", None) or os.getcwd()
+            # Approval callback: defer to Hermes' standard prompt flow if a
+            # CLI thread has installed one. Gateway / cron contexts get the
+            # codex-side fail-closed default.
+            try:
+                from tools.terminal_tool import _get_approval_callback
+                approval_callback = _get_approval_callback()
+            except Exception:
+                approval_callback = None
+            self._codex_session = CodexAppServerSession(
+                cwd=cwd,
+                approval_callback=approval_callback,
+            )
+
+        # NOTE: the user message is ALREADY appended to messages by the
+        # standard run_conversation() flow (line ~11823) before the early
+        # return reaches us. Do NOT append again — that would duplicate.
+
+        try:
+            turn = self._codex_session.run_turn(user_input=user_message)
+        except Exception as exc:
+            logger.exception("codex app-server turn failed")
+            return {
+                "final_response": (
+                    f"Codex app-server turn failed: {exc}. "
+                    f"Fall back to default runtime with `/codex-runtime auto`."
+                ),
+                "messages": messages,
+                "api_calls": 0,
+                "completed": False,
+                "partial": True,
+                "error": str(exc),
+            }
+
+        # Splice projected messages into the conversation. The projector emits
+        # standard {role, content, tool_calls, tool_call_id} entries, which
+        # is exactly what curator.py / sessions DB expect.
+        if turn.projected_messages:
+            messages.extend(turn.projected_messages)
+
+        # Counter ticks for the self-improvement loop.
+        # _turns_since_memory and _user_turn_count are ALREADY incremented
+        # in the run_conversation() pre-loop block (lines ~11793-11817) so we
+        # do NOT touch them here — that would double-count.
+        # Only _iters_since_skill needs explicit increment, since the
+        # chat_completions loop bumps it per tool iteration (line ~12110)
+        # and that loop is bypassed on this path.
+        self._iters_since_skill = (
+            getattr(self, "_iters_since_skill", 0) + turn.tool_iterations
+        )
+
+        # Now check the skill nudge AFTER iters were incremented — same
+        # pattern the chat_completions path uses (line ~15432).
+        should_review_skills = False
+        if (
+            self._skill_nudge_interval > 0
+            and self._iters_since_skill >= self._skill_nudge_interval
+            and "skill_manage" in self.valid_tool_names
+        ):
+            should_review_skills = True
+            self._iters_since_skill = 0
+
+        # External memory provider sync (mirrors line ~15439). Skipped on
+        # interrupt/error to avoid feeding partial transcripts to memory.
+        if not turn.interrupted and turn.error is None:
+            try:
+                self._sync_external_memory_for_turn(
+                    original_user_message=original_user_message,
+                    final_response=turn.final_text,
+                    interrupted=False,
+                )
+            except Exception:
+                logger.debug("external memory sync raised", exc_info=True)
+
+        # Background review fork — same cadence + signature as the default
+        # path (line ~15449). Only fires when a trigger actually tripped AND
+        # we have a real final response.
+        if (
+            turn.final_text
+            and not turn.interrupted
+            and (should_review_memory or should_review_skills)
+        ):
+            try:
+                self._spawn_background_review(
+                    messages_snapshot=list(messages),
+                    review_memory=should_review_memory,
+                    review_skills=should_review_skills,
+                )
+            except Exception:
+                logger.debug("background review spawn raised", exc_info=True)
+
+        return {
+            "final_response": turn.final_text,
+            "messages": messages,
+            "api_calls": 1,  # one app-server "turn" maps to one logical API call
+            "completed": not turn.interrupted and turn.error is None,
+            "partial": turn.interrupted or turn.error is not None,
+            "error": turn.error,
+            "codex_thread_id": turn.thread_id,
+            "codex_turn_id": turn.turn_id,
+        }
+
 
 def main(
     query: str = None,
diff --git a/tests/agent/transports/test_codex_app_server_runtime.py b/tests/agent/transports/test_codex_app_server_runtime.py
new file mode 100644
index 00000000000..d12ac227254
--- /dev/null
+++ b/tests/agent/transports/test_codex_app_server_runtime.py
@@ -0,0 +1,243 @@
+"""Tests for the optional codex app-server runtime gate.
+
+These are unit tests for the api_mode rewriter and the wire-level transport
+module. They do NOT require the `codex` CLI to be installed — that's
+covered by a separate live test gated on `codex --version`.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from hermes_cli.runtime_provider import (
+    _VALID_API_MODES,
+    _maybe_apply_codex_app_server_runtime,
+)
+
+
+class TestApiModeRegistration:
+    """The new api_mode must be registered or downstream parsing rejects it."""
+
+    def test_codex_app_server_is_a_valid_api_mode(self) -> None:
+        assert "codex_app_server" in _VALID_API_MODES
+
+    def test_existing_api_modes_still_present(self) -> None:
+        # Regression guard: don't accidentally delete other api_modes when
+        # touching this set.
+        for mode in (
+            "chat_completions",
+            "codex_responses",
+            "anthropic_messages",
+            "bedrock_converse",
+        ):
+            assert mode in _VALID_API_MODES
+
+
+class TestMaybeApplyCodexAppServerRuntime:
+    """The opt-in helper that rewrites api_mode → codex_app_server."""
+
+    @pytest.mark.parametrize(
+        "model_cfg",
+        [
+            None,
+            {},
+            {"openai_runtime": ""},
+            {"openai_runtime": "auto"},
+            {"openai_runtime": "AUTO"},
+            {"other_key": "codex_app_server"},  # wrong key
+        ],
+    )
+    def test_default_off_for_openai(self, model_cfg) -> None:
+        """Default behavior is preserved when the flag is unset/auto."""
+        got = _maybe_apply_codex_app_server_runtime(
+            provider="openai", api_mode="chat_completions", model_cfg=model_cfg
+        )
+        assert got == "chat_completions"
+
+    def test_opt_in_rewrites_openai(self) -> None:
+        got = _maybe_apply_codex_app_server_runtime(
+            provider="openai",
+            api_mode="chat_completions",
+            model_cfg={"openai_runtime": "codex_app_server"},
+        )
+        assert got == "codex_app_server"
+
+    def test_opt_in_rewrites_openai_codex(self) -> None:
+        got = _maybe_apply_codex_app_server_runtime(
+            provider="openai-codex",
+            api_mode="codex_responses",
+            model_cfg={"openai_runtime": "codex_app_server"},
+        )
+        assert got == "codex_app_server"
+
+    def test_case_insensitive(self) -> None:
+        got = _maybe_apply_codex_app_server_runtime(
+            provider="openai",
+            api_mode="chat_completions",
+            model_cfg={"openai_runtime": "Codex_App_Server"},
+        )
+        assert got == "codex_app_server"
+
+    @pytest.mark.parametrize(
+        "provider",
+        [
+            "anthropic",
+            "openrouter",
+            "xai",
+            "qwen-oauth",
+            "google-gemini-cli",
+            "opencode-zen",
+            "bedrock",
+            "",
+        ],
+    )
+    def test_other_providers_never_rerouted(self, provider) -> None:
+        """Non-OpenAI providers MUST NOT be rerouted even with the flag set —
+        codex's app-server can only run OpenAI/Codex auth flows."""
+        got = _maybe_apply_codex_app_server_runtime(
+            provider=provider,
+            api_mode="anthropic_messages",
+            model_cfg={"openai_runtime": "codex_app_server"},
+        )
+        assert got == "anthropic_messages", (
+            f"provider={provider!r} should not be rerouted to codex_app_server"
+        )
+
+
+class TestCodexAppServerModule:
+    """Module-surface tests for the JSON-RPC speaker. Don't require codex CLI."""
+
+    def test_module_imports(self) -> None:
+        from agent.transports import codex_app_server
+
+        assert codex_app_server.MIN_CODEX_VERSION >= (0, 1, 0)
+        assert callable(codex_app_server.parse_codex_version)
+        assert callable(codex_app_server.check_codex_binary)
+
+    def test_parse_codex_version_valid(self) -> None:
+        from agent.transports.codex_app_server import parse_codex_version
+
+        assert parse_codex_version("codex-cli 0.130.0") == (0, 130, 0)
+        assert parse_codex_version("codex-cli 1.2.3 (extra metadata)") == (1, 2, 3)
+        assert parse_codex_version("codex 99.0.1\n") == (99, 0, 1)
+
+    def test_parse_codex_version_invalid(self) -> None:
+        from agent.transports.codex_app_server import parse_codex_version
+
+        assert parse_codex_version("nope") is None
+        assert parse_codex_version("") is None
+        assert parse_codex_version(None) is None  # type: ignore[arg-type]
+
+    def test_check_binary_handles_missing_executable(self) -> None:
+        from agent.transports.codex_app_server import check_codex_binary
+
+        ok, msg = check_codex_binary(codex_bin="/nonexistent/codex/binary/path")
+        assert ok is False
+        assert "not found" in msg.lower() or "no such" in msg.lower()
+
+    def test_codex_error_class_is_runtimeerror(self) -> None:
+        from agent.transports.codex_app_server import CodexAppServerError
+
+        err = CodexAppServerError(code=-32600, message="boom")
+        assert isinstance(err, RuntimeError)
+        assert "boom" in str(err)
+        assert "-32600" in str(err)
+
+
+class TestSpawnEnvIsolation:
+    """The codex spawn must NOT rewrite HOME — codex's shell tool spawns
+    subprocesses (gh, git, npm, aws, gcloud, ...) that need to find their
+    config in the real user $HOME. CODEX_HOME isolates codex's own state,
+    HOME stays unchanged.
+
+    OpenClaw hit this footgun (openclaw/openclaw#81562) — they were
+    rewriting HOME to a synthetic per-agent dir alongside CODEX_HOME,
+    and then `gh auth status` / git config / etc. all broke inside codex
+    shell calls. We avoid the same bug by only overlaying CODEX_HOME and
+    RUST_LOG on top of os.environ.copy().
+    """
+
+    def test_spawn_env_preserves_HOME(self, monkeypatch):
+        """The spawn env must contain the parent process's HOME unchanged.
+        Verifies via a subprocess-monkey-patch."""
+        import subprocess
+        from agent.transports import codex_app_server as cas
+
+        captured = {}
+
+        class FakePopen:
+            def __init__(self, cmd, *args, **kwargs):
+                captured["env"] = kwargs.get("env", {}).copy()
+                # Provide minimal Popen surface so __init__ doesn't crash
+                # on attribute access during construction.
+                self.stdin = None
+                self.stdout = None
+                self.stderr = None
+                self.pid = 1
+                self.returncode = None
+
+            def poll(self):
+                return None
+
+            def terminate(self):
+                pass
+
+            def wait(self, timeout=None):
+                return 0
+
+            def kill(self):
+                pass
+
+        monkeypatch.setattr(subprocess, "Popen", FakePopen)
+        monkeypatch.setenv("HOME", "/users/alice")
+
+        client = cas.CodexAppServerClient(codex_bin="codex")
+        client._closed = True  # so close() is a no-op
+
+        # The spawn env must have HOME=/users/alice unchanged
+        assert captured["env"].get("HOME") == "/users/alice", (
+            f"HOME got rewritten in codex spawn env: "
+            f"{captured['env'].get('HOME')!r}. Codex's shell tool's "
+            "subprocesses (gh, git, aws, npm) need the user's real HOME."
+        )
+
+    def test_spawn_env_sets_CODEX_HOME_when_provided(self, monkeypatch):
+        """CODEX_HOME isolation must still work — that's the whole point
+        of the codex_home arg."""
+        import subprocess
+        from agent.transports import codex_app_server as cas
+
+        captured = {}
+
+        class FakePopen:
+            def __init__(self, cmd, *args, **kwargs):
+                captured["env"] = kwargs.get("env", {}).copy()
+                self.stdin = None
+                self.stdout = None
+                self.stderr = None
+                self.pid = 1
+                self.returncode = None
+
+            def poll(self):
+                return None
+
+            def terminate(self):
+                pass
+
+            def wait(self, timeout=None):
+                return 0
+
+            def kill(self):
+                pass
+
+        monkeypatch.setattr(subprocess, "Popen", FakePopen)
+        monkeypatch.setenv("HOME", "/users/alice")
+
+        client = cas.CodexAppServerClient(
+            codex_bin="codex", codex_home="/tmp/profile/codex"
+        )
+        client._closed = True
+
+        assert captured["env"].get("CODEX_HOME") == "/tmp/profile/codex"
+        # And HOME still passes through unchanged
+        assert captured["env"].get("HOME") == "/users/alice"
diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py
new file mode 100644
index 00000000000..de0b2f60cb8
--- /dev/null
+++ b/tests/agent/transports/test_codex_app_server_session.py
@@ -0,0 +1,502 @@
+"""Tests for CodexAppServerSession — drive turns through a mock client.
+
+The session adapter has the most complex behavior of the three new modules:
+notification draining, server-request handling (approvals), interrupt,
+deadline timeouts. These tests pin all of that without spawning real codex.
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+from typing import Any, Optional
+
+import pytest
+
+from agent.transports.codex_app_server_session import (
+    CodexAppServerSession,
+    TurnResult,
+    _ServerRequestRouting,
+    _approval_choice_to_codex_decision,
+)
+
+
+class FakeClient:
+    """Stand-in for CodexAppServerClient that records calls and lets the test
+    drive the notification / server-request streams synchronously."""
+
+    def __init__(self, *, codex_bin: str = "codex", codex_home=None) -> None:
+        self.codex_bin = codex_bin
+        self.codex_home = codex_home
+        self.requests: list[tuple[str, dict]] = []
+        self.notifications_responses: list[dict] = []
+        self.responses: list[tuple[Any, dict]] = []
+        self.error_responses: list[tuple[Any, int, str]] = []
+        self._initialized = False
+        self._closed = False
+        self._notifications: list[dict] = []
+        self._server_requests: list[dict] = []
+        self._request_handler = None  # Optional[Callable[[str, dict], dict]]
+
+    # API matching CodexAppServerClient
+    def initialize(self, **kwargs):
+        self._initialized = True
+        return {"userAgent": "fake/0.0.0", "codexHome": "/tmp",
+                "platformOs": "linux", "platformFamily": "unix"}
+
+    def request(self, method: str, params: Optional[dict] = None, timeout: float = 30.0):
+        self.requests.append((method, params or {}))
+        if self._request_handler is not None:
+            return self._request_handler(method, params or {})
+        # Sensible defaults for protocol methods used by the session
+        if method == "thread/start":
+            return {"thread": {"id": "thread-fake-001"},
+                    "activePermissionProfile": {"id": "workspace-write"}}
+        if method == "turn/start":
+            return {"turn": {"id": "turn-fake-001"}}
+        if method == "turn/interrupt":
+            return {}
+        return {}
+
+    def notify(self, method: str, params=None):
+        pass
+
+    def respond(self, request_id, result):
+        self.responses.append((request_id, result))
+
+    def respond_error(self, request_id, code, message, data=None):
+        self.error_responses.append((request_id, code, message))
+
+    def take_notification(self, timeout: float = 0.0):
+        if self._notifications:
+            return self._notifications.pop(0)
+        # Honor a tiny sleep so the loop doesn't hot-spin; the real client
+        # blocks on a queue. For tests we want determinism.
+        if timeout > 0:
+            time.sleep(min(timeout, 0.001))
+        return None
+
+    def take_server_request(self, timeout: float = 0.0):
+        if self._server_requests:
+            return self._server_requests.pop(0)
+        return None
+
+    def close(self):
+        self._closed = True
+
+    # Test helpers
+    def queue_notification(self, method: str, **params):
+        self._notifications.append({"method": method, "params": params})
+
+    def queue_server_request(self, method: str, request_id: Any = "srv-1", **params):
+        self._server_requests.append({"id": request_id, "method": method, "params": params})
+
+
+def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession:
+    return CodexAppServerSession(
+        cwd="/tmp",
+        client_factory=lambda **kw: client,
+        **kwargs,
+    )
+
+
+# ---- choice mapping ----
+
+class TestApprovalChoiceMapping:
+    @pytest.mark.parametrize("choice,expected", [
+        ("once", "accept"),
+        ("session", "acceptForSession"),
+        ("always", "acceptForSession"),
+        ("deny", "decline"),
+        ("anything-else", "decline"),
+    ])
+    def test_mapping(self, choice, expected):
+        assert _approval_choice_to_codex_decision(choice) == expected
+
+
+# ---- lifecycle ----
+
+class TestLifecycle:
+    def test_ensure_started_is_idempotent(self):
+        client = FakeClient()
+        s = make_session(client)
+        tid_a = s.ensure_started()
+        tid_b = s.ensure_started()
+        assert tid_a == tid_b == "thread-fake-001"
+        # thread/start should be called exactly once
+        method_calls = [m for (m, _) in client.requests if m == "thread/start"]
+        assert len(method_calls) == 1
+
+    def test_thread_start_passes_cwd_only(self):
+        """thread/start carries cwd. We intentionally do NOT pass `permissions`
+        on this codex version (experimentalApi-gated + requires matching
+        config.toml [permissions] table). Letting codex use its default
+        (read-only unless user configures otherwise) is the documented path."""
+        client = FakeClient()
+        s = make_session(client, permission_profile="workspace-write")
+        s.ensure_started()
+        method, params = next(r for r in client.requests if r[0] == "thread/start")
+        assert params["cwd"] == "/tmp"
+        assert "permissions" not in params  # see session.ensure_started() comment
+
+    def test_close_idempotent(self):
+        client = FakeClient()
+        s = make_session(client)
+        s.ensure_started()
+        s.close()
+        s.close()
+        assert client._closed is True
+
+
+# ---- turn loop ----
+
+class TestRunTurn:
+    def test_simple_text_turn_returns_final_message(self):
+        client = FakeClient()
+        client.queue_notification("turn/started", threadId="t", turn={"id": "tu1"})
+        client.queue_notification(
+            "item/completed",
+            item={"type": "agentMessage", "id": "m1", "text": "hello world"},
+            threadId="t", turnId="tu1",
+        )
+        client.queue_notification(
+            "turn/completed",
+            threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        s = make_session(client)
+        r = s.run_turn("hi", turn_timeout=2.0)
+        assert r.final_text == "hello world"
+        assert r.interrupted is False
+        assert r.error is None
+        assert any(m["role"] == "assistant" and m.get("content") == "hello world"
+                   for m in r.projected_messages)
+        # turn_id propagated for downstream session-DB linkage
+        assert r.turn_id == "turn-fake-001"
+
+    def test_tool_iteration_counter_ticks(self):
+        client = FakeClient()
+        # Two completed exec items + one final agent message
+        for i, item_id in enumerate(("ex1", "ex2"), start=1):
+            client.queue_notification(
+                "item/completed",
+                item={
+                    "type": "commandExecution", "id": item_id,
+                    "command": f"cmd{i}", "cwd": "/tmp",
+                    "status": "completed", "aggregatedOutput": "ok",
+                    "exitCode": 0, "commandActions": [],
+                },
+                threadId="t", turnId="tu1",
+            )
+        client.queue_notification(
+            "item/completed",
+            item={"type": "agentMessage", "id": "m1", "text": "done"},
+            threadId="t", turnId="tu1",
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        s = make_session(client)
+        r = s.run_turn("do stuff", turn_timeout=2.0)
+        assert r.tool_iterations == 2
+        # Each tool item produces (assistant, tool) — 2*2 + final assistant = 5 msgs
+        assert len(r.projected_messages) == 5
+
+    def test_turn_start_failure_returns_error(self):
+        client = FakeClient()
+        from agent.transports.codex_app_server import CodexAppServerError
+
+        def boom(method, params):
+            if method == "turn/start":
+                raise CodexAppServerError(code=-32600, message="bad input")
+            return {"thread": {"id": "t"}, "activePermissionProfile": {"id": "x"}}
+
+        client._request_handler = boom
+        s = make_session(client)
+        r = s.run_turn("hi", turn_timeout=2.0)
+        assert r.error is not None
+        assert "bad input" in r.error
+        assert r.final_text == ""
+
+    def test_interrupt_during_turn_issues_turn_interrupt(self):
+        client = FakeClient()
+        # Don't queue turn/completed — the loop has to interrupt out
+        client.queue_notification(
+            "item/completed",
+            item={"type": "commandExecution", "id": "x", "command": "sleep 60",
+                  "cwd": "/", "status": "inProgress",
+                  "aggregatedOutput": None, "exitCode": None,
+                  "commandActions": []},
+            threadId="t", turnId="tu1",
+        )
+        s = make_session(client)
+        s.ensure_started()
+        # Trip the interrupt before run_turn even consumes the notification.
+        # The loop will see interrupt set on its first iteration and bail.
+        s.request_interrupt()
+        r = s.run_turn("loop forever", turn_timeout=2.0)
+        assert r.interrupted is True
+        # turn/interrupt was requested with the right turnId
+        assert any(
+            method == "turn/interrupt" and params.get("turnId") == "turn-fake-001"
+            for (method, params) in client.requests
+        )
+
+    def test_deadline_exceeded_records_error(self):
+        client = FakeClient()
+        # No notifications and no completion → must hit deadline
+        s = make_session(client)
+        r = s.run_turn("never finishes", turn_timeout=0.05,
+                       notification_poll_timeout=0.01)
+        assert r.interrupted is True
+        assert r.error and "timed out" in r.error
+
+    def test_failed_turn_records_error_from_turn_completed(self):
+        client = FakeClient()
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "failed",
+                  "error": {"message": "model error"}},
+        )
+        s = make_session(client)
+        r = s.run_turn("x", turn_timeout=1.0)
+        assert r.error and "model error" in r.error
+
+
+# ---- approval bridge ----
+
+class TestServerRequestRouting:
+    def test_exec_approval_with_callback_approves_once(self):
+        client = FakeClient()
+        client.queue_server_request(
+            "item/commandExecution/requestApproval", request_id="req-1",
+            command="ls /tmp", cwd="/tmp",
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+
+        captured: dict = {}
+
+        def cb(command, description, *, allow_permanent=True):
+            captured["command"] = command
+            captured["description"] = description
+            return "once"
+
+        s = make_session(client, approval_callback=cb)
+        s.run_turn("hi", turn_timeout=1.0)
+        assert captured["command"] == "ls /tmp"
+        # The session must have responded to the server request with "accept"
+        assert ("req-1", {"decision": "accept"}) in client.responses
+
+    def test_exec_approval_no_callback_denies(self):
+        client = FakeClient()
+        client.queue_server_request("item/commandExecution/requestApproval", request_id="req-1",
+                                    command="rm -rf /", cwd="/")
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        s = make_session(client)  # no approval_callback wired
+        s.run_turn("hi", turn_timeout=1.0)
+        assert ("req-1", {"decision": "decline"}) in client.responses
+
+    def test_apply_patch_approval_session_maps_to_session_decision(self):
+        client = FakeClient()
+        client.queue_server_request(
+            "item/fileChange/requestApproval", request_id="req-2",
+            itemId="fc-1",
+            turnId="t1",
+            threadId="th",
+            startedAtMs=1234567890,
+            reason="create new file with hello() function",
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+
+        def cb(command, description, *, allow_permanent=True):
+            return "session"
+
+        s = make_session(client, approval_callback=cb)
+        s.run_turn("hi", turn_timeout=1.0)
+        assert ("req-2", {"decision": "acceptForSession"}) in client.responses
+
+    def test_unknown_server_request_replied_with_error(self):
+        client = FakeClient()
+        client.queue_server_request("totally/unknown", request_id="req-3")
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        s = make_session(client)
+        s.run_turn("hi", turn_timeout=1.0)
+        assert any(
+            rid == "req-3" and code == -32601
+            for (rid, code, _msg) in client.error_responses
+        )
+
+    def test_mcp_elicitation_for_hermes_tools_auto_accepts(self):
+        """When codex elicits on behalf of hermes-tools (our own callback),
+        accept automatically — the user already opted in by enabling the
+        runtime."""
+        client = FakeClient()
+        client.queue_server_request(
+            "mcpServer/elicitation/request", request_id="elic-1",
+            threadId="t", turnId="tu1",
+            serverName="hermes-tools",
+            mode="form",
+            message="confirm",
+            requestedSchema={"type": "object", "properties": {}},
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        s = make_session(client)
+        s.run_turn("hi", turn_timeout=1.0)
+        assert ("elic-1", {"action": "accept", "content": None, "_meta": None}) in client.responses
+
+    def test_mcp_elicitation_for_other_servers_declines(self):
+        """For third-party MCP servers we decline by default so users
+        explicitly opt in through codex's own UI."""
+        client = FakeClient()
+        client.queue_server_request(
+            "mcpServer/elicitation/request", request_id="elic-2",
+            threadId="t", turnId="tu1",
+            serverName="some-third-party",
+            mode="url",
+            message="please log in",
+            url="https://example.com/oauth",
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        s = make_session(client)
+        s.run_turn("hi", turn_timeout=1.0)
+        assert ("elic-2", {"action": "decline", "content": None, "_meta": None}) in client.responses
+
+    def test_routing_auto_approve_bypass(self):
+        client = FakeClient()
+        client.queue_server_request("item/commandExecution/requestApproval", request_id="r1",
+                                    command="ls", cwd="/")
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        # No callback, but routing says auto-approve. Should approve.
+        s = make_session(client, request_routing=_ServerRequestRouting(
+            auto_approve_exec=True))
+        s.run_turn("hi", turn_timeout=1.0)
+        assert ("r1", {"decision": "accept"}) in client.responses
+
+    def test_callback_raises_falls_back_to_decline(self):
+        client = FakeClient()
+        client.queue_server_request("item/commandExecution/requestApproval", request_id="r1",
+                                    command="ls", cwd="/")
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+
+        def boom(*a, **kw):
+            raise RuntimeError("ui crashed")
+
+        s = make_session(client, approval_callback=boom)
+        s.run_turn("hi", turn_timeout=1.0)
+        # Fail-closed: deny on callback exception
+        assert ("r1", {"decision": "decline"}) in client.responses
+
+
+# ---- enriched approval prompts ----
+
+class TestApprovalPromptEnrichment:
+    """Quirk #4: apply_patch prompt should show what's changing.
+    Quirk #10: exec prompt should never show empty cwd."""
+
+    def test_exec_falls_back_to_session_cwd(self):
+        """When codex omits cwd from the approval params, the prompt shows
+        the session cwd, not an empty string."""
+        client = FakeClient()
+        client.queue_server_request(
+            "item/commandExecution/requestApproval", request_id="r1",
+            command="ls",  # no cwd
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        captured = {}
+        def cb(command, description, *, allow_permanent=True):
+            captured["description"] = description
+            return "once"
+        s = make_session(client, approval_callback=cb)
+        s.run_turn("hi", turn_timeout=1.0)
+        # Session cwd is /tmp by default in make_session()
+        assert "/tmp" in captured["description"]
+        assert "Codex requests exec in <unknown>" not in captured["description"]
+
+    def test_apply_patch_prompt_summarizes_pending_changes(self):
+        """When the projector has cached the fileChange item from item/started,
+        the approval prompt surfaces the change summary."""
+        client = FakeClient()
+        # item/started fires first (carries the changes), then approval request
+        client.queue_notification(
+            "item/started",
+            item={"type": "fileChange", "id": "fc-1",
+                  "changes": [
+                      {"kind": {"type": "add"}, "path": "/tmp/new.py"},
+                      {"kind": {"type": "update"}, "path": "/tmp/old.py"},
+                  ]},
+            threadId="t", turnId="tu1",
+        )
+        client.queue_server_request(
+            "item/fileChange/requestApproval", request_id="req-2",
+            itemId="fc-1", turnId="tu1", threadId="t",
+            startedAtMs=1234567890,
+            reason="add and update files",
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        captured = {}
+        def cb(command, description, *, allow_permanent=True):
+            captured["command"] = command
+            captured["description"] = description
+            return "once"
+        s = make_session(client, approval_callback=cb)
+        s.run_turn("hi", turn_timeout=1.0)
+        # Both add and update kinds should be in the summary
+        assert "1 add" in captured["command"] or "1 add" in captured["description"]
+        assert "1 update" in captured["command"] or "1 update" in captured["description"]
+        # And at least one of the paths
+        joined = captured["command"] + " " + captured["description"]
+        assert "/tmp/new.py" in joined or "/tmp/old.py" in joined
+
+    def test_apply_patch_prompt_works_without_cached_summary(self):
+        """When approval arrives before item/started (or without changes
+        info), prompt falls back to whatever codex provided."""
+        client = FakeClient()
+        client.queue_server_request(
+            "item/fileChange/requestApproval", request_id="req-2",
+            itemId="fc-orphan", turnId="tu1", threadId="t",
+            startedAtMs=1234567890,
+            reason="apply some changes",
+        )
+        client.queue_notification(
+            "turn/completed", threadId="t",
+            turn={"id": "tu1", "status": "completed", "error": None},
+        )
+        captured = {}
+        def cb(command, description, *, allow_permanent=True):
+            captured["command"] = command
+            return "once"
+        s = make_session(client, approval_callback=cb)
+        s.run_turn("hi", turn_timeout=1.0)
+        # Falls back to the reason
+        assert "apply some changes" in captured["command"]
diff --git a/tests/agent/transports/test_codex_event_projector.py b/tests/agent/transports/test_codex_event_projector.py
new file mode 100644
index 00000000000..04980f35c61
--- /dev/null
+++ b/tests/agent/transports/test_codex_event_projector.py
@@ -0,0 +1,303 @@
+"""Tests for CodexEventProjector — codex item/* events → Hermes messages list.
+
+Drives projection against fixture notifications captured from codex 0.130.0
+plus synthetic ones for item types we couldn't auth-test live."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from agent.transports.codex_event_projector import (
+    CodexEventProjector,
+    ProjectionResult,
+    _deterministic_call_id,
+    _format_tool_args,
+)
+
+
+# --- Fixture: real `commandExecution` notification captured from codex 0.130.0
+COMMAND_EXEC_COMPLETED = {
+    "method": "item/completed",
+    "params": {
+        "item": {
+            "type": "commandExecution",
+            "id": "f8a75c66-a89e-4fd7-8bcf-2d58e664fa9e",
+            "command": "/bin/bash -lc 'echo hello && ls /tmp | head -3'",
+            "cwd": "/tmp",
+            "processId": None,
+            "source": "userShell",
+            "status": "completed",
+            "commandActions": [
+                {"type": "listFiles", "command": "ls /tmp", "path": "tmp"}
+            ],
+            "aggregatedOutput": "hello\naa_lang.json\n",
+            "exitCode": 0,
+            "durationMs": 10,
+        },
+        "threadId": "019e1a94-352b-71e1-b214-e5c67c9ec190",
+        "turnId": "019e1a94-3553-7940-8af3-4ca57142deb7",
+        "completedAtMs": 1778562381151,
+    },
+}
+
+
+class TestProjectionInvariants:
+    """Universal invariants that must hold across all projection paths."""
+
+    def test_streaming_deltas_dont_materialize(self) -> None:
+        p = CodexEventProjector()
+        for delta_method in (
+            "item/commandExecution/outputDelta",
+            "item/agentMessage/delta",
+            "item/reasoning/delta",
+        ):
+            r = p.project({"method": delta_method, "params": {"delta": "x"}})
+            assert r.messages == [], (
+                f"{delta_method} should NOT produce messages — only "
+                f"item/completed materializes"
+            )
+            assert r.is_tool_iteration is False
+            assert r.final_text is None
+
+    def test_turn_started_and_completed_are_silent(self) -> None:
+        p = CodexEventProjector()
+        for method in ("turn/started", "turn/completed", "thread/started"):
+            r = p.project({"method": method, "params": {}})
+            assert r.messages == []
+
+    def test_unknown_method_silent(self) -> None:
+        p = CodexEventProjector()
+        r = p.project({"method": "totally/unknown", "params": {}})
+        assert r.messages == []
+
+
+class TestCommandExecutionProjection:
+    """Real captured notification → assistant tool_call + tool result."""
+
+    def test_command_completed_produces_two_messages(self) -> None:
+        p = CodexEventProjector()
+        r = p.project(COMMAND_EXEC_COMPLETED)
+        assert len(r.messages) == 2
+        assert r.is_tool_iteration is True
+
+    def test_first_message_is_assistant_tool_call(self) -> None:
+        p = CodexEventProjector()
+        msgs = p.project(COMMAND_EXEC_COMPLETED).messages
+        assistant = msgs[0]
+        assert assistant["role"] == "assistant"
+        assert assistant["content"] is None
+        assert len(assistant["tool_calls"]) == 1
+        tc = assistant["tool_calls"][0]
+        assert tc["type"] == "function"
+        assert tc["function"]["name"] == "exec_command"
+        args = json.loads(tc["function"]["arguments"])
+        assert "echo hello" in args["command"]
+        assert args["cwd"] == "/tmp"
+
+    def test_second_message_is_tool_result_correlating_by_id(self) -> None:
+        p = CodexEventProjector()
+        msgs = p.project(COMMAND_EXEC_COMPLETED).messages
+        assistant, tool = msgs
+        assert tool["role"] == "tool"
+        assert tool["tool_call_id"] == assistant["tool_calls"][0]["id"]
+        assert "hello" in tool["content"]
+
+    def test_nonzero_exit_code_annotated_in_tool_result(self) -> None:
+        item = {**COMMAND_EXEC_COMPLETED["params"]["item"], "exitCode": 2,
+                "aggregatedOutput": "boom"}
+        notif = {
+            "method": "item/completed",
+            "params": {**COMMAND_EXEC_COMPLETED["params"], "item": item},
+        }
+        p = CodexEventProjector()
+        msgs = p.project(notif).messages
+        assert "[exit 2]" in msgs[1]["content"]
+        assert "boom" in msgs[1]["content"]
+
+    def test_deterministic_call_id_across_replay(self) -> None:
+        # Same item id → same call_id (prefix cache must stay valid).
+        p1 = CodexEventProjector()
+        p2 = CodexEventProjector()
+        a = p1.project(COMMAND_EXEC_COMPLETED).messages
+        b = p2.project(COMMAND_EXEC_COMPLETED).messages
+        assert a[0]["tool_calls"][0]["id"] == b[0]["tool_calls"][0]["id"]
+
+
+class TestAgentMessageProjection:
+    """assistant text → final_text + assistant message."""
+
+    def test_agent_message_projects_to_assistant(self) -> None:
+        p = CodexEventProjector()
+        r = p.project({
+            "method": "item/completed",
+            "params": {"item": {"type": "agentMessage", "id": "x",
+                                "text": "hi there"}},
+        })
+        assert r.final_text == "hi there"
+        assert r.messages == [{"role": "assistant", "content": "hi there"}]
+        assert r.is_tool_iteration is False
+
+    def test_pending_reasoning_attaches_to_next_assistant_message(self) -> None:
+        p = CodexEventProjector()
+        # First a reasoning item lands
+        r1 = p.project({
+            "method": "item/completed",
+            "params": {"item": {"type": "reasoning", "id": "r1",
+                                "summary": ["thinking..."],
+                                "content": ["step 1", "step 2"]}},
+        })
+        assert r1.messages == []  # reasoning alone produces no message
+        # Then the assistant message
+        r2 = p.project({
+            "method": "item/completed",
+            "params": {"item": {"type": "agentMessage", "id": "a1",
+                                "text": "ok"}},
+        })
+        assistant = r2.messages[0]
+        assert "reasoning" in assistant
+        assert "thinking" in assistant["reasoning"]
+        assert "step 1" in assistant["reasoning"]
+
+    def test_reasoning_consumed_after_attaching(self) -> None:
+        p = CodexEventProjector()
+        p.project({"method": "item/completed", "params": {"item": {
+            "type": "reasoning", "id": "r1", "summary": ["once"], "content": []}}})
+        first = p.project({"method": "item/completed", "params": {"item": {
+            "type": "agentMessage", "id": "a", "text": "first"}}}).messages[0]
+        second = p.project({"method": "item/completed", "params": {"item": {
+            "type": "agentMessage", "id": "b", "text": "second"}}}).messages[0]
+        assert "reasoning" in first
+        assert "reasoning" not in second
+
+
+class TestFileChangeProjection:
+    def test_file_change_summary_no_inlined_content(self) -> None:
+        item = {
+            "type": "fileChange",
+            "id": "fc1",
+            "status": "applied",
+            "changes": [
+                {"kind": {"type": "add"}, "path": "/tmp/new.py"},
+                {"kind": {"type": "update"}, "path": "/tmp/old.py"},
+            ],
+        }
+        p = CodexEventProjector()
+        msgs = p.project({"method": "item/completed",
+                          "params": {"item": item}}).messages
+        assert len(msgs) == 2
+        tc = msgs[0]["tool_calls"][0]
+        assert tc["function"]["name"] == "apply_patch"
+        args = json.loads(tc["function"]["arguments"])
+        assert len(args["changes"]) == 2
+        assert all("kind" in c and "path" in c for c in args["changes"])
+        assert "applied" in msgs[1]["content"]
+
+
+class TestMcpToolCallProjection:
+    def test_mcp_tool_call_namespaced(self) -> None:
+        item = {
+            "type": "mcpToolCall",
+            "id": "m1",
+            "server": "obsidian",
+            "tool": "search_notes",
+            "status": "completed",
+            "arguments": {"query": "hermes"},
+            "result": {"content": [{"text": "found"}]},
+            "error": None,
+        }
+        msgs = CodexEventProjector().project(
+            {"method": "item/completed", "params": {"item": item}}
+        ).messages
+        assert msgs[0]["tool_calls"][0]["function"]["name"] == "mcp.obsidian.search_notes"
+        assert "found" in msgs[1]["content"]
+
+    def test_mcp_error_surfaced(self) -> None:
+        item = {
+            "type": "mcpToolCall", "id": "m2",
+            "server": "x", "tool": "y", "status": "failed",
+            "arguments": {}, "result": None,
+            "error": {"code": -1, "message": "no"},
+        }
+        msgs = CodexEventProjector().project(
+            {"method": "item/completed", "params": {"item": item}}
+        ).messages
+        assert "error" in msgs[1]["content"]
+
+
+class TestUserAndOpaqueProjection:
+    def test_user_message_text_fragments_only(self) -> None:
+        item = {
+            "type": "userMessage", "id": "u1",
+            "content": [
+                {"type": "text", "text": "hello"},
+                {"type": "image", "url": "http://x/y"},
+                {"type": "text", "text": "world"},
+            ],
+        }
+        msgs = CodexEventProjector().project(
+            {"method": "item/completed", "params": {"item": item}}
+        ).messages
+        assert msgs[0]["role"] == "user"
+        assert "hello" in msgs[0]["content"]
+        assert "world" in msgs[0]["content"]
+
+    def test_opaque_item_recorded_without_fabricated_tool_calls(self) -> None:
+        item = {"type": "plan", "id": "p1", "text": "do the thing"}
+        msgs = CodexEventProjector().project(
+            {"method": "item/completed", "params": {"item": item}}
+        ).messages
+        assert len(msgs) == 1
+        assert msgs[0]["role"] == "assistant"
+        assert "plan" in msgs[0]["content"].lower()
+        assert "tool_calls" not in msgs[0]
+
+
+class TestHelpers:
+    def test_deterministic_call_id_stable(self) -> None:
+        assert _deterministic_call_id("exec", "abc") == _deterministic_call_id("exec", "abc")
+        assert _deterministic_call_id("exec", "abc") != _deterministic_call_id("exec", "xyz")
+
+    def test_deterministic_call_id_handles_missing_id(self) -> None:
+        # Should not raise, should be stable for same item type
+        a = _deterministic_call_id("exec", "")
+        b = _deterministic_call_id("exec", "")
+        assert a == b
+        assert "exec" in a
+
+    def test_format_tool_args_sorted_keys(self) -> None:
+        # Sorted keys = deterministic across replays = prefix cache stays valid
+        a = _format_tool_args({"b": 1, "a": 2})
+        b = _format_tool_args({"a": 2, "b": 1})
+        assert a == b
+
+
+class TestRoleAlternationInvariant:
+    """The project must never emit two assistant messages back-to-back from
+    one item — that breaks Hermes' message alternation invariant."""
+
+    @pytest.mark.parametrize(
+        "item",
+        [
+            {"type": "commandExecution", "id": "c1", "command": "x",
+             "cwd": "/", "status": "completed", "aggregatedOutput": "",
+             "exitCode": 0, "commandActions": []},
+            {"type": "fileChange", "id": "f1", "status": "applied",
+             "changes": []},
+            {"type": "mcpToolCall", "id": "m1", "server": "s", "tool": "t",
+             "status": "completed", "arguments": {}, "result": None,
+             "error": None},
+            {"type": "dynamicToolCall", "id": "d1", "tool": "x",
+             "arguments": {}, "status": "completed",
+             "contentItems": [], "success": True},
+        ],
+    )
+    def test_tool_items_emit_assistant_then_tool(self, item) -> None:
+        msgs = CodexEventProjector().project(
+            {"method": "item/completed", "params": {"item": item}}
+        ).messages
+        assert len(msgs) == 2
+        assert msgs[0]["role"] == "assistant"
+        assert msgs[1]["role"] == "tool"
+        assert msgs[1]["tool_call_id"] == msgs[0]["tool_calls"][0]["id"]
diff --git a/tests/agent/transports/test_hermes_tools_mcp_server.py b/tests/agent/transports/test_hermes_tools_mcp_server.py
new file mode 100644
index 00000000000..3c11cb3f81d
--- /dev/null
+++ b/tests/agent/transports/test_hermes_tools_mcp_server.py
@@ -0,0 +1,135 @@
+"""Tests for the hermes-tools-as-MCP server module surface.
+
+We don't run a live MCP session in unit tests — that requires the codex
+subprocess + client + an event loop. These tests pin the static
+contract: the module imports, the EXPOSED_TOOLS list is sane, and the
+build helper assembles a server when the SDK is present.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+
+class TestModuleSurface:
+    def test_module_imports_clean(self):
+        from agent.transports import hermes_tools_mcp_server as m
+        assert callable(m.main)
+        assert callable(m._build_server)
+        assert isinstance(m.EXPOSED_TOOLS, tuple)
+        assert len(m.EXPOSED_TOOLS) > 0
+
+    def test_exposed_tools_are_safe_subset(self):
+        """We MUST NOT expose tools codex already has, because codex'
+        own builtins are better-integrated with its sandbox + approvals.
+        Specifically: no terminal/shell, no read_file/write_file, no
+        patch — those are codex's built-in tools."""
+        from agent.transports.hermes_tools_mcp_server import EXPOSED_TOOLS
+        forbidden = {
+            "terminal", "shell", "read_file", "write_file", "patch",
+            "search_files", "process",
+        }
+        leaked = forbidden & set(EXPOSED_TOOLS)
+        assert not leaked, (
+            f"these tools must NOT be exposed via the codex callback "
+            f"because codex has built-in equivalents: {leaked}"
+        )
+
+    def test_expected_hermes_specific_tools_listed(self):
+        """The Hermes-specific tools should be present so users on the
+        codex runtime keep access to them."""
+        from agent.transports.hermes_tools_mcp_server import EXPOSED_TOOLS
+        for required in (
+            "web_search",
+            "web_extract",
+            "browser_navigate",
+            "vision_analyze",
+            "image_generate",
+            "skill_view",
+        ):
+            assert required in EXPOSED_TOOLS, f"missing {required!r}"
+
+    def test_agent_loop_tools_not_exposed(self):
+        """delegate_task / memory / session_search / todo require the
+        running AIAgent context to dispatch, so a stateless MCP callback
+        can't drive them. They must NOT be in EXPOSED_TOOLS."""
+        from agent.transports.hermes_tools_mcp_server import EXPOSED_TOOLS
+        for agent_loop_tool in ("delegate_task", "memory", "session_search", "todo"):
+            assert agent_loop_tool not in EXPOSED_TOOLS, (
+                f"{agent_loop_tool!r} requires the agent loop context "
+                "and can't be reached through a stateless MCP callback"
+            )
+
+    def test_kanban_worker_tools_exposed(self):
+        """Kanban workers run as `hermes chat -q` subprocesses; if they
+        come up on the codex_app_server runtime, the worker can do the
+        actual work via codex's shell but needs the kanban tools through
+        the MCP callback to report back to the kernel. Without these
+        tools available, the worker would hang at completion time."""
+        from agent.transports.hermes_tools_mcp_server import EXPOSED_TOOLS
+        # Worker handoff tools — every dispatched worker uses at least
+        # one of {complete, block, comment} to close out its task.
+        for worker_tool in (
+            "kanban_complete",
+            "kanban_block",
+            "kanban_comment",
+            "kanban_heartbeat",
+        ):
+            assert worker_tool in EXPOSED_TOOLS, (
+                f"{worker_tool!r} missing from codex callback — kanban "
+                "workers on codex_app_server runtime would hang"
+            )
+
+    def test_kanban_orchestrator_tools_exposed(self):
+        """Orchestrator agents need to dispatch new tasks, query the
+        board, and unblock/link tasks. Exposed so an orchestrator on
+        codex_app_server can do its job."""
+        from agent.transports.hermes_tools_mcp_server import EXPOSED_TOOLS
+        for orch_tool in (
+            "kanban_create",
+            "kanban_show",
+            "kanban_list",
+            "kanban_unblock",
+            "kanban_link",
+        ):
+            assert orch_tool in EXPOSED_TOOLS, (
+                f"{orch_tool!r} missing from codex callback"
+            )
+
+
+class TestMain:
+    def test_main_returns_2_when_mcp_unavailable(self, monkeypatch):
+        """When the mcp package isn't installed, main() should exit
+        cleanly with code 2 and an install hint, not crash."""
+        import agent.transports.hermes_tools_mcp_server as m
+
+        def boom_build(*a, **kw):
+            raise ImportError("mcp not installed")
+
+        monkeypatch.setattr(m, "_build_server", boom_build)
+        rc = m.main(["--verbose"])
+        assert rc == 2
+
+    def test_main_handles_keyboard_interrupt(self, monkeypatch):
+        import agent.transports.hermes_tools_mcp_server as m
+
+        class FakeServer:
+            def run(self):
+                raise KeyboardInterrupt()
+
+        monkeypatch.setattr(m, "_build_server", lambda: FakeServer())
+        rc = m.main([])
+        assert rc == 0
+
+    def test_main_returns_1_on_runtime_error(self, monkeypatch):
+        import agent.transports.hermes_tools_mcp_server as m
+
+        class CrashingServer:
+            def run(self):
+                raise RuntimeError("boom")
+
+        monkeypatch.setattr(m, "_build_server", lambda: CrashingServer())
+        rc = m.main([])
+        assert rc == 1
diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
new file mode 100644
index 00000000000..0274251327c
--- /dev/null
+++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py
@@ -0,0 +1,589 @@
+"""Tests for the codex MCP plugin migration helper."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from hermes_cli.codex_runtime_plugin_migration import (
+    MIGRATION_MARKER,
+    MigrationReport,
+    _format_toml_value,
+    _strip_existing_managed_block,
+    _translate_one_server,
+    migrate,
+    render_codex_toml_section,
+)
+
+
+# ---- per-server translation ----
+
+class TestTranslateOneServer:
+    def test_stdio_basic(self):
+        cfg, skipped = _translate_one_server("filesystem", {
+            "command": "npx",
+            "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
+            "env": {"FOO": "bar"},
+        })
+        assert cfg == {
+            "command": "npx",
+            "args": ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
+            "env": {"FOO": "bar"},
+        }
+        assert skipped == []
+
+    def test_stdio_with_cwd(self):
+        cfg, _ = _translate_one_server("custom", {
+            "command": "/usr/bin/myserver",
+            "cwd": "/var/lib/mcp",
+        })
+        assert cfg["cwd"] == "/var/lib/mcp"
+
+    def test_http_basic(self):
+        cfg, skipped = _translate_one_server("api", {
+            "url": "https://x.example/mcp",
+            "headers": {"Authorization": "Bearer abc"},
+        })
+        assert cfg == {
+            "url": "https://x.example/mcp",
+            "http_headers": {"Authorization": "Bearer abc"},
+        }
+        assert skipped == []
+
+    def test_sse_falls_under_streamable_http_with_warning(self):
+        cfg, skipped = _translate_one_server("sse_server", {
+            "url": "http://localhost:8000/sse",
+            "transport": "sse",
+        })
+        assert cfg["url"] == "http://localhost:8000/sse"
+        assert any("sse" in s.lower() for s in skipped)
+
+    def test_timeouts_translate(self):
+        cfg, _ = _translate_one_server("x", {
+            "command": "y",
+            "timeout": 180,
+            "connect_timeout": 30,
+        })
+        assert cfg["tool_timeout_sec"] == 180.0
+        assert cfg["startup_timeout_sec"] == 30.0
+
+    def test_non_numeric_timeout_skipped(self):
+        cfg, skipped = _translate_one_server("x", {
+            "command": "y",
+            "timeout": "not-a-number",
+        })
+        assert "tool_timeout_sec" not in cfg
+        assert any("timeout" in s and "numeric" in s for s in skipped)
+
+    def test_disabled_server_emits_enabled_false(self):
+        cfg, _ = _translate_one_server("x", {
+            "command": "y",
+            "enabled": False,
+        })
+        assert cfg["enabled"] is False
+
+    def test_enabled_true_omitted(self):
+        cfg, _ = _translate_one_server("x", {"command": "y", "enabled": True})
+        assert "enabled" not in cfg  # codex defaults to true
+
+    def test_command_and_url_prefers_stdio_warns(self):
+        cfg, skipped = _translate_one_server("x", {
+            "command": "y", "url": "http://z",
+        })
+        assert "command" in cfg
+        assert "url" not in cfg
+        assert any("url" in s for s in skipped)
+
+    def test_no_transport_returns_none(self):
+        cfg, skipped = _translate_one_server("broken", {"description": "x"})
+        assert cfg is None
+        assert "no command or url" in skipped[0]
+
+    def test_sampling_dropped_with_warning(self):
+        cfg, skipped = _translate_one_server("x", {
+            "command": "y",
+            "sampling": {"enabled": True, "model": "gemini-3-flash"},
+        })
+        assert "sampling" not in cfg
+        assert any("sampling" in s for s in skipped)
+
+    def test_unknown_keys_warned(self):
+        cfg, skipped = _translate_one_server("x", {
+            "command": "y",
+            "totally_made_up_key": "value",
+        })
+        assert "totally_made_up_key" not in cfg
+        assert any("totally_made_up_key" in s for s in skipped)
+
+    def test_non_dict_input(self):
+        cfg, skipped = _translate_one_server("x", "notadict")  # type: ignore[arg-type]
+        assert cfg is None
+
+
+# ---- TOML rendering ----
+
+class TestTomlValueFormatter:
+    def test_string_quoted(self):
+        assert _format_toml_value("hello") == '"hello"'
+
+    def test_string_with_quotes_escaped(self):
+        assert _format_toml_value('a"b') == '"a\\"b"'
+
+    def test_bool(self):
+        assert _format_toml_value(True) == "true"
+        assert _format_toml_value(False) == "false"
+
+    def test_int(self):
+        assert _format_toml_value(42) == "42"
+
+    def test_float(self):
+        assert _format_toml_value(180.0) == "180.0"
+
+    def test_list_of_strings(self):
+        assert _format_toml_value(["a", "b"]) == '["a", "b"]'
+
+    def test_inline_table(self):
+        out = _format_toml_value({"FOO": "bar"})
+        assert out == '{ FOO = "bar" }'
+
+    def test_empty_inline_table(self):
+        assert _format_toml_value({}) == "{}"
+
+    def test_string_with_newline_escaped(self):
+        """TOML basic strings don't allow literal newlines — a path or
+        env var containing a newline must use \\n. Otherwise codex would
+        refuse to load the config."""
+        out = _format_toml_value("line one\nline two")
+        assert "\n" not in out  # no raw newline in output
+        assert "\\n" in out
+
+    def test_string_with_tab_escaped(self):
+        out = _format_toml_value("col1\tcol2")
+        assert "\t" not in out
+        assert "\\t" in out
+
+    def test_string_with_other_controls_escaped(self):
+        for raw, expected in [
+            ("\r", "\\r"),
+            ("\f", "\\f"),
+            ("\b", "\\b"),
+        ]:
+            out = _format_toml_value(f"x{raw}y")
+            assert raw not in out, f"{raw!r} should be escaped"
+            assert expected in out, f"{expected!r} should be in output"
+
+    def test_windows_path_escaped_correctly(self):
+        out = _format_toml_value(r"C:\Users\Alice\.codex")
+        # Each backslash should be doubled
+        assert out == r'"C:\\Users\\Alice\\.codex"'
+
+    def test_atomic_write_no_temp_leak_on_success(self, tmp_path):
+        """The atomic-write path uses tempfile.mkstemp + rename. On
+        success the temp file should not be left behind."""
+        migrate({"mcp_servers": {"x": {"command": "y"}}},
+                codex_home=tmp_path,
+                discover_plugins=False,
+                expose_hermes_tools=False,
+                default_permission_profile=None)
+        # config.toml should exist
+        assert (tmp_path / "config.toml").exists()
+        # And no .config.toml.* temp files left behind
+        leftover = [p.name for p in tmp_path.iterdir()
+                    if p.name.startswith(".config.toml.")]
+        assert leftover == [], f"temp file leaked after migration: {leftover}"
+
+    def test_atomic_write_cleanup_on_rename_failure(self, tmp_path, monkeypatch):
+        """If rename fails partway through (out of disk, permissions,
+        crash), the temp file must be cleaned up. Otherwise repeated
+        failed migrations would pile up .config.toml.* files."""
+        from pathlib import Path as _Path
+        original_replace = _Path.replace
+
+        def failing_replace(self, target):
+            raise OSError("simulated disk full")
+
+        monkeypatch.setattr(_Path, "replace", failing_replace)
+        report = migrate(
+            {"mcp_servers": {"x": {"command": "y"}}},
+            codex_home=tmp_path,
+            discover_plugins=False,
+            expose_hermes_tools=False,
+            default_permission_profile=None,
+        )
+        # Error surfaced
+        assert any("simulated disk full" in e for e in report.errors)
+        # And no leaked temp file
+        leftover = [p.name for p in tmp_path.iterdir()
+                    if p.name.startswith(".config.toml.")]
+        assert leftover == [], f"temp files leaked: {leftover}"
+
+    def test_unsupported_type_raises(self):
+        with pytest.raises(ValueError):
+            _format_toml_value(object())
+
+
+class TestRenderToml:
+    def test_starts_with_marker(self):
+        out = render_codex_toml_section({})
+        assert out.startswith(MIGRATION_MARKER)
+
+    def test_empty_servers_emits_placeholder(self):
+        out = render_codex_toml_section({})
+        assert "no MCP servers" in out
+
+    def test_servers_sorted_alphabetically(self):
+        out = render_codex_toml_section({
+            "zoo": {"command": "z"},
+            "alpha": {"command": "a"},
+            "middle": {"command": "m"},
+        })
+        # Find the section header positions and confirm order
+        a_pos = out.find("[mcp_servers.alpha]")
+        m_pos = out.find("[mcp_servers.middle]")
+        z_pos = out.find("[mcp_servers.zoo]")
+        assert 0 < a_pos < m_pos < z_pos
+
+    def test_server_with_args_and_env(self):
+        out = render_codex_toml_section({
+            "fs": {
+                "command": "npx",
+                "args": ["-y", "filesystem"],
+                "env": {"PATH": "/usr/bin"},
+            }
+        })
+        assert "[mcp_servers.fs]" in out
+        assert 'command = "npx"' in out
+        assert 'args = ["-y", "filesystem"]' in out
+        # Env emitted as inline table
+        assert 'env = { PATH = "/usr/bin" }' in out
+
+
+# ---- existing-block stripping ----
+
+class TestStripExistingManagedBlock:
+    def test_no_managed_block_unchanged(self):
+        text = "[other]\nfoo = 1\n"
+        assert _strip_existing_managed_block(text) == text
+
+    def test_strips_managed_block_alone(self):
+        text = (
+            f"{MIGRATION_MARKER}\n"
+            "\n"
+            "[mcp_servers.fs]\n"
+            'command = "npx"\n'
+        )
+        assert _strip_existing_managed_block(text).strip() == ""
+
+    def test_preserves_user_content_above_managed_block(self):
+        text = (
+            "[model]\n"
+            'name = "gpt-5.5"\n'
+            "\n"
+            f"{MIGRATION_MARKER}\n"
+            "[mcp_servers.fs]\n"
+            'command = "x"\n'
+        )
+        out = _strip_existing_managed_block(text)
+        assert "[model]" in out
+        assert 'name = "gpt-5.5"' in out
+        assert "mcp_servers.fs" not in out
+
+    def test_preserves_unrelated_section_after_managed_block(self):
+        text = (
+            f"{MIGRATION_MARKER}\n"
+            "[mcp_servers.fs]\n"
+            'command = "x"\n'
+            "\n"
+            "[providers]\n"
+            'foo = "bar"\n'
+        )
+        out = _strip_existing_managed_block(text)
+        assert "mcp_servers.fs" not in out
+        assert "[providers]" in out
+        assert 'foo = "bar"' in out
+
+
+# ---- end-to-end migrate(, expose_hermes_tools=False) ----
+
+class TestMigrate:
+    def test_no_servers_no_plugins_no_perms_writes_placeholder(self, tmp_path):
+        report = migrate({}, codex_home=tmp_path,
+                         discover_plugins=False,
+                         default_permission_profile=None, expose_hermes_tools=False)
+        assert report.written
+        text = (tmp_path / "config.toml").read_text()
+        assert MIGRATION_MARKER in text
+        assert "no MCP servers" in text or "no MCP servers, plugins, or permissions" in text
+
+    def test_no_servers_still_writes_permissions_default(self, tmp_path):
+        """Even with zero MCP servers, enabling the runtime should write the
+        default permissions profile so users don't get prompted on every
+        write attempt. This is the fix for quirk #2."""
+        report = migrate({}, codex_home=tmp_path, discover_plugins=False, expose_hermes_tools=False)
+        assert report.written
+        text = (tmp_path / "config.toml").read_text()
+        # Codex's schema: top-level `default_permissions` keying a built-in
+        # profile name (prefixed with ":"). NOT a [permissions] section
+        # (which is for *user-defined* profiles with structured fields).
+        assert 'default_permissions = ":workspace"' in text
+        assert report.wrote_permissions_default == ":workspace"
+
+    def test_explicit_none_permissions_skips_block(self, tmp_path):
+        report = migrate({"mcp_servers": {"x": {"command": "y"}}},
+                         codex_home=tmp_path,
+                         discover_plugins=False,
+                         default_permission_profile=None, expose_hermes_tools=False)
+        text = (tmp_path / "config.toml").read_text()
+        assert "default_permissions" not in text
+        assert "[permissions]" not in text
+        assert report.wrote_permissions_default is None
+
+    def test_plugin_discovery_writes_plugin_blocks(self, tmp_path, monkeypatch):
+        """Discovered curated plugins land as [plugins."<name>@<marketplace>"]
+        blocks. This is what OpenClaw calls 'migrate native codex plugins.'"""
+        from hermes_cli import codex_runtime_plugin_migration as crpm
+
+        def fake_query(codex_home=None, timeout=8.0):
+            return [
+                {"name": "google-calendar", "marketplace": "openai-curated",
+                 "enabled": True},
+                {"name": "github", "marketplace": "openai-curated",
+                 "enabled": True},
+            ], None
+        monkeypatch.setattr(crpm, "_query_codex_plugins", fake_query)
+
+        report = migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False)
+        text = (tmp_path / "config.toml").read_text()
+        assert '[plugins."github@openai-curated"]' in text
+        assert '[plugins."google-calendar@openai-curated"]' in text
+        assert "enabled = true" in text
+        assert "google-calendar@openai-curated" in report.migrated_plugins
+        assert "github@openai-curated" in report.migrated_plugins
+
+    def test_plugin_discovery_failure_non_fatal(self, tmp_path, monkeypatch):
+        """If codex isn't installed or RPC fails, MCP migration still
+        completes. The error surfaces in the report but doesn't abort."""
+        from hermes_cli import codex_runtime_plugin_migration as crpm
+
+        def fake_query_fails(codex_home=None, timeout=8.0):
+            return [], "codex CLI not available"
+        monkeypatch.setattr(crpm, "_query_codex_plugins", fake_query_fails)
+
+        report = migrate({"mcp_servers": {"x": {"command": "y"}}},
+                         codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False)
+        assert report.written
+        assert report.migrated == ["x"]
+        assert report.plugin_query_error == "codex CLI not available"
+        assert report.migrated_plugins == []
+
+    def test_discover_plugins_false_skips_query(self, tmp_path, monkeypatch):
+        """Tests and restricted environments can opt out of the subprocess
+        spawn entirely."""
+        from hermes_cli import codex_runtime_plugin_migration as crpm
+
+        called = {"yes": False}
+        def boom(*a, **kw):
+            called["yes"] = True
+            return [], None
+        monkeypatch.setattr(crpm, "_query_codex_plugins", boom)
+
+        migrate({"mcp_servers": {"x": {"command": "y"}}},
+                codex_home=tmp_path, discover_plugins=False, expose_hermes_tools=False)
+        assert called["yes"] is False
+
+    def test_dry_run_skips_plugin_query(self, tmp_path, monkeypatch):
+        """Dry run should never spawn codex. Even with discover_plugins=True
+        the query is skipped because dry_run takes precedence."""
+        from hermes_cli import codex_runtime_plugin_migration as crpm
+
+        called = {"yes": False}
+        def boom(*a, **kw):
+            called["yes"] = True
+            return [], None
+        monkeypatch.setattr(crpm, "_query_codex_plugins", boom)
+
+        migrate({"mcp_servers": {"x": {"command": "y"}}},
+                codex_home=tmp_path, dry_run=True, discover_plugins=True, expose_hermes_tools=False)
+        assert called["yes"] is False
+
+    def test_re_run_replaces_plugin_block(self, tmp_path, monkeypatch):
+        """Plugin blocks are managed and re-runs should replace them
+        cleanly — same idempotency contract as MCP servers."""
+        from hermes_cli import codex_runtime_plugin_migration as crpm
+
+        # First run: only github
+        monkeypatch.setattr(crpm, "_query_codex_plugins",
+                            lambda codex_home=None, timeout=8.0: (
+                                [{"name": "github", "marketplace": "openai-curated", "enabled": True}],
+                                None,
+                            ))
+        migrate({}, codex_home=tmp_path, discover_plugins=True,
+                default_permission_profile=None, expose_hermes_tools=False)
+        first = (tmp_path / "config.toml").read_text()
+        assert "github@openai-curated" in first
+
+        # Second run: only canva (github went away)
+        monkeypatch.setattr(crpm, "_query_codex_plugins",
+                            lambda codex_home=None, timeout=8.0: (
+                                [{"name": "canva", "marketplace": "openai-curated", "enabled": True}],
+                                None,
+                            ))
+        migrate({}, codex_home=tmp_path, discover_plugins=True,
+                default_permission_profile=None, expose_hermes_tools=False)
+        second = (tmp_path / "config.toml").read_text()
+        assert "github@openai-curated" not in second
+        assert "canva@openai-curated" in second
+
+    def test_expose_hermes_tools_writes_callback_mcp_entry(self, tmp_path):
+        """When expose_hermes_tools=True (production default), an
+        [mcp_servers.hermes-tools] entry is written so codex calls back
+        into Hermes for browser/web/delegate_task/vision/memory tools.
+
+        This is the fix for 'all other tools that codex doesn't provide
+        should be useable by hermes' — quirk #7."""
+        report = migrate({}, codex_home=tmp_path,
+                         discover_plugins=False,
+                         default_permission_profile=None,
+                         expose_hermes_tools=True)
+        text = (tmp_path / "config.toml").read_text()
+        assert "[mcp_servers.hermes-tools]" in text
+        assert "hermes_tools_mcp_server" in text
+        # Must include startup + tool timeouts so codex doesn't give up
+        assert "startup_timeout_sec" in text
+        assert "tool_timeout_sec" in text
+        # And the entry is reported
+        assert "hermes-tools" in report.migrated
+
+    def test_expose_hermes_tools_disabled_skips_entry(self, tmp_path):
+        """expose_hermes_tools=False suppresses the callback registration."""
+        migrate({}, codex_home=tmp_path,
+                discover_plugins=False,
+                default_permission_profile=None,
+                expose_hermes_tools=False)
+        text = (tmp_path / "config.toml").read_text()
+        assert "[mcp_servers.hermes-tools]" not in text
+        assert "hermes_tools_mcp_server" not in text
+
+    def test_dry_run_doesnt_write(self, tmp_path):
+        report = migrate({"mcp_servers": {"x": {"command": "y"}}},
+                         codex_home=tmp_path, dry_run=True, expose_hermes_tools=False)
+        assert report.dry_run is True
+        assert not (tmp_path / "config.toml").exists()
+        assert "x" in report.migrated
+
+    def test_full_migration_round_trip(self, tmp_path):
+        hermes_cfg = {
+            "mcp_servers": {
+                "filesystem": {
+                    "command": "npx",
+                    "args": ["-y", "@modelcontextprotocol/server-filesystem"],
+                },
+                "github": {
+                    "url": "https://api.github.com/mcp",
+                    "headers": {"Authorization": "Bearer x"},
+                },
+            }
+        }
+        report = migrate(hermes_cfg, codex_home=tmp_path, expose_hermes_tools=False)
+        assert report.written
+        text = (tmp_path / "config.toml").read_text()
+        assert "[mcp_servers.filesystem]" in text
+        assert "[mcp_servers.github]" in text
+        assert 'command = "npx"' in text
+        assert 'url = "https://api.github.com/mcp"' in text
+
+    def test_idempotent_re_run_replaces_managed_block(self, tmp_path):
+        # First migration
+        migrate({"mcp_servers": {"a": {"command": "x"}}}, codex_home=tmp_path, expose_hermes_tools=False)
+        first_text = (tmp_path / "config.toml").read_text()
+        assert "[mcp_servers.a]" in first_text
+        # Second migration with different servers
+        migrate({"mcp_servers": {"b": {"command": "y"}}}, codex_home=tmp_path, expose_hermes_tools=False)
+        second_text = (tmp_path / "config.toml").read_text()
+        assert "[mcp_servers.a]" not in second_text
+        assert "[mcp_servers.b]" in second_text
+
+    def test_preserves_user_codex_config_above_marker(self, tmp_path):
+        target = tmp_path / "config.toml"
+        target.write_text(
+            "[model]\n"
+            'profile = "default"\n'
+            "\n"
+            "[providers.openai]\n"
+            'api_key = "sk-test"\n'
+        )
+        migrate({"mcp_servers": {"a": {"command": "x"}}}, codex_home=tmp_path, expose_hermes_tools=False)
+        new_text = target.read_text()
+        # User's codex config preserved
+        assert "[model]" in new_text
+        assert 'profile = "default"' in new_text
+        assert "[providers.openai]" in new_text
+        # And new MCP block appended
+        assert "[mcp_servers.a]" in new_text
+        assert MIGRATION_MARKER in new_text
+
+    def test_preserves_user_mcp_server_outside_managed_block(self, tmp_path):
+        """Quirk #6: when a user adds their own MCP server entry directly
+        to ~/.codex/config.toml outside Hermes' managed block, re-running
+        migration must preserve it. Tested both above and below the
+        managed block."""
+        target = tmp_path / "config.toml"
+        target.write_text(
+            "[mcp_servers.user-above]\n"
+            'command = "/usr/bin/above-server"\n'
+            'args = ["--above"]\n'
+        )
+        # First migrate — adds managed block below user content
+        migrate({"mcp_servers": {"hermes-mcp": {"command": "npx"}}},
+                codex_home=tmp_path, discover_plugins=False,
+                expose_hermes_tools=False)
+        text = target.read_text()
+        assert "user-above" in text, "user MCP server above managed block got nuked"
+        assert 'command = "/usr/bin/above-server"' in text
+
+        # Append another user entry below the managed block
+        target.write_text(
+            text + "\n[mcp_servers.user-below]\ncommand = \"below-server\"\n"
+        )
+        # Re-migrate — both should survive
+        migrate({"mcp_servers": {"hermes-mcp": {"command": "npx"}}},
+                codex_home=tmp_path, discover_plugins=False,
+                expose_hermes_tools=False)
+        final = target.read_text()
+        assert "user-above" in final
+        assert "user-below" in final
+        # And our managed block is still there with the new content
+        assert "[mcp_servers.hermes-mcp]" in final
+
+    def test_skipped_keys_reported(self, tmp_path):
+        report = migrate({
+            "mcp_servers": {
+                "x": {
+                    "command": "y",
+                    "sampling": {"enabled": True},  # codex has no equivalent
+                }
+            }
+        }, codex_home=tmp_path, expose_hermes_tools=False)
+        assert "x" in report.skipped_keys_per_server
+        assert any("sampling" in s for s in report.skipped_keys_per_server["x"])
+
+    def test_invalid_mcp_servers_value(self, tmp_path):
+        report = migrate({"mcp_servers": "notadict"}, codex_home=tmp_path, expose_hermes_tools=False)
+        assert any("not a dict" in e for e in report.errors)
+
+    def test_server_without_transport_skipped_with_error(self, tmp_path):
+        report = migrate({
+            "mcp_servers": {"broken": {"description": "no command/url"}}
+        }, codex_home=tmp_path, expose_hermes_tools=False)
+        assert "broken" not in report.migrated
+        assert any("broken" in e for e in report.errors)
+
+    def test_summary_reports_migration_count(self, tmp_path):
+        report = migrate({
+            "mcp_servers": {"a": {"command": "x"}, "b": {"command": "y"}}
+        }, codex_home=tmp_path, expose_hermes_tools=False)
+        summary = report.summary()
+        assert "Migrated 2 MCP server(s)" in summary
+        assert "- a" in summary
+        assert "- b" in summary
diff --git a/tests/hermes_cli/test_codex_runtime_switch.py b/tests/hermes_cli/test_codex_runtime_switch.py
new file mode 100644
index 00000000000..9a01543776e
--- /dev/null
+++ b/tests/hermes_cli/test_codex_runtime_switch.py
@@ -0,0 +1,231 @@
+"""Tests for the /codex-runtime slash-command shared logic.
+
+These cover the pure-Python state machine; CLI and gateway handlers are
+tested separately because they involve config persistence and prompt
+formatting that's surface-specific."""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli import codex_runtime_switch as crs
+
+
+class TestParseArgs:
+    @pytest.mark.parametrize("arg,expected", [
+        ("", None),
+        ("   ", None),
+        ("auto", "auto"),
+        ("codex_app_server", "codex_app_server"),
+        ("on", "codex_app_server"),
+        ("off", "auto"),
+        ("codex", "codex_app_server"),
+        ("default", "auto"),
+        ("hermes", "auto"),
+        ("ENABLE", "codex_app_server"),  # case-insensitive
+        ("DiSaBlE", "auto"),
+    ])
+    def test_valid_args(self, arg, expected):
+        value, errors = crs.parse_args(arg)
+        assert errors == []
+        assert value == expected
+
+    def test_invalid_arg_returns_error(self):
+        value, errors = crs.parse_args("turbo")
+        assert value is None
+        assert errors and "Unknown runtime" in errors[0]
+
+
+class TestGetCurrentRuntime:
+    def test_default_when_unset(self):
+        assert crs.get_current_runtime({}) == "auto"
+        assert crs.get_current_runtime({"model": {}}) == "auto"
+        assert crs.get_current_runtime({"model": {"openai_runtime": ""}}) == "auto"
+
+    def test_unrecognized_falls_back_to_auto(self):
+        assert crs.get_current_runtime(
+            {"model": {"openai_runtime": "garbage"}}
+        ) == "auto"
+
+    def test_explicit_codex(self):
+        assert crs.get_current_runtime(
+            {"model": {"openai_runtime": "codex_app_server"}}
+        ) == "codex_app_server"
+
+    def test_handles_non_dict_config(self):
+        assert crs.get_current_runtime(None) == "auto"  # type: ignore[arg-type]
+        assert crs.get_current_runtime("notadict") == "auto"  # type: ignore[arg-type]
+        assert crs.get_current_runtime({"model": "notadict"}) == "auto"
+
+
+class TestSetRuntime:
+    def test_creates_model_section_if_missing(self):
+        cfg = {}
+        old = crs.set_runtime(cfg, "codex_app_server")
+        assert old == "auto"
+        assert cfg["model"]["openai_runtime"] == "codex_app_server"
+
+    def test_returns_previous_value(self):
+        cfg = {"model": {"openai_runtime": "codex_app_server"}}
+        old = crs.set_runtime(cfg, "auto")
+        assert old == "codex_app_server"
+        assert cfg["model"]["openai_runtime"] == "auto"
+
+    def test_invalid_value_raises(self):
+        with pytest.raises(ValueError):
+            crs.set_runtime({}, "garbage")
+
+
+class TestApply:
+    def test_read_only_call_reports_state(self):
+        cfg = {"model": {"openai_runtime": "codex_app_server"}}
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")):
+            r = crs.apply(cfg, None)
+        assert r.success
+        assert r.new_value == "codex_app_server"
+        assert r.old_value == "codex_app_server"
+        assert "codex_app_server" in r.message
+        assert "0.130.0" in r.message
+
+    def test_no_change_when_already_set(self):
+        cfg = {"model": {"openai_runtime": "auto"}}
+        r = crs.apply(cfg, "auto")
+        assert r.success
+        assert r.message == "openai_runtime already set to auto"
+
+    def test_enable_blocked_when_codex_missing(self):
+        cfg = {}
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(False, "codex not found")):
+            r = crs.apply(cfg, "codex_app_server")
+        assert r.success is False
+        assert "Cannot enable" in r.message
+        assert "npm i -g @openai/codex" in r.message
+        # Config NOT mutated on failure
+        assert cfg.get("model", {}).get("openai_runtime") in (None, "")
+
+    def test_enable_succeeds_when_codex_present(self):
+        cfg = {}
+        persisted = {}
+
+        def persist(c):
+            persisted.update(c)
+
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")):
+            r = crs.apply(cfg, "codex_app_server", persist_callback=persist)
+        assert r.success
+        assert r.new_value == "codex_app_server"
+        assert r.old_value == "auto"
+        assert r.requires_new_session is True
+        assert "via MCP" in r.message  # hermes-tools callback message
+        assert cfg["model"]["openai_runtime"] == "codex_app_server"
+        assert persisted["model"]["openai_runtime"] == "codex_app_server"
+
+    def test_disable_does_not_check_binary(self):
+        cfg = {"model": {"openai_runtime": "codex_app_server"}}
+        with patch.object(crs, "check_codex_binary_ok") as bin_check:
+            r = crs.apply(cfg, "auto")
+        assert r.success
+        # Binary check is irrelevant when disabling — should not be called
+        # with the codex_app_server enable-gate signature.
+        assert r.new_value == "auto"
+        assert r.old_value == "codex_app_server"
+
+    def test_persist_callback_failure_reported(self):
+        cfg = {}
+
+        def persist_boom(c):
+            raise IOError("disk full")
+
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")):
+            r = crs.apply(cfg, "codex_app_server", persist_callback=persist_boom)
+        assert r.success is False
+        assert "persist failed" in r.message
+        assert "disk full" in r.message
+
+    def test_enable_triggers_mcp_migration(self):
+        """Enabling codex_app_server should auto-migrate Hermes mcp_servers
+        to ~/.codex/config.toml so the spawned subprocess sees them."""
+        cfg = {
+            "mcp_servers": {
+                "filesystem": {"command": "npx", "args": ["-y", "fs-server"]},
+            }
+        }
+
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")), \
+             patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
+            mig.return_value.migrated = ["filesystem", "hermes-tools"]
+            mig.return_value.migrated_plugins = []
+            mig.return_value.plugin_query_error = None
+            mig.return_value.wrote_permissions_default = ":workspace"
+            mig.return_value.errors = []
+            mig.return_value.target_path = "/fake/.codex/config.toml"
+            r = crs.apply(cfg, "codex_app_server")
+        assert r.success
+        assert mig.called  # migration was triggered
+        # User MCP servers are reported (excluding internal hermes-tools)
+        assert "Migrated 1 MCP server" in r.message
+        assert "filesystem" in r.message
+        # Permissions default surfaces
+        assert "Default sandbox: :workspace" in r.message
+        # Hermes tool callback announcement
+        assert "via MCP" in r.message
+
+    def test_disable_does_not_trigger_migration(self):
+        """Switching back to auto must not write to ~/.codex/."""
+        cfg = {
+            "model": {"openai_runtime": "codex_app_server"},
+            "mcp_servers": {"x": {"command": "y"}},
+        }
+        with patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
+            r = crs.apply(cfg, "auto")
+        assert r.success
+        assert not mig.called  # disabling does not migrate
+
+    def test_migration_failure_does_not_block_enable(self):
+        """If MCP migration raises, the runtime change still proceeds —
+        users can manually re-run migration later."""
+        cfg = {"mcp_servers": {"x": {"command": "y"}}}
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")), \
+             patch("hermes_cli.codex_runtime_plugin_migration.migrate",
+                   side_effect=RuntimeError("disk full")):
+            r = crs.apply(cfg, "codex_app_server")
+        assert r.success  # change still applied
+        assert r.new_value == "codex_app_server"
+        assert "MCP migration skipped" in r.message
+        assert "disk full" in r.message
+
+    def test_binary_check_cached_within_apply(self):
+        """check_codex_binary_ok is invoked at most once per apply() call.
+
+        The enable path has three sites that need the version (state report,
+        enable gate, success message). Without caching, a single
+        /codex-runtime invocation spawns `codex --version` three times.
+        Regression guard against a refactor that drops the cache.
+        """
+        cfg = {}
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")) as bin_check, \
+             patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
+            r = crs.apply(cfg, "codex_app_server")
+        assert r.success
+        assert bin_check.call_count == 1, (
+            f"check_codex_binary_ok was called {bin_check.call_count} time(s); "
+            "should be cached and called exactly once per apply()"
+        )
+
+    def test_binary_check_cached_on_read_only_call(self):
+        """Read-only call (new_value=None) calls the binary check exactly
+        once and reuses the result for the message."""
+        cfg = {"model": {"openai_runtime": "codex_app_server"}}
+        with patch.object(crs, "check_codex_binary_ok",
+                          return_value=(True, "0.130.0")) as bin_check:
+            crs.apply(cfg, None)
+        assert bin_check.call_count == 1
diff --git a/tests/run_agent/test_codex_app_server_integration.py b/tests/run_agent/test_codex_app_server_integration.py
new file mode 100644
index 00000000000..6fc60695d2a
--- /dev/null
+++ b/tests/run_agent/test_codex_app_server_integration.py
@@ -0,0 +1,344 @@
+"""Integration test for the codex_app_server runtime path through AIAgent.
+
+Verifies that:
+  - api_mode='codex_app_server' is accepted on AIAgent construction
+  - run_conversation() takes the early-return path and never enters the
+    chat completions loop
+  - Projected messages from a fake Codex session land in the messages list
+  - tool_iterations from the codex session tick the skill nudge counter
+  - Memory nudge counter ticks once per turn
+  - The returned dict has the same shape as the chat_completions path
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+import run_agent
+from agent.transports.codex_app_server_session import CodexAppServerSession, TurnResult
+
+
+@pytest.fixture
+def fake_session(monkeypatch):
+    """Replace CodexAppServerSession with a stub that returns a fixed
+    TurnResult, so we can drive AIAgent without spawning real codex."""
+
+    def fake_run_turn(self, user_input: str, **kwargs):
+        return TurnResult(
+            final_text=f"echo: {user_input}",
+            projected_messages=[
+                {"role": "assistant", "content": None,
+                 "tool_calls": [{"id": "exec_1", "type": "function",
+                                 "function": {"name": "exec_command",
+                                              "arguments": "{}"}}]},
+                {"role": "tool", "tool_call_id": "exec_1", "content": "ok"},
+                {"role": "assistant", "content": f"echo: {user_input}"},
+            ],
+            tool_iterations=1,
+            interrupted=False,
+            error=None,
+            turn_id="turn-stub-1",
+            thread_id="thread-stub-1",
+        )
+
+    monkeypatch.setattr(CodexAppServerSession, "run_turn", fake_run_turn)
+    monkeypatch.setattr(
+        CodexAppServerSession, "ensure_started", lambda self: "thread-stub-1"
+    )
+
+
+def _make_codex_agent():
+    """Construct an AIAgent in codex_app_server mode without contacting any
+    real provider. We pass api_mode explicitly so the constructor takes the
+    fast path for direct credentials."""
+    return run_agent.AIAgent(
+        api_key="stub",
+        base_url="https://stub.invalid",
+        provider="openai",
+        api_mode="codex_app_server",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+
+class TestApiModeAccepted:
+    def test_api_mode_is_codex_app_server(self):
+        agent = _make_codex_agent()
+        assert agent.api_mode == "codex_app_server"
+
+
+class TestRunConversationCodexPath:
+    def test_run_conversation_returns_codex_shape(self, fake_session):
+        agent = _make_codex_agent()
+        # No background review fork during tests
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("hello there")
+        assert result["final_response"] == "echo: hello there"
+        assert result["completed"] is True
+        assert result["partial"] is False
+        assert result["error"] is None
+        assert result["api_calls"] == 1
+        assert result["codex_thread_id"] == "thread-stub-1"
+        assert result["codex_turn_id"] == "turn-stub-1"
+
+    def test_projected_messages_are_spliced(self, fake_session):
+        agent = _make_codex_agent()
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("hello")
+        msgs = result["messages"]
+        # User message + 3 projected (assistant tool_call + tool + assistant text)
+        assert len(msgs) >= 4
+        assert msgs[0]["role"] == "user"
+        assert msgs[0]["content"] == "hello"
+        # Last assistant message has the final text
+        final = [m for m in msgs if m.get("role") == "assistant"
+                 and m.get("content") == "echo: hello"]
+        assert final, f"expected final assistant message in {msgs}"
+
+    def test_nudge_counters_tick(self, fake_session):
+        """The skill nudge counter must accumulate tool_iterations across
+        turns. The memory nudge counter is gated on memory being configured
+        (which we skip via skip_memory=True), so we don't assert on it here —
+        a separate test below covers that path explicitly."""
+        agent = _make_codex_agent()
+        agent._iters_since_skill = 0
+        agent._user_turn_count = 0
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            agent.run_conversation("first")
+        assert agent._iters_since_skill == 1  # one tool_iteration in fake turn
+        # _user_turn_count is incremented by run_conversation pre-loop, not
+        # by the codex helper — confirms we delegate that to the standard flow.
+        assert agent._user_turn_count == 1
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            agent.run_conversation("second")
+        assert agent._iters_since_skill == 2
+        assert agent._user_turn_count == 2
+
+    def test_user_message_not_duplicated(self, fake_session):
+        """Regression guard: the user message must appear exactly once in
+        the messages list. The standard run_conversation pre-loop appends
+        it, and the codex helper must NOT append again."""
+        agent = _make_codex_agent()
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("ping unique 12345")
+        user_count = sum(
+            1 for m in result["messages"]
+            if m.get("role") == "user" and m.get("content") == "ping unique 12345"
+        )
+        assert user_count == 1, f"user message appeared {user_count}× in {result['messages']}"
+
+    def test_background_review_NOT_invoked_below_threshold(self, fake_session):
+        """A single turn shouldn't trigger background review — counters
+        haven't reached the nudge interval (default 10)."""
+        agent = _make_codex_agent()
+        agent._memory_nudge_interval = 10
+        agent._skill_nudge_interval = 10
+        agent._iters_since_skill = 0
+        with patch.object(agent, "_spawn_background_review",
+                          return_value=None) as spawn:
+            agent.run_conversation("ping")
+        # Below threshold → review should NOT fire (was a real bug:
+        # the helper was calling _spawn_background_review() with no
+        # args after every turn, which would crash with TypeError).
+        assert not spawn.called
+
+    def test_background_review_skill_trigger_fires_above_threshold(
+        self, monkeypatch
+    ):
+        """When tool iterations cross the skill nudge interval, the
+        background review fires with review_skills=True and the right
+        messages_snapshot signature."""
+        from agent.transports.codex_app_server_session import (
+            CodexAppServerSession, TurnResult,
+        )
+        # Make the fake session report 10 tool iterations in one turn
+        # (matching the default skill threshold).
+        def fake_run_turn(self, user_input: str, **kwargs):
+            return TurnResult(
+                final_text=f"echo: {user_input}",
+                projected_messages=[
+                    {"role": "assistant", "content": f"echo: {user_input}"},
+                ],
+                tool_iterations=10,
+                turn_id="t1", thread_id="th1",
+            )
+        monkeypatch.setattr(CodexAppServerSession, "run_turn", fake_run_turn)
+        monkeypatch.setattr(
+            CodexAppServerSession, "ensure_started", lambda self: "th1"
+        )
+
+        agent = _make_codex_agent()
+        agent._skill_nudge_interval = 10
+        agent._iters_since_skill = 0
+        # Make valid_tool_names include 'skill_manage' so the gate passes
+        agent.valid_tool_names = set(getattr(agent, "valid_tool_names", set()))
+        agent.valid_tool_names.add("skill_manage")
+
+        with patch.object(agent, "_spawn_background_review",
+                          return_value=None) as spawn:
+            agent.run_conversation("do tool work")
+
+        assert spawn.called, "skill threshold tripped but review didn't fire"
+        # Verify the call signature matches what _spawn_background_review
+        # actually expects — this is the regression guard for the original
+        # bug where the codex path called it with no args at all.
+        call = spawn.call_args
+        assert "messages_snapshot" in call.kwargs
+        assert isinstance(call.kwargs["messages_snapshot"], list)
+        assert call.kwargs["review_skills"] is True
+        # Counter should be reset after the review fires
+        assert agent._iters_since_skill == 0
+
+    def test_background_review_signature_never_breaks(self, fake_session):
+        """Even when no trigger fires, the helper must never call
+        _spawn_background_review with the wrong signature. Run a turn,
+        then run another turn after manually tripping the skill counter
+        and confirm the call shape is the kwargs-only form the function
+        actually accepts."""
+        agent = _make_codex_agent()
+        agent._skill_nudge_interval = 1  # very low so any iter trips it
+        agent._iters_since_skill = 0
+        agent.valid_tool_names = set(getattr(agent, "valid_tool_names", set()))
+        agent.valid_tool_names.add("skill_manage")
+
+        with patch.object(agent, "_spawn_background_review",
+                          return_value=None) as spawn:
+            agent.run_conversation("first")
+        # The fake session reports tool_iterations=1, which trips
+        # _skill_nudge_interval=1. So review should fire.
+        assert spawn.called
+        # Critical invariant: positional args must be empty, all real
+        # args must be kwargs (matching _spawn_background_review's
+        # actual signature).
+        call = spawn.call_args
+        assert call.args == (), (
+            f"expected no positional args, got {call.args!r} — "
+            "would crash _spawn_background_review at runtime"
+        )
+        assert "messages_snapshot" in call.kwargs
+
+    def test_chat_completions_loop_is_not_entered(self, fake_session):
+        """The early-return must bypass the regular API call loop entirely.
+        We confirm by patching the SDK call and asserting it's never invoked."""
+        agent = _make_codex_agent()
+        # The chat_completions loop calls self.client.chat.completions.create(...)
+        # If our early-return works, that path is dead.
+        with patch.object(agent, "client") as client_mock, patch.object(
+            agent, "_spawn_background_review", return_value=None
+        ):
+            agent.run_conversation("hi")
+        assert not client_mock.chat.completions.create.called
+
+
+class TestReviewForkApiModeDowngrade:
+    """When the parent agent runs on codex_app_server, the background
+    review fork must downgrade to codex_responses — otherwise the fork
+    can't dispatch agent-loop tools (memory, skill_manage) which is the
+    whole point of the review."""
+
+    def test_codex_app_server_parent_downgrades_review_fork(self):
+        """Live test against the real _spawn_background_review code path:
+        verify the review_agent gets api_mode=codex_responses when the
+        parent is codex_app_server."""
+        from unittest.mock import MagicMock, patch as _patch
+        agent = _make_codex_agent()
+        # Pretend memory + skills are configured so the review fork
+        # reaches the AIAgent constructor.
+        agent._memory_store = MagicMock()
+        agent._memory_enabled = True
+        agent._user_profile_enabled = True
+        # Mock _current_main_runtime to return the parent's codex_app_server
+        # state so we can confirm the helper detects + downgrades it.
+        agent._current_main_runtime = lambda: {
+            "api_mode": "codex_app_server",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "stub-token",
+        }
+        # Capture what AIAgent gets constructed with inside the helper.
+        captured = {}
+
+        def _capture_init(self, **kwargs):
+            captured.update(kwargs)
+            # Set bare attributes the rest of the spawn function reads
+            # so it can finish without exploding.
+            self.api_mode = kwargs.get("api_mode")
+            self.provider = kwargs.get("provider")
+            self.model = kwargs.get("model")
+            self._memory_write_origin = None
+            self._memory_write_context = None
+            self._memory_store = None
+            self._memory_enabled = False
+            self._user_profile_enabled = False
+            self._memory_nudge_interval = 0
+            self._skill_nudge_interval = 0
+            self.suppress_status_output = False
+            self._session_messages = []
+
+            def _no_op_run_conv(*a, **kw):
+                return {"final_response": "", "messages": []}
+            self.run_conversation = _no_op_run_conv
+
+            def _no_op_close(*a, **kw):
+                return None
+            self.close = _no_op_close
+
+        with _patch("run_agent.AIAgent.__init__", _capture_init):
+            agent._spawn_background_review(
+                messages_snapshot=[{"role": "user", "content": "x"}],
+                review_memory=True,
+                review_skills=False,
+            )
+            # Wait for the spawned thread to actually execute
+            import time
+            for _ in range(30):
+                if "api_mode" in captured:
+                    break
+                time.sleep(0.1)
+
+        assert captured.get("api_mode") == "codex_responses", (
+            f"review fork should be downgraded to codex_responses when "
+            f"parent is codex_app_server; got {captured.get('api_mode')!r}"
+        )
+
+
+class TestErrorHandling:
+    def test_session_exception_returns_partial_with_error(self, monkeypatch):
+        def boom_run_turn(self, user_input, **kwargs):
+            raise RuntimeError("subprocess died")
+
+        monkeypatch.setattr(CodexAppServerSession, "ensure_started",
+                            lambda self: "t1")
+        monkeypatch.setattr(CodexAppServerSession, "run_turn", boom_run_turn)
+
+        agent = _make_codex_agent()
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("hi")
+        assert result["completed"] is False
+        assert result["partial"] is True
+        assert "subprocess died" in result["error"]
+        assert "codex-runtime auto" in result["final_response"]
+
+    def test_interrupted_turn_marked_partial(self, monkeypatch):
+        def interrupted_turn(self, user_input, **kwargs):
+            return TurnResult(
+                final_text="",
+                projected_messages=[],
+                tool_iterations=0,
+                interrupted=True,
+                error="user interrupted",
+                turn_id="t",
+                thread_id="th",
+            )
+        monkeypatch.setattr(CodexAppServerSession, "ensure_started",
+                            lambda self: "th")
+        monkeypatch.setattr(CodexAppServerSession, "run_turn", interrupted_turn)
+
+        agent = _make_codex_agent()
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("hi")
+        assert result["completed"] is False
+        assert result["partial"] is True
+        assert result["error"] == "user interrupted"
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index b17036ade44..409ddf8fe35 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -515,6 +515,8 @@ Advanced per-platform knobs for throttling the outbound message batcher. Most us
 | `HERMES_HUMAN_DELAY_MIN_MS` | Custom delay range minimum (ms) |
 | `HERMES_HUMAN_DELAY_MAX_MS` | Custom delay range maximum (ms) |
 | `HERMES_QUIET` | Suppress non-essential output (`true`/`false`) |
+| `CODEX_HOME` | When [Codex app-server runtime](../user-guide/features/codex-app-server-runtime) is enabled, override the directory Codex CLI reads its config + auth from (default: `~/.codex`). Hermes' migration writes the managed block to `<CODEX_HOME>/config.toml`. |
+| `HERMES_KANBAN_TASK` | Set by the kanban dispatcher when spawning a worker (task UUID). Workers and the spawned `hermes-tools` MCP subprocess inherit it so kanban tools gate correctly. Don't set manually. |
 | `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `1800`) |
 | `HERMES_API_CALL_STALE_TIMEOUT` | Non-streaming stale-call timeout in seconds (default: `300`). Auto-disabled for local providers when left unset. Also configurable via `providers.<id>.stale_timeout_seconds` or `providers.<id>.models.<model>.stale_timeout_seconds` in `config.yaml`. |
 | `HERMES_STREAM_READ_TIMEOUT` | Streaming socket read timeout in seconds (default: `120`). Auto-increased to `HERMES_API_TIMEOUT` for local providers. Increase if local LLMs time out during long code generation. |
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index 718da1350aa..377c31c4477 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -50,6 +50,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 |---------|-------------|
 | `/config` | Show current configuration |
 | `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint), and user-defined aliases (`/model fav`, `/model grok` — see [Custom model aliases](#custom-model-aliases)). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider, exit the session and run `hermes model` from your terminal. |
+| `/codex-runtime [auto\|codex_app_server\|on\|off]` | Toggle the optional [Codex app-server runtime](../user-guide/features/codex-app-server-runtime) for OpenAI/Codex models. `auto` (default) uses Hermes' standard chat completions; `codex_app_server` hands turns to a `codex app-server` subprocess for native shell, apply_patch, ChatGPT subscription auth, and migrated Codex plugins. Effective on next session. |
 | `/personality` | Set a predefined personality |
 | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
 | `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. Options: `normal`, `fast`, `status`. |
@@ -180,6 +181,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
 | `/status` | Show session info. |
 | `/stop` | Kill all running background processes and interrupt the running agent. |
 | `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), auto-detect (`/model custom`), and user-defined aliases (`/model fav`, `/model grok` — see [Custom model aliases](#custom-model-aliases)). Use `--global` to persist the change to config.yaml. **Note:** `/model` can only switch between already-configured providers. To add a new provider or set up API keys, use `hermes model` from your terminal (outside the chat session). |
+| `/codex-runtime [auto\|codex_app_server\|on\|off]` | Toggle the optional [Codex app-server runtime](../user-guide/features/codex-app-server-runtime). Persists to `model.openai_runtime` in config.yaml and evicts the cached agent so the next message picks up the new runtime. Effective on next session. |
 | `/personality [name]` | Set a personality overlay for the session. |
 | `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. |
 | `/retry` | Retry the last message. |
diff --git a/website/docs/user-guide/features/codex-app-server-runtime.md b/website/docs/user-guide/features/codex-app-server-runtime.md
new file mode 100644
index 00000000000..5d4b068088b
--- /dev/null
+++ b/website/docs/user-guide/features/codex-app-server-runtime.md
@@ -0,0 +1,443 @@
+---
+title: Codex App-Server Runtime (optional)
+sidebar_label: Codex App-Server Runtime
+---
+
+# Codex App-Server Runtime
+
+Hermes can optionally hand `openai/*` and `openai-codex/*` turns to the [Codex CLI app-server](https://github.com/openai/codex) instead of running its own tool loop. When enabled, terminal commands, file edits, sandboxing, and MCP tool calls all execute inside Codex's runtime — Hermes becomes the shell around it (sessions DB, slash commands, gateway, memory and skill review).
+
+This is **opt-in only**. Default Hermes behavior is unchanged unless you flip the flag. Hermes never auto-routes you onto this runtime.
+
+## Why
+
+- Run OpenAI agent turns against your **ChatGPT subscription** (no API key required) using the same auth flow Codex CLI uses.
+- Use **Codex's own toolset and sandbox** — `shell` for terminal/read/write/search, `apply_patch` for structured edits, `update_plan` for planning, all running inside seatbelt/landlock sandboxing.
+- **Native Codex plugins** — Linear, GitHub, Gmail, Calendar, Canva, etc. — installed via `codex plugin` are auto-migrated and active in your Hermes session.
+- **Hermes' richer tools come along** — web_search, web_extract, browser automation, vision, image generation, skills, and TTS work via an MCP callback. Codex calls back into Hermes for tools it doesn't have built in.
+- **Memory and skill nudges keep working** — Codex's events are projected into Hermes' message shape so the self-improvement loop sees a normal-looking transcript.
+
+## What tools the model actually has
+
+This is the part most users want to know up front. When this runtime is on, the model running your turn has three independent sources of tools:
+
+### 1. Codex's built-in toolset (always on)
+
+These ship with `codex app-server` itself — no Hermes involvement, no MCP, no plugins. All five are available the moment the runtime starts:
+
+- **`shell`** — runs arbitrary shell commands inside the sandbox. This is how the model reads files (`cat`, `head`, `tail`), writes them (`echo > foo`, heredocs), searches them (`find`, `rg`, `grep`), navigates directories (`ls`, `cd`), runs builds, manages processes, and anything else you'd do in bash.
+- **`apply_patch`** — applies a structured multi-file diff in Codex's patch format. The model uses this for non-trivial code edits (adding a function, refactoring across files); shell heredocs are still available for one-off writes.
+- **`update_plan`** — codex's internal todo / plan tracker. Equivalent of Hermes' `todo` tool, but managed entirely inside codex's runtime.
+- **`view_image`** — load a local image file into the conversation so the model can see it.
+- **`web_search`** — codex has its own built-in web search when configured. Hermes also exposes `web_search` (Firecrawl-backed) via the callback below; the model picks whichever it prefers.
+
+So **anything you'd do via terminal — read/write/search/find/run — codex does natively**. The sandbox profile (`:workspace` by default when you enable the runtime) controls what's writable.
+
+### 2. Native Codex plugins (auto-migrated from your `codex plugin` install)
+
+When you enable the runtime, Hermes queries codex's `plugin/list` RPC and writes a `[plugins."<name>@openai-curated"]` entry for every plugin you have installed. The plugins themselves are managed by codex and authorized once via codex's own UI.
+
+Examples (the ones the OpenClaw thread highlighted as "YouTube-video-worthy"):
+
+- **Linear** — find/update issues
+- **GitHub** — search code, view PRs, comment
+- **Gmail** — read/send mail
+- **Google Calendar** — create/find events
+- **Outlook calendar/email** — same shape via the Microsoft connector
+- **Canva** — design generation
+- ...whatever else you've installed via `codex plugin marketplace add openai-curated` + `codex plugin install ...`
+
+What's NOT migrated:
+- Plugins you haven't installed yet — install them in Codex first.
+- ChatGPT app marketplace entries (`app/list`) — these are already enabled inside codex by virtue of your account auth.
+
+### 3. Hermes tool callback (MCP server, registered in `~/.codex/config.toml`)
+
+Hermes registers itself as an MCP server so codex can call back for tools codex doesn't ship with. Available via the callback:
+
+- **`web_search`** / **`web_extract`** — Firecrawl-backed; tends to be cleaner than scraping for structured content.
+- **`browser_navigate` / `browser_click` / `browser_type` / `browser_press` / `browser_snapshot` / `browser_scroll` / `browser_back` / `browser_get_images` / `browser_console` / `browser_vision`** — full browser automation via Camofox or Browserbase.
+- **`vision_analyze`** — call a separate vision model to inspect an image (different from codex's `view_image` which loads it into the conversation).
+- **`image_generate`** — image generation through Hermes' image_gen plugin chain.
+- **`skill_view` / `skills_list`** — read from Hermes' skill library.
+- **`text_to_speech`** — TTS through Hermes' configured provider.
+
+When the model wants one of these, codex spawns the `hermes_tools_mcp_server` subprocess via stdio MCP, the call is dispatched through `model_tools.handle_function_call()` (same code path as Hermes' default runtime), and the result is returned to codex like any other MCP response.
+
+### What's NOT available on this runtime
+
+These four Hermes tools require the running AIAgent context (mid-loop state) to dispatch, and a stateless MCP callback can't drive them. Switch back to the default runtime (`/codex-runtime auto`) when you need any of them:
+
+- **`delegate_task`** — spawn subagents
+- **`memory`** — Hermes' persistent memory store
+- **`session_search`** — cross-session search
+- **`todo`** — Hermes' todo store (codex's `update_plan` is the in-runtime equivalent)
+
+## Workflow features (`/goal`, kanban, cron)
+
+### `/goal` (the Ralph loop)
+
+**Works on this runtime.** Goals persist in `state_meta` keyed by session id, the continuation prompt feeds back as a normal user message through `run_conversation()`, and codex executes the next turn natively. The goal judge runs via the auxiliary client (configured via `auxiliary.goal_judge` in config.yaml), independent of which runtime is active. The judge's "blocked, needs user input" verdict is a clean escape if codex stalls on approvals.
+
+**One thing to be aware of:** each continuation prompt is a fresh codex turn, which means codex re-evaluates command approval policy from scratch. If you're doing a long-running goal with lots of writes, expect more approval prompts than you'd see on a single in-session task. Set `default_permissions = ":workspace"` (which Hermes does automatically when you enable the runtime) so simple workspace writes don't require prompting.
+
+### Kanban (multi-agent worktree dispatch)
+
+**Works on this runtime, with one subtle dependency.** The kanban dispatcher spawns each worker as a separate `hermes chat -q` subprocess that reads the user's config — which means if `model.openai_runtime: codex_app_server` is set globally, workers also come up on the codex runtime.
+
+What works inside a codex-runtime worker:
+- Codex's full toolset (shell, apply_patch, update_plan, view_image, web_search) — the worker does its actual task work natively
+- The migrated codex plugins — Linear, GitHub, etc.
+- The Hermes tool callback for browser_*, vision, image_gen, skills, TTS
+
+What also works because the MCP callback exposes them:
+- **`kanban_complete` / `kanban_block` / `kanban_comment` / `kanban_heartbeat`** — the worker handoff tools. These read `HERMES_KANBAN_TASK` from env (set by the dispatcher), gate access correctly, and write to `~/.hermes/kanban.db`. Without these in the callback, a worker on this runtime could do its task but couldn't report back, hanging until the dispatcher's timeout.
+- **`kanban_show` / `kanban_list`** — read-only board queries for the worker to check its own context.
+- **`kanban_create` / `kanban_unblock` / `kanban_link`** — orchestrator-only operations. Available for orchestrator agents running on the codex runtime that need to dispatch new tasks.
+
+The kanban tools are gated by `HERMES_KANBAN_TASK` env var the dispatcher sets — that var is propagated to the codex subprocess (codex inherits env) and from there to the spawned `hermes-tools` MCP server subprocess. So the tools see the right task id and gate correctly.
+
+### Cron jobs
+
+**Not specifically tested.** Cron jobs run via `cronjob` → `AIAgent.run_conversation`, the same code path as the CLI. If the cron job's config has `openai_runtime: codex_app_server` it'll run on codex. The same tool-availability rules apply — codex built-ins + plugins + MCP callback work, agent-loop tools (delegate_task, memory, session_search, todo) don't. If your cron job relies on those, scope the cron to a profile that uses the default runtime.
+
+## Trade-offs
+
+|  | Hermes default runtime | Codex app-server (opt-in) |
+|---|---|---|
+| `delegate_task` subagents | yes | not available — needs agent loop context |
+| `memory`, `session_search`, `todo` | yes | not available — needs agent loop context |
+| `web_search`, `web_extract` | yes | yes (via MCP callback) |
+| Browser automation (Camofox/Browserbase) | yes | yes (via MCP callback) |
+| `vision_analyze`, `image_generate` | yes | yes (via MCP callback) |
+| `skill_view`, `skills_list` | yes | yes (via MCP callback) |
+| `text_to_speech` | yes | yes (via MCP callback) |
+| Codex `shell` (terminal/read/write/search/find/run) | — | yes (Codex built-in) |
+| Codex `apply_patch` (structured multi-file edits) | — | yes (Codex built-in) |
+| Codex `update_plan` (in-runtime todo) | — | yes (Codex built-in) |
+| Codex `view_image` (load image into conversation) | — | yes (Codex built-in) |
+| Codex sandbox (seatbelt/landlock, profiles) | — | yes (Codex built-in) |
+| ChatGPT subscription auth | — | yes (via `openai-codex` provider) |
+| Native Codex plugins (Linear, GitHub, etc.) | — | yes (auto-migrated) |
+| User MCP servers | yes | yes (auto-migrated to codex) |
+| Memory + skill review (background) | yes | yes (via item projection) |
+| Multi-turn conversations | yes | yes |
+| `/goal` (Ralph loop) | yes | yes |
+| Kanban worker dispatch | yes | yes (via callback) |
+| Kanban orchestrator tools | yes | yes (via callback) |
+| All gateway platforms | yes | yes |
+| Non-OpenAI providers | yes | n/a — OpenAI/Codex-scoped |
+
+## Prerequisites
+
+1. **Codex CLI installed:**
+   ```bash
+   npm i -g @openai/codex
+   codex --version   # 0.130.0 or newer
+   ```
+2. **Codex OAuth login.** The codex subprocess reads `~/.codex/auth.json`. Two ways to populate it:
+   ```bash
+   codex login                  # writes tokens to ~/.codex/auth.json
+   ```
+   Hermes' own `hermes auth login codex` writes to `~/.hermes/auth.json` — that's a separate session. **Run `codex login` separately** if you haven't.
+
+3. **(Optional) Install the Codex plugins you want.** When you enable the runtime, Hermes auto-migrates whichever curated plugins you've already installed via Codex CLI:
+   ```bash
+   codex plugin marketplace add openai-curated
+   # then via codex's TUI, install Linear / GitHub / Gmail / etc.
+   ```
+   Hermes will discover them and write `[plugins."<name>@openai-curated"]` entries to `~/.codex/config.toml` automatically.
+
+## Enabling
+
+In a Hermes session:
+
+```
+/codex-runtime codex_app_server
+```
+
+That command:
+- Verifies the `codex` CLI is installed (blocks with an install hint if not).
+- Persists `model.openai_runtime: codex_app_server` to your config.yaml.
+- Migrates user MCP servers from `~/.hermes/config.yaml` to `~/.codex/config.toml`.
+- **Discovers and migrates installed native Codex plugins** (Linear, GitHub, Gmail, Calendar, Canva, etc.) by querying Codex's `plugin/list` RPC.
+- **Registers Hermes' own tools as an MCP server** so the codex subprocess can call back for tools codex doesn't ship with.
+- **Writes `default_permissions = ":workspace"`** so the sandbox allows writes within the workspace without prompting for every operation.
+- Tells you what was migrated. Takes effect on the **next** session — the current cached agent keeps the prior runtime so prompt caches stay valid.
+
+Synonyms: `/codex-runtime on`, `/codex-runtime off`, `/codex-runtime auto`.
+
+To check current state without changing anything:
+```
+/codex-runtime
+```
+
+You can also set it manually in `~/.hermes/config.yaml`:
+```yaml
+model:
+  openai_runtime: codex_app_server   # default is "auto" (= Hermes runtime)
+```
+
+## Self-improvement loop (memory + skill nudges)
+
+Hermes' background self-improvement fires on counter thresholds:
+
+- Every 10 user prompts → a forked review agent looks at the conversation and decides whether anything should be saved to memory.
+- Every 10 tool iterations within a single turn → same idea but for skills (`skill_manage` writes).
+
+**Both keep working on the codex runtime.** The codex path projects each completed `commandExecution` / `fileChange` / `mcpToolCall` / `dynamicToolCall` item into a synthetic `assistant tool_call` + `tool` result message, so by the time the review runs it sees the same shape it sees on the default Hermes runtime.
+
+How the wiring stays equivalent:
+
+| | Default runtime | Codex runtime |
+|---|---|---|
+| `_turns_since_memory` increments | per user prompt, in run_conversation pre-loop | same code path, before the early-return |
+| `_iters_since_skill` increments | per tool iteration in the chat-completions loop | by `turn.tool_iterations` after the codex turn returns |
+| Memory trigger (`_turns_since_memory >= _memory_nudge_interval`) | computed in pre-loop, fires after response | computed in pre-loop, passed through to codex helper |
+| Skill trigger (`_iters_since_skill >= _skill_nudge_interval`) | computed after the loop | computed after the codex turn |
+| `_spawn_background_review(messages_snapshot=..., review_memory=..., review_skills=...)` | called when either trigger fires | called identically when either trigger fires |
+
+One detail: the review fork itself needs to call Hermes' agent-loop tools (`memory`, `skill_manage`), which require Hermes' own dispatch. So when the parent agent is on `codex_app_server`, the review fork is **downgraded to `codex_responses`** — same OAuth credentials, same `openai-codex` provider, but talks to OpenAI's Responses API directly so Hermes owns the loop and the agent-loop tools work. This is invisible to the user.
+
+Net effect: enable the codex runtime and your memory + skill nudges keep firing exactly as they would otherwise.
+
+## How approvals work
+
+Codex requests approval before executing commands or applying patches. These get translated into Hermes' standard "Dangerous Command" prompt:
+
+```
+╭───────────────────────────────────────╮
+│ Dangerous Command                     │
+│                                       │
+│ /bin/bash -lc 'echo hello > foo.txt'  │
+│                                       │
+│ ❯ 1. Allow once                       │
+│   2. Allow for this session           │
+│   3. Deny                             │
+│                                       │
+│ Codex requests exec in /your/cwd      │
+╰───────────────────────────────────────╯
+```
+
+- **Allow once** → approve this single command.
+- **Allow for this session** → Codex won't re-prompt for similar commands.
+- **Deny** → command is rejected; Codex continues in read-only mode.
+
+For `apply_patch` (file edit) approvals, Hermes shows a summary of what changed (`1 add, 1 update: /tmp/new.py, /tmp/old.py`) when codex provides the data via the corresponding `fileChange` item.
+
+## Permission profiles
+
+Codex has three built-in permission profiles:
+- `:read-only` — no writes; every shell command requires approval
+- `:workspace` — writes within the current workspace allowed without prompts (Hermes' default when you enable the runtime)
+- `:danger-no-sandbox` — no sandbox at all (don't use this unless you understand it)
+
+You can override the default in `~/.codex/config.toml` outside Hermes' managed block:
+
+```toml
+default_permissions = ":read-only"
+```
+
+(Hermes will preserve your override on re-migration as long as it lives outside the `# managed by hermes-agent` markers.)
+
+## Auxiliary tasks and ChatGPT subscription token cost
+
+When this runtime is on with the `openai-codex` provider, **auxiliary tasks (title generation, context compression, vision auto-detect, session search summarization, the background self-improvement review fork) also flow through your ChatGPT subscription by default**, because Hermes' auxiliary client uses the main provider/model when no per-task override is set.
+
+This isn't specific to `codex_app_server` — it's true for the existing `codex_responses` path too — but it's more visible here because you're explicitly opting in for the subscription billing.
+
+To route specific aux tasks to a cheaper / different model, set explicit overrides in `~/.hermes/config.yaml`:
+
+```yaml
+auxiliary:
+  title_generation:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+  context_compression:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+  vision_detect:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+  session_search:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+  goal_judge:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+```
+
+The self-improvement review fork inherits the main runtime via `_current_main_runtime()` and Hermes downgrades it from `codex_app_server` to `codex_responses` automatically (so the fork can actually call `memory` and `skill_manage` — Hermes' own agent-loop tools). That fork still uses your subscription auth unless you've routed aux tasks elsewhere.
+
+## Editing `~/.codex/config.toml` safely
+
+Hermes wraps everything it manages between two marker comments:
+
+```toml
+# managed by hermes-agent — `hermes codex-runtime migrate` regenerates this section
+default_permissions = ":workspace"
+[mcp_servers.filesystem]
+...
+[plugins."github@openai-curated"]
+...
+# end hermes-agent managed section
+```
+
+Anything **outside** that block is yours. Re-running migration (via `/codex-runtime codex_app_server` or whenever you toggle the runtime on) replaces the managed block in place but preserves user content above and below it verbatim. This means you can:
+
+- Add your own MCP servers Hermes doesn't know about
+- Override `default_permissions` to `:read-only` if you prefer to be prompted
+- Configure codex-only options (model, providers, otel, etc.)
+- Add user-defined permission profiles in `[permissions.<name>]` tables
+
+Anything you add **inside** the managed block will get clobbered on the next migration. If you need a tweak that requires editing the managed block, file an issue and we'll add the knob.
+
+## Multi-profile / multi-tenant setups
+
+By default, Hermes points the codex subprocess at `~/.codex/` regardless of which Hermes profile is active. This means `hermes -p work` and `hermes -p personal` share the same Codex auth, plugins, and config. For most users this is the right behavior — it matches what running `codex` CLI directly would do.
+
+If you want per-profile Codex isolation (separate auth, separate installed plugins, separate config), set `CODEX_HOME` explicitly per profile. The cleanest way is to point at a directory under your `HERMES_HOME`:
+
+```bash
+# Inside the work profile, you might wrap hermes:
+CODEX_HOME=~/.hermes/profiles/work/codex hermes chat
+```
+
+You'll need to re-run `codex login` once with that `CODEX_HOME` set so the OAuth tokens land in the profile-scoped location. After that, `hermes -p work` will operate on isolated Codex state.
+
+We don't auto-scope this because moving an existing user's `~/.codex/` would silently invalidate their Codex CLI auth — anyone who already ran `codex login` would have to re-authenticate. Opt-in feels safer than surprising users.
+
+## HOME environment variable passthrough
+
+Hermes does NOT rewrite `HOME` when spawning the codex app-server subprocess (we use `os.environ.copy()` and only overlay `CODEX_HOME` and `RUST_LOG`). This means:
+
+- Commands codex runs via its `shell` tool see the real user `HOME` and find `~/.gitconfig`, `~/.gh/`, `~/.aws/`, `~/.npmrc`, etc. correctly.
+- Codex's internal state stays isolated through `CODEX_HOME` (which points at `~/.codex/` by default).
+
+This matches the boundary OpenClaw arrived at after some early experimentation: isolate Codex's state, leave the user's home alone. (Cf. openclaw/openclaw#81562.)
+
+## MCP server migration
+
+Hermes' `mcp_servers` config is auto-translated to the TOML format Codex expects. The migration runs every time you enable the runtime and is idempotent — re-runs replace the managed section but preserve any user-edited Codex config.
+
+What translates:
+
+| Hermes (`config.yaml`) | Codex (`config.toml`) |
+|---|---|
+| `command` + `args` + `env` | stdio transport |
+| `url` + `headers` | streamable_http transport |
+| `timeout` | `tool_timeout_sec` |
+| `connect_timeout` | `startup_timeout_sec` |
+| `enabled: false` | `enabled = false` |
+
+What's not migrated:
+- Hermes-specific keys like `sampling` (Codex's MCP client has no equivalent — these are dropped with a per-server warning).
+
+## Native Codex plugin migration
+
+Plugins installed via `codex plugin` (Linear, GitHub, Gmail, Calendar, Canva, etc.) are discovered through Codex's `plugin/list` RPC. For each plugin where `installed: true`, Hermes writes a `[plugins."<name>@openai-curated"]` block enabling it in your Hermes session.
+
+This means: when your friend says "I have Calendar and GitHub set up in my Codex CLI" and they enable Hermes' codex runtime, Hermes activates those automatically. No re-configuration needed.
+
+What's NOT migrated:
+- Plugins not yet installed in Codex CLI. Install them via `codex plugin` first.
+- ChatGPT app marketplace entries (the per-account `app/list` results — these are already enabled inside codex by virtue of your account auth).
+- Plugin OAuth — you authorize each plugin once in Codex itself; Hermes doesn't touch credentials.
+
+## Hermes tool callback (the new MCP server)
+
+Codex's built-in toolset covers shell/file ops/patches but doesn't have web search, browser automation, vision, image generation, etc. To keep those usable in a codex turn, Hermes registers itself as an MCP server in `~/.codex/config.toml`:
+
+```toml
+[mcp_servers.hermes-tools]
+command = "/path/to/python"
+args = ["-m", "agent.transports.hermes_tools_mcp_server"]
+env = { HERMES_HOME = "/your/.hermes", PYTHONPATH = "...", HERMES_QUIET = "1" }
+startup_timeout_sec = 30.0
+tool_timeout_sec = 600.0
+```
+
+When the model calls `web_search` (or another exposed Hermes tool), codex spawns the `hermes_tools_mcp_server` subprocess via stdio, the request is dispatched through `model_tools.handle_function_call()`, and the result is projected back to codex like any other MCP response.
+
+**Tools available via the callback:** `web_search`, `web_extract`, `browser_navigate`, `browser_click`, `browser_type`, `browser_press`, `browser_snapshot`, `browser_scroll`, `browser_back`, `browser_get_images`, `browser_console`, `browser_vision`, `vision_analyze`, `image_generate`, `skill_view`, `skills_list`, `text_to_speech`.
+
+**Tools NOT available:** `delegate_task`, `memory`, `session_search`, `todo`. These need the running AIAgent context to dispatch (mid-loop state) and a stateless MCP callback can't drive them. Use the default Hermes runtime (`/codex-runtime auto`) when you need these.
+
+## Disabling
+
+Switch back at any time:
+
+```
+/codex-runtime auto
+```
+
+Effective on the next session. The Codex managed block stays in `~/.codex/config.toml` so you can re-enable later without losing config — or remove it manually if you prefer.
+
+## Limitations
+
+This runtime is **opt-in beta**. Working as of Hermes Agent 2026.5 + Codex CLI 0.130.0:
+
+- Multi-turn conversations
+- `commandExecution` and `fileChange` (apply_patch) approvals via Hermes UI
+- MCP tool calls (verified against `@modelcontextprotocol/server-filesystem` and the new `hermes-tools` callback)
+- Native Codex plugin migration (verified against Linear / GitHub / Calendar inventory)
+- Deny/cancel paths
+- Toggle on/off cycle
+- Memory and skill nudge counters (verified live via integration tests)
+- Hermes web_search through codex (verified live: "OpenAI Codex CLI – Getting Started" returned end-to-end)
+
+Known limitations:
+
+- **Hermes auth and codex auth are separate sessions.** You need both `codex login` AND `hermes auth login codex` for the cleanest UX (the runtime uses codex's session for the LLM call). This is a deliberate design choice in Hermes' `_import_codex_cli_tokens` — Hermes won't share OAuth state with codex CLI to avoid clobbering each other on token refresh.
+- **`delegate_task`, `memory`, `session_search`, `todo` are unavailable on this runtime.** They need the running AIAgent context which a stateless MCP callback can't provide. Use `/codex-runtime auto` when you need these.
+- **No inline patch preview in approval prompts when codex doesn't track the changeset.** Codex's `fileChange` approval params don't always carry the changeset. Hermes caches the data from the corresponding `item/started` notification when possible, but if approval arrives before the item has streamed, the prompt falls back to whatever `reason` codex provides.
+- **Sub-second cancellation isn't guaranteed.** Mid-stream interrupts (Ctrl+C while codex is responding) are sent via `turn/interrupt`, but if codex has already flushed the final message, you get the response anyway.
+
+If you find a bug, [open an issue](https://github.com/NousResearch/hermes-agent/issues) with the output of `hermes logs --since 5m`. Mention `codex-runtime` in the title so it's easy to triage.
+
+## Architecture
+
+```
+                ┌─── Hermes shell (CLI / TUI / gateway) ───┐
+                │  sessions DB · slash commands · memory   │
+                │  & skill review · cron · session pickers │
+                └──┬──────────────────────────────────────┬┘
+                   │ user_message               final     │
+                   ▼                            text +    │
+        ┌──────────────────────────────────┐   projected  │
+        │  AIAgent.run_conversation()       │   messages   │
+        │   if api_mode == codex_app_server │              │
+        │     → CodexAppServerSession       │              │
+        │   else: chat_completions / codex_responses (default)
+        └────┬─────────────────────────────┘              │
+             │ JSON-RPC over stdio                        │
+             ▼                                            │
+        ┌──────────────────────────────────┐              │
+        │  codex app-server (subprocess)    │──────────────┘
+        │   thread/start, turn/start        │
+        │   item/* notifications            │
+        │   shell + apply_patch + update_plan│
+        │   view_image + sandbox            │
+        │   ┌─────────────────────────┐     │
+        │   │  MCP client             │     │
+        │   │  ├─ user MCP servers    │     │
+        │   │  ├─ native plugins      │     │
+        │   │  │   (linear, github,   │     │
+        │   │  │    gmail, calendar,  │     │
+        │   │  │    canva, ...)       │     │
+        │   │  └─ hermes-tools ───────┼─────────────────┐
+        │   │       (callback to     │     │           │
+        │   │        Hermes' richer  │     │           │
+        │   │        tools)          │     │           │
+        │   └─────────────────────────┘     │           │
+        └──────────────────────────────────┘           │
+                                                        │
+                                                        ▼
+        ┌──────────────────────────────────────────────────────────┐
+        │  hermes_tools_mcp_server.py (subprocess on demand)        │
+        │   web_search, web_extract, browser_*, vision_analyze,    │
+        │   image_generate, skill_view, skills_list, text_to_speech│
+        └──────────────────────────────────────────────────────────┘
+```
+
+For implementation details, see [PR #24182](https://github.com/NousResearch/hermes-agent/pull/24182) and the [Codex app-server protocol README](https://github.com/openai/codex/blob/main/codex-rs/app-server/README.md).
diff --git a/website/sidebars.ts b/website/sidebars.ts
index f706d2a607d..6bdd5d296a0 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -68,6 +68,7 @@ const sidebars: SidebarsConfig = {
             'user-guide/features/cron',
             'user-guide/features/delegation',
             'user-guide/features/kanban',
+            'user-guide/features/codex-app-server-runtime',
             'user-guide/features/kanban-tutorial',
             'user-guide/features/kanban-worker-lanes',
             'user-guide/features/goals',