Harden Codex auth refresh and responses compatibility

2026-04-25 00:51:20 +00:00 · 2026-02-25 19:27:54 -08:00 · 2026-02-25 19:27:54 -08:00 · 74c662b63a
commit 74c662b63a
parent 91bdb9eb2d
9 changed files with 996 additions and 22 deletions
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -20,6 +20,7 @@ import logging
 import os
 import shutil
 import stat
+import base64
 import subprocess
 import time
 import webbrowser
@ -58,6 +59,9 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
+CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120


 # =============================================================================
@ -380,6 +384,27 @@ def _optional_base_url(value: Any) -> Optional[str]:
    return cleaned if cleaned else None


+def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
+    if not isinstance(token, str) or token.count(".") != 2:
+        return {}
+    payload = token.split(".")[1]
+    payload += "=" * ((4 - len(payload) % 4) % 4)
+    try:
+        raw = base64.urlsafe_b64decode(payload.encode("utf-8"))
+        claims = json.loads(raw.decode("utf-8"))
+    except Exception:
+        return {}
+    return claims if isinstance(claims, dict) else {}
+
+
+def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> bool:
+    claims = _decode_jwt_claims(access_token)
+    exp = claims.get("exp")
+    if not isinstance(exp, (int, float)):
+        return False
+    return float(exp) <= (time.time() + max(0, int(skew_seconds)))
+
+
 # =============================================================================
 # SSH / remote session detection
 # =============================================================================
@ -405,6 +430,39 @@ def _codex_auth_file_path() -> Path:
    return resolve_codex_home_path() / "auth.json"


+def _codex_auth_lock_path(auth_path: Path) -> Path:
+    return auth_path.with_suffix(auth_path.suffix + ".lock")
+
+
+@contextmanager
+def _codex_auth_file_lock(
+    auth_path: Path,
+    timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS,
+):
+    lock_path = _codex_auth_lock_path(auth_path)
+    lock_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with lock_path.open("a+") as lock_file:
+        if fcntl is None:
+            yield
+            return
+
+        deadline = time.time() + max(1.0, timeout_seconds)
+        while True:
+            try:
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+                break
+            except BlockingIOError:
+                if time.time() >= deadline:
+                    raise TimeoutError(f"Timed out waiting for Codex auth lock: {lock_path}")
+                time.sleep(0.05)
+
+        try:
+            yield
+        finally:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+
+
 def read_codex_auth_file() -> Dict[str, Any]:
    """Read and validate Codex auth.json shape."""
    codex_home = resolve_codex_home_path()
@ -469,11 +527,172 @@ def read_codex_auth_file() -> Dict[str, Any]:
    }


-def resolve_codex_runtime_credentials() -> Dict[str, Any]:
+def _persist_codex_auth_payload(
+    auth_path: Path,
+    payload: Dict[str, Any],
+    *,
+    lock_held: bool = False,
+) -> None:
+    auth_path.parent.mkdir(parents=True, exist_ok=True)
+
+    def _write() -> None:
+        serialized = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
+        tmp_path = auth_path.parent / f".{auth_path.name}.{os.getpid()}.{time.time_ns()}.tmp"
+        try:
+            with tmp_path.open("w", encoding="utf-8") as tmp_file:
+                tmp_file.write(serialized)
+                tmp_file.flush()
+                os.fsync(tmp_file.fileno())
+            os.replace(tmp_path, auth_path)
+        finally:
+            if tmp_path.exists():
+                try:
+                    tmp_path.unlink()
+                except OSError:
+                    pass
+
+        try:
+            auth_path.chmod(stat.S_IRUSR | stat.S_IWUSR)
+        except OSError:
+            pass
+
+    if lock_held:
+        _write()
+        return
+
+    with _codex_auth_file_lock(auth_path):
+        _write()
+
+
+def _refresh_codex_auth_tokens(
+    *,
+    payload: Dict[str, Any],
+    auth_path: Path,
+    timeout_seconds: float,
+    lock_held: bool = False,
+) -> Dict[str, Any]:
+    tokens = payload.get("tokens")
+    if not isinstance(tokens, dict):
+        raise AuthError(
+            "Codex auth file is missing a valid 'tokens' object.",
+            provider="openai-codex",
+            code="codex_auth_invalid_shape",
+            relogin_required=True,
+        )
+
+    refresh_token = tokens.get("refresh_token")
+    if not isinstance(refresh_token, str) or not refresh_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.refresh_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+
+    timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
+        response = client.post(
+            CODEX_OAUTH_TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": CODEX_OAUTH_CLIENT_ID,
+            },
+        )
+
+    if response.status_code != 200:
+        code = "codex_refresh_failed"
+        message = f"Codex token refresh failed with status {response.status_code}."
+        relogin_required = False
+        try:
+            err = response.json()
+            if isinstance(err, dict):
+                err_code = err.get("error")
+                if isinstance(err_code, str) and err_code.strip():
+                    code = err_code.strip()
+                err_desc = err.get("error_description") or err.get("message")
+                if isinstance(err_desc, str) and err_desc.strip():
+                    message = f"Codex token refresh failed: {err_desc.strip()}"
+        except Exception:
+            pass
+        if code in {"invalid_grant", "invalid_token", "invalid_request"}:
+            relogin_required = True
+        raise AuthError(
+            message,
+            provider="openai-codex",
+            code=code,
+            relogin_required=relogin_required,
+        )
+
+    try:
+        refresh_payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            "Codex token refresh returned invalid JSON.",
+            provider="openai-codex",
+            code="codex_refresh_invalid_json",
+            relogin_required=True,
+        ) from exc
+
+    access_token = refresh_payload.get("access_token")
+    if not isinstance(access_token, str) or not access_token.strip():
+        raise AuthError(
+            "Codex token refresh response was missing access_token.",
+            provider="openai-codex",
+            code="codex_refresh_missing_access_token",
+            relogin_required=True,
+        )
+
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = access_token.strip()
+    next_refresh = refresh_payload.get("refresh_token")
+    if isinstance(next_refresh, str) and next_refresh.strip():
+        updated_tokens["refresh_token"] = next_refresh.strip()
+    payload["tokens"] = updated_tokens
+    payload["last_refresh"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    _persist_codex_auth_payload(auth_path, payload, lock_held=lock_held)
+    return updated_tokens
+
+
+def resolve_codex_runtime_credentials(
+    *,
+    force_refresh: bool = False,
+    refresh_if_expiring: bool = True,
+    refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
    """Resolve runtime credentials from Codex CLI auth state."""
    data = read_codex_auth_file()
    payload = data["payload"]
-    tokens = data["tokens"]
+    tokens = dict(data["tokens"])
+    auth_path = data["auth_path"]
+    access_token = str(tokens.get("access_token", "") or "").strip()
+    refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
+
+    should_refresh = bool(force_refresh)
+    if (not should_refresh) and refresh_if_expiring:
+        should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
+    if should_refresh:
+        lock_timeout = max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)
+        with _codex_auth_file_lock(auth_path, timeout_seconds=lock_timeout):
+            data = read_codex_auth_file()
+            payload = data["payload"]
+            tokens = dict(data["tokens"])
+            access_token = str(tokens.get("access_token", "") or "").strip()
+
+            should_refresh = bool(force_refresh)
+            if (not should_refresh) and refresh_if_expiring:
+                should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
+
+            if should_refresh:
+                tokens = _refresh_codex_auth_tokens(
+                    payload=payload,
+                    auth_path=auth_path,
+                    timeout_seconds=refresh_timeout_seconds,
+                    lock_held=True,
+                )
+                access_token = str(tokens.get("access_token", "") or "").strip()
+
    base_url = (
        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
        or DEFAULT_CODEX_BASE_URL
@ -482,11 +701,11 @@ def resolve_codex_runtime_credentials() -> Dict[str, Any]:
    return {
        "provider": "openai-codex",
        "base_url": base_url,
-        "api_key": tokens["access_token"],
+        "api_key": access_token,
        "source": "codex-auth-json",
        "last_refresh": payload.get("last_refresh"),
        "auth_mode": payload.get("auth_mode"),
-        "auth_file": str(data["auth_path"]),
+        "auth_file": str(auth_path),
        "codex_home": str(data["codex_home"]),
    }

--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@ -0,0 +1,91 @@
+"""Codex model discovery from local Codex CLI cache/config."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import List, Optional
+
+from hermes_cli.auth import resolve_codex_home_path
+
+DEFAULT_CODEX_MODELS: List[str] = [
+    "gpt-5-codex",
+    "gpt-5.3-codex",
+    "gpt-5.2-codex",
+    "gpt-5.1-codex",
+]
+
+
+def _read_default_model(codex_home: Path) -> Optional[str]:
+    config_path = codex_home / "config.toml"
+    if not config_path.exists():
+        return None
+    try:
+        import tomllib
+    except Exception:
+        return None
+    try:
+        payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    model = payload.get("model") if isinstance(payload, dict) else None
+    if isinstance(model, str) and model.strip():
+        return model.strip()
+    return None
+
+
+def _read_cache_models(codex_home: Path) -> List[str]:
+    cache_path = codex_home / "models_cache.json"
+    if not cache_path.exists():
+        return []
+    try:
+        raw = json.loads(cache_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+
+    entries = raw.get("models") if isinstance(raw, dict) else None
+    sortable = []
+    if isinstance(entries, list):
+        for item in entries:
+            if not isinstance(item, dict):
+                continue
+            slug = item.get("slug")
+            if not isinstance(slug, str) or not slug.strip():
+                continue
+            slug = slug.strip()
+            if "codex" not in slug.lower():
+                continue
+            if item.get("supported_in_api") is False:
+                continue
+            visibility = item.get("visibility")
+            if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
+                continue
+            priority = item.get("priority")
+            rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+            sortable.append((rank, slug))
+
+    sortable.sort(key=lambda item: (item[0], item[1]))
+    deduped: List[str] = []
+    for _, slug in sortable:
+        if slug not in deduped:
+            deduped.append(slug)
+    return deduped
+
+
+def get_codex_model_ids() -> List[str]:
+    codex_home = resolve_codex_home_path()
+    ordered: List[str] = []
+
+    default_model = _read_default_model(codex_home)
+    if default_model:
+        ordered.append(default_model)
+
+    for model_id in _read_cache_models(codex_home):
+        if model_id not in ordered:
+            ordered.append(model_id)
+
+    for model_id in DEFAULT_CODEX_MODELS:
+        if model_id not in ordered:
+            ordered.append(model_id)
+
+    return ordered
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -385,6 +385,7 @@ def _model_flow_openai_codex(config, current_model=""):
        _update_config_for_provider, _login_openai_codex,
        PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
    )
+    from hermes_cli.codex_models import get_codex_model_ids
    from hermes_cli.config import get_env_value, save_env_value
    import argparse

@ -402,14 +403,7 @@ def _model_flow_openai_codex(config, current_model=""):
            print(f"Login failed: {exc}")
            return

-    # Codex models are not discoverable through /models with this auth path,
-    # so provide curated IDs with custom fallback.
-    codex_models = [
-        "gpt-5-codex",
-        "gpt-5.3-codex",
-        "gpt-5.2-codex",
-        "gpt-5.1-codex",
-    ]
+    codex_models = get_codex_model_ids()

    selected = _prompt_model_selection(codex_models, current_model=current_model)
    if selected:
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -826,12 +826,8 @@ def run_setup_wizard(args):
                    save_env_value("LLM_MODEL", custom)
            # else: keep current
        elif selected_provider == "openai-codex":
-            codex_models = [
-                "gpt-5-codex",
-                "gpt-5.3-codex",
-                "gpt-5.2-codex",
-                "gpt-5.1-codex",
-            ]
+            from hermes_cli.codex_models import get_codex_model_ids
+            codex_models = get_codex_model_ids()
            model_choices = [f"{m}" for m in codex_models]
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
--- a/run_agent.py
+++ b/run_agent.py
@ -1357,6 +1357,175 @@ class AIAgent:

        return items

+    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
+        if not isinstance(raw_items, list):
+            raise ValueError("Codex Responses input must be a list of input items.")
+
+        normalized: List[Dict[str, Any]] = []
+        for idx, item in enumerate(raw_items):
+            if not isinstance(item, dict):
+                raise ValueError(f"Codex Responses input[{idx}] must be an object.")
+
+            item_type = item.get("type")
+            if item_type == "function_call":
+                call_id = item.get("call_id")
+                name = item.get("name")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+                if not isinstance(name, str) or not name.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+
+                arguments = item.get("arguments", "{}")
+                if isinstance(arguments, dict):
+                    arguments = json.dumps(arguments, ensure_ascii=False)
+                elif not isinstance(arguments, str):
+                    arguments = str(arguments)
+                arguments = arguments.strip() or "{}"
+
+                normalized.append(
+                    {
+                        "type": "function_call",
+                        "call_id": call_id.strip(),
+                        "name": name.strip(),
+                        "arguments": arguments,
+                    }
+                )
+                continue
+
+            if item_type == "function_call_output":
+                call_id = item.get("call_id")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+                output = item.get("output", "")
+                if output is None:
+                    output = ""
+                if not isinstance(output, str):
+                    output = str(output)
+
+                normalized.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": call_id.strip(),
+                        "output": output,
+                    }
+                )
+                continue
+
+            role = item.get("role")
+            if role in {"user", "assistant"}:
+                content = item.get("content", "")
+                if content is None:
+                    content = ""
+                if not isinstance(content, str):
+                    content = str(content)
+
+                normalized.append({"role": role, "content": content})
+                continue
+
+            raise ValueError(
+                f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
+            )
+
+        return normalized
+
+    def _preflight_codex_api_kwargs(
+        self,
+        api_kwargs: Any,
+        *,
+        allow_stream: bool = False,
+    ) -> Dict[str, Any]:
+        if not isinstance(api_kwargs, dict):
+            raise ValueError("Codex Responses request must be a dict.")
+
+        required = {"model", "instructions", "input"}
+        missing = [key for key in required if key not in api_kwargs]
+        if missing:
+            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+
+        model = api_kwargs.get("model")
+        if not isinstance(model, str) or not model.strip():
+            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+        model = model.strip()
+
+        instructions = api_kwargs.get("instructions")
+        if instructions is None:
+            instructions = ""
+        if not isinstance(instructions, str):
+            instructions = str(instructions)
+        instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
+
+        normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))
+
+        tools = api_kwargs.get("tools")
+        normalized_tools = None
+        if tools is not None:
+            if not isinstance(tools, list):
+                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+            normalized_tools = []
+            for idx, tool in enumerate(tools):
+                if not isinstance(tool, dict):
+                    raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
+                if tool.get("type") != "function":
+                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+
+                name = tool.get("name")
+                parameters = tool.get("parameters")
+                if not isinstance(name, str) or not name.strip():
+                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+                if not isinstance(parameters, dict):
+                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+
+                description = tool.get("description", "")
+                if description is None:
+                    description = ""
+                if not isinstance(description, str):
+                    description = str(description)
+
+                strict = tool.get("strict", False)
+                if not isinstance(strict, bool):
+                    strict = bool(strict)
+
+                normalized_tools.append(
+                    {
+                        "type": "function",
+                        "name": name.strip(),
+                        "description": description,
+                        "strict": strict,
+                        "parameters": parameters,
+                    }
+                )
+
+        store = api_kwargs.get("store", False)
+        if store is not False:
+            raise ValueError("Codex Responses contract requires 'store' to be false.")
+
+        allowed_keys = {"model", "instructions", "input", "tools", "store"}
+        normalized: Dict[str, Any] = {
+            "model": model,
+            "instructions": instructions,
+            "input": normalized_input,
+            "tools": normalized_tools,
+            "store": False,
+        }
+
+        if allow_stream:
+            stream = api_kwargs.get("stream")
+            if stream is not None and stream is not True:
+                raise ValueError("Codex Responses 'stream' must be true when set.")
+            if stream is True:
+                normalized["stream"] = True
+            allowed_keys.add("stream")
+        elif "stream" in api_kwargs:
+            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+
+        unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
+        if unexpected:
+            raise ValueError(
+                f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
+            )
+
+        return normalized
+
    def _extract_responses_message_text(self, item: Any) -> str:
        """Extract assistant text from a Responses message output item."""
        content = getattr(item, "content", None)
@ -1511,6 +1680,7 @@ class AIAgent:

    def _run_codex_stream(self, api_kwargs: dict):
        """Execute one streaming Responses API request and return the final response."""
+        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
        max_stream_retries = 1
        for attempt in range(max_stream_retries + 1):
            try:
@ -1539,6 +1709,7 @@ class AIAgent:
        """Fallback path for stream completion edge cases on Codex-style Responses backends."""
        fallback_kwargs = dict(api_kwargs)
        fallback_kwargs["stream"] = True
+        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
        stream_or_response = self.client.responses.create(**fallback_kwargs)

        # Compatibility shim for mocks or providers that still return a concrete response.
@ -1573,6 +1744,43 @@ class AIAgent:
            return terminal_response
        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")

+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
+            return False
+
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+
+            creds = resolve_codex_runtime_credentials(force_refresh=force)
+        except Exception as exc:
+            logger.debug("Codex credential refresh failed: %s", exc)
+            return False
+
+        api_key = creds.get("api_key")
+        base_url = creds.get("base_url")
+        if not isinstance(api_key, str) or not api_key.strip():
+            return False
+        if not isinstance(base_url, str) or not base_url.strip():
+            return False
+
+        self.api_key = api_key.strip()
+        self.base_url = base_url.strip().rstrip("/")
+        self._client_kwargs["api_key"] = self.api_key
+        self._client_kwargs["base_url"] = self.base_url
+
+        try:
+            self.client.close()
+        except Exception:
+            pass
+
+        try:
+            self.client = OpenAI(**self._client_kwargs)
+        except Exception as exc:
+            logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
+            return False
+
+        return True
+
    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
@ -2364,12 +2572,15 @@ class AIAgent:
            api_start_time = time.time()
            retry_count = 0
            max_retries = 6  # Increased to allow longer backoff periods
+            codex_auth_retry_attempted = False

            finish_reason = "stop"

            while retry_count <= max_retries:
                try:
                    api_kwargs = self._build_api_kwargs(api_messages)
+                    if self.api_mode == "codex_responses":
+                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)

                    if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
                        self._dump_api_request_debug(api_kwargs, reason="preflight")
@ -2586,6 +2797,18 @@ class AIAgent:
                    if thinking_spinner:
                        thinking_spinner.stop(f"(╥_╥) error, retrying...")
                        thinking_spinner = None
+
+                    status_code = getattr(api_error, "status_code", None)
+                    if (
+                        self.api_mode == "codex_responses"
+                        and self.provider == "openai-codex"
+                        and status_code == 401
+                        and not codex_auth_retry_attempted
+                    ):
+                        codex_auth_retry_attempted = True
+                        if self._try_refresh_codex_client_credentials(force=True):
+                            print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
+                            continue
                    
                    retry_count += 1
                    elapsed_time = time.time() - api_start_time
@ -2614,7 +2837,6 @@ class AIAgent:
                    # Check for non-retryable client errors (4xx HTTP status codes).
                    # These indicate a problem with the request itself (bad model ID,
                    # invalid API key, forbidden, etc.) and will never succeed on retry.
-                    status_code = getattr(api_error, "status_code", None)
                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500
                    is_client_error = is_client_status_error or any(phrase in error_msg for phrase in [
                        'error code: 400', 'error code: 401', 'error code: 403',
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@ -1,4 +1,7 @@
 import json
+import time
+import base64
+from contextlib import contextmanager
 from pathlib import Path
 from types import SimpleNamespace

@ -9,6 +12,7 @@ from hermes_cli.auth import (
    AuthError,
    DEFAULT_CODEX_BASE_URL,
    PROVIDER_REGISTRY,
+    _persist_codex_auth_payload,
    _login_openai_codex,
    login_command,
    get_codex_auth_status,
@ -37,6 +41,12 @@ def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh
    return auth_file


+def _jwt_with_exp(exp_epoch: int) -> str:
+    payload = {"exp": exp_epoch}
+    encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
+    return f"h.{encoded}.s"
+
+
 def test_read_codex_auth_file_success(tmp_path, monkeypatch):
    codex_home = tmp_path / "codex-home"
    auth_file = _write_codex_auth(codex_home)
@ -61,12 +71,107 @@ def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkey
    assert exc.value.relogin_required is True


+def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    expiring_token = _jwt_with_exp(int(time.time()) - 10)
+    _write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert auth_path == codex_home / "auth.json"
+        assert lock_held is True
+        return {"access_token": "access-new", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials()
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-new"
+
+
+def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-forced", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-forced"
+
+
+def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    lock_calls = {"enter": 0, "exit": 0}
+
+    @contextmanager
+    def _fake_lock(auth_path, timeout_seconds=15.0):
+        assert auth_path == codex_home / "auth.json"
+        lock_calls["enter"] += 1
+        try:
+            yield
+        finally:
+            lock_calls["exit"] += 1
+
+    refresh_calls = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        refresh_calls["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
+
+    monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert refresh_calls["count"] == 1
+    assert lock_calls["enter"] == 1
+    assert lock_calls["exit"] == 1
+    assert resolved["api_key"] == "access-updated"
+
+
 def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
    assert resolve_provider("openai-codex") == "openai-codex"


+def test_persist_codex_auth_payload_writes_atomically(tmp_path):
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text('{"stale":true}\n')
+    payload = {
+        "auth_mode": "oauth",
+        "tokens": {
+            "access_token": "next-access",
+            "refresh_token": "next-refresh",
+        },
+        "last_refresh": "2026-02-26T00:00:00Z",
+    }
+
+    _persist_codex_auth_payload(auth_path, payload)
+
+    stored = json.loads(auth_path.read_text())
+    assert stored == payload
+    assert list(tmp_path.glob(".auth.json.*.tmp")) == []
+
+
 def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
    monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
    status = get_codex_auth_status()
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -0,0 +1,175 @@
+import asyncio
+import sys
+import types
+from types import SimpleNamespace
+
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import cron.scheduler as cron_scheduler
+import gateway.run as gateway_run
+import run_agent
+from gateway.config import Platform
+from gateway.session import SessionSource
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _codex_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+class _UnauthorizedError(RuntimeError):
+    def __init__(self):
+        super().__init__("Error code: 401 - unauthorized")
+        self.status_code = 401
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def close(self):
+        return None
+
+
+class _Codex401ThenSuccessAgent(run_agent.AIAgent):
+    refresh_attempts = 0
+    last_init = {}
+
+    def __init__(self, *args, **kwargs):
+        kwargs.setdefault("skip_context_files", True)
+        kwargs.setdefault("skip_memory", True)
+        kwargs.setdefault("max_iterations", 4)
+        type(self).last_init = dict(kwargs)
+        super().__init__(*args, **kwargs)
+        self._cleanup_task_resources = lambda task_id: None
+        self._persist_session = lambda messages, history=None: None
+        self._save_trajectory = lambda messages, user_message, completed: None
+        self._save_session_log = lambda messages: None
+
+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        type(self).refresh_attempts += 1
+        return True
+
+    def run_conversation(self, user_message: str, conversation_history=None):
+        calls = {"api": 0}
+
+        def _fake_api_call(api_kwargs):
+            calls["api"] += 1
+            if calls["api"] == 1:
+                raise _UnauthorizedError()
+            return _codex_message_response("Recovered via refresh")
+
+        self._interruptible_api_call = _fake_api_call
+        return super().run_conversation(user_message, conversation_history=conversation_history)
+
+
+def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda requested=None: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    success, output, final_response, error = cron_scheduler.run_job(
+        {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
+    )
+
+    assert success is True
+    assert error is None
+    assert final_response == "Recovered via refresh"
+    assert "Recovered via refresh" in output
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
+
+
+def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._running_agents = {}
+
+    source = SessionSource(
+        platform=Platform.LOCAL,
+        chat_id="cli",
+        chat_name="CLI",
+        chat_type="dm",
+        user_id="user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="ping",
+            context_prompt="",
+            history=[],
+            source=source,
+            session_id="session-1",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    assert result["final_response"] == "Recovered via refresh"
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
--- a/tests/test_codex_models.py
+++ b/tests/test_codex_models.py
@ -0,0 +1,40 @@
+import json
+
+from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
+
+
+def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    (codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
+    (codex_home / "models_cache.json").write_text(
+        json.dumps(
+            {
+                "models": [
+                    {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
+                    {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
+                    {"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
+                    {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
+                ]
+            }
+        )
+    )
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[0] == "gpt-5.2-codex"
+    assert "gpt-5.1-codex" in models
+    assert "gpt-5.3-codex" in models
+    assert "gpt-4o" not in models
+    assert "gpt-5-hidden-codex" not in models
+
+
+def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@ -2,6 +2,8 @@ import sys
 import types
 from types import SimpleNamespace

+import pytest
+

 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@ -156,6 +158,16 @@ class _FakeCreateStream:
        self.closed = True


+def _codex_request_kwargs():
+    return {
+        "model": "gpt-5-codex",
+        "instructions": "You are Hermes.",
+        "input": [{"role": "user", "content": "Ping"}],
+        "tools": None,
+        "store": False,
+    }
+
+
 def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
    _patch_agent_bootstrap(monkeypatch)
    agent = run_agent.AIAgent(
@ -222,6 +234,10 @@ def test_build_api_kwargs_codex(monkeypatch):
    assert kwargs["tools"][0]["name"] == "terminal"
    assert kwargs["tools"][0]["strict"] is False
    assert "function" not in kwargs["tools"][0]
+    assert kwargs["store"] is False
+    assert "timeout" not in kwargs
+    assert "max_tokens" not in kwargs
+    assert "extra_body" not in kwargs


 def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
@ -243,7 +259,7 @@ def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
        )
    )

-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
    assert calls["stream"] == 2
    assert response.output[0].content[0].text == "stream ok"

@ -269,7 +285,7 @@ def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(mon
        )
    )

-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
    assert calls["stream"] == 2
    assert calls["create"] == 1
    assert response.output[0].content[0].text == "create fallback ok"
@ -304,7 +320,7 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
        )
    )

-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
    assert calls["stream"] == 2
    assert calls["create"] == 1
    assert create_stream.closed is True
@ -323,6 +339,72 @@ def test_run_conversation_codex_plain_text(monkeypatch):
    assert result["messages"][-1]["content"] == "OK"


+def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"api": 0, "refresh": 0}
+
+    class _UnauthorizedError(RuntimeError):
+        def __init__(self):
+            super().__init__("Error code: 401 - unauthorized")
+            self.status_code = 401
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            raise _UnauthorizedError()
+        return _codex_message_response("Recovered after refresh")
+
+    def _fake_refresh(*, force=True):
+        calls["refresh"] += 1
+        assert force is True
+        return True
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+    monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
+
+    result = agent.run_conversation("Say OK")
+
+    assert calls["api"] == 2
+    assert calls["refresh"] == 1
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered after refresh"
+
+
+def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    closed = {"value": False}
+    rebuilt = {"kwargs": None}
+
+    class _ExistingClient:
+        def close(self):
+            closed["value"] = True
+
+    class _RebuiltClient:
+        pass
+
+    def _fake_openai(**kwargs):
+        rebuilt["kwargs"] = kwargs
+        return _RebuiltClient()
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_codex_runtime_credentials",
+        lambda force_refresh=True: {
+            "api_key": "new-codex-token",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        },
+    )
+    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
+
+    agent.client = _ExistingClient()
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is True
+    assert closed["value"] is True
+    assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
+    assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert isinstance(agent.client, _RebuiltClient)
+
+
 def test_run_conversation_codex_tool_round_trip(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]
@ -404,6 +486,56 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
    assert function_output["call_id"] == "call_pair123"


+def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    preflight = agent._preflight_codex_api_kwargs(
+        {
+            "model": "gpt-5-codex",
+            "instructions": "You are Hermes.",
+            "input": [
+                {"role": "user", "content": "hi"},
+                {
+                    "type": "function_call",
+                    "id": "call_bad",
+                    "call_id": "call_good",
+                    "name": "terminal",
+                    "arguments": "{}",
+                },
+            ],
+            "tools": [],
+            "store": False,
+        }
+    )
+
+    fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
+    assert fn_call["call_id"] == "call_good"
+    assert "id" not in fn_call
+
+
+def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+
+    with pytest.raises(ValueError, match="function_call_output is missing call_id"):
+        agent._preflight_codex_api_kwargs(
+            {
+                "model": "gpt-5-codex",
+                "instructions": "You are Hermes.",
+                "input": [{"type": "function_call_output", "output": "{}"}],
+                "tools": [],
+                "store": False,
+            }
+        )
+
+
+def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["temperature"] = 0
+
+    with pytest.raises(ValueError, match="unsupported field"):
+        agent._preflight_codex_api_kwargs(kwargs)
+
+
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]