fix: codex OAuth credential pool disconnect + expired token import (#5681)

Three bugs causing OpenAI Codex sessions to fail silently: 1. Credential pool vs legacy store disconnect: hermes auth and hermes model store device_code tokens in the credential pool, but get_codex_auth_status(), resolve_codex_runtime_credentials(), and _model_flow_openai_codex() only read from the legacy provider state. Fresh pool tokens were invisible to the auth status checks and model selection flow. 2. _import_codex_cli_tokens() imported expired tokens from ~/.codex/ without checking JWT expiry. Combined with _login_openai_codex() saying 'Login successful!' for expired credentials, users got stuck in a loop of dead tokens being recycled. 3. _login_openai_codex() accepted expired tokens from resolve_codex_runtime_credentials() without validating expiry before telling the user login succeeded. Fixes: - get_codex_auth_status() now checks credential pool first, falls back to legacy provider state - _model_flow_openai_codex() uses pool-aware auth status for token retrieval when fetching model lists - _import_codex_cli_tokens() validates JWT exp claim, rejects expired - _login_openai_codex() verifies resolved token isn't expiring before accepting existing credentials - _run_codex_stream() logs response.incomplete/failed terminal events with status and incomplete_details for diagnostics - Codex empty output recovery: captures streamed text during streaming and synthesizes a response when get_final_response() returns empty output (handles chatgpt.com backend-api edge cases)
2026-06-14 09:11:54 +00:00 · 2026-04-06 18:10:33 -07:00 · 2026-04-06 18:10:33 -07:00 · dc4c07ed9d
commit dc4c07ed9d
parent 8cf013ecd9
3 changed files with 136 additions and 19 deletions
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -1099,7 +1099,8 @@ def _refresh_codex_auth_tokens(
 def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
    """Try to read tokens from ~/.codex/auth.json (Codex CLI shared file).
    
-    Returns tokens dict if valid, None otherwise. Does NOT write to the shared file.
+    Returns tokens dict if valid and not expired, None otherwise.
+    Does NOT write to the shared file.
    """
    codex_home = os.getenv("CODEX_HOME", "").strip()
    if not codex_home:
@ -1112,7 +1113,17 @@ def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
        tokens = payload.get("tokens")
        if not isinstance(tokens, dict):
            return None
-        if not tokens.get("access_token") or not tokens.get("refresh_token"):
+        access_token = tokens.get("access_token")
+        refresh_token = tokens.get("refresh_token")
+        if not access_token or not refresh_token:
+            return None
+        # Reject expired tokens — importing stale tokens from ~/.codex/
+        # that can't be refreshed leaves the user stuck with "Login successful!"
+        # but no working credentials.
+        if _codex_access_token_is_expiring(access_token, 0):
+            logger.debug(
+                "Codex CLI tokens at %s are expired — skipping import.", auth_path,
+            )
            return None
        return dict(tokens)
    except Exception:
@ -1904,7 +1915,36 @@ def get_nous_auth_status() -> Dict[str, Any]:


 def get_codex_auth_status() -> Dict[str, Any]:
-    """Status snapshot for Codex auth."""
+    """Status snapshot for Codex auth.
+    
+    Checks the credential pool first (where `hermes auth` stores credentials),
+    then falls back to the legacy provider state.
+    """
+    # Check credential pool first — this is where `hermes auth` and
+    # `hermes model` store device_code tokens.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("openai-codex")
+        if pool and pool.has_credentials():
+            entry = pool.select()
+            if entry is not None:
+                api_key = (
+                    getattr(entry, "runtime_api_key", None)
+                    or getattr(entry, "access_token", "")
+                )
+                if api_key and not _codex_access_token_is_expiring(api_key, 0):
+                    return {
+                        "logged_in": True,
+                        "auth_store": str(_auth_file_path()),
+                        "last_refresh": getattr(entry, "last_refresh", None),
+                        "auth_mode": "chatgpt",
+                        "source": f"pool:{getattr(entry, 'label', 'unknown')}",
+                        "api_key": api_key,
+                    }
+    except Exception:
+        pass
+
+    # Fall back to legacy provider state
    try:
        creds = resolve_codex_runtime_credentials()
        return {
@ -1913,6 +1953,7 @@ def get_codex_auth_status() -> Dict[str, Any]:
            "last_refresh": creds.get("last_refresh"),
            "auth_mode": creds.get("auth_mode"),
            "source": creds.get("source"),
+            "api_key": creds.get("api_key"),
        }
    except AuthError as exc:
        return {
@ -2356,17 +2397,25 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
    # Check for existing Hermes-owned credentials
    try:
        existing = resolve_codex_runtime_credentials()
-        print("Existing Codex credentials found in Hermes auth store.")
-        try:
-            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
-        except (EOFError, KeyboardInterrupt):
-            reuse = "y"
-        if reuse in ("", "y", "yes"):
-            config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
-            print()
-            print("Login successful!")
-            print(f"  Config updated: {config_path} (model.provider=openai-codex)")
-            return
+        # Verify the resolved token is actually usable (not expired).
+        # resolve_codex_runtime_credentials attempts refresh, so if we get
+        # here the token should be valid — but double-check before telling
+        # the user "Login successful!".
+        _resolved_key = existing.get("api_key", "")
+        if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60):
+            print("Existing Codex credentials found in Hermes auth store.")
+            try:
+                reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                reuse = "y"
+            if reuse in ("", "y", "yes"):
+                config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
+                print()
+                print("Login successful!")
+                print(f"  Config updated: {config_path} (model.provider=openai-codex)")
+                return
+        else:
+            print("Existing Codex credentials are expired. Starting fresh login...")
    except AuthError:
        pass

--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1294,12 +1294,21 @@ def _model_flow_openai_codex(config, current_model=""):
            return

    _codex_token = None
+    # Prefer credential pool (where `hermes auth` stores device_code tokens),
+    # fall back to legacy provider state.
    try:
-        from hermes_cli.auth import resolve_codex_runtime_credentials
-        _codex_creds = resolve_codex_runtime_credentials()
-        _codex_token = _codex_creds.get("api_key")
+        _codex_status = get_codex_auth_status()
+        if _codex_status.get("logged_in"):
+            _codex_token = _codex_status.get("api_key")
    except Exception:
        pass
+    if not _codex_token:
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+            _codex_creds = resolve_codex_runtime_credentials()
+            _codex_token = _codex_creds.get("api_key")
+        except Exception:
+            pass

    codex_models = get_codex_model_ids(access_token=_codex_token)

--- a/run_agent.py
+++ b/run_agent.py
@ -3868,6 +3868,10 @@ class AIAgent:
        has_tool_calls = False
        first_delta_fired = False
        self._reasoning_deltas_fired = False
+        # Accumulate streamed text so we can recover if get_final_response()
+        # returns empty output (e.g. chatgpt.com backend-api sends
+        # response.incomplete instead of response.completed).
+        self._codex_streamed_text_parts: list = []
        for attempt in range(max_stream_retries + 1):
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
@ -3887,6 +3891,7 @@ class AIAgent:
                                        except Exception:
                                            pass
                                self._fire_stream_delta(delta_text)
+                                self._codex_streamed_text_parts.append(delta_text)
                        # Track tool calls to suppress text streaming
                        elif "function_call" in event_type:
                            has_tool_calls = True
@ -3895,6 +3900,18 @@ class AIAgent:
                            reasoning_text = getattr(event, "delta", "")
                            if reasoning_text:
                                self._fire_reasoning_delta(reasoning_text)
+                        # Log non-completed terminal events for diagnostics
+                        elif event_type in ("response.incomplete", "response.failed"):
+                            resp_obj = getattr(event, "response", None)
+                            status = getattr(resp_obj, "status", None) if resp_obj else None
+                            incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
+                            logger.warning(
+                                "Codex Responses stream received terminal event %s "
+                                "(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
+                                event_type, status, incomplete_details,
+                                sum(len(p) for p in self._codex_streamed_text_parts),
+                                self._client_log_context(),
+                            )
                    return stream.get_final_response()
            except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
                if attempt < max_stream_retries:
@ -7366,8 +7383,50 @@ class AIAgent:
                            response_invalid = True
                            error_details.append("response.output is not a list")
                        elif len(output_items) == 0:
-                            response_invalid = True
-                            error_details.append("response.output is empty")
+                            # Log diagnostics for empty output
+                            _resp_status = getattr(response, "status", None)
+                            _resp_incomplete = getattr(response, "incomplete_details", None)
+                            _streamed_parts = getattr(self, "_codex_streamed_text_parts", [])
+                            _streamed_text = "".join(_streamed_parts).strip() if _streamed_parts else ""
+                            logging.warning(
+                                "Codex response.output is empty "
+                                "(status=%s, incomplete_details=%s, streamed_chars=%d, "
+                                "output_text=%r, model=%s). %s",
+                                _resp_status, _resp_incomplete, len(_streamed_text),
+                                getattr(response, "output_text", None),
+                                getattr(response, "model", None),
+                                f"api_mode={self.api_mode} provider={self.provider}",
+                            )
+                            # Recovery: if we streamed text but the final response
+                            # lost it (e.g. response.incomplete from chatgpt backend-api),
+                            # synthesize a minimal response so the user gets the answer
+                            # the model already delivered.
+                            if _streamed_text:
+                                logging.info(
+                                    "Recovering %d chars of streamed text as response "
+                                    "(status was %s).", len(_streamed_text), _resp_status,
+                                )
+                                response = SimpleNamespace(
+                                    output=[SimpleNamespace(
+                                        type="message",
+                                        role="assistant",
+                                        status="completed",
+                                        content=[SimpleNamespace(
+                                            type="output_text",
+                                            text=_streamed_text,
+                                        )],
+                                    )],
+                                    status=_resp_status or "completed",
+                                    model=getattr(response, "model", self.model),
+                                    usage=getattr(response, "usage", None),
+                                    id=getattr(response, "id", None),
+                                    output_text=_streamed_text,
+                                )
+                                # Clear the accumulated parts so we don't double-recover
+                                self._codex_streamed_text_parts = []
+                            else:
+                                response_invalid = True
+                                error_details.append("response.output is empty")
                    elif self.api_mode == "anthropic_messages":
                        content_blocks = getattr(response, "content", None) if response is not None else None
                        if response is None: