fix: codex OAuth credential pool disconnect + expired token import (#5681)

Three bugs causing OpenAI Codex sessions to fail silently:

1. Credential pool vs legacy store disconnect: hermes auth and hermes
   model store device_code tokens in the credential pool, but
   get_codex_auth_status(), resolve_codex_runtime_credentials(), and
   _model_flow_openai_codex() only read from the legacy provider state.
   Fresh pool tokens were invisible to the auth status checks and model
   selection flow.

2. _import_codex_cli_tokens() imported expired tokens from ~/.codex/
   without checking JWT expiry. Combined with _login_openai_codex()
   saying 'Login successful!' for expired credentials, users got stuck
   in a loop of dead tokens being recycled.

3. _login_openai_codex() accepted expired tokens from
   resolve_codex_runtime_credentials() without validating expiry before
   telling the user login succeeded.

Fixes:
- get_codex_auth_status() now checks credential pool first, falls back
  to legacy provider state
- _model_flow_openai_codex() uses pool-aware auth status for token
  retrieval when fetching model lists
- _import_codex_cli_tokens() validates JWT exp claim, rejects expired
- _login_openai_codex() verifies resolved token isn't expiring before
  accepting existing credentials
- _run_codex_stream() logs response.incomplete/failed terminal events
  with status and incomplete_details for diagnostics
- Codex empty output recovery: captures streamed text during streaming
  and synthesizes a response when get_final_response() returns empty
  output (handles chatgpt.com backend-api edge cases)
This commit is contained in:
Teknium 2026-04-06 18:10:33 -07:00 committed by GitHub
parent 8cf013ecd9
commit dc4c07ed9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 136 additions and 19 deletions

View file

@ -1099,7 +1099,8 @@ def _refresh_codex_auth_tokens(
def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
"""Try to read tokens from ~/.codex/auth.json (Codex CLI shared file).
Returns tokens dict if valid, None otherwise. Does NOT write to the shared file.
Returns tokens dict if valid and not expired, None otherwise.
Does NOT write to the shared file.
"""
codex_home = os.getenv("CODEX_HOME", "").strip()
if not codex_home:
@ -1112,7 +1113,17 @@ def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
tokens = payload.get("tokens")
if not isinstance(tokens, dict):
return None
if not tokens.get("access_token") or not tokens.get("refresh_token"):
access_token = tokens.get("access_token")
refresh_token = tokens.get("refresh_token")
if not access_token or not refresh_token:
return None
# Reject expired tokens — importing stale tokens from ~/.codex/
# that can't be refreshed leaves the user stuck with "Login successful!"
# but no working credentials.
if _codex_access_token_is_expiring(access_token, 0):
logger.debug(
"Codex CLI tokens at %s are expired — skipping import.", auth_path,
)
return None
return dict(tokens)
except Exception:
@ -1904,7 +1915,36 @@ def get_nous_auth_status() -> Dict[str, Any]:
def get_codex_auth_status() -> Dict[str, Any]:
"""Status snapshot for Codex auth."""
"""Status snapshot for Codex auth.
Checks the credential pool first (where `hermes auth` stores credentials),
then falls back to the legacy provider state.
"""
# Check credential pool first — this is where `hermes auth` and
# `hermes model` store device_code tokens.
try:
from agent.credential_pool import load_pool
pool = load_pool("openai-codex")
if pool and pool.has_credentials():
entry = pool.select()
if entry is not None:
api_key = (
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
if api_key and not _codex_access_token_is_expiring(api_key, 0):
return {
"logged_in": True,
"auth_store": str(_auth_file_path()),
"last_refresh": getattr(entry, "last_refresh", None),
"auth_mode": "chatgpt",
"source": f"pool:{getattr(entry, 'label', 'unknown')}",
"api_key": api_key,
}
except Exception:
pass
# Fall back to legacy provider state
try:
creds = resolve_codex_runtime_credentials()
return {
@ -1913,6 +1953,7 @@ def get_codex_auth_status() -> Dict[str, Any]:
"last_refresh": creds.get("last_refresh"),
"auth_mode": creds.get("auth_mode"),
"source": creds.get("source"),
"api_key": creds.get("api_key"),
}
except AuthError as exc:
return {
@ -2356,17 +2397,25 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
# Check for existing Hermes-owned credentials
try:
existing = resolve_codex_runtime_credentials()
print("Existing Codex credentials found in Hermes auth store.")
try:
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in ("", "y", "yes"):
config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
print()
print("Login successful!")
print(f" Config updated: {config_path} (model.provider=openai-codex)")
return
# Verify the resolved token is actually usable (not expired).
# resolve_codex_runtime_credentials attempts refresh, so if we get
# here the token should be valid — but double-check before telling
# the user "Login successful!".
_resolved_key = existing.get("api_key", "")
if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60):
print("Existing Codex credentials found in Hermes auth store.")
try:
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in ("", "y", "yes"):
config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
print()
print("Login successful!")
print(f" Config updated: {config_path} (model.provider=openai-codex)")
return
else:
print("Existing Codex credentials are expired. Starting fresh login...")
except AuthError:
pass