fix: codex OAuth credential pool disconnect + expired token import (#5681)

Three bugs causing OpenAI Codex sessions to fail silently:

1. Credential pool vs legacy store disconnect: hermes auth and hermes
   model store device_code tokens in the credential pool, but
   get_codex_auth_status(), resolve_codex_runtime_credentials(), and
   _model_flow_openai_codex() only read from the legacy provider state.
   Fresh pool tokens were invisible to the auth status checks and model
   selection flow.

2. _import_codex_cli_tokens() imported expired tokens from ~/.codex/
   without checking JWT expiry. Combined with _login_openai_codex()
   saying 'Login successful!' for expired credentials, users got stuck
   in a loop of dead tokens being recycled.

3. _login_openai_codex() accepted expired tokens from
   resolve_codex_runtime_credentials() without validating expiry before
   telling the user login succeeded.

Fixes:
- get_codex_auth_status() now checks credential pool first, falls back
  to legacy provider state
- _model_flow_openai_codex() uses pool-aware auth status for token
  retrieval when fetching model lists
- _import_codex_cli_tokens() validates JWT exp claim, rejects expired
- _login_openai_codex() verifies resolved token isn't expiring before
  accepting existing credentials
- _run_codex_stream() logs response.incomplete/failed terminal events
  with status and incomplete_details for diagnostics
- Codex empty output recovery: captures streamed text during streaming
  and synthesizes a response when get_final_response() returns empty
  output (handles chatgpt.com backend-api edge cases)
This commit is contained in:
Teknium 2026-04-06 18:10:33 -07:00 committed by GitHub
parent 8cf013ecd9
commit dc4c07ed9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 136 additions and 19 deletions

View file

@ -1099,7 +1099,8 @@ def _refresh_codex_auth_tokens(
def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
"""Try to read tokens from ~/.codex/auth.json (Codex CLI shared file).
Returns tokens dict if valid, None otherwise. Does NOT write to the shared file.
Returns tokens dict if valid and not expired, None otherwise.
Does NOT write to the shared file.
"""
codex_home = os.getenv("CODEX_HOME", "").strip()
if not codex_home:
@ -1112,7 +1113,17 @@ def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
tokens = payload.get("tokens")
if not isinstance(tokens, dict):
return None
if not tokens.get("access_token") or not tokens.get("refresh_token"):
access_token = tokens.get("access_token")
refresh_token = tokens.get("refresh_token")
if not access_token or not refresh_token:
return None
# Reject expired tokens — importing stale tokens from ~/.codex/
# that can't be refreshed leaves the user stuck with "Login successful!"
# but no working credentials.
if _codex_access_token_is_expiring(access_token, 0):
logger.debug(
"Codex CLI tokens at %s are expired — skipping import.", auth_path,
)
return None
return dict(tokens)
except Exception:
@ -1904,7 +1915,36 @@ def get_nous_auth_status() -> Dict[str, Any]:
def get_codex_auth_status() -> Dict[str, Any]:
"""Status snapshot for Codex auth."""
"""Status snapshot for Codex auth.
Checks the credential pool first (where `hermes auth` stores credentials),
then falls back to the legacy provider state.
"""
# Check credential pool first — this is where `hermes auth` and
# `hermes model` store device_code tokens.
try:
from agent.credential_pool import load_pool
pool = load_pool("openai-codex")
if pool and pool.has_credentials():
entry = pool.select()
if entry is not None:
api_key = (
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
if api_key and not _codex_access_token_is_expiring(api_key, 0):
return {
"logged_in": True,
"auth_store": str(_auth_file_path()),
"last_refresh": getattr(entry, "last_refresh", None),
"auth_mode": "chatgpt",
"source": f"pool:{getattr(entry, 'label', 'unknown')}",
"api_key": api_key,
}
except Exception:
pass
# Fall back to legacy provider state
try:
creds = resolve_codex_runtime_credentials()
return {
@ -1913,6 +1953,7 @@ def get_codex_auth_status() -> Dict[str, Any]:
"last_refresh": creds.get("last_refresh"),
"auth_mode": creds.get("auth_mode"),
"source": creds.get("source"),
"api_key": creds.get("api_key"),
}
except AuthError as exc:
return {
@ -2356,17 +2397,25 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
# Check for existing Hermes-owned credentials
try:
existing = resolve_codex_runtime_credentials()
print("Existing Codex credentials found in Hermes auth store.")
try:
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in ("", "y", "yes"):
config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
print()
print("Login successful!")
print(f" Config updated: {config_path} (model.provider=openai-codex)")
return
# Verify the resolved token is actually usable (not expired).
# resolve_codex_runtime_credentials attempts refresh, so if we get
# here the token should be valid — but double-check before telling
# the user "Login successful!".
_resolved_key = existing.get("api_key", "")
if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60):
print("Existing Codex credentials found in Hermes auth store.")
try:
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in ("", "y", "yes"):
config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
print()
print("Login successful!")
print(f" Config updated: {config_path} (model.provider=openai-codex)")
return
else:
print("Existing Codex credentials are expired. Starting fresh login...")
except AuthError:
pass

View file

@ -1294,12 +1294,21 @@ def _model_flow_openai_codex(config, current_model=""):
return
_codex_token = None
# Prefer credential pool (where `hermes auth` stores device_code tokens),
# fall back to legacy provider state.
try:
from hermes_cli.auth import resolve_codex_runtime_credentials
_codex_creds = resolve_codex_runtime_credentials()
_codex_token = _codex_creds.get("api_key")
_codex_status = get_codex_auth_status()
if _codex_status.get("logged_in"):
_codex_token = _codex_status.get("api_key")
except Exception:
pass
if not _codex_token:
try:
from hermes_cli.auth import resolve_codex_runtime_credentials
_codex_creds = resolve_codex_runtime_credentials()
_codex_token = _codex_creds.get("api_key")
except Exception:
pass
codex_models = get_codex_model_ids(access_token=_codex_token)

View file

@ -3868,6 +3868,10 @@ class AIAgent:
has_tool_calls = False
first_delta_fired = False
self._reasoning_deltas_fired = False
# Accumulate streamed text so we can recover if get_final_response()
# returns empty output (e.g. chatgpt.com backend-api sends
# response.incomplete instead of response.completed).
self._codex_streamed_text_parts: list = []
for attempt in range(max_stream_retries + 1):
try:
with active_client.responses.stream(**api_kwargs) as stream:
@ -3887,6 +3891,7 @@ class AIAgent:
except Exception:
pass
self._fire_stream_delta(delta_text)
self._codex_streamed_text_parts.append(delta_text)
# Track tool calls to suppress text streaming
elif "function_call" in event_type:
has_tool_calls = True
@ -3895,6 +3900,18 @@ class AIAgent:
reasoning_text = getattr(event, "delta", "")
if reasoning_text:
self._fire_reasoning_delta(reasoning_text)
# Log non-completed terminal events for diagnostics
elif event_type in ("response.incomplete", "response.failed"):
resp_obj = getattr(event, "response", None)
status = getattr(resp_obj, "status", None) if resp_obj else None
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
logger.warning(
"Codex Responses stream received terminal event %s "
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
event_type, status, incomplete_details,
sum(len(p) for p in self._codex_streamed_text_parts),
self._client_log_context(),
)
return stream.get_final_response()
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
if attempt < max_stream_retries:
@ -7366,8 +7383,50 @@ class AIAgent:
response_invalid = True
error_details.append("response.output is not a list")
elif len(output_items) == 0:
response_invalid = True
error_details.append("response.output is empty")
# Log diagnostics for empty output
_resp_status = getattr(response, "status", None)
_resp_incomplete = getattr(response, "incomplete_details", None)
_streamed_parts = getattr(self, "_codex_streamed_text_parts", [])
_streamed_text = "".join(_streamed_parts).strip() if _streamed_parts else ""
logging.warning(
"Codex response.output is empty "
"(status=%s, incomplete_details=%s, streamed_chars=%d, "
"output_text=%r, model=%s). %s",
_resp_status, _resp_incomplete, len(_streamed_text),
getattr(response, "output_text", None),
getattr(response, "model", None),
f"api_mode={self.api_mode} provider={self.provider}",
)
# Recovery: if we streamed text but the final response
# lost it (e.g. response.incomplete from chatgpt backend-api),
# synthesize a minimal response so the user gets the answer
# the model already delivered.
if _streamed_text:
logging.info(
"Recovering %d chars of streamed text as response "
"(status was %s).", len(_streamed_text), _resp_status,
)
response = SimpleNamespace(
output=[SimpleNamespace(
type="message",
role="assistant",
status="completed",
content=[SimpleNamespace(
type="output_text",
text=_streamed_text,
)],
)],
status=_resp_status or "completed",
model=getattr(response, "model", self.model),
usage=getattr(response, "usage", None),
id=getattr(response, "id", None),
output_text=_streamed_text,
)
# Clear the accumulated parts so we don't double-recover
self._codex_streamed_text_parts = []
else:
response_invalid = True
error_details.append("response.output is empty")
elif self.api_mode == "anthropic_messages":
content_blocks = getattr(response, "content", None) if response is not None else None
if response is None: