mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(codex): surface actionable hint when stale-call detector fires on known silent-reject pattern
The ChatGPT Codex backend (chatgpt.com/backend-api/codex) has historically silently dropped certain model requests: the connection is accepted but no stream events are emitted and no error is raised. PR #31967 lowered the implicit stale-call default from 300s to 90s so fallbacks kick in faster, but users still see an opaque "No response from provider for 90s (non-streaming, ...)" message that gives no path forward. This patch adds a narrow heuristic — gpt-5.5 family on the Codex backend via codex_responses api_mode — that substitutes the generic timeout message with actionable text naming the gpt-5.4-codex workaround and pointing at #21444 for symptom history. Changes: - run_agent.py — new ``AIAgent._codex_silent_hang_hint(model=...)`` method. Returns ``None`` for any request that does not match all three guards (codex_responses api_mode, openai-codex provider or chatgpt.com Codex base URL, gpt-5.5-family model name with word-boundary regex anchoring to avoid false-positives on e.g. ``gpt-5.50``). - agent/chat_completion_helpers.py — the non-stream stale-call site consults the hint via ``getattr(...)`` so the call site stays robust if the helper is ever removed or stubbed in tests. Hint is appended to both the ``_emit_status`` warning and the ``TimeoutError`` message so the user sees it in their terminal AND it lands in any retry-loop diagnostics. - tests/run_agent/test_codex_silent_hang_hint.py — 10 regression tests covering positive cases (bare gpt-5.5, vendor-prefixed openai/gpt-5.5, gpt-5.5-codex SKU, model=None fallback to self.model) and negative cases (gpt-5.4-codex workaround, gpt-5.50 false-positive guard, non-codex api_mode, non-codex provider, empty/None model, unrelated models on Codex). Does NOT fix the backend-side issue (that's an upstream OpenAI/ChatGPT problem we cannot patch from here). Only converts an opaque timeout into text that names the workaround so users do not have to dig through logs or wait for a forum post to learn what to do. Closes #22046
This commit is contained in:
parent
4c64638897
commit
b1adb95038
4 changed files with 203 additions and 9 deletions
|
|
@ -278,17 +278,31 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
|||
_elapsed = time.time() - _call_start
|
||||
if _elapsed > _stale_timeout:
|
||||
_est_ctx = estimate_request_context_tokens(api_kwargs)
|
||||
_silent_hint: Optional[str] = None
|
||||
_hint_fn = getattr(agent, "_codex_silent_hang_hint", None)
|
||||
if callable(_hint_fn):
|
||||
try:
|
||||
_silent_hint = _hint_fn(model=api_kwargs.get("model"))
|
||||
except Exception:
|
||||
_silent_hint = None
|
||||
logger.warning(
|
||||
"Non-streaming API call stale for %.0fs (threshold %.0fs). "
|
||||
"model=%s context=~%s tokens. Killing connection.",
|
||||
_elapsed, _stale_timeout,
|
||||
api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
|
||||
)
|
||||
agent._emit_status(
|
||||
f"⚠️ No response from provider for {int(_elapsed)}s "
|
||||
f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
|
||||
f"Aborting call."
|
||||
)
|
||||
if _silent_hint:
|
||||
agent._emit_status(
|
||||
f"⚠️ No response from provider for {int(_elapsed)}s "
|
||||
f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
|
||||
f"{_silent_hint}"
|
||||
)
|
||||
else:
|
||||
agent._emit_status(
|
||||
f"⚠️ No response from provider for {int(_elapsed)}s "
|
||||
f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
|
||||
f"Aborting call."
|
||||
)
|
||||
try:
|
||||
if agent.api_mode == "anthropic_messages":
|
||||
agent._anthropic_client.close()
|
||||
|
|
@ -303,10 +317,17 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
|||
# Wait briefly for the thread to notice the closed connection.
|
||||
t.join(timeout=2.0)
|
||||
if result["error"] is None and result["response"] is None:
|
||||
result["error"] = TimeoutError(
|
||||
f"Non-streaming API call timed out after {int(_elapsed)}s "
|
||||
f"with no response (threshold: {int(_stale_timeout)}s)"
|
||||
)
|
||||
if _silent_hint:
|
||||
result["error"] = TimeoutError(
|
||||
f"Non-streaming API call timed out after {int(_elapsed)}s "
|
||||
f"with no response (threshold: {int(_stale_timeout)}s). "
|
||||
f"{_silent_hint}"
|
||||
)
|
||||
else:
|
||||
result["error"] = TimeoutError(
|
||||
f"Non-streaming API call timed out after {int(_elapsed)}s "
|
||||
f"with no response (threshold: {int(_stale_timeout)}s)"
|
||||
)
|
||||
break
|
||||
|
||||
if agent._interrupt_requested:
|
||||
|
|
|
|||
51
run_agent.py
51
run_agent.py
|
|
@ -927,6 +927,57 @@ class AIAgent:
|
|||
return max(stale_base, 150.0)
|
||||
return stale_base
|
||||
|
||||
def _codex_silent_hang_hint(self, model: Optional[str] = None) -> Optional[str]:
|
||||
"""Return an actionable hint when this request matches a known
|
||||
Codex silent-reject configuration, else ``None``.
|
||||
|
||||
The ChatGPT Codex backend (``chatgpt.com/backend-api/codex``) has
|
||||
historically silently dropped certain model requests: the connection
|
||||
is accepted but no stream events are emitted and no error is raised.
|
||||
The stale-call detector ends the hang, but a generic "timed out"
|
||||
message gives the user no path forward.
|
||||
|
||||
This helper substitutes an actionable hint into the stale-timeout
|
||||
warning when the request matches a known silent-reject pattern.
|
||||
Currently flagged: ``gpt-5.5`` family on the Codex backend. See
|
||||
hermes-agent #21444 for the symptom history. The upstream backend
|
||||
behavior has historically come and gone with ChatGPT entitlement
|
||||
changes — the heuristic stays in place as future-proofing even when
|
||||
the symptom is dormant.
|
||||
|
||||
Does NOT fix the backend issue. Only converts an opaque stale-timeout
|
||||
into actionable text so users learn the workaround in seconds rather
|
||||
than digging through logs.
|
||||
"""
|
||||
if self.api_mode != "codex_responses":
|
||||
return None
|
||||
is_codex_backend = (
|
||||
self.provider == "openai-codex"
|
||||
or (
|
||||
getattr(self, "_base_url_hostname", "") == "chatgpt.com"
|
||||
and "/backend-api/codex" in (getattr(self, "_base_url_lower", "") or "")
|
||||
)
|
||||
)
|
||||
if not is_codex_backend:
|
||||
return None
|
||||
eff_model = (model if model is not None else self.model) or ""
|
||||
model_lower = eff_model.lower()
|
||||
# Match the gpt-5.5 family — bare ``gpt-5.5``, ``gpt-5.5-codex``,
|
||||
# vendor-prefixed variants like ``openai/gpt-5.5``, and any future
|
||||
# ``gpt-5.5-*`` SKU. Anchor at a word boundary on either side so
|
||||
# unrelated tokens like ``gpt-5.50`` do not match.
|
||||
if not re.search(r"(?:^|[/\-_])gpt-5\.5(?:$|[\-_])", model_lower):
|
||||
return None
|
||||
return (
|
||||
f"Codex backend appears to be silently rejecting {eff_model!r} "
|
||||
"on chatgpt.com/backend-api/codex (no stream events, no error). "
|
||||
"This is a known backend-side pattern that has affected ChatGPT "
|
||||
"Plus accounts intermittently. "
|
||||
"Workaround: try `gpt-5.4-codex` on the same OAuth profile, "
|
||||
"or switch to a different model/provider in your fallback chain. "
|
||||
"See hermes-agent#21444 for symptom history."
|
||||
)
|
||||
|
||||
def _is_openrouter_url(self) -> bool:
|
||||
"""Return True when the base URL targets OpenRouter."""
|
||||
return base_url_host_matches(self._base_url_lower, "openrouter.ai")
|
||||
|
|
|
|||
|
|
@ -595,6 +595,7 @@ AUTHOR_MAP = {
|
|||
"mgparkprint@gmail.com": "vlwkaos",
|
||||
"1317078257maroon@gmail.com": "Oxidane-bot",
|
||||
"tranquil_flow@protonmail.com": "Tranquil-Flow",
|
||||
"66773372+Tranquil-Flow@users.noreply.github.com": "Tranquil-Flow",
|
||||
"LyleLengyel@gmail.com": "mcndjxlefnd",
|
||||
"wangshengyang2004@163.com": "Wangshengyang2004",
|
||||
"hasan.ali13381@gmail.com": "H-Ali13381",
|
||||
|
|
|
|||
121
tests/run_agent/test_codex_silent_hang_hint.py
Normal file
121
tests/run_agent/test_codex_silent_hang_hint.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
"""Tests for the ``_codex_silent_hang_hint`` heuristic.
|
||||
|
||||
The helper substitutes an actionable hint into the stale-call timeout
|
||||
warning when the request matches a known Codex silent-reject pattern
|
||||
(gpt-5.5 family on the ChatGPT Codex backend). See issue #21444 for
|
||||
symptom history.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_agent(tmp_path: Path, **overrides):
|
||||
from run_agent import AIAgent
|
||||
kwargs = dict(
|
||||
model="gpt-5.5",
|
||||
provider="openai-codex",
|
||||
api_key="sk-dummy",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
platform="cli",
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return AIAgent(**kwargs)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate_hermes_home(monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
|
||||
|
||||
# ── positive cases: hint fires ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_hint_fires_for_bare_gpt_5_5_on_codex(tmp_path):
|
||||
agent = _make_agent(tmp_path)
|
||||
agent.api_mode = "codex_responses"
|
||||
hint = agent._codex_silent_hang_hint(model="gpt-5.5")
|
||||
assert hint is not None
|
||||
assert "gpt-5.4-codex" in hint
|
||||
assert "fallback chain" in hint
|
||||
|
||||
|
||||
def test_hint_fires_for_vendor_prefixed_gpt_5_5(tmp_path):
|
||||
agent = _make_agent(tmp_path, model="openai/gpt-5.5")
|
||||
agent.api_mode = "codex_responses"
|
||||
hint = agent._codex_silent_hang_hint(model="openai/gpt-5.5")
|
||||
assert hint is not None
|
||||
|
||||
|
||||
def test_hint_fires_for_gpt_5_5_codex_suffix(tmp_path):
|
||||
agent = _make_agent(tmp_path, model="gpt-5.5-codex")
|
||||
agent.api_mode = "codex_responses"
|
||||
hint = agent._codex_silent_hang_hint(model="gpt-5.5-codex")
|
||||
assert hint is not None
|
||||
|
||||
|
||||
def test_hint_fires_when_model_arg_omitted(tmp_path):
|
||||
"""The helper falls back to ``self.model`` when ``model=`` not passed."""
|
||||
agent = _make_agent(tmp_path)
|
||||
agent.api_mode = "codex_responses"
|
||||
hint = agent._codex_silent_hang_hint()
|
||||
assert hint is not None
|
||||
|
||||
|
||||
# ── negative cases: hint stays None ────────────────────────────────────────
|
||||
|
||||
|
||||
def test_hint_skipped_for_gpt_5_4_codex(tmp_path):
|
||||
"""gpt-5.4-codex is the recommended workaround — must not trigger."""
|
||||
agent = _make_agent(tmp_path, model="gpt-5.4-codex")
|
||||
agent.api_mode = "codex_responses"
|
||||
assert agent._codex_silent_hang_hint(model="gpt-5.4-codex") is None
|
||||
|
||||
|
||||
def test_hint_skipped_for_gpt_5_50_false_positive(tmp_path):
|
||||
"""``gpt-5.50`` (hypothetical future SKU) must not regex-match gpt-5.5."""
|
||||
agent = _make_agent(tmp_path, model="gpt-5.50")
|
||||
agent.api_mode = "codex_responses"
|
||||
assert agent._codex_silent_hang_hint(model="gpt-5.50") is None
|
||||
|
||||
|
||||
def test_hint_skipped_for_non_codex_api_mode(tmp_path):
|
||||
"""Hint only fires on the Codex Responses path."""
|
||||
agent = _make_agent(tmp_path)
|
||||
agent.api_mode = "chat_completions"
|
||||
assert agent._codex_silent_hang_hint(model="gpt-5.5") is None
|
||||
|
||||
|
||||
def test_hint_skipped_for_non_codex_provider(tmp_path):
|
||||
"""Same model on a non-Codex provider does not trigger."""
|
||||
agent = _make_agent(
|
||||
tmp_path,
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="openai/gpt-5.5",
|
||||
)
|
||||
agent.api_mode = "codex_responses"
|
||||
assert agent._codex_silent_hang_hint(model="openai/gpt-5.5") is None
|
||||
|
||||
|
||||
def test_hint_skipped_for_empty_model(tmp_path):
|
||||
"""Explicit empty string ``model`` short-circuits the regex."""
|
||||
agent = _make_agent(tmp_path, model="gpt-5.4-codex") # self.model non-matching
|
||||
agent.api_mode = "codex_responses"
|
||||
# Explicit empty string: regex won't match
|
||||
assert agent._codex_silent_hang_hint(model="") is None
|
||||
# model=None falls back to self.model which is gpt-5.4-codex, also no match
|
||||
assert agent._codex_silent_hang_hint(model=None) is None
|
||||
|
||||
|
||||
def test_hint_skipped_for_unrelated_model_on_codex(tmp_path):
|
||||
agent = _make_agent(tmp_path, model="gpt-4-turbo")
|
||||
agent.api_mode = "codex_responses"
|
||||
assert agent._codex_silent_hang_hint(model="gpt-4-turbo") is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue