fix(agent): classify TypeError('NoneType ... not iterable') as retryable provider shape error

Salvages the intent of #33136 (@Brixyy) onto current main. The original PR
was written against the pre-refactor monolithic run_agent.py and added a
top-level _is_nonretryable_local_validation_error() helper. Both target
functions have since been extracted to agent/conversation_loop.py:2869,
so the salvage applies the equivalent guard inline at that canonical
location rather than reintroducing the helper.

## Why

After #33042 made our own Codex consumer structurally immune to NoneType
crashes, third-party shims, mocked clients, and any future code path that
hasn't migrated could still surface TypeError: 'NoneType' object is not
iterable as a wire-shape mismatch. The agent loop's classifier currently
treats ALL TypeError as a local programming bug and aborts non-retryable
— users on stale Telegram/gateway turns saw bare "Non-retryable error
(HTTP None)" with no recovery.

This is a provider/SDK shape mismatch, not a local programming bug. The
retry/fallback path should run, not be short-circuited.

## What

agent/conversation_loop.py: extend is_local_validation_error to exclude
TypeErrors whose message matches the NoneType-not-iterable shape (case-
insensitive, both "NoneType" and "not iterable" must appear).

tests/run_agent/test_jsondecodeerror_retryable.py:
- update the mirror predicate to match the production check
- add TestNoneTypeNotIterableIsRetryable class with 3 tests (the basic
  shape, message variants, unrelated TypeErrors still abort)
- add TestAgentLoopSourceHasNoneTypeCarveOut to enforce the source-level
  invariant matches the test mirror

## Validation

tests/run_agent/test_jsondecodeerror_retryable.py +
tests/run_agent/test_31273_402_not_retried.py → 14/14 passing

Co-authored-by: Brixyy <subrtt@gmail.com>
This commit is contained in:
Brixyy 2026-05-27 11:20:41 -07:00 committed by Teknium
parent 3476509f97
commit dc9d677d59
2 changed files with 79 additions and 0 deletions

View file

@ -2879,6 +2879,21 @@ def run_conversation(
# ssl.SSLError explicitly so the error classifier's
# retryable=True mapping takes effect instead.
and not isinstance(api_error, ssl.SSLError)
# Provider/SDK "NoneType is not iterable" failures are
# shape mismatches from upstream (e.g. chatgpt.com Codex
# backend response.completed.output=null) — not local
# programming bugs. Even after #33042 made our own
# consumer immune, third-party shims and mocked clients
# can still surface this shape via TypeError. Treat
# them as retryable so the error classifier's normal
# retry/fallback path runs instead of killing the turn
# as non-retryable (which left Telegram users staring
# at a bare "Non-retryable error" with no recovery).
and not (
isinstance(api_error, TypeError)
and "nonetype" in str(api_error).lower()
and "not iterable" in str(api_error).lower()
)
)
# ``FailoverReason.billing`` (HTTP 402) is NOT in this
# exclusion set. By the time we reach this block:

View file

@ -28,9 +28,20 @@ def _mirror_agent_predicate(err: BaseException) -> bool:
or, better, refactor the check into a shared helper and have both
sites import it.
"""
import ssl
return (
isinstance(err, (ValueError, TypeError))
and not isinstance(err, (UnicodeEncodeError, json.JSONDecodeError))
and not isinstance(err, ssl.SSLError)
# NoneType-is-not-iterable shape errors come from upstream SDK /
# provider response mismatches, not local programming bugs. See
# the agent/conversation_loop.py inline comment for #33136.
and not (
isinstance(err, TypeError)
and "nonetype" in str(err).lower()
and "not iterable" in str(err).lower()
)
)
@ -90,3 +101,56 @@ class TestAgentLoopSourceStillHasCarveOut:
"agent/conversation_loop.py must carve out json.JSONDecodeError "
"from the is_local_validation_error classification — see #14782."
)
class TestNoneTypeNotIterableIsRetryable:
"""Regression for #33136 / closes lingering Telegram \"Non-retryable error (HTTP None)\".
The chatgpt.com Codex backend (and any other upstream SDK / provider shim)
can surface ``TypeError: 'NoneType' object is not iterable`` as a wire-shape
mismatch, not a local programming bug. Even after #33042 made our own
consumer immune, third-party paths and mocked clients can still produce
this shape. The classifier should treat it as retryable so the normal
retry/fallback chain runs.
"""
def test_nonetype_not_iterable_is_retryable(self):
err = TypeError("'NoneType' object is not iterable")
assert not _mirror_agent_predicate(err), (
"TypeError('NoneType ... not iterable') must be excluded from "
"is_local_validation_error — it is a provider/SDK shape mismatch, "
"not a local bug. See #33136."
)
def test_nonetype_not_iterable_uppercase_variants_still_retryable(self):
# The carve-out is case-insensitive; SDK message phrasing can vary.
for msg in [
"'NoneType' object is not iterable",
"NoneType object is not iterable",
"argument of type 'NoneType' is not iterable",
]:
err = TypeError(msg)
assert not _mirror_agent_predicate(err), (
f"Variant {msg!r} should be classified as retryable provider shape error."
)
def test_unrelated_type_error_remains_local_validation(self):
"""TypeError without the NoneType-not-iterable pattern still aborts (programming bug)."""
assert _mirror_agent_predicate(TypeError("tools must be a list"))
assert _mirror_agent_predicate(TypeError("expected str, got int"))
class TestAgentLoopSourceHasNoneTypeCarveOut:
"""Belt-and-suspenders: the production source must include the carve-out."""
def test_conversation_loop_excludes_nonetype_not_iterable_from_local_validation(self):
import inspect
from agent import conversation_loop
src = inspect.getsource(conversation_loop)
assert "is_local_validation_error" in src
# The specific check must be present.
assert "nonetype" in src.lower() and "not iterable" in src.lower(), (
"agent/conversation_loop.py must carve out 'NoneType is not iterable' "
"TypeErrors from the is_local_validation_error classification — see #33136."
)