From dc9d677d59c40f8e943a036b3ea4d264fd53218a Mon Sep 17 00:00:00 2001 From: Brixyy Date: Wed, 27 May 2026 11:20:41 -0700 Subject: [PATCH] fix(agent): classify TypeError('NoneType ... not iterable') as retryable provider shape error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Salvages the intent of #33136 (@Brixyy) onto current main. The original PR was written against the pre-refactor monolithic run_agent.py and added a top-level _is_nonretryable_local_validation_error() helper. Both target functions have since been extracted to agent/conversation_loop.py:2869, so the salvage applies the equivalent guard inline at that canonical location rather than reintroducing the helper. ## Why After #33042 made our own Codex consumer structurally immune to NoneType crashes, third-party shims, mocked clients, and any future code path that hasn't migrated could still surface TypeError: 'NoneType' object is not iterable as a wire-shape mismatch. The agent loop's classifier currently treats ALL TypeError as a local programming bug and aborts non-retryable — users on stale Telegram/gateway turns saw bare "Non-retryable error (HTTP None)" with no recovery. This is a provider/SDK shape mismatch, not a local programming bug. The retry/fallback path should run, not be short-circuited. ## What agent/conversation_loop.py: extend is_local_validation_error to exclude TypeErrors whose message matches the NoneType-not-iterable shape (case- insensitive, both "NoneType" and "not iterable" must appear). tests/run_agent/test_jsondecodeerror_retryable.py: - update the mirror predicate to match the production check - add TestNoneTypeNotIterableIsRetryable class with 3 tests (the basic shape, message variants, unrelated TypeErrors still abort) - add TestAgentLoopSourceHasNoneTypeCarveOut to enforce the source-level invariant matches the test mirror ## Validation tests/run_agent/test_jsondecodeerror_retryable.py + tests/run_agent/test_31273_402_not_retried.py → 14/14 passing Co-authored-by: Brixyy --- agent/conversation_loop.py | 15 +++++ .../test_jsondecodeerror_retryable.py | 64 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 078c62771ed..271056138b1 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2879,6 +2879,21 @@ def run_conversation( # ssl.SSLError explicitly so the error classifier's # retryable=True mapping takes effect instead. and not isinstance(api_error, ssl.SSLError) + # Provider/SDK "NoneType is not iterable" failures are + # shape mismatches from upstream (e.g. chatgpt.com Codex + # backend response.completed.output=null) — not local + # programming bugs. Even after #33042 made our own + # consumer immune, third-party shims and mocked clients + # can still surface this shape via TypeError. Treat + # them as retryable so the error classifier's normal + # retry/fallback path runs instead of killing the turn + # as non-retryable (which left Telegram users staring + # at a bare "Non-retryable error" with no recovery). + and not ( + isinstance(api_error, TypeError) + and "nonetype" in str(api_error).lower() + and "not iterable" in str(api_error).lower() + ) ) # ``FailoverReason.billing`` (HTTP 402) is NOT in this # exclusion set. By the time we reach this block: diff --git a/tests/run_agent/test_jsondecodeerror_retryable.py b/tests/run_agent/test_jsondecodeerror_retryable.py index 0bd4fc09f9f..3f2f3c84b0d 100644 --- a/tests/run_agent/test_jsondecodeerror_retryable.py +++ b/tests/run_agent/test_jsondecodeerror_retryable.py @@ -28,9 +28,20 @@ def _mirror_agent_predicate(err: BaseException) -> bool: or, better, refactor the check into a shared helper and have both sites import it. """ + import ssl + return ( isinstance(err, (ValueError, TypeError)) and not isinstance(err, (UnicodeEncodeError, json.JSONDecodeError)) + and not isinstance(err, ssl.SSLError) + # NoneType-is-not-iterable shape errors come from upstream SDK / + # provider response mismatches, not local programming bugs. See + # the agent/conversation_loop.py inline comment for #33136. + and not ( + isinstance(err, TypeError) + and "nonetype" in str(err).lower() + and "not iterable" in str(err).lower() + ) ) @@ -90,3 +101,56 @@ class TestAgentLoopSourceStillHasCarveOut: "agent/conversation_loop.py must carve out json.JSONDecodeError " "from the is_local_validation_error classification — see #14782." ) + + + +class TestNoneTypeNotIterableIsRetryable: + """Regression for #33136 / closes lingering Telegram \"Non-retryable error (HTTP None)\". + + The chatgpt.com Codex backend (and any other upstream SDK / provider shim) + can surface ``TypeError: 'NoneType' object is not iterable`` as a wire-shape + mismatch, not a local programming bug. Even after #33042 made our own + consumer immune, third-party paths and mocked clients can still produce + this shape. The classifier should treat it as retryable so the normal + retry/fallback chain runs. + """ + + def test_nonetype_not_iterable_is_retryable(self): + err = TypeError("'NoneType' object is not iterable") + assert not _mirror_agent_predicate(err), ( + "TypeError('NoneType ... not iterable') must be excluded from " + "is_local_validation_error — it is a provider/SDK shape mismatch, " + "not a local bug. See #33136." + ) + + def test_nonetype_not_iterable_uppercase_variants_still_retryable(self): + # The carve-out is case-insensitive; SDK message phrasing can vary. + for msg in [ + "'NoneType' object is not iterable", + "NoneType object is not iterable", + "argument of type 'NoneType' is not iterable", + ]: + err = TypeError(msg) + assert not _mirror_agent_predicate(err), ( + f"Variant {msg!r} should be classified as retryable provider shape error." + ) + + def test_unrelated_type_error_remains_local_validation(self): + """TypeError without the NoneType-not-iterable pattern still aborts (programming bug).""" + assert _mirror_agent_predicate(TypeError("tools must be a list")) + assert _mirror_agent_predicate(TypeError("expected str, got int")) + + +class TestAgentLoopSourceHasNoneTypeCarveOut: + """Belt-and-suspenders: the production source must include the carve-out.""" + + def test_conversation_loop_excludes_nonetype_not_iterable_from_local_validation(self): + import inspect + from agent import conversation_loop + src = inspect.getsource(conversation_loop) + assert "is_local_validation_error" in src + # The specific check must be present. + assert "nonetype" in src.lower() and "not iterable" in src.lower(), ( + "agent/conversation_loop.py must carve out 'NoneType is not iterable' " + "TypeErrors from the is_local_validation_error classification — see #33136." + )