From 524453dab57e2201bda1c5338900453152b8ae98 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 22:31:25 -0700 Subject: [PATCH] refactor(agent): consolidate inner-retry-loop recovery flags into TurnRetryState (god-file Phase 1b) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit run_conversation's inner retry loop tracked recovery state in ~15 scattered bare booleans (per-provider OAuth refresh guards, format-recovery guards, restart signals). They are now fields on a single TurnRetryState dataclass the loop mutates in place (_retry.), giving the recovery bookkeeping a named, testable home. Loop-control vars (retry_count, max_retries, max_compression_attempts) stay as plain locals — they're while-mechanics, not recovery bookkeeping. Behavior-neutral: pure local→attribute rewrite of 42 references; kwarg NAMES preserved (e.g. has_retried_429=_retry.has_retried_429). Live simple + tool turns OK. Validation: tests/run_agent/ 1615 passed / 0 failed under per-file process isolation; new test_turn_retry_state.py pins the field contract. --- agent/conversation_loop.py | 99 ++++++++++++---------------- agent/turn_retry_state.py | 68 +++++++++++++++++++ tests/agent/test_turn_retry_state.py | 64 ++++++++++++++++++ 3 files changed, 175 insertions(+), 56 deletions(-) create mode 100644 agent/turn_retry_state.py create mode 100644 tests/agent/test_turn_retry_state.py diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index c00bf81a6c8..2e4b7ed7073 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -32,6 +32,7 @@ from agent.display import KawaiiSpinner from agent.error_classifier import FailoverReason, classify_api_error from agent.iteration_budget import IterationBudget from agent.turn_context import build_turn_context +from agent.turn_retry_state import TurnRetryState from agent.memory_manager import build_memory_context_block from agent.message_sanitization import ( _repair_tool_call_arguments, @@ -798,22 +799,8 @@ def run_conversation( api_start_time = time.time() retry_count = 0 max_retries = agent._api_max_retries - primary_recovery_attempted = False + _retry = TurnRetryState() max_compression_attempts = 3 - codex_auth_retry_attempted=False - anthropic_auth_retry_attempted=False - nous_auth_retry_attempted=False - nous_paid_entitlement_refresh_attempted=False - copilot_auth_retry_attempted=False - thinking_sig_retry_attempted = False - invalid_encrypted_content_retry_attempted = False - image_shrink_retry_attempted = False - multimodal_tool_content_retry_attempted = False - oauth_1m_beta_retry_attempted = False - llama_cpp_grammar_retry_attempted = False - has_retried_429 = False - restart_with_compressed_messages = False - restart_with_length_continuation = False finish_reason = "stop" response = None # Guard against UnboundLocalError if all retries fail @@ -846,7 +833,7 @@ def run_conversation( if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 - primary_recovery_attempted = False + _retry.primary_recovery_attempted = False continue # No fallback available — surface buffered context # so user sees the rate-limit message that led here. @@ -1171,7 +1158,7 @@ def run_conversation( if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 - primary_recovery_attempted = False + _retry.primary_recovery_attempted = False continue # Check for error field in response (some providers include this) @@ -1242,7 +1229,7 @@ def run_conversation( if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 - primary_recovery_attempted = False + _retry.primary_recovery_attempted = False continue # Terminal — flush buffered retry trace so user sees what happened. agent._flush_status_buffer() @@ -1466,7 +1453,7 @@ def run_conversation( } messages.append(continue_msg) agent._session_messages = messages - restart_with_length_continuation = True + _retry.restart_with_length_continuation = True break partial_response = agent._strip_think_blocks("".join(truncated_response_parts)).strip() @@ -1715,7 +1702,7 @@ def run_conversation( f"({hit_pct:.0f}% hit, {written:,} written)" ) - has_retried_429 = False # Reset on success + _retry.has_retried_429 = False # Reset on success # Note: don't clear the retry buffer here — an "API call # success" only means we got bytes back, not that we got # usable content. Empty responses still loop through the @@ -2045,9 +2032,9 @@ def run_conversation( getattr(agent, "provider", "") or "", getattr(agent, "base_url", "") or "", ) - and not nous_paid_entitlement_refresh_attempted + and not _retry.nous_paid_entitlement_refresh_attempted ): - nous_paid_entitlement_refresh_attempted = True + _retry.nous_paid_entitlement_refresh_attempted = True if _try_refresh_nous_paid_entitlement_credentials(agent): agent._vprint( f"{agent.log_prefix}🔐 Nous paid access verified — " @@ -2056,9 +2043,9 @@ def run_conversation( ) continue - recovered_with_pool, has_retried_429 = agent._recover_with_credential_pool( + recovered_with_pool, _retry.has_retried_429 = agent._recover_with_credential_pool( status_code=status_code, - has_retried_429=has_retried_429, + has_retried_429=_retry.has_retried_429, classified_reason=classified.reason, error_context=error_context, ) @@ -2073,9 +2060,9 @@ def run_conversation( # fails, fall through to normal error handling. if ( classified.reason == FailoverReason.image_too_large - and not image_shrink_retry_attempted + and not _retry.image_shrink_retry_attempted ): - image_shrink_retry_attempted = True + _retry.image_shrink_retry_attempted = True if agent._try_shrink_image_parts_in_messages(api_messages): agent._vprint( f"{agent.log_prefix}📐 Image(s) exceeded provider size limit — " @@ -2098,9 +2085,9 @@ def run_conversation( # downgrade, and retry once. See issue #27344. if ( classified.reason == FailoverReason.multimodal_tool_content_unsupported - and not multimodal_tool_content_retry_attempted + and not _retry.multimodal_tool_content_retry_attempted ): - multimodal_tool_content_retry_attempted = True + _retry.multimodal_tool_content_retry_attempted = True if agent._try_strip_image_parts_from_tool_messages(api_messages): agent._vprint( f"{agent.log_prefix}📐 Provider rejected list-type tool content — " @@ -2127,9 +2114,9 @@ def run_conversation( classified.reason == FailoverReason.oauth_long_context_beta_forbidden and agent.api_mode == "anthropic_messages" and agent._is_anthropic_oauth - and not oauth_1m_beta_retry_attempted + and not _retry.oauth_1m_beta_retry_attempted ): - oauth_1m_beta_retry_attempted = True + _retry.oauth_1m_beta_retry_attempted = True if not getattr(agent, "_oauth_1m_beta_disabled", False): agent._oauth_1m_beta_disabled = True try: @@ -2148,9 +2135,9 @@ def run_conversation( agent.api_mode == "codex_responses" and agent.provider in {"openai-codex", "xai-oauth"} and status_code == 401 - and not codex_auth_retry_attempted + and not _retry.codex_auth_retry_attempted ): - codex_auth_retry_attempted = True + _retry.codex_auth_retry_attempted = True if agent._try_refresh_codex_client_credentials(force=True): _label = "xAI OAuth" if agent.provider == "xai-oauth" else "Codex" agent._buffer_vprint(f"🔐 {_label} auth refreshed after 401. Retrying request...") @@ -2159,9 +2146,9 @@ def run_conversation( agent.api_mode == "chat_completions" and agent.provider == "nous" and status_code == 401 - and not nous_auth_retry_attempted + and not _retry.nous_auth_retry_attempted ): - nous_auth_retry_attempted = True + _retry.nous_auth_retry_attempted = True if agent._try_refresh_nous_client_credentials(force=True): print(f"{agent.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...") continue @@ -2190,9 +2177,9 @@ def run_conversation( if ( agent.provider == "copilot" and status_code == 401 - and not copilot_auth_retry_attempted + and not _retry.copilot_auth_retry_attempted ): - copilot_auth_retry_attempted = True + _retry.copilot_auth_retry_attempted = True if agent._try_refresh_copilot_client_credentials(): agent._buffer_vprint(f"🔐 Copilot credentials refreshed after 401. Retrying request...") continue @@ -2200,9 +2187,9 @@ def run_conversation( agent.api_mode == "anthropic_messages" and status_code == 401 and hasattr(agent, '_anthropic_api_key') - and not anthropic_auth_retry_attempted + and not _retry.anthropic_auth_retry_attempted ): - anthropic_auth_retry_attempted = True + _retry.anthropic_auth_retry_attempted = True from agent.anthropic_adapter import _is_oauth_token from agent.azure_identity_adapter import is_token_provider if agent._try_refresh_anthropic_client_credentials(): @@ -2243,9 +2230,9 @@ def run_conversation( # blocks at all. One-shot — don't retry infinitely. if ( classified.reason == FailoverReason.thinking_signature - and not thinking_sig_retry_attempted + and not _retry.thinking_sig_retry_attempted ): - thinking_sig_retry_attempted = True + _retry.thinking_sig_retry_attempted = True for _m in messages: if isinstance(_m, dict): _m.pop("reasoning_details", None) @@ -2277,7 +2264,7 @@ def run_conversation( # handles it (the provider is rejecting something else). if ( classified.reason == FailoverReason.invalid_encrypted_content - and not invalid_encrypted_content_retry_attempted + and not _retry.invalid_encrypted_content_retry_attempted and agent.api_mode == "codex_responses" and bool(getattr(agent, "_codex_reasoning_replay_enabled", True)) and any( @@ -2288,7 +2275,7 @@ def run_conversation( for _m in messages ) ): - invalid_encrypted_content_retry_attempted = True + _retry.invalid_encrypted_content_retry_attempted = True replay_stats = agent._disable_codex_reasoning_replay(messages) agent._vprint( f"{agent.log_prefix}⚠️ Encrypted reasoning replay was rejected by the provider — " @@ -2315,9 +2302,9 @@ def run_conversation( # fires only for users on llama.cpp's OAI server. if ( classified.reason == FailoverReason.llama_cpp_grammar_pattern - and not llama_cpp_grammar_retry_attempted + and not _retry.llama_cpp_grammar_retry_attempted ): - llama_cpp_grammar_retry_attempted = True + _retry.llama_cpp_grammar_retry_attempted = True try: from tools.schema_sanitizer import strip_pattern_and_format _, _stripped = strip_pattern_and_format(agent.tools) @@ -2528,7 +2515,7 @@ def run_conversation( f"(was {old_ctx:,}), retrying..." ) time.sleep(2) - restart_with_compressed_messages = True + _retry.restart_with_compressed_messages = True break # Fall through to normal error handling if compression # is exhausted or didn't help. @@ -2561,7 +2548,7 @@ def run_conversation( if agent._try_activate_fallback(reason=classified.reason): retry_count = 0 compression_attempts = 0 - primary_recovery_attempted = False + _retry.primary_recovery_attempted = False continue # ── Nous Portal: record rate limit & skip retries ───── @@ -2699,7 +2686,7 @@ def run_conversation( if len(messages) < original_len: agent._buffer_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") time.sleep(2) # Brief pause between compression retries - restart_with_compressed_messages = True + _retry.restart_with_compressed_messages = True break else: # Terminal — surface buffered context so the user @@ -2771,7 +2758,7 @@ def run_conversation( "failed": True, "compression_exhausted": True, } - restart_with_compressed_messages = True + _retry.restart_with_compressed_messages = True break # Error is about the INPUT being too large. Only reduce @@ -2856,7 +2843,7 @@ def run_conversation( if len(messages) < original_len: agent._buffer_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") time.sleep(2) # Brief pause between compression retries - restart_with_compressed_messages = True + _retry.restart_with_compressed_messages = True break else: # Can't compress further and already at minimum tier @@ -2961,7 +2948,7 @@ def run_conversation( if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 - primary_recovery_attempted = False + _retry.primary_recovery_attempted = False continue if api_kwargs is not None: agent._dump_api_request_debug( @@ -3093,10 +3080,10 @@ def run_conversation( # client once for transient transport errors (stale # connection pool, TCP reset). Only attempted once # per API call block. - if not primary_recovery_attempted and agent._try_recover_primary_transport( + if not _retry.primary_recovery_attempted and agent._try_recover_primary_transport( api_error, retry_count=retry_count, max_retries=max_retries, ): - primary_recovery_attempted = True + _retry.primary_recovery_attempted = True retry_count = 0 continue # Try fallback before giving up entirely @@ -3105,7 +3092,7 @@ def run_conversation( if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 - primary_recovery_attempted = False + _retry.primary_recovery_attempted = False continue # Terminal — flush buffered retry/fallback trace. agent._flush_status_buffer() @@ -3256,17 +3243,17 @@ def run_conversation( _turn_exit_reason = "interrupted_during_api_call" break - if restart_with_compressed_messages: + if _retry.restart_with_compressed_messages: api_call_count -= 1 agent.iteration_budget.refund() # Count compression restarts toward the retry limit to prevent # infinite loops when compression reduces messages but not enough # to fit the context window. retry_count += 1 - restart_with_compressed_messages = False + _retry.restart_with_compressed_messages = False continue - if restart_with_length_continuation: + if _retry.restart_with_length_continuation: # Progressively boost the output token budget on each retry. # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768. # Applies to all providers via _ephemeral_max_output_tokens. diff --git a/agent/turn_retry_state.py b/agent/turn_retry_state.py new file mode 100644 index 00000000000..188fe3f1c16 --- /dev/null +++ b/agent/turn_retry_state.py @@ -0,0 +1,68 @@ +"""Per-attempt recovery bookkeeping for the conversation turn loop. + +The inner retry loop in ``run_conversation`` (``while retry_count < +max_retries``) makes several distinct recovery attempts on a single model API +call: a credential-pool 429 retry, a per-provider OAuth refresh (codex, +anthropic, nous, copilot), a long-context compression restart, a length- +continuation restart, and a handful of format-recovery branches (thinking- +signature stripping, multimodal-tool-content stripping, llama.cpp grammar +fallback, image shrink, invalid-encrypted-content, 1M-beta header). + +Each of those branches is guarded by a one-shot boolean so it fires at most +once per attempt. They used to be ~16 bare ``*_attempted`` / ``has_retried_*`` +/ ``restart_with_*`` locals declared inline before the loop and threaded +through its 2,400-line body. ``TurnRetryState`` collapses them into one object +the loop mutates in place (``state.codex_auth_retry_attempted = True``), giving +the recovery bookkeeping a single named, testable home. + +Loop-control variables (``retry_count``, ``max_retries``, +``max_compression_attempts``) intentionally stay as plain locals — they are the +``while`` mechanics, not recovery bookkeeping, and putting them on the object +would add indirection without clarifying anything. + +This module is dependency-free so it can be unit-tested in isolation and +imported by the turn loop without an import cycle. +""" + +from __future__ import annotations + +from dataclasses import dataclass, fields + + +@dataclass +class TurnRetryState: + """One-shot recovery guards + restart signals for a single API-call attempt. + + A fresh instance is created for each iteration of the outer turn loop + (once per ``api_call_count``). Each guard fires its recovery branch at most + once; the ``restart_with_*`` signals are read by the loop after the attempt + to decide whether to rebuild the request and retry. + """ + + # ── Per-provider OAuth / credential refresh guards ─────────────────── + codex_auth_retry_attempted: bool = False + anthropic_auth_retry_attempted: bool = False + nous_auth_retry_attempted: bool = False + nous_paid_entitlement_refresh_attempted: bool = False + copilot_auth_retry_attempted: bool = False + + # ── Format / payload recovery guards ───────────────────────────────── + thinking_sig_retry_attempted: bool = False + invalid_encrypted_content_retry_attempted: bool = False + image_shrink_retry_attempted: bool = False + multimodal_tool_content_retry_attempted: bool = False + oauth_1m_beta_retry_attempted: bool = False + llama_cpp_grammar_retry_attempted: bool = False + + # ── Transport / rate-limit recovery ────────────────────────────────── + primary_recovery_attempted: bool = False + has_retried_429: bool = False + + # ── Restart signals (read by the outer loop after the attempt) ─────── + restart_with_compressed_messages: bool = False + restart_with_length_continuation: bool = False + + def __iter__(self): + # Convenience for debugging / tests: iterate (name, value) pairs. + for f in fields(self): + yield f.name, getattr(self, f.name) diff --git a/tests/agent/test_turn_retry_state.py b/tests/agent/test_turn_retry_state.py new file mode 100644 index 00000000000..138cca12a64 --- /dev/null +++ b/tests/agent/test_turn_retry_state.py @@ -0,0 +1,64 @@ +"""Unit tests for TurnRetryState (god-file Phase 1b). + +The dataclass holds the inner-retry-loop's one-shot recovery guards + restart +signals. These tests pin its shape and default semantics — the behavioral +guarantee for the loop itself is the existing recovery-branch tests in +tests/run_agent/ which now exercise these fields via `_retry.`. +""" + +from __future__ import annotations + +from dataclasses import fields + +from agent.turn_retry_state import TurnRetryState + + +EXPECTED_FIELDS = { + "codex_auth_retry_attempted", + "anthropic_auth_retry_attempted", + "nous_auth_retry_attempted", + "nous_paid_entitlement_refresh_attempted", + "copilot_auth_retry_attempted", + "thinking_sig_retry_attempted", + "invalid_encrypted_content_retry_attempted", + "image_shrink_retry_attempted", + "multimodal_tool_content_retry_attempted", + "oauth_1m_beta_retry_attempted", + "llama_cpp_grammar_retry_attempted", + "primary_recovery_attempted", + "has_retried_429", + "restart_with_compressed_messages", + "restart_with_length_continuation", +} + + +def test_all_guards_default_false(): + s = TurnRetryState() + for name, value in s: + assert value is False, f"{name} should default to False" + + +def test_field_set_matches_contract(): + names = {f.name for f in fields(TurnRetryState)} + assert names == EXPECTED_FIELDS, ( + f"unexpected drift: missing={EXPECTED_FIELDS - names} extra={names - EXPECTED_FIELDS}" + ) + + +def test_loop_control_vars_are_not_on_state(): + # retry_count / max_retries / max_compression_attempts stay as loop locals, + # NOT on the state object (they are while-mechanics, not recovery bookkeeping). + names = {f.name for f in fields(TurnRetryState)} + for loop_local in ("retry_count", "max_retries", "max_compression_attempts"): + assert loop_local not in names + + +def test_guards_are_independently_mutable(): + s = TurnRetryState() + s.codex_auth_retry_attempted = True + s.restart_with_compressed_messages = True + assert s.codex_auth_retry_attempted is True + assert s.restart_with_compressed_messages is True + # untouched guards stay False + assert s.has_retried_429 is False + assert s.anthropic_auth_retry_attempted is False