From 93b6f4522479a7c92ef8dc6a75d71c8c83b7e7f1 Mon Sep 17 00:00:00 2001 From: Teknium Date: Wed, 15 Apr 2026 14:56:55 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20always=20retry=20on=20ASCII=20codec=20Un?= =?UTF-8?q?icodeEncodeError=20=E2=80=94=20don't=20gate=20on=20per-componen?= =?UTF-8?q?t=20sanitization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recovery block previously only retried (continue) when one of the per-component sanitization checks (messages, tools, system prompt, headers, credentials) found and stripped non-ASCII content. When the non-ASCII lived only in api_messages' reasoning_content field (which is built from messages['reasoning'] and not checked by the original _sanitize_messages_non_ascii), all checks returned False and the recovery fell through to the normal error path — burning a retry attempt despite _force_ascii_payload being set. Now the recovery always continues (retries) when _is_ascii_codec is detected. The _force_ascii_payload flag guarantees the next iteration runs _sanitize_structure_non_ascii(api_kwargs) on the full API payload, catching any remaining non-ASCII regardless of where it lives. Also adds test for the 'reasoning' field on canonical messages. Fixes #6843 --- run_agent.py | 24 +++++++++++++++------ tests/run_agent/test_unicode_ascii_codec.py | 21 +++++++++++++++--- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/run_agent.py b/run_agent.py index a181b11a4..b01107814 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9186,22 +9186,34 @@ class AIAgent: force=True, ) - if ( + # Always retry on ASCII codec detection — + # _force_ascii_payload guarantees the full + # api_kwargs payload is sanitized on the + # next iteration (line ~8475). Even when + # per-component checks above find nothing + # (e.g. non-ASCII only in api_messages' + # reasoning_content), the flag catches it. + # Bounded by _unicode_sanitization_passes < 2. + self._unicode_sanitization_passes += 1 + _any_sanitized = ( _messages_sanitized or _prefill_sanitized or _tools_sanitized or _system_sanitized or _headers_sanitized or _credential_sanitized - ): - self._unicode_sanitization_passes += 1 + ) + if _any_sanitized: self._vprint( f"{self.log_prefix}⚠️ System encoding is ASCII — stripped non-ASCII characters from request payload. Retrying...", force=True, ) - continue - # Nothing to sanitize in any payload component. - # Fall through to normal error path. + else: + self._vprint( + f"{self.log_prefix}⚠️ System encoding is ASCII — enabling full-payload sanitization for retry...", + force=True, + ) + continue status_code = getattr(api_error, "status_code", None) error_context = self._extract_api_error_context(api_error) diff --git a/tests/run_agent/test_unicode_ascii_codec.py b/tests/run_agent/test_unicode_ascii_codec.py index 714429b30..04b5e4043 100644 --- a/tests/run_agent/test_unicode_ascii_codec.py +++ b/tests/run_agent/test_unicode_ascii_codec.py @@ -268,9 +268,9 @@ class TestApiKeyClientSync: agent.client.api_key = _clean_key # All three locations should now hold the clean key - assert agent.api_key == "***" - assert agent._client_kwargs["api_key"] == "***" - assert agent.client.api_key == "***" + assert agent.api_key == "sk-proj-abcdef" + assert agent._client_kwargs["api_key"] == "sk-proj-abcdef" + assert agent.client.api_key == "sk-proj-abcdef" # The bad char should be gone from all of them assert "\u028b" not in agent.api_key assert "\u028b" not in agent._client_kwargs["api_key"] @@ -355,3 +355,18 @@ class TestApiMessagesAndApiKwargsSanitized: # api_messages sanitize must catch the dirty reasoning_content assert _sanitize_messages_non_ascii(api_messages) is True assert "\xab" not in api_messages[1]["reasoning_content"] + + def test_reasoning_field_in_canonical_messages_is_sanitized(self): + """The canonical messages list stores reasoning as 'reasoning', not + 'reasoning_content'. The extra-fields loop must catch it.""" + messages = [ + {"role": "user", "content": "hello"}, + { + "role": "assistant", + "content": "ok", + "reasoning": "Let me think \xab carefully \xbb", + }, + ] + assert _sanitize_messages_non_ascii(messages) is True + assert "\xab" not in messages[1]["reasoning"] + assert "\xbb" not in messages[1]["reasoning"]