diff --git a/run_agent.py b/run_agent.py index a181b11a4b..b011078142 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9186,22 +9186,34 @@ class AIAgent: force=True, ) - if ( + # Always retry on ASCII codec detection — + # _force_ascii_payload guarantees the full + # api_kwargs payload is sanitized on the + # next iteration (line ~8475). Even when + # per-component checks above find nothing + # (e.g. non-ASCII only in api_messages' + # reasoning_content), the flag catches it. + # Bounded by _unicode_sanitization_passes < 2. + self._unicode_sanitization_passes += 1 + _any_sanitized = ( _messages_sanitized or _prefill_sanitized or _tools_sanitized or _system_sanitized or _headers_sanitized or _credential_sanitized - ): - self._unicode_sanitization_passes += 1 + ) + if _any_sanitized: self._vprint( f"{self.log_prefix}⚠️ System encoding is ASCII — stripped non-ASCII characters from request payload. Retrying...", force=True, ) - continue - # Nothing to sanitize in any payload component. - # Fall through to normal error path. + else: + self._vprint( + f"{self.log_prefix}⚠️ System encoding is ASCII — enabling full-payload sanitization for retry...", + force=True, + ) + continue status_code = getattr(api_error, "status_code", None) error_context = self._extract_api_error_context(api_error) diff --git a/tests/run_agent/test_unicode_ascii_codec.py b/tests/run_agent/test_unicode_ascii_codec.py index 714429b30d..04b5e4043c 100644 --- a/tests/run_agent/test_unicode_ascii_codec.py +++ b/tests/run_agent/test_unicode_ascii_codec.py @@ -268,9 +268,9 @@ class TestApiKeyClientSync: agent.client.api_key = _clean_key # All three locations should now hold the clean key - assert agent.api_key == "***" - assert agent._client_kwargs["api_key"] == "***" - assert agent.client.api_key == "***" + assert agent.api_key == "sk-proj-abcdef" + assert agent._client_kwargs["api_key"] == "sk-proj-abcdef" + assert agent.client.api_key == "sk-proj-abcdef" # The bad char should be gone from all of them assert "\u028b" not in agent.api_key assert "\u028b" not in agent._client_kwargs["api_key"] @@ -355,3 +355,18 @@ class TestApiMessagesAndApiKwargsSanitized: # api_messages sanitize must catch the dirty reasoning_content assert _sanitize_messages_non_ascii(api_messages) is True assert "\xab" not in api_messages[1]["reasoning_content"] + + def test_reasoning_field_in_canonical_messages_is_sanitized(self): + """The canonical messages list stores reasoning as 'reasoning', not + 'reasoning_content'. The extra-fields loop must catch it.""" + messages = [ + {"role": "user", "content": "hello"}, + { + "role": "assistant", + "content": "ok", + "reasoning": "Let me think \xab carefully \xbb", + }, + ] + assert _sanitize_messages_non_ascii(messages) is True + assert "\xab" not in messages[1]["reasoning"] + assert "\xbb" not in messages[1]["reasoning"]