fix: always retry on ASCII codec UnicodeEncodeError — don't gate on per-component sanitization

The recovery block previously only retried (continue) when one of the per-component sanitization checks (messages, tools, system prompt, headers, credentials) found and stripped non-ASCII content. When the non-ASCII lived only in api_messages' reasoning_content field (which is built from messages['reasoning'] and not checked by the original _sanitize_messages_non_ascii), all checks returned False and the recovery fell through to the normal error path — burning a retry attempt despite _force_ascii_payload being set. Now the recovery always continues (retries) when _is_ascii_codec is detected. The _force_ascii_payload flag guarantees the next iteration runs _sanitize_structure_non_ascii(api_kwargs) on the full API payload, catching any remaining non-ASCII regardless of where it lives. Also adds test for the 'reasoning' field on canonical messages. Fixes #6843
2026-04-25 00:51:20 +00:00 · 2026-04-15 14:56:55 -07:00 · 2026-04-15 14:56:55 -07:00 · 93b6f45224
commit 93b6f45224
parent 902f1e6ede
2 changed files with 36 additions and 9 deletions
--- a/tests/run_agent/test_unicode_ascii_codec.py
+++ b/tests/run_agent/test_unicode_ascii_codec.py
@ -268,9 +268,9 @@ class TestApiKeyClientSync:
            agent.client.api_key = _clean_key

        # All three locations should now hold the clean key
-        assert agent.api_key == "***"
-        assert agent._client_kwargs["api_key"] == "***"
-        assert agent.client.api_key == "***"
+        assert agent.api_key == "sk-proj-abcdef"
+        assert agent._client_kwargs["api_key"] == "sk-proj-abcdef"
+        assert agent.client.api_key == "sk-proj-abcdef"
        # The bad char should be gone from all of them
        assert "\u028b" not in agent.api_key
        assert "\u028b" not in agent._client_kwargs["api_key"]
@ -355,3 +355,18 @@ class TestApiMessagesAndApiKwargsSanitized:
        # api_messages sanitize must catch the dirty reasoning_content
        assert _sanitize_messages_non_ascii(api_messages) is True
        assert "\xab" not in api_messages[1]["reasoning_content"]
+
+    def test_reasoning_field_in_canonical_messages_is_sanitized(self):
+        """The canonical messages list stores reasoning as 'reasoning', not
+        'reasoning_content'.  The extra-fields loop must catch it."""
+        messages = [
+            {"role": "user", "content": "hello"},
+            {
+                "role": "assistant",
+                "content": "ok",
+                "reasoning": "Let me think \xab carefully \xbb",
+            },
+        ]
+        assert _sanitize_messages_non_ascii(messages) is True
+        assert "\xab" not in messages[1]["reasoning"]
+        assert "\xbb" not in messages[1]["reasoning"]