mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: always retry on ASCII codec UnicodeEncodeError — don't gate on per-component sanitization
The recovery block previously only retried (continue) when one of the per-component sanitization checks (messages, tools, system prompt, headers, credentials) found and stripped non-ASCII content. When the non-ASCII lived only in api_messages' reasoning_content field (which is built from messages['reasoning'] and not checked by the original _sanitize_messages_non_ascii), all checks returned False and the recovery fell through to the normal error path — burning a retry attempt despite _force_ascii_payload being set. Now the recovery always continues (retries) when _is_ascii_codec is detected. The _force_ascii_payload flag guarantees the next iteration runs _sanitize_structure_non_ascii(api_kwargs) on the full API payload, catching any remaining non-ASCII regardless of where it lives. Also adds test for the 'reasoning' field on canonical messages. Fixes #6843
This commit is contained in:
parent
902f1e6ede
commit
93b6f45224
2 changed files with 36 additions and 9 deletions
|
|
@ -268,9 +268,9 @@ class TestApiKeyClientSync:
|
|||
agent.client.api_key = _clean_key
|
||||
|
||||
# All three locations should now hold the clean key
|
||||
assert agent.api_key == "***"
|
||||
assert agent._client_kwargs["api_key"] == "***"
|
||||
assert agent.client.api_key == "***"
|
||||
assert agent.api_key == "sk-proj-abcdef"
|
||||
assert agent._client_kwargs["api_key"] == "sk-proj-abcdef"
|
||||
assert agent.client.api_key == "sk-proj-abcdef"
|
||||
# The bad char should be gone from all of them
|
||||
assert "\u028b" not in agent.api_key
|
||||
assert "\u028b" not in agent._client_kwargs["api_key"]
|
||||
|
|
@ -355,3 +355,18 @@ class TestApiMessagesAndApiKwargsSanitized:
|
|||
# api_messages sanitize must catch the dirty reasoning_content
|
||||
assert _sanitize_messages_non_ascii(api_messages) is True
|
||||
assert "\xab" not in api_messages[1]["reasoning_content"]
|
||||
|
||||
def test_reasoning_field_in_canonical_messages_is_sanitized(self):
|
||||
"""The canonical messages list stores reasoning as 'reasoning', not
|
||||
'reasoning_content'. The extra-fields loop must catch it."""
|
||||
messages = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "ok",
|
||||
"reasoning": "Let me think \xab carefully \xbb",
|
||||
},
|
||||
]
|
||||
assert _sanitize_messages_non_ascii(messages) is True
|
||||
assert "\xab" not in messages[1]["reasoning"]
|
||||
assert "\xbb" not in messages[1]["reasoning"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue