mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(unicode): sanitize surrogate metadata and allow two-pass retry
This commit is contained in:
parent
71036a7a75
commit
2c99b4e79b
2 changed files with 55 additions and 8 deletions
|
|
@ -107,6 +107,26 @@ class TestSurrogateVsAsciiSanitization:
|
|||
assert "\ud800" not in messages[0]["content"]
|
||||
assert "\ufffd" in messages[0]["content"]
|
||||
|
||||
def test_surrogates_in_name_and_tool_calls_are_sanitized(self):
|
||||
messages = [{
|
||||
"role": "assistant",
|
||||
"name": "bad\ud800name",
|
||||
"content": None,
|
||||
"tool_calls": [{
|
||||
"id": "call_\ud800",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read\ud800_file",
|
||||
"arguments": '{"path": "bad\ud800.txt"}'
|
||||
}
|
||||
}],
|
||||
}]
|
||||
assert _sanitize_messages_surrogates(messages) is True
|
||||
assert "\ud800" not in messages[0]["name"]
|
||||
assert "\ud800" not in messages[0]["tool_calls"][0]["id"]
|
||||
assert "\ud800" not in messages[0]["tool_calls"][0]["function"]["name"]
|
||||
assert "\ud800" not in messages[0]["tool_calls"][0]["function"]["arguments"]
|
||||
|
||||
def test_ascii_codec_strips_all_non_ascii(self):
|
||||
"""ASCII codec case: all non-ASCII is stripped, not replaced."""
|
||||
messages = [{"role": "user", "content": "test ⚕🤖你好 end"}]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue