fix(agent): sanitize surrogate characters from API responses and before API calls

This commit is contained in:
ygd58 2026-04-04 18:59:12 +02:00 committed by Teknium
parent 3522a7aa13
commit 8798b069d3

View file

@ -6822,9 +6822,16 @@ class AIAgent:
except Exception:
pass
# Sanitize surrogates from API response — some models (e.g. Kimi/GLM via Ollama)
# can return invalid surrogate code points that crash json.dumps() on persist.
_raw_content = assistant_message.content or ""
_san_content = _sanitize_surrogates(_raw_content)
if reasoning_text:
reasoning_text = _sanitize_surrogates(reasoning_text)
msg = {
"role": "assistant",
"content": assistant_message.content or "",
"content": _san_content,
"reasoning": reasoning_text,
"finish_reason": finish_reason,
}
@ -8705,6 +8712,12 @@ class AIAgent:
new_tcs.append(tc)
am["tool_calls"] = new_tcs
# Proactively strip any surrogate characters before the API call.
# Models served via Ollama (Kimi K2.5, GLM-5, Qwen) can return
# lone surrogates (U+D800-U+DFFF) that crash json.dumps() inside
# the OpenAI SDK. Sanitizing here prevents the 3-retry cycle.
_sanitize_messages_surrogates(api_messages)
# Calculate approximate request size for logging
total_chars = sum(len(str(msg)) for msg in api_messages)
approx_tokens = estimate_messages_tokens_rough(api_messages)