fix(agent): sanitize surrogate characters from API responses and before API calls

2026-04-25 00:51:20 +00:00 · 2026-04-04 18:59:12 +02:00 · 2026-04-04 18:59:12 +02:00 · 8798b069d3
commit 8798b069d3
parent 3522a7aa13
1 changed files with 14 additions and 1 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -6822,9 +6822,16 @@ class AIAgent:
                except Exception:
                    pass

+        # Sanitize surrogates from API response — some models (e.g. Kimi/GLM via Ollama)
+        # can return invalid surrogate code points that crash json.dumps() on persist.
+        _raw_content = assistant_message.content or ""
+        _san_content = _sanitize_surrogates(_raw_content)
+        if reasoning_text:
+            reasoning_text = _sanitize_surrogates(reasoning_text)
+
        msg = {
            "role": "assistant",
-            "content": assistant_message.content or "",
+            "content": _san_content,
            "reasoning": reasoning_text,
            "finish_reason": finish_reason,
        }
@ -8705,6 +8712,12 @@ class AIAgent:
                    new_tcs.append(tc)
                am["tool_calls"] = new_tcs

+            # Proactively strip any surrogate characters before the API call.
+            # Models served via Ollama (Kimi K2.5, GLM-5, Qwen) can return
+            # lone surrogates (U+D800-U+DFFF) that crash json.dumps() inside
+            # the OpenAI SDK. Sanitizing here prevents the 3-retry cycle.
+            _sanitize_messages_surrogates(api_messages)
+
            # Calculate approximate request size for logging
            total_chars = sum(len(str(msg)) for msg in api_messages)
            approx_tokens = estimate_messages_tokens_rough(api_messages)