mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
commit
bd09e42eac
14 changed files with 1072 additions and 100 deletions
109
run_agent.py
109
run_agent.py
|
|
@ -353,12 +353,50 @@ def _sanitize_surrogates(text: str) -> str:
|
|||
return text
|
||||
|
||||
|
||||
def _sanitize_structure_surrogates(payload: Any) -> bool:
|
||||
"""Replace surrogate code points in nested dict/list payloads in-place.
|
||||
|
||||
Mirror of ``_sanitize_structure_non_ascii`` but for surrogate recovery.
|
||||
Used to scrub nested structured fields (e.g. ``reasoning_details`` — an
|
||||
array of dicts with ``summary``/``text`` strings) that flat per-field
|
||||
checks don't reach. Returns True if any surrogates were replaced.
|
||||
"""
|
||||
found = False
|
||||
|
||||
def _walk(node):
|
||||
nonlocal found
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
node[key] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
elif isinstance(node, list):
|
||||
for idx, value in enumerate(node):
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
node[idx] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
|
||||
_walk(payload)
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_messages_surrogates(messages: list) -> bool:
|
||||
"""Sanitize surrogate characters from all string content in a messages list.
|
||||
|
||||
Walks message dicts in-place. Returns True if any surrogates were found
|
||||
and replaced, False otherwise. Covers content/text, name, and tool call
|
||||
metadata/arguments so retries don't fail on a non-content field.
|
||||
and replaced, False otherwise. Covers content/text, name, tool call
|
||||
metadata/arguments, AND any additional string or nested structured fields
|
||||
(``reasoning``, ``reasoning_content``, ``reasoning_details``, etc.) so
|
||||
retries don't fail on a non-content field. Byte-level reasoning models
|
||||
(xiaomi/mimo, kimi, glm) can emit lone surrogates in reasoning output
|
||||
that flow through to ``api_messages["reasoning_content"]`` on the next
|
||||
turn and crash json.dumps inside the OpenAI SDK.
|
||||
"""
|
||||
found = False
|
||||
for msg in messages:
|
||||
|
|
@ -398,6 +436,21 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
|
|||
if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
|
||||
fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
|
||||
found = True
|
||||
# Walk any additional string / nested fields (reasoning,
|
||||
# reasoning_content, reasoning_details, etc.) — surrogates from
|
||||
# byte-level reasoning models (xiaomi/mimo, kimi, glm) can lurk
|
||||
# in these fields and aren't covered by the per-field checks above.
|
||||
# Matches _sanitize_messages_non_ascii's coverage (PR #10537).
|
||||
for key, value in msg.items():
|
||||
if key in {"content", "name", "tool_calls", "role"}:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
msg[key] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
if _sanitize_structure_surrogates(value):
|
||||
found = True
|
||||
return found
|
||||
|
||||
|
||||
|
|
@ -8689,6 +8742,7 @@ class AIAgent:
|
|||
{
|
||||
"name": tc["function"]["name"],
|
||||
"result": _results_by_id.get(tc.get("id")),
|
||||
"arguments": tc["function"].get("arguments"),
|
||||
}
|
||||
for tc in _m["tool_calls"]
|
||||
if isinstance(tc, dict)
|
||||
|
|
@ -9303,8 +9357,7 @@ class AIAgent:
|
|||
"and had none left for the actual response.\n\n"
|
||||
"To fix this:\n"
|
||||
"→ Lower reasoning effort: `/thinkon low` or `/thinkon minimal`\n"
|
||||
"→ Increase the output token limit: "
|
||||
"set `model.max_tokens` in config.yaml"
|
||||
"→ Or switch to a larger/non-reasoning model with `/model`"
|
||||
)
|
||||
self._cleanup_task_resources(effective_task_id)
|
||||
self._persist_session(messages, conversation_history)
|
||||
|
|
@ -9571,13 +9624,51 @@ class AIAgent:
|
|||
if isinstance(api_error, UnicodeEncodeError) and getattr(self, '_unicode_sanitization_passes', 0) < 2:
|
||||
_err_str = str(api_error).lower()
|
||||
_is_ascii_codec = "'ascii'" in _err_str or "ascii" in _err_str
|
||||
# Detect surrogate errors — utf-8 codec refusing to
|
||||
# encode U+D800..U+DFFF. The error text is:
|
||||
# "'utf-8' codec can't encode characters in position
|
||||
# N-M: surrogates not allowed"
|
||||
_is_surrogate_error = (
|
||||
"surrogate" in _err_str
|
||||
or ("'utf-8'" in _err_str and not _is_ascii_codec)
|
||||
)
|
||||
# Sanitize surrogates from both the canonical `messages`
|
||||
# list AND `api_messages` (the API-copy, which may carry
|
||||
# `reasoning_content`/`reasoning_details` transformed
|
||||
# from `reasoning` — fields the canonical list doesn't
|
||||
# have directly). Also clean `api_kwargs` if built and
|
||||
# `prefill_messages` if present. Mirrors the ASCII
|
||||
# codec recovery below.
|
||||
_surrogates_found = _sanitize_messages_surrogates(messages)
|
||||
if _surrogates_found:
|
||||
if isinstance(api_messages, list):
|
||||
if _sanitize_messages_surrogates(api_messages):
|
||||
_surrogates_found = True
|
||||
if isinstance(api_kwargs, dict):
|
||||
if _sanitize_structure_surrogates(api_kwargs):
|
||||
_surrogates_found = True
|
||||
if isinstance(getattr(self, "prefill_messages", None), list):
|
||||
if _sanitize_messages_surrogates(self.prefill_messages):
|
||||
_surrogates_found = True
|
||||
# Gate the retry on the error type, not on whether we
|
||||
# found anything — _force_ascii_payload / the extended
|
||||
# surrogate walker above cover all known paths, but a
|
||||
# new transformed field could still slip through. If
|
||||
# the error was a surrogate encode failure, always let
|
||||
# the retry run; the proactive sanitizer at line ~8781
|
||||
# runs again on the next iteration. Bounded by
|
||||
# _unicode_sanitization_passes < 2 (outer guard).
|
||||
if _surrogates_found or _is_surrogate_error:
|
||||
self._unicode_sanitization_passes += 1
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Stripped invalid surrogate characters from messages. Retrying...",
|
||||
force=True,
|
||||
)
|
||||
if _surrogates_found:
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Stripped invalid surrogate characters from messages. Retrying...",
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Surrogate encoding error — retrying after full-payload sanitization...",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
if _is_ascii_codec:
|
||||
self._force_ascii_payload = True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue