fix(transport): strip Hermes-internal scaffolding keys before chat.completions

The empty-response recovery path in run_agent.py appends synthetic
messages tagged with _empty_recovery_synthetic (and the agent loop uses
_thinking_prefill / _empty_terminal_sentinel similarly). These are
internal bookkeeping markers — they must never reach the wire.

chat_completions' convert_messages only stripped Codex Responses leak
fields (codex_reasoning_items, call_id, etc.), not these _-prefixed
markers. Permissive providers (real OpenAI, Anthropic) silently ignore
unknown message keys so the bug stayed hidden, but strict
OpenAI-compatible gateways reject them outright. Observed against
codex.nekos.me:

  502: [ObjectParam] [input[617]._empty_recovery_synthetic]
       [unknown_parameter] Unknown parameter:
       '_empty_recovery_synthetic'

Because the synthetic messages persist in the session, every
subsequent request in that session carries the poisoned key and
fails identically — a deterministic 502 the retry loop mistakes for
a transient server error.

Fix: convert_messages now drops any top-level message key starting
with '_'. OpenAI's message schema has no '_'-prefixed fields, so this
is safe and future-proofs against new internal markers.

Origin: local-author
Upstream-PR: none
Patch-State: local-only
This commit is contained in:
Soju 2026-05-15 11:26:47 +09:00 committed by Teknium
parent 7ab1677362
commit 775a17284f
2 changed files with 52 additions and 3 deletions

View file

@ -113,9 +113,8 @@ class ChatCompletionsTransport(ProviderTransport):
self, messages: list[dict[str, Any]], **kwargs
) -> list[dict[str, Any]]:
"""Messages are already in OpenAI format — strip internal fields
that strict chat-completions providers reject with HTTP 400/422.
Strips:
that strict chat-completions providers reject with HTTP 400/422
(or, in the case of some OpenAI-compatible gateways, 5xx):
- Codex Responses API fields: ``codex_reasoning_items`` /
``codex_message_items`` on the message, ``call_id`` /
@ -127,6 +126,16 @@ class ChatCompletionsTransport(ProviderTransport):
``Extra inputs are not permitted, field: 'messages[N].tool_name'``.
Permissive providers (OpenRouter, MiniMax) silently ignore the
field, which masked the bug for months.
- Hermes-internal scaffolding markers any top-level message key
starting with ``_`` (e.g. ``_empty_recovery_synthetic``,
``_empty_terminal_sentinel``, ``_thinking_prefill``). These are
bookkeeping flags the agent loop attaches to messages so the
persistence layer can later strip its own scaffolding; they must
never reach the wire. Permissive providers (real OpenAI,
Anthropic) silently drop unknown message keys, but strict
gateways (e.g. opencode-go, codex.nekos.me) reject with
``Extra inputs are not permitted, field: 'messages[N]._empty_recovery_synthetic'``,
which then poisons every subsequent request in the session.
"""
needs_sanitize = False
for msg in messages:
@ -139,6 +148,9 @@ class ChatCompletionsTransport(ProviderTransport):
):
needs_sanitize = True
break
if any(isinstance(k, str) and k.startswith("_") for k in msg):
needs_sanitize = True
break
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
@ -160,6 +172,11 @@ class ChatCompletionsTransport(ProviderTransport):
msg.pop("codex_reasoning_items", None)
msg.pop("codex_message_items", None)
msg.pop("tool_name", None)
# Drop all Hermes-internal scaffolding markers (``_``-prefixed).
# OpenAI's message schema has no ``_``-prefixed fields, so this
# is safe and future-proofs against new markers being added.
for key in [k for k in msg if isinstance(k, str) and k.startswith("_")]:
msg.pop(key, None)
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:

View file

@ -66,6 +66,38 @@ class TestChatCompletionsBasic:
# Original list untouched (deepcopy-on-demand)
assert msgs[2]["tool_name"] == "execute_code"
def test_convert_messages_strips_internal_scaffolding_markers(self, transport):
"""Hermes-internal ``_``-prefixed markers must never reach the wire.
The empty-response recovery path appends synthetic messages tagged
with ``_empty_recovery_synthetic``; permissive providers ignore the
unknown key, but strict gateways (opencode-go, codex.nekos.me)
reject the request, poisoning every later turn in the session.
"""
msgs = [
{"role": "user", "content": "run the task"},
{"role": "assistant", "content": "(empty)", "_empty_recovery_synthetic": True},
{"role": "user", "content": "continue", "_empty_recovery_synthetic": True},
{"role": "assistant", "content": "done", "_thinking_prefill": True,
"_empty_terminal_sentinel": True},
]
result = transport.convert_messages(msgs)
for m in result:
assert not any(k.startswith("_") for k in m), m
# Visible content preserved
assert result[1]["content"] == "(empty)"
assert result[2]["content"] == "continue"
# Original list untouched (deepcopy-on-demand)
assert msgs[1]["_empty_recovery_synthetic"] is True
def test_convert_messages_clean_list_is_identity(self, transport):
"""A list with no internal/codex keys is returned as-is (no copy)."""
msgs = [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
]
assert transport.convert_messages(msgs) is msgs
class TestChatCompletionsBuildKwargs: