fix(state): JSON-encode multimodal message content for sqlite

sqlite3 can only bind str/bytes/int/float/None to query parameters.
Multimodal message content is a list of parts (text + image_url), which
raised 'Error binding parameter 3: type list is not supported' in
append_message and replace_messages.

In the CLI/TUI this surfaced as a visible crash when users pasted
screenshots. In the gateway it was silently swallowed by a bare except
in append_to_transcript, causing multimodal turns to be lost from the
session transcript.

Fix at the DB layer: _encode_content wraps lists/dicts as
'\\x00json:' + json.dumps(...) on write, _decode_content unwraps on
read. Plain strings are untouched, so existing FTS search, previews,
and JSONL compat are unaffected. Paired decode in get_messages,
get_messages_as_conversation, and search_messages context previews.

Regression test covers: list content round-trip, dict content
round-trip, string content stored unchanged, replace_messages with
multimodal content.

Also included: aligned fix #17522 for TUI image attachment with
paths containing spaces (see previous commit).
This commit is contained in:
Teknium 2026-04-30 20:22:40 -07:00
parent cc340c4a4d
commit 531ac20408
3 changed files with 162 additions and 12 deletions

View file

@ -1154,6 +1154,48 @@ class SessionDB:
# Message storage # Message storage
# ========================================================================= # =========================================================================
# Sentinel prefix used to distinguish JSON-encoded structured content
# (multimodal messages: lists of parts like text + image_url) from plain
# string content. The NUL byte is not legal in normal text, so this
# cannot collide with real user content.
_CONTENT_JSON_PREFIX = "\x00json:"
@classmethod
def _encode_content(cls, content: Any) -> Any:
"""Serialize structured (list/dict) message content for sqlite.
sqlite3 can only bind ``str``, ``bytes``, ``int``, ``float``, and ``None``
to query parameters. Multimodal messages have ``content`` as a list of
parts (``[{"type": "text", ...}, {"type": "image_url", ...}]``), which
raises ``ProgrammingError: Error binding parameter N: type 'list' is
not supported`` when bound directly.
Returns the value unchanged when it's already a safe scalar, or a
sentinel-prefixed JSON string for lists/dicts. Paired with
:meth:`_decode_content` on read.
"""
if content is None or isinstance(content, (str, bytes, int, float)):
return content
try:
return cls._CONTENT_JSON_PREFIX + json.dumps(content)
except (TypeError, ValueError):
# Last-resort fallback: stringify so persistence never fails.
return str(content)
@classmethod
def _decode_content(cls, content: Any) -> Any:
"""Reverse :meth:`_encode_content`; returns scalars unchanged."""
if isinstance(content, str) and content.startswith(cls._CONTENT_JSON_PREFIX):
try:
return json.loads(content[len(cls._CONTENT_JSON_PREFIX):])
except (json.JSONDecodeError, TypeError):
logger.warning(
"Failed to decode JSON-encoded message content; "
"returning raw string"
)
return content
return content
def append_message( def append_message(
self, self,
session_id: str, session_id: str,
@ -1190,6 +1232,9 @@ class SessionDB:
if codex_message_items else None if codex_message_items else None
) )
tool_calls_json = json.dumps(tool_calls) if tool_calls else None tool_calls_json = json.dumps(tool_calls) if tool_calls else None
# Multimodal content (list of parts) must be JSON-encoded: sqlite3
# cannot bind list/dict parameters directly.
stored_content = self._encode_content(content)
# Pre-compute tool call count # Pre-compute tool call count
num_tool_calls = 0 num_tool_calls = 0
@ -1206,7 +1251,7 @@ class SessionDB:
( (
session_id, session_id,
role, role,
content, stored_content,
tool_call_id, tool_call_id,
tool_calls_json, tool_calls_json,
tool_name, tool_name,
@ -1289,7 +1334,7 @@ class SessionDB:
( (
session_id, session_id,
role, role,
msg.get("content"), self._encode_content(msg.get("content")),
msg.get("tool_call_id"), msg.get("tool_call_id"),
tool_calls_json, tool_calls_json,
msg.get("tool_name"), msg.get("tool_name"),
@ -1328,6 +1373,8 @@ class SessionDB:
result = [] result = []
for row in rows: for row in rows:
msg = dict(row) msg = dict(row)
if "content" in msg:
msg["content"] = self._decode_content(msg["content"])
if msg.get("tool_calls"): if msg.get("tool_calls"):
try: try:
msg["tool_calls"] = json.loads(msg["tool_calls"]) msg["tool_calls"] = json.loads(msg["tool_calls"])
@ -1425,7 +1472,7 @@ class SessionDB:
messages = [] messages = []
for row in rows: for row in rows:
content = row["content"] content = self._decode_content(row["content"])
if row["role"] in {"user", "assistant"} and isinstance(content, str): if row["role"] in {"user", "assistant"} and isinstance(content, str):
content = sanitize_context(content).strip() content = sanitize_context(content).strip()
msg = {"role": row["role"], "content": content} msg = {"role": row["role"], "content": content}
@ -1810,10 +1857,26 @@ class SessionDB:
)""", )""",
(match["id"], match["id"]), (match["id"], match["id"]),
) )
context_msgs = [ context_msgs = []
{"role": r["role"], "content": (r["content"] or "")[:200]} for r in ctx_cursor.fetchall():
for r in ctx_cursor.fetchall() raw = r["content"]
] decoded = self._decode_content(raw)
# Multimodal context: render a compact text-only
# summary for search previews.
if isinstance(decoded, list):
text_parts = [
p.get("text", "") for p in decoded
if isinstance(p, dict) and p.get("type") == "text"
]
text = " ".join(t for t in text_parts if t).strip()
preview = text or "[multimodal content]"
elif isinstance(decoded, str):
preview = decoded
else:
preview = ""
context_msgs.append(
{"role": r["role"], "content": preview[:200]}
)
match["context"] = context_msgs match["context"] = context_msgs
except Exception: except Exception:
match["context"] = [] match["context"] = []

View file

@ -1243,7 +1243,7 @@ class TestRewriteTranscriptPreservesReasoning:
assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}] assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}] assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path): def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path, monkeypatch):
from hermes_state import SessionDB from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "test.db") db = SessionDB(db_path=tmp_path / "test.db")
@ -1258,16 +1258,27 @@ class TestRewriteTranscriptPreservesReasoning:
store._db = db store._db = db
store._loaded = True store._loaded = True
# Force the second insert inside replace_messages to fail, simulating
# any storage-layer error that might abort a multi-row rewrite.
real_encode = SessionDB._encode_content
calls = {"n": 0}
def flaky_encode(cls, content):
calls["n"] += 1
if calls["n"] == 2:
raise RuntimeError("simulated storage failure")
return real_encode.__func__(cls, content)
monkeypatch.setattr(SessionDB, "_encode_content", classmethod(flaky_encode))
replacement = [ replacement = [
{"role": "user", "content": "after user"}, {"role": "user", "content": "after user"},
{ {"role": "assistant", "content": "after assistant"},
"role": "assistant",
"content": {"not": "sqlite-bindable but JSONL-safe"},
},
] ]
store.rewrite_transcript(session_id, replacement) store.rewrite_transcript(session_id, replacement)
# The rewrite must roll back atomically — original messages preserved.
after = db.get_messages_as_conversation(session_id) after = db.get_messages_as_conversation(session_id)
assert [msg["content"] for msg in after] == [ assert [msg["content"] for msg in after] == [
"before user", "before user",

View file

@ -212,6 +212,82 @@ class TestMessageStorage:
messages = db.get_messages("s1") messages = db.get_messages("s1")
assert messages[0]["tool_calls"] == tool_calls assert messages[0]["tool_calls"] == tool_calls
def test_multimodal_list_content_round_trip(self, db):
"""Multimodal ``content`` (list of parts) must survive the SQLite
round-trip. sqlite3 cannot bind Python lists directly, so the DB
layer JSON-encodes structured content on write and decodes on read.
Regression test for the "Error binding parameter 3: type 'list' is
not supported" crash users hit when pasting screenshots into the
TUI (issue #17522).
"""
db.create_session(session_id="s1", source="cli")
content = [
{"type": "text", "text": "describe this screenshot"},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,iVBORw0KG..."},
},
]
# Write must not raise
db.append_message("s1", role="user", content=content)
# get_messages decodes back to the original list
msgs = db.get_messages("s1")
assert len(msgs) == 1
assert msgs[0]["content"] == content
# get_messages_as_conversation decodes back to the original list
conv = db.get_messages_as_conversation("s1")
assert len(conv) == 1
assert conv[0] == {"role": "user", "content": content}
def test_dict_content_round_trip(self, db):
"""Dict-shaped content (e.g. provider wrappers) also round-trips."""
db.create_session(session_id="s1", source="cli")
content = {"parts": [{"text": "hi"}]}
db.append_message("s1", role="user", content=content)
msgs = db.get_messages("s1")
assert msgs[0]["content"] == content
def test_string_content_unchanged_by_encoding(self, db):
"""Plain strings must not be wrapped — FTS search and legacy
consumers depend on raw-string storage for text content.
"""
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="plain text")
# Peek at the raw column to confirm no encoding was applied
with db._lock:
row = db._conn.execute(
"SELECT content FROM messages WHERE session_id = ?", ("s1",)
).fetchone()
assert row["content"] == "plain text"
def test_replace_messages_handles_multimodal_content(self, db):
"""`replace_messages` (used by /retry, /undo, /compress) must also
handle list content without crashing."""
db.create_session(session_id="s1", source="cli")
content = [
{"type": "text", "text": "look at this"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
]
db.replace_messages(
"s1",
[
{"role": "user", "content": content},
{"role": "assistant", "content": "I see a screenshot."},
],
)
msgs = db.get_messages("s1")
assert len(msgs) == 2
assert msgs[0]["content"] == content
assert msgs[1]["content"] == "I see a screenshot."
def test_get_messages_as_conversation(self, db): def test_get_messages_as_conversation(self, db):
db.create_session(session_id="s1", source="cli") db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="Hello") db.append_message("s1", role="user", content="Hello")