From 531ac204081f8a925f547df0f3415bcbd7321817 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 30 Apr 2026 20:22:40 -0700 Subject: [PATCH] fix(state): JSON-encode multimodal message content for sqlite sqlite3 can only bind str/bytes/int/float/None to query parameters. Multimodal message content is a list of parts (text + image_url), which raised 'Error binding parameter 3: type list is not supported' in append_message and replace_messages. In the CLI/TUI this surfaced as a visible crash when users pasted screenshots. In the gateway it was silently swallowed by a bare except in append_to_transcript, causing multimodal turns to be lost from the session transcript. Fix at the DB layer: _encode_content wraps lists/dicts as '\\x00json:' + json.dumps(...) on write, _decode_content unwraps on read. Plain strings are untouched, so existing FTS search, previews, and JSONL compat are unaffected. Paired decode in get_messages, get_messages_as_conversation, and search_messages context previews. Regression test covers: list content round-trip, dict content round-trip, string content stored unchanged, replace_messages with multimodal content. Also included: aligned fix #17522 for TUI image attachment with paths containing spaces (see previous commit). --- hermes_state.py | 77 +++++++++++++++++++++++++++++++---- tests/gateway/test_session.py | 21 +++++++--- tests/test_hermes_state.py | 76 ++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 12 deletions(-) diff --git a/hermes_state.py b/hermes_state.py index 7ca67d5ceec..a808b684c74 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1154,6 +1154,48 @@ class SessionDB: # Message storage # ========================================================================= + # Sentinel prefix used to distinguish JSON-encoded structured content + # (multimodal messages: lists of parts like text + image_url) from plain + # string content. The NUL byte is not legal in normal text, so this + # cannot collide with real user content. + _CONTENT_JSON_PREFIX = "\x00json:" + + @classmethod + def _encode_content(cls, content: Any) -> Any: + """Serialize structured (list/dict) message content for sqlite. + + sqlite3 can only bind ``str``, ``bytes``, ``int``, ``float``, and ``None`` + to query parameters. Multimodal messages have ``content`` as a list of + parts (``[{"type": "text", ...}, {"type": "image_url", ...}]``), which + raises ``ProgrammingError: Error binding parameter N: type 'list' is + not supported`` when bound directly. + + Returns the value unchanged when it's already a safe scalar, or a + sentinel-prefixed JSON string for lists/dicts. Paired with + :meth:`_decode_content` on read. + """ + if content is None or isinstance(content, (str, bytes, int, float)): + return content + try: + return cls._CONTENT_JSON_PREFIX + json.dumps(content) + except (TypeError, ValueError): + # Last-resort fallback: stringify so persistence never fails. + return str(content) + + @classmethod + def _decode_content(cls, content: Any) -> Any: + """Reverse :meth:`_encode_content`; returns scalars unchanged.""" + if isinstance(content, str) and content.startswith(cls._CONTENT_JSON_PREFIX): + try: + return json.loads(content[len(cls._CONTENT_JSON_PREFIX):]) + except (json.JSONDecodeError, TypeError): + logger.warning( + "Failed to decode JSON-encoded message content; " + "returning raw string" + ) + return content + return content + def append_message( self, session_id: str, @@ -1190,6 +1232,9 @@ class SessionDB: if codex_message_items else None ) tool_calls_json = json.dumps(tool_calls) if tool_calls else None + # Multimodal content (list of parts) must be JSON-encoded: sqlite3 + # cannot bind list/dict parameters directly. + stored_content = self._encode_content(content) # Pre-compute tool call count num_tool_calls = 0 @@ -1206,7 +1251,7 @@ class SessionDB: ( session_id, role, - content, + stored_content, tool_call_id, tool_calls_json, tool_name, @@ -1289,7 +1334,7 @@ class SessionDB: ( session_id, role, - msg.get("content"), + self._encode_content(msg.get("content")), msg.get("tool_call_id"), tool_calls_json, msg.get("tool_name"), @@ -1328,6 +1373,8 @@ class SessionDB: result = [] for row in rows: msg = dict(row) + if "content" in msg: + msg["content"] = self._decode_content(msg["content"]) if msg.get("tool_calls"): try: msg["tool_calls"] = json.loads(msg["tool_calls"]) @@ -1425,7 +1472,7 @@ class SessionDB: messages = [] for row in rows: - content = row["content"] + content = self._decode_content(row["content"]) if row["role"] in {"user", "assistant"} and isinstance(content, str): content = sanitize_context(content).strip() msg = {"role": row["role"], "content": content} @@ -1810,10 +1857,26 @@ class SessionDB: )""", (match["id"], match["id"]), ) - context_msgs = [ - {"role": r["role"], "content": (r["content"] or "")[:200]} - for r in ctx_cursor.fetchall() - ] + context_msgs = [] + for r in ctx_cursor.fetchall(): + raw = r["content"] + decoded = self._decode_content(raw) + # Multimodal context: render a compact text-only + # summary for search previews. + if isinstance(decoded, list): + text_parts = [ + p.get("text", "") for p in decoded + if isinstance(p, dict) and p.get("type") == "text" + ] + text = " ".join(t for t in text_parts if t).strip() + preview = text or "[multimodal content]" + elif isinstance(decoded, str): + preview = decoded + else: + preview = "" + context_msgs.append( + {"role": r["role"], "content": preview[:200]} + ) match["context"] = context_msgs except Exception: match["context"] = [] diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 5e8af49e3e1..57a8aefa5e8 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -1243,7 +1243,7 @@ class TestRewriteTranscriptPreservesReasoning: assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}] assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}] - def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path): + def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path, monkeypatch): from hermes_state import SessionDB db = SessionDB(db_path=tmp_path / "test.db") @@ -1258,16 +1258,27 @@ class TestRewriteTranscriptPreservesReasoning: store._db = db store._loaded = True + # Force the second insert inside replace_messages to fail, simulating + # any storage-layer error that might abort a multi-row rewrite. + real_encode = SessionDB._encode_content + calls = {"n": 0} + + def flaky_encode(cls, content): + calls["n"] += 1 + if calls["n"] == 2: + raise RuntimeError("simulated storage failure") + return real_encode.__func__(cls, content) + + monkeypatch.setattr(SessionDB, "_encode_content", classmethod(flaky_encode)) + replacement = [ {"role": "user", "content": "after user"}, - { - "role": "assistant", - "content": {"not": "sqlite-bindable but JSONL-safe"}, - }, + {"role": "assistant", "content": "after assistant"}, ] store.rewrite_transcript(session_id, replacement) + # The rewrite must roll back atomically — original messages preserved. after = db.get_messages_as_conversation(session_id) assert [msg["content"] for msg in after] == [ "before user", diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index d66828a6410..a2c48366ded 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -212,6 +212,82 @@ class TestMessageStorage: messages = db.get_messages("s1") assert messages[0]["tool_calls"] == tool_calls + def test_multimodal_list_content_round_trip(self, db): + """Multimodal ``content`` (list of parts) must survive the SQLite + round-trip. sqlite3 cannot bind Python lists directly, so the DB + layer JSON-encodes structured content on write and decodes on read. + + Regression test for the "Error binding parameter 3: type 'list' is + not supported" crash users hit when pasting screenshots into the + TUI (issue #17522). + """ + db.create_session(session_id="s1", source="cli") + content = [ + {"type": "text", "text": "describe this screenshot"}, + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,iVBORw0KG..."}, + }, + ] + + # Write must not raise + db.append_message("s1", role="user", content=content) + + # get_messages decodes back to the original list + msgs = db.get_messages("s1") + assert len(msgs) == 1 + assert msgs[0]["content"] == content + + # get_messages_as_conversation decodes back to the original list + conv = db.get_messages_as_conversation("s1") + assert len(conv) == 1 + assert conv[0] == {"role": "user", "content": content} + + def test_dict_content_round_trip(self, db): + """Dict-shaped content (e.g. provider wrappers) also round-trips.""" + db.create_session(session_id="s1", source="cli") + content = {"parts": [{"text": "hi"}]} + + db.append_message("s1", role="user", content=content) + msgs = db.get_messages("s1") + assert msgs[0]["content"] == content + + def test_string_content_unchanged_by_encoding(self, db): + """Plain strings must not be wrapped — FTS search and legacy + consumers depend on raw-string storage for text content. + """ + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="plain text") + + # Peek at the raw column to confirm no encoding was applied + with db._lock: + row = db._conn.execute( + "SELECT content FROM messages WHERE session_id = ?", ("s1",) + ).fetchone() + assert row["content"] == "plain text" + + def test_replace_messages_handles_multimodal_content(self, db): + """`replace_messages` (used by /retry, /undo, /compress) must also + handle list content without crashing.""" + db.create_session(session_id="s1", source="cli") + content = [ + {"type": "text", "text": "look at this"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}}, + ] + + db.replace_messages( + "s1", + [ + {"role": "user", "content": content}, + {"role": "assistant", "content": "I see a screenshot."}, + ], + ) + + msgs = db.get_messages("s1") + assert len(msgs) == 2 + assert msgs[0]["content"] == content + assert msgs[1]["content"] == "I see a screenshot." + def test_get_messages_as_conversation(self, db): db.create_session(session_id="s1", source="cli") db.append_message("s1", role="user", content="Hello")