mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(state): JSON-encode multimodal message content for sqlite
sqlite3 can only bind str/bytes/int/float/None to query parameters. Multimodal message content is a list of parts (text + image_url), which raised 'Error binding parameter 3: type list is not supported' in append_message and replace_messages. In the CLI/TUI this surfaced as a visible crash when users pasted screenshots. In the gateway it was silently swallowed by a bare except in append_to_transcript, causing multimodal turns to be lost from the session transcript. Fix at the DB layer: _encode_content wraps lists/dicts as '\\x00json:' + json.dumps(...) on write, _decode_content unwraps on read. Plain strings are untouched, so existing FTS search, previews, and JSONL compat are unaffected. Paired decode in get_messages, get_messages_as_conversation, and search_messages context previews. Regression test covers: list content round-trip, dict content round-trip, string content stored unchanged, replace_messages with multimodal content. Also included: aligned fix #17522 for TUI image attachment with paths containing spaces (see previous commit).
This commit is contained in:
parent
cc340c4a4d
commit
531ac20408
3 changed files with 162 additions and 12 deletions
|
|
@ -1154,6 +1154,48 @@ class SessionDB:
|
|||
# Message storage
|
||||
# =========================================================================
|
||||
|
||||
# Sentinel prefix used to distinguish JSON-encoded structured content
|
||||
# (multimodal messages: lists of parts like text + image_url) from plain
|
||||
# string content. The NUL byte is not legal in normal text, so this
|
||||
# cannot collide with real user content.
|
||||
_CONTENT_JSON_PREFIX = "\x00json:"
|
||||
|
||||
@classmethod
|
||||
def _encode_content(cls, content: Any) -> Any:
|
||||
"""Serialize structured (list/dict) message content for sqlite.
|
||||
|
||||
sqlite3 can only bind ``str``, ``bytes``, ``int``, ``float``, and ``None``
|
||||
to query parameters. Multimodal messages have ``content`` as a list of
|
||||
parts (``[{"type": "text", ...}, {"type": "image_url", ...}]``), which
|
||||
raises ``ProgrammingError: Error binding parameter N: type 'list' is
|
||||
not supported`` when bound directly.
|
||||
|
||||
Returns the value unchanged when it's already a safe scalar, or a
|
||||
sentinel-prefixed JSON string for lists/dicts. Paired with
|
||||
:meth:`_decode_content` on read.
|
||||
"""
|
||||
if content is None or isinstance(content, (str, bytes, int, float)):
|
||||
return content
|
||||
try:
|
||||
return cls._CONTENT_JSON_PREFIX + json.dumps(content)
|
||||
except (TypeError, ValueError):
|
||||
# Last-resort fallback: stringify so persistence never fails.
|
||||
return str(content)
|
||||
|
||||
@classmethod
|
||||
def _decode_content(cls, content: Any) -> Any:
|
||||
"""Reverse :meth:`_encode_content`; returns scalars unchanged."""
|
||||
if isinstance(content, str) and content.startswith(cls._CONTENT_JSON_PREFIX):
|
||||
try:
|
||||
return json.loads(content[len(cls._CONTENT_JSON_PREFIX):])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
"Failed to decode JSON-encoded message content; "
|
||||
"returning raw string"
|
||||
)
|
||||
return content
|
||||
return content
|
||||
|
||||
def append_message(
|
||||
self,
|
||||
session_id: str,
|
||||
|
|
@ -1190,6 +1232,9 @@ class SessionDB:
|
|||
if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
# Multimodal content (list of parts) must be JSON-encoded: sqlite3
|
||||
# cannot bind list/dict parameters directly.
|
||||
stored_content = self._encode_content(content)
|
||||
|
||||
# Pre-compute tool call count
|
||||
num_tool_calls = 0
|
||||
|
|
@ -1206,7 +1251,7 @@ class SessionDB:
|
|||
(
|
||||
session_id,
|
||||
role,
|
||||
content,
|
||||
stored_content,
|
||||
tool_call_id,
|
||||
tool_calls_json,
|
||||
tool_name,
|
||||
|
|
@ -1289,7 +1334,7 @@ class SessionDB:
|
|||
(
|
||||
session_id,
|
||||
role,
|
||||
msg.get("content"),
|
||||
self._encode_content(msg.get("content")),
|
||||
msg.get("tool_call_id"),
|
||||
tool_calls_json,
|
||||
msg.get("tool_name"),
|
||||
|
|
@ -1328,6 +1373,8 @@ class SessionDB:
|
|||
result = []
|
||||
for row in rows:
|
||||
msg = dict(row)
|
||||
if "content" in msg:
|
||||
msg["content"] = self._decode_content(msg["content"])
|
||||
if msg.get("tool_calls"):
|
||||
try:
|
||||
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||
|
|
@ -1425,7 +1472,7 @@ class SessionDB:
|
|||
|
||||
messages = []
|
||||
for row in rows:
|
||||
content = row["content"]
|
||||
content = self._decode_content(row["content"])
|
||||
if row["role"] in {"user", "assistant"} and isinstance(content, str):
|
||||
content = sanitize_context(content).strip()
|
||||
msg = {"role": row["role"], "content": content}
|
||||
|
|
@ -1810,10 +1857,26 @@ class SessionDB:
|
|||
)""",
|
||||
(match["id"], match["id"]),
|
||||
)
|
||||
context_msgs = [
|
||||
{"role": r["role"], "content": (r["content"] or "")[:200]}
|
||||
for r in ctx_cursor.fetchall()
|
||||
context_msgs = []
|
||||
for r in ctx_cursor.fetchall():
|
||||
raw = r["content"]
|
||||
decoded = self._decode_content(raw)
|
||||
# Multimodal context: render a compact text-only
|
||||
# summary for search previews.
|
||||
if isinstance(decoded, list):
|
||||
text_parts = [
|
||||
p.get("text", "") for p in decoded
|
||||
if isinstance(p, dict) and p.get("type") == "text"
|
||||
]
|
||||
text = " ".join(t for t in text_parts if t).strip()
|
||||
preview = text or "[multimodal content]"
|
||||
elif isinstance(decoded, str):
|
||||
preview = decoded
|
||||
else:
|
||||
preview = ""
|
||||
context_msgs.append(
|
||||
{"role": r["role"], "content": preview[:200]}
|
||||
)
|
||||
match["context"] = context_msgs
|
||||
except Exception:
|
||||
match["context"] = []
|
||||
|
|
|
|||
|
|
@ -1243,7 +1243,7 @@ class TestRewriteTranscriptPreservesReasoning:
|
|||
assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
|
||||
assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
|
||||
|
||||
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path):
|
||||
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path, monkeypatch):
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "test.db")
|
||||
|
|
@ -1258,16 +1258,27 @@ class TestRewriteTranscriptPreservesReasoning:
|
|||
store._db = db
|
||||
store._loaded = True
|
||||
|
||||
# Force the second insert inside replace_messages to fail, simulating
|
||||
# any storage-layer error that might abort a multi-row rewrite.
|
||||
real_encode = SessionDB._encode_content
|
||||
calls = {"n": 0}
|
||||
|
||||
def flaky_encode(cls, content):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 2:
|
||||
raise RuntimeError("simulated storage failure")
|
||||
return real_encode.__func__(cls, content)
|
||||
|
||||
monkeypatch.setattr(SessionDB, "_encode_content", classmethod(flaky_encode))
|
||||
|
||||
replacement = [
|
||||
{"role": "user", "content": "after user"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": {"not": "sqlite-bindable but JSONL-safe"},
|
||||
},
|
||||
{"role": "assistant", "content": "after assistant"},
|
||||
]
|
||||
|
||||
store.rewrite_transcript(session_id, replacement)
|
||||
|
||||
# The rewrite must roll back atomically — original messages preserved.
|
||||
after = db.get_messages_as_conversation(session_id)
|
||||
assert [msg["content"] for msg in after] == [
|
||||
"before user",
|
||||
|
|
|
|||
|
|
@ -212,6 +212,82 @@ class TestMessageStorage:
|
|||
messages = db.get_messages("s1")
|
||||
assert messages[0]["tool_calls"] == tool_calls
|
||||
|
||||
def test_multimodal_list_content_round_trip(self, db):
|
||||
"""Multimodal ``content`` (list of parts) must survive the SQLite
|
||||
round-trip. sqlite3 cannot bind Python lists directly, so the DB
|
||||
layer JSON-encodes structured content on write and decodes on read.
|
||||
|
||||
Regression test for the "Error binding parameter 3: type 'list' is
|
||||
not supported" crash users hit when pasting screenshots into the
|
||||
TUI (issue #17522).
|
||||
"""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
content = [
|
||||
{"type": "text", "text": "describe this screenshot"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64,iVBORw0KG..."},
|
||||
},
|
||||
]
|
||||
|
||||
# Write must not raise
|
||||
db.append_message("s1", role="user", content=content)
|
||||
|
||||
# get_messages decodes back to the original list
|
||||
msgs = db.get_messages("s1")
|
||||
assert len(msgs) == 1
|
||||
assert msgs[0]["content"] == content
|
||||
|
||||
# get_messages_as_conversation decodes back to the original list
|
||||
conv = db.get_messages_as_conversation("s1")
|
||||
assert len(conv) == 1
|
||||
assert conv[0] == {"role": "user", "content": content}
|
||||
|
||||
def test_dict_content_round_trip(self, db):
|
||||
"""Dict-shaped content (e.g. provider wrappers) also round-trips."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
content = {"parts": [{"text": "hi"}]}
|
||||
|
||||
db.append_message("s1", role="user", content=content)
|
||||
msgs = db.get_messages("s1")
|
||||
assert msgs[0]["content"] == content
|
||||
|
||||
def test_string_content_unchanged_by_encoding(self, db):
|
||||
"""Plain strings must not be wrapped — FTS search and legacy
|
||||
consumers depend on raw-string storage for text content.
|
||||
"""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="user", content="plain text")
|
||||
|
||||
# Peek at the raw column to confirm no encoding was applied
|
||||
with db._lock:
|
||||
row = db._conn.execute(
|
||||
"SELECT content FROM messages WHERE session_id = ?", ("s1",)
|
||||
).fetchone()
|
||||
assert row["content"] == "plain text"
|
||||
|
||||
def test_replace_messages_handles_multimodal_content(self, db):
|
||||
"""`replace_messages` (used by /retry, /undo, /compress) must also
|
||||
handle list content without crashing."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
content = [
|
||||
{"type": "text", "text": "look at this"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
|
||||
]
|
||||
|
||||
db.replace_messages(
|
||||
"s1",
|
||||
[
|
||||
{"role": "user", "content": content},
|
||||
{"role": "assistant", "content": "I see a screenshot."},
|
||||
],
|
||||
)
|
||||
|
||||
msgs = db.get_messages("s1")
|
||||
assert len(msgs) == 2
|
||||
assert msgs[0]["content"] == content
|
||||
assert msgs[1]["content"] == "I see a screenshot."
|
||||
|
||||
def test_get_messages_as_conversation(self, db):
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="user", content="Hello")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue