mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(state): JSON-encode multimodal message content for sqlite
sqlite3 can only bind str/bytes/int/float/None to query parameters. Multimodal message content is a list of parts (text + image_url), which raised 'Error binding parameter 3: type list is not supported' in append_message and replace_messages. In the CLI/TUI this surfaced as a visible crash when users pasted screenshots. In the gateway it was silently swallowed by a bare except in append_to_transcript, causing multimodal turns to be lost from the session transcript. Fix at the DB layer: _encode_content wraps lists/dicts as '\\x00json:' + json.dumps(...) on write, _decode_content unwraps on read. Plain strings are untouched, so existing FTS search, previews, and JSONL compat are unaffected. Paired decode in get_messages, get_messages_as_conversation, and search_messages context previews. Regression test covers: list content round-trip, dict content round-trip, string content stored unchanged, replace_messages with multimodal content. Also included: aligned fix #17522 for TUI image attachment with paths containing spaces (see previous commit).
This commit is contained in:
parent
cc340c4a4d
commit
531ac20408
3 changed files with 162 additions and 12 deletions
|
|
@ -1154,6 +1154,48 @@ class SessionDB:
|
||||||
# Message storage
|
# Message storage
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
|
# Sentinel prefix used to distinguish JSON-encoded structured content
|
||||||
|
# (multimodal messages: lists of parts like text + image_url) from plain
|
||||||
|
# string content. The NUL byte is not legal in normal text, so this
|
||||||
|
# cannot collide with real user content.
|
||||||
|
_CONTENT_JSON_PREFIX = "\x00json:"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _encode_content(cls, content: Any) -> Any:
|
||||||
|
"""Serialize structured (list/dict) message content for sqlite.
|
||||||
|
|
||||||
|
sqlite3 can only bind ``str``, ``bytes``, ``int``, ``float``, and ``None``
|
||||||
|
to query parameters. Multimodal messages have ``content`` as a list of
|
||||||
|
parts (``[{"type": "text", ...}, {"type": "image_url", ...}]``), which
|
||||||
|
raises ``ProgrammingError: Error binding parameter N: type 'list' is
|
||||||
|
not supported`` when bound directly.
|
||||||
|
|
||||||
|
Returns the value unchanged when it's already a safe scalar, or a
|
||||||
|
sentinel-prefixed JSON string for lists/dicts. Paired with
|
||||||
|
:meth:`_decode_content` on read.
|
||||||
|
"""
|
||||||
|
if content is None or isinstance(content, (str, bytes, int, float)):
|
||||||
|
return content
|
||||||
|
try:
|
||||||
|
return cls._CONTENT_JSON_PREFIX + json.dumps(content)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
# Last-resort fallback: stringify so persistence never fails.
|
||||||
|
return str(content)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _decode_content(cls, content: Any) -> Any:
|
||||||
|
"""Reverse :meth:`_encode_content`; returns scalars unchanged."""
|
||||||
|
if isinstance(content, str) and content.startswith(cls._CONTENT_JSON_PREFIX):
|
||||||
|
try:
|
||||||
|
return json.loads(content[len(cls._CONTENT_JSON_PREFIX):])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
logger.warning(
|
||||||
|
"Failed to decode JSON-encoded message content; "
|
||||||
|
"returning raw string"
|
||||||
|
)
|
||||||
|
return content
|
||||||
|
return content
|
||||||
|
|
||||||
def append_message(
|
def append_message(
|
||||||
self,
|
self,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
|
|
@ -1190,6 +1232,9 @@ class SessionDB:
|
||||||
if codex_message_items else None
|
if codex_message_items else None
|
||||||
)
|
)
|
||||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||||
|
# Multimodal content (list of parts) must be JSON-encoded: sqlite3
|
||||||
|
# cannot bind list/dict parameters directly.
|
||||||
|
stored_content = self._encode_content(content)
|
||||||
|
|
||||||
# Pre-compute tool call count
|
# Pre-compute tool call count
|
||||||
num_tool_calls = 0
|
num_tool_calls = 0
|
||||||
|
|
@ -1206,7 +1251,7 @@ class SessionDB:
|
||||||
(
|
(
|
||||||
session_id,
|
session_id,
|
||||||
role,
|
role,
|
||||||
content,
|
stored_content,
|
||||||
tool_call_id,
|
tool_call_id,
|
||||||
tool_calls_json,
|
tool_calls_json,
|
||||||
tool_name,
|
tool_name,
|
||||||
|
|
@ -1289,7 +1334,7 @@ class SessionDB:
|
||||||
(
|
(
|
||||||
session_id,
|
session_id,
|
||||||
role,
|
role,
|
||||||
msg.get("content"),
|
self._encode_content(msg.get("content")),
|
||||||
msg.get("tool_call_id"),
|
msg.get("tool_call_id"),
|
||||||
tool_calls_json,
|
tool_calls_json,
|
||||||
msg.get("tool_name"),
|
msg.get("tool_name"),
|
||||||
|
|
@ -1328,6 +1373,8 @@ class SessionDB:
|
||||||
result = []
|
result = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
msg = dict(row)
|
msg = dict(row)
|
||||||
|
if "content" in msg:
|
||||||
|
msg["content"] = self._decode_content(msg["content"])
|
||||||
if msg.get("tool_calls"):
|
if msg.get("tool_calls"):
|
||||||
try:
|
try:
|
||||||
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||||
|
|
@ -1425,7 +1472,7 @@ class SessionDB:
|
||||||
|
|
||||||
messages = []
|
messages = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
content = row["content"]
|
content = self._decode_content(row["content"])
|
||||||
if row["role"] in {"user", "assistant"} and isinstance(content, str):
|
if row["role"] in {"user", "assistant"} and isinstance(content, str):
|
||||||
content = sanitize_context(content).strip()
|
content = sanitize_context(content).strip()
|
||||||
msg = {"role": row["role"], "content": content}
|
msg = {"role": row["role"], "content": content}
|
||||||
|
|
@ -1810,10 +1857,26 @@ class SessionDB:
|
||||||
)""",
|
)""",
|
||||||
(match["id"], match["id"]),
|
(match["id"], match["id"]),
|
||||||
)
|
)
|
||||||
context_msgs = [
|
context_msgs = []
|
||||||
{"role": r["role"], "content": (r["content"] or "")[:200]}
|
for r in ctx_cursor.fetchall():
|
||||||
for r in ctx_cursor.fetchall()
|
raw = r["content"]
|
||||||
]
|
decoded = self._decode_content(raw)
|
||||||
|
# Multimodal context: render a compact text-only
|
||||||
|
# summary for search previews.
|
||||||
|
if isinstance(decoded, list):
|
||||||
|
text_parts = [
|
||||||
|
p.get("text", "") for p in decoded
|
||||||
|
if isinstance(p, dict) and p.get("type") == "text"
|
||||||
|
]
|
||||||
|
text = " ".join(t for t in text_parts if t).strip()
|
||||||
|
preview = text or "[multimodal content]"
|
||||||
|
elif isinstance(decoded, str):
|
||||||
|
preview = decoded
|
||||||
|
else:
|
||||||
|
preview = ""
|
||||||
|
context_msgs.append(
|
||||||
|
{"role": r["role"], "content": preview[:200]}
|
||||||
|
)
|
||||||
match["context"] = context_msgs
|
match["context"] = context_msgs
|
||||||
except Exception:
|
except Exception:
|
||||||
match["context"] = []
|
match["context"] = []
|
||||||
|
|
|
||||||
|
|
@ -1243,7 +1243,7 @@ class TestRewriteTranscriptPreservesReasoning:
|
||||||
assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
|
assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}]
|
||||||
assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
|
assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}]
|
||||||
|
|
||||||
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path):
|
def test_db_rewrite_is_atomic_on_insert_failure(self, tmp_path, monkeypatch):
|
||||||
from hermes_state import SessionDB
|
from hermes_state import SessionDB
|
||||||
|
|
||||||
db = SessionDB(db_path=tmp_path / "test.db")
|
db = SessionDB(db_path=tmp_path / "test.db")
|
||||||
|
|
@ -1258,16 +1258,27 @@ class TestRewriteTranscriptPreservesReasoning:
|
||||||
store._db = db
|
store._db = db
|
||||||
store._loaded = True
|
store._loaded = True
|
||||||
|
|
||||||
|
# Force the second insert inside replace_messages to fail, simulating
|
||||||
|
# any storage-layer error that might abort a multi-row rewrite.
|
||||||
|
real_encode = SessionDB._encode_content
|
||||||
|
calls = {"n": 0}
|
||||||
|
|
||||||
|
def flaky_encode(cls, content):
|
||||||
|
calls["n"] += 1
|
||||||
|
if calls["n"] == 2:
|
||||||
|
raise RuntimeError("simulated storage failure")
|
||||||
|
return real_encode.__func__(cls, content)
|
||||||
|
|
||||||
|
monkeypatch.setattr(SessionDB, "_encode_content", classmethod(flaky_encode))
|
||||||
|
|
||||||
replacement = [
|
replacement = [
|
||||||
{"role": "user", "content": "after user"},
|
{"role": "user", "content": "after user"},
|
||||||
{
|
{"role": "assistant", "content": "after assistant"},
|
||||||
"role": "assistant",
|
|
||||||
"content": {"not": "sqlite-bindable but JSONL-safe"},
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
store.rewrite_transcript(session_id, replacement)
|
store.rewrite_transcript(session_id, replacement)
|
||||||
|
|
||||||
|
# The rewrite must roll back atomically — original messages preserved.
|
||||||
after = db.get_messages_as_conversation(session_id)
|
after = db.get_messages_as_conversation(session_id)
|
||||||
assert [msg["content"] for msg in after] == [
|
assert [msg["content"] for msg in after] == [
|
||||||
"before user",
|
"before user",
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,82 @@ class TestMessageStorage:
|
||||||
messages = db.get_messages("s1")
|
messages = db.get_messages("s1")
|
||||||
assert messages[0]["tool_calls"] == tool_calls
|
assert messages[0]["tool_calls"] == tool_calls
|
||||||
|
|
||||||
|
def test_multimodal_list_content_round_trip(self, db):
|
||||||
|
"""Multimodal ``content`` (list of parts) must survive the SQLite
|
||||||
|
round-trip. sqlite3 cannot bind Python lists directly, so the DB
|
||||||
|
layer JSON-encodes structured content on write and decodes on read.
|
||||||
|
|
||||||
|
Regression test for the "Error binding parameter 3: type 'list' is
|
||||||
|
not supported" crash users hit when pasting screenshots into the
|
||||||
|
TUI (issue #17522).
|
||||||
|
"""
|
||||||
|
db.create_session(session_id="s1", source="cli")
|
||||||
|
content = [
|
||||||
|
{"type": "text", "text": "describe this screenshot"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": "data:image/png;base64,iVBORw0KG..."},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Write must not raise
|
||||||
|
db.append_message("s1", role="user", content=content)
|
||||||
|
|
||||||
|
# get_messages decodes back to the original list
|
||||||
|
msgs = db.get_messages("s1")
|
||||||
|
assert len(msgs) == 1
|
||||||
|
assert msgs[0]["content"] == content
|
||||||
|
|
||||||
|
# get_messages_as_conversation decodes back to the original list
|
||||||
|
conv = db.get_messages_as_conversation("s1")
|
||||||
|
assert len(conv) == 1
|
||||||
|
assert conv[0] == {"role": "user", "content": content}
|
||||||
|
|
||||||
|
def test_dict_content_round_trip(self, db):
|
||||||
|
"""Dict-shaped content (e.g. provider wrappers) also round-trips."""
|
||||||
|
db.create_session(session_id="s1", source="cli")
|
||||||
|
content = {"parts": [{"text": "hi"}]}
|
||||||
|
|
||||||
|
db.append_message("s1", role="user", content=content)
|
||||||
|
msgs = db.get_messages("s1")
|
||||||
|
assert msgs[0]["content"] == content
|
||||||
|
|
||||||
|
def test_string_content_unchanged_by_encoding(self, db):
|
||||||
|
"""Plain strings must not be wrapped — FTS search and legacy
|
||||||
|
consumers depend on raw-string storage for text content.
|
||||||
|
"""
|
||||||
|
db.create_session(session_id="s1", source="cli")
|
||||||
|
db.append_message("s1", role="user", content="plain text")
|
||||||
|
|
||||||
|
# Peek at the raw column to confirm no encoding was applied
|
||||||
|
with db._lock:
|
||||||
|
row = db._conn.execute(
|
||||||
|
"SELECT content FROM messages WHERE session_id = ?", ("s1",)
|
||||||
|
).fetchone()
|
||||||
|
assert row["content"] == "plain text"
|
||||||
|
|
||||||
|
def test_replace_messages_handles_multimodal_content(self, db):
|
||||||
|
"""`replace_messages` (used by /retry, /undo, /compress) must also
|
||||||
|
handle list content without crashing."""
|
||||||
|
db.create_session(session_id="s1", source="cli")
|
||||||
|
content = [
|
||||||
|
{"type": "text", "text": "look at this"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
db.replace_messages(
|
||||||
|
"s1",
|
||||||
|
[
|
||||||
|
{"role": "user", "content": content},
|
||||||
|
{"role": "assistant", "content": "I see a screenshot."},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
msgs = db.get_messages("s1")
|
||||||
|
assert len(msgs) == 2
|
||||||
|
assert msgs[0]["content"] == content
|
||||||
|
assert msgs[1]["content"] == "I see a screenshot."
|
||||||
|
|
||||||
def test_get_messages_as_conversation(self, db):
|
def test_get_messages_as_conversation(self, db):
|
||||||
db.create_session(session_id="s1", source="cli")
|
db.create_session(session_id="s1", source="cli")
|
||||||
db.append_message("s1", role="user", content="Hello")
|
db.append_message("s1", role="user", content="Hello")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue