mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 08:32:09 +00:00
Merge pull request #43322 from kshitijk4poor/fix/langfuse-redact-base64-data-uri
fix(langfuse): redact base64 data URIs instead of truncating into invalid base64
This commit is contained in:
commit
f1b8519670
2 changed files with 58 additions and 1 deletions
|
|
@ -227,7 +227,30 @@ def _trace_key(task_id: str, session_id: str) -> str:
|
|||
return f"thread:{threading.get_ident()}"
|
||||
|
||||
|
||||
def _truncate_text(value: str, max_chars: int) -> str:
|
||||
def _is_base64_data_uri(value: str) -> bool:
|
||||
prefix = value[:200].lower()
|
||||
return prefix.startswith("data:") and ";base64," in prefix
|
||||
|
||||
|
||||
def _redact_data_uri(value: str) -> dict[str, Any]:
|
||||
header = value.split(",", 1)[0] if "," in value else "data:"
|
||||
media_type = header[5:].split(";", 1)[0] if header.startswith("data:") else ""
|
||||
return {
|
||||
"type": "data_uri",
|
||||
"media_type": media_type or None,
|
||||
"omitted": True,
|
||||
"length": len(value),
|
||||
}
|
||||
|
||||
|
||||
def _truncate_text(value: str, max_chars: int) -> Any:
|
||||
# Langfuse SDK treats data:*;base64 strings as media and attempts to
|
||||
# decode them. Truncating those strings produces invalid base64 and noisy
|
||||
# "Error parsing base64 data URI" logs. Observability only needs metadata,
|
||||
# not raw image/audio payloads, so redact the whole data URI before it
|
||||
# reaches the SDK.
|
||||
if _is_base64_data_uri(value):
|
||||
return _redact_data_uri(value)
|
||||
if len(value) <= max_chars:
|
||||
return value
|
||||
return value[:max_chars] + f"... [truncated {len(value) - max_chars} chars]"
|
||||
|
|
|
|||
|
|
@ -171,6 +171,40 @@ class TestHooksInert:
|
|||
mod.on_post_tool_call(tool_name="read_file", args={}, result="ok", task_id="t", session_id="s")
|
||||
|
||||
|
||||
class TestPayloadSanitization:
|
||||
def test_safe_value_redacts_base64_data_uri_instead_of_truncating(self):
|
||||
sys.modules.pop("plugins.observability.langfuse", None)
|
||||
import importlib
|
||||
mod = importlib.import_module("plugins.observability.langfuse")
|
||||
|
||||
payload = "data:image/png;base64," + ("a" * 20000)
|
||||
result = mod._safe_value(payload)
|
||||
|
||||
assert result == {
|
||||
"type": "data_uri",
|
||||
"media_type": "image/png",
|
||||
"omitted": True,
|
||||
"length": len(payload),
|
||||
}
|
||||
|
||||
def test_serialize_messages_redacts_data_uri_parts(self):
|
||||
sys.modules.pop("plugins.observability.langfuse", None)
|
||||
import importlib
|
||||
mod = importlib.import_module("plugins.observability.langfuse")
|
||||
|
||||
payload = "data:image/jpeg;base64," + ("b" * 20000)
|
||||
serialized = mod._serialize_messages([
|
||||
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": payload}}]}
|
||||
])
|
||||
|
||||
assert serialized[0]["content"][0]["image_url"]["url"] == {
|
||||
"type": "data_uri",
|
||||
"media_type": "image/jpeg",
|
||||
"omitted": True,
|
||||
"length": len(payload),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Placeholder-credential guard (#23823).
|
||||
#
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue