Merge pull request #43322 from kshitijk4poor/fix/langfuse-redact-base64-data-uri

fix(langfuse): redact base64 data URIs instead of truncating into invalid base64
This commit is contained in:
kshitij 2026-06-09 22:41:41 -07:00 committed by GitHub
commit f1b8519670
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 58 additions and 1 deletions

View file

@ -227,7 +227,30 @@ def _trace_key(task_id: str, session_id: str) -> str:
return f"thread:{threading.get_ident()}"
def _truncate_text(value: str, max_chars: int) -> str:
def _is_base64_data_uri(value: str) -> bool:
prefix = value[:200].lower()
return prefix.startswith("data:") and ";base64," in prefix
def _redact_data_uri(value: str) -> dict[str, Any]:
header = value.split(",", 1)[0] if "," in value else "data:"
media_type = header[5:].split(";", 1)[0] if header.startswith("data:") else ""
return {
"type": "data_uri",
"media_type": media_type or None,
"omitted": True,
"length": len(value),
}
def _truncate_text(value: str, max_chars: int) -> Any:
# Langfuse SDK treats data:*;base64 strings as media and attempts to
# decode them. Truncating those strings produces invalid base64 and noisy
# "Error parsing base64 data URI" logs. Observability only needs metadata,
# not raw image/audio payloads, so redact the whole data URI before it
# reaches the SDK.
if _is_base64_data_uri(value):
return _redact_data_uri(value)
if len(value) <= max_chars:
return value
return value[:max_chars] + f"... [truncated {len(value) - max_chars} chars]"

View file

@ -171,6 +171,40 @@ class TestHooksInert:
mod.on_post_tool_call(tool_name="read_file", args={}, result="ok", task_id="t", session_id="s")
class TestPayloadSanitization:
def test_safe_value_redacts_base64_data_uri_instead_of_truncating(self):
sys.modules.pop("plugins.observability.langfuse", None)
import importlib
mod = importlib.import_module("plugins.observability.langfuse")
payload = "data:image/png;base64," + ("a" * 20000)
result = mod._safe_value(payload)
assert result == {
"type": "data_uri",
"media_type": "image/png",
"omitted": True,
"length": len(payload),
}
def test_serialize_messages_redacts_data_uri_parts(self):
sys.modules.pop("plugins.observability.langfuse", None)
import importlib
mod = importlib.import_module("plugins.observability.langfuse")
payload = "data:image/jpeg;base64," + ("b" * 20000)
serialized = mod._serialize_messages([
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": payload}}]}
])
assert serialized[0]["content"][0]["image_url"]["url"] == {
"type": "data_uri",
"media_type": "image/jpeg",
"omitted": True,
"length": len(payload),
}
# ---------------------------------------------------------------------------
# Placeholder-credential guard (#23823).
#