fix(langfuse): restore usage/cost when post_api_request sends a sanitized response

on_post_llm_call extracted usage via `if response is not None:`, taking the
response-object path. But post_api_request delivers `response` as a sanitized
dict (no `.usage` attribute) alongside a separate `usage` summary dict, so
`getattr(response, "usage")` was always None and token/cost data was dropped
for every gateway turn (traces showed usage 0 / cost 0).

Gate on a real `.usage` attribute so the existing usage-dict fallback is
reached. Real response objects (post_llm_call / legacy) still take the
response-object path. Adds regression tests for both paths.
This commit is contained in:
kamonspecial 2026-06-07 00:06:39 +09:00
parent 56236b16e3
commit 9f1c16a7fb
2 changed files with 83 additions and 2 deletions

View file

@ -837,8 +837,16 @@ def on_post_llm_call(*, task_id: str = "", session_id: str = "", provider: str =
if output.get("tool_calls"):
state.turn_tool_calls.extend(output["tool_calls"])
# Extract usage: prefer response object, fall back to usage dict from post_api_request
if response is not None:
# Extract usage: prefer a real response object that carries usage, else
# fall back to the usage summary dict from post_api_request.
#
# post_api_request passes `response` as a SANITIZED dict (no ``.usage``
# attribute) alongside a separate `usage` summary dict. Gating on
# ``response is not None`` here took the response-object path on that dict,
# where ``getattr(response, "usage", None)`` is always None — so usage and
# cost were silently dropped for every gateway turn. Gate on a real
# ``.usage`` attribute instead so the usage-dict fallback below is reached.
if getattr(response, "usage", None) is not None:
usage_details, cost_details = _usage_and_cost(
response,
provider=provider,

View file

@ -704,3 +704,76 @@ class TestToolObservationKeying:
assert ended["output"] == {"status": "done"}
assert not state.tools
class TestUsageFromSanitizedResponse:
"""Regression: ``post_api_request`` delivers ``response`` as a sanitized
dict (no ``.usage`` attribute) plus a separate ``usage`` summary dict. The
post-call handler must read the ``usage`` dict instead of treating the dict
response as a usage-bearing object and dropping all token/cost data."""
def _setup(self, mod, monkeypatch):
# Active client so on_post_llm_call does not early-return.
monkeypatch.setattr(mod, "_get_langfuse", lambda: object())
observation = object()
state = mod.TraceState(trace_id="trace-1", root_ctx=None, root_span=None)
state.generations[mod._request_key(1)] = observation
monkeypatch.setitem(mod._TRACE_STATE, mod._trace_key("task-1", "session-1"), state)
captured = {}
def fake_end_observation(obs, *, output=None, metadata=None, usage_details=None, cost_details=None):
captured["usage_details"] = usage_details
monkeypatch.setattr(mod, "_end_observation", fake_end_observation)
return captured
def test_sanitized_dict_response_uses_usage_dict(self, monkeypatch):
sys.modules.pop("plugins.observability.langfuse", None)
mod = importlib.import_module("plugins.observability.langfuse")
captured = self._setup(mod, monkeypatch)
# A plain dict has no ``.usage`` attribute — mirrors post_api_request.
mod.on_post_llm_call(
task_id="task-1",
session_id="session-1",
api_call_count=1,
model="gemini-3-flash-preview",
response={"model": "gemini-3-flash-preview", "usage": {"input_tokens": 100, "output_tokens": 20}},
usage={"input_tokens": 100, "output_tokens": 20},
assistant_content_chars=42,
)
# Before the fix the dict response shadowed the usage dict and tokens
# were lost (usage_details == {}).
assert captured["usage_details"] == {"input": 100, "output": 20}
def test_real_response_object_with_usage_still_used(self, monkeypatch):
sys.modules.pop("plugins.observability.langfuse", None)
mod = importlib.import_module("plugins.observability.langfuse")
captured = self._setup(mod, monkeypatch)
# A response object that genuinely carries usage must still take the
# response-object path (post_llm_call / legacy behavior).
seen = {}
def fake_usage_and_cost(resp, **_):
seen["resp"] = resp
return {"input": 7, "output": 3}, {}
monkeypatch.setattr(mod, "_usage_and_cost", fake_usage_and_cost)
class _Resp:
usage = {"prompt_tokens": 7, "completion_tokens": 3}
resp = _Resp()
mod.on_post_llm_call(
task_id="task-1",
session_id="session-1",
api_call_count=1,
model="gemini-3-flash-preview",
response=resp,
usage={"input_tokens": 999, "output_tokens": 999},
assistant_content_chars=42,
)
assert seen["resp"] is resp
assert captured["usage_details"] == {"input": 7, "output": 3}