mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Merge pull request #40560 from kamonspecial/fix/langfuse-usage-sanitized-response
fix(langfuse): restore usage/cost when post_api_request sends a sanitized response
This commit is contained in:
commit
1e3b3dfabb
2 changed files with 83 additions and 2 deletions
|
|
@ -837,8 +837,16 @@ def on_post_llm_call(*, task_id: str = "", session_id: str = "", provider: str =
|
|||
if output.get("tool_calls"):
|
||||
state.turn_tool_calls.extend(output["tool_calls"])
|
||||
|
||||
# Extract usage: prefer response object, fall back to usage dict from post_api_request
|
||||
if response is not None:
|
||||
# Extract usage: prefer a real response object that carries usage, else
|
||||
# fall back to the usage summary dict from post_api_request.
|
||||
#
|
||||
# post_api_request passes `response` as a SANITIZED dict (no ``.usage``
|
||||
# attribute) alongside a separate `usage` summary dict. Gating on
|
||||
# ``response is not None`` here took the response-object path on that dict,
|
||||
# where ``getattr(response, "usage", None)`` is always None — so usage and
|
||||
# cost were silently dropped for every gateway turn. Gate on a real
|
||||
# ``.usage`` attribute instead so the usage-dict fallback below is reached.
|
||||
if getattr(response, "usage", None) is not None:
|
||||
usage_details, cost_details = _usage_and_cost(
|
||||
response,
|
||||
provider=provider,
|
||||
|
|
|
|||
|
|
@ -704,3 +704,76 @@ class TestToolObservationKeying:
|
|||
assert ended["output"] == {"status": "done"}
|
||||
assert not state.tools
|
||||
|
||||
|
||||
class TestUsageFromSanitizedResponse:
|
||||
"""Regression: ``post_api_request`` delivers ``response`` as a sanitized
|
||||
dict (no ``.usage`` attribute) plus a separate ``usage`` summary dict. The
|
||||
post-call handler must read the ``usage`` dict instead of treating the dict
|
||||
response as a usage-bearing object and dropping all token/cost data."""
|
||||
|
||||
def _setup(self, mod, monkeypatch):
|
||||
# Active client so on_post_llm_call does not early-return.
|
||||
monkeypatch.setattr(mod, "_get_langfuse", lambda: object())
|
||||
observation = object()
|
||||
state = mod.TraceState(trace_id="trace-1", root_ctx=None, root_span=None)
|
||||
state.generations[mod._request_key(1)] = observation
|
||||
monkeypatch.setitem(mod._TRACE_STATE, mod._trace_key("task-1", "session-1"), state)
|
||||
captured = {}
|
||||
|
||||
def fake_end_observation(obs, *, output=None, metadata=None, usage_details=None, cost_details=None):
|
||||
captured["usage_details"] = usage_details
|
||||
|
||||
monkeypatch.setattr(mod, "_end_observation", fake_end_observation)
|
||||
return captured
|
||||
|
||||
def test_sanitized_dict_response_uses_usage_dict(self, monkeypatch):
|
||||
sys.modules.pop("plugins.observability.langfuse", None)
|
||||
mod = importlib.import_module("plugins.observability.langfuse")
|
||||
captured = self._setup(mod, monkeypatch)
|
||||
|
||||
# A plain dict has no ``.usage`` attribute — mirrors post_api_request.
|
||||
mod.on_post_llm_call(
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
api_call_count=1,
|
||||
model="gemini-3-flash-preview",
|
||||
response={"model": "gemini-3-flash-preview", "usage": {"input_tokens": 100, "output_tokens": 20}},
|
||||
usage={"input_tokens": 100, "output_tokens": 20},
|
||||
assistant_content_chars=42,
|
||||
)
|
||||
|
||||
# Before the fix the dict response shadowed the usage dict and tokens
|
||||
# were lost (usage_details == {}).
|
||||
assert captured["usage_details"] == {"input": 100, "output": 20}
|
||||
|
||||
def test_real_response_object_with_usage_still_used(self, monkeypatch):
|
||||
sys.modules.pop("plugins.observability.langfuse", None)
|
||||
mod = importlib.import_module("plugins.observability.langfuse")
|
||||
captured = self._setup(mod, monkeypatch)
|
||||
|
||||
# A response object that genuinely carries usage must still take the
|
||||
# response-object path (post_llm_call / legacy behavior).
|
||||
seen = {}
|
||||
|
||||
def fake_usage_and_cost(resp, **_):
|
||||
seen["resp"] = resp
|
||||
return {"input": 7, "output": 3}, {}
|
||||
|
||||
monkeypatch.setattr(mod, "_usage_and_cost", fake_usage_and_cost)
|
||||
|
||||
class _Resp:
|
||||
usage = {"prompt_tokens": 7, "completion_tokens": 3}
|
||||
|
||||
resp = _Resp()
|
||||
mod.on_post_llm_call(
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
api_call_count=1,
|
||||
model="gemini-3-flash-preview",
|
||||
response=resp,
|
||||
usage={"input_tokens": 999, "output_tokens": 999},
|
||||
assistant_content_chars=42,
|
||||
)
|
||||
|
||||
assert seen["resp"] is resp
|
||||
assert captured["usage_details"] == {"input": 7, "output": 3}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue