mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: gateway token double-counting with cached agents (#3306)
The cached agent accumulates session_input_tokens across messages, so run_conversation() returns cumulative totals. But update_session() used += (increment), double-counting on every message after the first. - session.py: change in-memory entry updates from += to = (direct assignment for cumulative values) - hermes_state.py: add absolute=True flag to update_token_counts() that uses SET column = ? instead of SET column = column + ? - session.py: pass absolute=True to the DB call CLI path is unchanged — it passes per-API-call deltas directly to update_token_counts() with the default absolute=False (increment). Reported by @zaycruz in #3222. Closes #3222.
This commit is contained in:
parent
1519c4d477
commit
a8df7f9964
3 changed files with 45 additions and 10 deletions
|
|
@ -762,14 +762,16 @@ class SessionStore:
|
||||||
if session_key in self._entries:
|
if session_key in self._entries:
|
||||||
entry = self._entries[session_key]
|
entry = self._entries[session_key]
|
||||||
entry.updated_at = _now()
|
entry.updated_at = _now()
|
||||||
entry.input_tokens += input_tokens
|
# Direct assignment — the gateway receives cumulative totals
|
||||||
entry.output_tokens += output_tokens
|
# from the cached agent, not per-call deltas.
|
||||||
entry.cache_read_tokens += cache_read_tokens
|
entry.input_tokens = input_tokens
|
||||||
entry.cache_write_tokens += cache_write_tokens
|
entry.output_tokens = output_tokens
|
||||||
|
entry.cache_read_tokens = cache_read_tokens
|
||||||
|
entry.cache_write_tokens = cache_write_tokens
|
||||||
if last_prompt_tokens is not None:
|
if last_prompt_tokens is not None:
|
||||||
entry.last_prompt_tokens = last_prompt_tokens
|
entry.last_prompt_tokens = last_prompt_tokens
|
||||||
if estimated_cost_usd is not None:
|
if estimated_cost_usd is not None:
|
||||||
entry.estimated_cost_usd += estimated_cost_usd
|
entry.estimated_cost_usd = estimated_cost_usd
|
||||||
if cost_status:
|
if cost_status:
|
||||||
entry.cost_status = cost_status
|
entry.cost_status = cost_status
|
||||||
entry.total_tokens = (
|
entry.total_tokens = (
|
||||||
|
|
@ -795,6 +797,7 @@ class SessionStore:
|
||||||
billing_provider=provider,
|
billing_provider=provider,
|
||||||
billing_base_url=base_url,
|
billing_base_url=base_url,
|
||||||
model=model,
|
model=model,
|
||||||
|
absolute=True,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Session DB operation failed: %s", e)
|
logger.debug("Session DB operation failed: %s", e)
|
||||||
|
|
|
||||||
|
|
@ -319,11 +319,39 @@ class SessionDB:
|
||||||
billing_provider: Optional[str] = None,
|
billing_provider: Optional[str] = None,
|
||||||
billing_base_url: Optional[str] = None,
|
billing_base_url: Optional[str] = None,
|
||||||
billing_mode: Optional[str] = None,
|
billing_mode: Optional[str] = None,
|
||||||
|
absolute: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Increment token counters and backfill model if not already set."""
|
"""Update token counters and backfill model if not already set.
|
||||||
with self._lock:
|
|
||||||
self._conn.execute(
|
When *absolute* is False (default), values are **incremented** — use
|
||||||
"""UPDATE sessions SET
|
this for per-API-call deltas (CLI path).
|
||||||
|
|
||||||
|
When *absolute* is True, values are **set directly** — use this when
|
||||||
|
the caller already holds cumulative totals (gateway path, where the
|
||||||
|
cached agent accumulates across messages).
|
||||||
|
"""
|
||||||
|
if absolute:
|
||||||
|
sql = """UPDATE sessions SET
|
||||||
|
input_tokens = ?,
|
||||||
|
output_tokens = ?,
|
||||||
|
cache_read_tokens = ?,
|
||||||
|
cache_write_tokens = ?,
|
||||||
|
reasoning_tokens = ?,
|
||||||
|
estimated_cost_usd = COALESCE(?, 0),
|
||||||
|
actual_cost_usd = CASE
|
||||||
|
WHEN ? IS NULL THEN actual_cost_usd
|
||||||
|
ELSE ?
|
||||||
|
END,
|
||||||
|
cost_status = COALESCE(?, cost_status),
|
||||||
|
cost_source = COALESCE(?, cost_source),
|
||||||
|
pricing_version = COALESCE(?, pricing_version),
|
||||||
|
billing_provider = COALESCE(billing_provider, ?),
|
||||||
|
billing_base_url = COALESCE(billing_base_url, ?),
|
||||||
|
billing_mode = COALESCE(billing_mode, ?),
|
||||||
|
model = COALESCE(model, ?)
|
||||||
|
WHERE id = ?"""
|
||||||
|
else:
|
||||||
|
sql = """UPDATE sessions SET
|
||||||
input_tokens = input_tokens + ?,
|
input_tokens = input_tokens + ?,
|
||||||
output_tokens = output_tokens + ?,
|
output_tokens = output_tokens + ?,
|
||||||
cache_read_tokens = cache_read_tokens + ?,
|
cache_read_tokens = cache_read_tokens + ?,
|
||||||
|
|
@ -341,7 +369,10 @@ class SessionDB:
|
||||||
billing_base_url = COALESCE(billing_base_url, ?),
|
billing_base_url = COALESCE(billing_base_url, ?),
|
||||||
billing_mode = COALESCE(billing_mode, ?),
|
billing_mode = COALESCE(billing_mode, ?),
|
||||||
model = COALESCE(model, ?)
|
model = COALESCE(model, ?)
|
||||||
WHERE id = ?""",
|
WHERE id = ?"""
|
||||||
|
with self._lock:
|
||||||
|
self._conn.execute(
|
||||||
|
sql,
|
||||||
(
|
(
|
||||||
input_tokens,
|
input_tokens,
|
||||||
output_tokens,
|
output_tokens,
|
||||||
|
|
|
||||||
|
|
@ -858,6 +858,7 @@ class TestLastPromptTokens:
|
||||||
billing_provider=None,
|
billing_provider=None,
|
||||||
billing_base_url=None,
|
billing_base_url=None,
|
||||||
model="openai/gpt-5.4",
|
model="openai/gpt-5.4",
|
||||||
|
absolute=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue