mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: gateway token double-counting — use absolute set instead of increment (#3317)
The gateway's update_session() used += for token counts, but the cached agent's session_prompt_tokens / session_completion_tokens are cumulative totals that grow across messages. Each update_session call re-added the running total, inflating usage stats with every message (1.7x after 3 messages, worse over longer conversations). Fix: change += to = for in-memory entry fields, add set_token_counts() to SessionDB that uses direct assignment instead of SQL increment, and switch the gateway to call it. CLI mode continues using update_token_counts() (increment) since it tracks per-API-call deltas — that path is unchanged. Based on analysis from PR #3222 by @zaycruz (closed). Co-authored-by: zaycruz <zay@users.noreply.github.com>
This commit is contained in:
parent
867eefdd9f
commit
22cfad157b
3 changed files with 68 additions and 2 deletions
|
|
@ -785,7 +785,7 @@ class SessionStore:
|
|||
|
||||
if self._db and db_session_id:
|
||||
try:
|
||||
self._db.update_token_counts(
|
||||
self._db.set_token_counts(
|
||||
db_session_id,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
|
|
|
|||
|
|
@ -415,6 +415,72 @@ class SessionDB:
|
|||
)
|
||||
self._conn.commit()
|
||||
|
||||
def set_token_counts(
|
||||
self,
|
||||
session_id: str,
|
||||
input_tokens: int = 0,
|
||||
output_tokens: int = 0,
|
||||
model: str = None,
|
||||
cache_read_tokens: int = 0,
|
||||
cache_write_tokens: int = 0,
|
||||
reasoning_tokens: int = 0,
|
||||
estimated_cost_usd: Optional[float] = None,
|
||||
actual_cost_usd: Optional[float] = None,
|
||||
cost_status: Optional[str] = None,
|
||||
cost_source: Optional[str] = None,
|
||||
pricing_version: Optional[str] = None,
|
||||
billing_provider: Optional[str] = None,
|
||||
billing_base_url: Optional[str] = None,
|
||||
billing_mode: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Set token counters to absolute values (not increment).
|
||||
|
||||
Use this when the caller provides cumulative totals from a completed
|
||||
conversation run (e.g. the gateway, where the cached agent's
|
||||
session_prompt_tokens already reflects the running total).
|
||||
"""
|
||||
with self._lock:
|
||||
self._conn.execute(
|
||||
"""UPDATE sessions SET
|
||||
input_tokens = ?,
|
||||
output_tokens = ?,
|
||||
cache_read_tokens = ?,
|
||||
cache_write_tokens = ?,
|
||||
reasoning_tokens = ?,
|
||||
estimated_cost_usd = ?,
|
||||
actual_cost_usd = CASE
|
||||
WHEN ? IS NULL THEN actual_cost_usd
|
||||
ELSE ?
|
||||
END,
|
||||
cost_status = COALESCE(?, cost_status),
|
||||
cost_source = COALESCE(?, cost_source),
|
||||
pricing_version = COALESCE(?, pricing_version),
|
||||
billing_provider = COALESCE(billing_provider, ?),
|
||||
billing_base_url = COALESCE(billing_base_url, ?),
|
||||
billing_mode = COALESCE(billing_mode, ?),
|
||||
model = COALESCE(model, ?)
|
||||
WHERE id = ?""",
|
||||
(
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
reasoning_tokens,
|
||||
estimated_cost_usd,
|
||||
actual_cost_usd,
|
||||
actual_cost_usd,
|
||||
cost_status,
|
||||
cost_source,
|
||||
pricing_version,
|
||||
billing_provider,
|
||||
billing_base_url,
|
||||
billing_mode,
|
||||
model,
|
||||
session_id,
|
||||
),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a session by ID."""
|
||||
with self._lock:
|
||||
|
|
|
|||
|
|
@ -846,7 +846,7 @@ class TestLastPromptTokens:
|
|||
|
||||
store.update_session("k1", model="openai/gpt-5.4")
|
||||
|
||||
store._db.update_token_counts.assert_called_once_with(
|
||||
store._db.set_token_counts.assert_called_once_with(
|
||||
"s1",
|
||||
input_tokens=0,
|
||||
output_tokens=0,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue