diff --git a/gateway/run.py b/gateway/run.py index 2eb745f92..d9b884ac8 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4068,6 +4068,7 @@ class GatewayRunner: self.session_store.update_session( session_entry.session_key, last_prompt_tokens=agent_result.get("last_prompt_tokens", 0), + total_tokens=agent_result.get("session_total_tokens", 0), ) # Auto voice reply: send TTS audio before the text response @@ -8712,11 +8713,14 @@ class GatewayRunner: _last_prompt_toks = 0 _input_toks = 0 _output_toks = 0 + _session_total_toks = 0 _agent = agent_holder[0] if _agent and hasattr(_agent, "context_compressor"): _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0) _input_toks = getattr(_agent, "session_prompt_tokens", 0) _output_toks = getattr(_agent, "session_completion_tokens", 0) + if _agent: + _session_total_toks = getattr(_agent, "session_total_tokens", 0) or 0 _resolved_model = getattr(_agent, "model", None) if _agent else None if not final_response: @@ -8732,6 +8736,7 @@ class GatewayRunner: "last_prompt_tokens": _last_prompt_toks, "input_tokens": _input_toks, "output_tokens": _output_toks, + "session_total_tokens": _session_total_toks, "model": _resolved_model, } @@ -8821,6 +8826,7 @@ class GatewayRunner: "last_prompt_tokens": _last_prompt_toks, "input_tokens": _input_toks, "output_tokens": _output_toks, + "session_total_tokens": _session_total_toks, "model": _resolved_model, "session_id": effective_session_id, "response_previewed": result.get("response_previewed", False), diff --git a/gateway/session.py b/gateway/session.py index 33165dcd9..ff7c4d3a7 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -770,6 +770,7 @@ class SessionStore: self, session_key: str, last_prompt_tokens: int = None, + total_tokens: int = None, ) -> None: """Update lightweight session metadata after an interaction.""" with self._lock: @@ -780,6 +781,8 @@ class SessionStore: entry.updated_at = _now() if last_prompt_tokens is not None: entry.last_prompt_tokens = last_prompt_tokens + if total_tokens is not None: + entry.total_tokens = total_tokens self._save() def suspend_session(self, session_key: str) -> bool: diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py index 0dbd5980b..5a03f9978 100644 --- a/tests/gateway/test_status_command.py +++ b/tests/gateway/test_status_command.py @@ -150,6 +150,7 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch): runner.session_store.update_session.assert_called_once_with( session_entry.session_key, last_prompt_tokens=80, + total_tokens=0, )