From 2046a4c08cb24323444c4f161371a8e24b5df8b3 Mon Sep 17 00:00:00 2001
From: "ac (sourcetree)" <alchang@Pobox.com>
Date: Wed, 11 Mar 2026 17:44:37 -0700
Subject: [PATCH] fix: backfill model on gateway sessions after agent runs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gateway sessions end up with model=NULL because the session row is
created before AIAgent is constructed.  After the agent responds,
update_session() writes token counts but never fills in the model.

Thread agent.model through _run_agent()'s return dict into
update_session() → update_token_counts().  The SQL uses
COALESCE(model, ?) so it only fills NULL rows — never overwrites
a model already set at creation time (e.g. CLI sessions).

If the agent falls back to a different provider, agent.model is
updated in-place by _try_activate_fallback(), so the recorded value
reflects whichever model actually produced the response.

Fixes #987
---
 gateway/run.py     |  5 ++++-
 gateway/session.py |  4 +++-
 hermes_state.py    | 10 ++++++----
 3 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/gateway/run.py b/gateway/run.py
index 221f8f916..bc16b224d 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1578,10 +1578,11 @@ class GatewayRunner:
                         skip_db=agent_persisted,
                     )
             
-            # Update session with actual prompt token count from the agent
+            # Update session with actual prompt token count and model from the agent
             self.session_store.update_session(
                 session_entry.session_key,
                 last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
+                model=agent_result.get("model"),
             )
             
             return response
@@ -3586,6 +3587,7 @@ class GatewayRunner:
                     "tools": tools_holder[0] or [],
                     "history_offset": len(agent_history),
                     "last_prompt_tokens": _last_prompt_toks,
+                    "model": agent_holder[0].model if agent_holder[0] else None,
                 }
             
             # Scan tool results for MEDIA:<path> tags that need to be delivered
@@ -3648,6 +3650,7 @@ class GatewayRunner:
                 "tools": tools_holder[0] or [],
                 "history_offset": len(agent_history),
                 "last_prompt_tokens": _last_prompt_toks,
+                "model": agent_holder[0].model if agent_holder[0] else None,
                 "session_id": effective_session_id,
             }
         
diff --git a/gateway/session.py b/gateway/session.py
index 3e42db4fe..965f60793 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -590,6 +590,7 @@ class SessionStore:
         input_tokens: int = 0,
         output_tokens: int = 0,
         last_prompt_tokens: int = None,
+        model: str = None,
     ) -> None:
         """Update a session's metadata after an interaction."""
         self._ensure_loaded()
@@ -607,7 +608,8 @@ class SessionStore:
             if self._db:
                 try:
                     self._db.update_token_counts(
-                        entry.session_id, input_tokens, output_tokens
+                        entry.session_id, input_tokens, output_tokens,
+                        model=model,
                     )
                 except Exception as e:
                     logger.debug("Session DB operation failed: %s", e)
diff --git a/hermes_state.py b/hermes_state.py
index 5e29321ec..8945e195d 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -227,15 +227,17 @@ class SessionDB:
         self._conn.commit()
 
     def update_token_counts(
-        self, session_id: str, input_tokens: int = 0, output_tokens: int = 0
+        self, session_id: str, input_tokens: int = 0, output_tokens: int = 0,
+        model: str = None,
     ) -> None:
-        """Increment token counters on a session."""
+        """Increment token counters and backfill model if not already set."""
         self._conn.execute(
             """UPDATE sessions SET
                input_tokens = input_tokens + ?,
-               output_tokens = output_tokens + ?
+               output_tokens = output_tokens + ?,
+               model = COALESCE(model, ?)
                WHERE id = ?""",
-            (input_tokens, output_tokens, session_id),
+            (input_tokens, output_tokens, model, session_id),
         )
         self._conn.commit()