feat(status): restore model and context in gateway status

PROBLEM: The old public /status PR drifted out of the current Amy patch stack, leaving /status without the model/provider, context window, or explicit cumulative token label that Wolfram uses to monitor context pressure from chat. SOLUTION: Re-port the feature onto the current gateway status handler. Prefer live/cached agent runtime metadata, fall back to SessionDB + SessionStore state between turns, add localized status model/context lines, and keep token totals explicitly labeled cumulative. Verification: tests/gateway/test_status_command.py, tests/hermes_cli/test_commands.py
2026-07-31 19:16:29 +00:00 · 2026-05-31 17:14:47 +02:00 · 2026-05-31 17:14:47 +02:00 · ead38107a2
commit ead38107a2
parent 5035fa9029
19 changed files with 257 additions and 10 deletions
--- a/gateway/slash_commands.py
+++ b/gateway/slash_commands.py
@ -394,20 +394,35 @@ class GatewaySlashCommandsMixin:

    async def _handle_status_command(self, event: MessageEvent) -> str:
        """Handle /status command."""
+        from gateway.run import _AGENT_PENDING_SENTINEL, _load_gateway_config, _resolve_gateway_model
+
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)

        connected_platforms = [p.value for p in self.adapters.keys()]

-        # Check if there's an active agent
+        # Check if there's an active agent. Keep the sentinel distinct: a
+        # starting/pending run should not be treated as a fully usable agent for
+        # model/context display, but it still occupies the session slot.
        session_key = session_entry.session_key
-        is_running = session_key in self._running_agents
+        agent = self._running_agents.get(session_key)
+        is_running = agent is not None and agent is not _AGENT_PENDING_SENTINEL

        # Count pending /queue follow-ups (slot + overflow).
        adapter = self.adapters.get(source.platform) if source else None
        queue_depth = self._queue_depth(session_key, adapter=adapter)

+        def _clean_str(value: Any) -> str:
+            return value.strip() if isinstance(value, str) and value.strip() else ""
+
+        def _int_value(value: Any) -> int:
+            try:
+                return int(value)
+            except (TypeError, ValueError):
+                return 0
+
        title = None
+        session_row: dict[str, Any] = {}
        # Pull token totals from the SQLite session DB rather than the
        # in-memory SessionStore.  The agent's per-turn token deltas are
        # persisted into sessions_db (run_agent.py), not into SessionEntry,
@ -422,17 +437,106 @@ class GatewaySlashCommandsMixin:
                title = None
            try:
                row = self._session_db.get_session(session_entry.session_id)
-                if row:
+                if isinstance(row, dict):
+                    session_row = row
                    db_total_tokens = (
-                        (row.get("input_tokens") or 0)
-                        + (row.get("output_tokens") or 0)
-                        + (row.get("cache_read_tokens") or 0)
-                        + (row.get("cache_write_tokens") or 0)
-                        + (row.get("reasoning_tokens") or 0)
+                        _int_value(row.get("input_tokens"))
+                        + _int_value(row.get("output_tokens"))
+                        + _int_value(row.get("cache_read_tokens"))
+                        + _int_value(row.get("cache_write_tokens"))
+                        + _int_value(row.get("reasoning_tokens"))
                    )
            except Exception:
                db_total_tokens = 0

+        # Resolve model/context for cockpit-style status. Prefer the live or
+        # cached agent because it carries the actual runtime route and context
+        # compressor. Fall back to persisted SessionDB metadata plus the
+        # SessionStore's last_prompt_tokens so /status remains useful between
+        # turns without making billing/account calls.
+        status_agent = agent if is_running else None
+        if status_agent is None:
+            cache_lock = getattr(self, "_agent_cache_lock", None)
+            cache = getattr(self, "_agent_cache", None)
+            if cache_lock is not None and cache is not None:
+                try:
+                    with cache_lock:
+                        cached = cache.get(session_key)
+                    if cached:
+                        status_agent = cached[0]
+                except Exception:
+                    status_agent = None
+
+        model_name = ""
+        provider_name = ""
+        base_url = ""
+        context_used = 0
+        context_total = 0
+        if status_agent is not None and status_agent is not _AGENT_PENDING_SENTINEL:
+            model_name = _clean_str(getattr(status_agent, "model", ""))
+            provider_name = _clean_str(getattr(status_agent, "provider", ""))
+            base_url = _clean_str(getattr(status_agent, "base_url", ""))
+            ctx = getattr(status_agent, "context_compressor", None)
+            if ctx is not None:
+                context_used = _int_value(getattr(ctx, "last_prompt_tokens", 0))
+                context_total = _int_value(getattr(ctx, "context_length", 0))
+
+        model_name = model_name or _clean_str(session_row.get("model"))
+        provider_name = provider_name or _clean_str(session_row.get("billing_provider"))
+        base_url = base_url or _clean_str(session_row.get("billing_base_url"))
+        context_used = context_used or _int_value(getattr(session_entry, "last_prompt_tokens", 0))
+
+        user_config: dict[str, Any] = {}
+        if not model_name or not provider_name or not context_total:
+            try:
+                user_config = _load_gateway_config()
+            except Exception:
+                user_config = {}
+        if not model_name:
+            model_name = _resolve_gateway_model(user_config)
+        if not provider_name:
+            model_cfg = user_config.get("model", {}) if isinstance(user_config, dict) else {}
+            if isinstance(model_cfg, dict):
+                provider_name = _clean_str(model_cfg.get("provider"))
+        if not context_total and model_name:
+            try:
+                from agent.model_metadata import get_model_context_length
+
+                model_cfg = user_config.get("model", {}) if isinstance(user_config, dict) else {}
+                configured_context = None
+                if isinstance(model_cfg, dict):
+                    configured_context = model_cfg.get("context_length")
+                custom_providers = user_config.get("custom_providers") if isinstance(user_config, dict) else None
+                context_total = get_model_context_length(
+                    model_name,
+                    base_url=base_url,
+                    api_key="",
+                    config_context_length=configured_context if isinstance(configured_context, int) else None,
+                    provider=provider_name,
+                    custom_providers=custom_providers if isinstance(custom_providers, list) else None,
+                )
+            except Exception:
+                context_total = 0
+
+        model_line = ""
+        if model_name:
+            if provider_name:
+                model_line = t("gateway.status.model_provider", model=model_name, provider=provider_name)
+            else:
+                model_line = t("gateway.status.model", model=model_name)
+
+        context_line = ""
+        if context_total:
+            pct = min(100, round((context_used / context_total) * 100)) if context_total else 0
+            context_line = t(
+                "gateway.status.context",
+                used=f"{context_used:,}",
+                total=f"{context_total:,}",
+                pct=f"{pct}",
+            )
+        elif context_used:
+            context_line = t("gateway.status.context_used", used=f"{context_used:,}")
+
        lines = [
            t("gateway.status.header"),
            "",
@ -443,7 +547,13 @@ class GatewaySlashCommandsMixin:
        lines.extend([
            t("gateway.status.created", timestamp=session_entry.created_at.strftime('%Y-%m-%d %H:%M')),
            t("gateway.status.last_activity", timestamp=session_entry.updated_at.strftime('%Y-%m-%d %H:%M')),
-            t("gateway.status.tokens", tokens=f"{db_total_tokens:,}"),
+        ])
+        if model_line:
+            lines.append(model_line)
+        if context_line:
+            lines.append(context_line)
+        lines.extend([
+            t("gateway.status.tokens", tokens=f"{db_total_tokens:,} (cumulative)"),
            t("gateway.status.agent_running", state=t("gateway.status.state_yes") if is_running else t("gateway.status.state_no")),
        ])
        if queue_depth:
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -109,7 +109,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               args_hint="[text | pause | resume | clear | status]"),
    CommandDef("subgoal", "Add or manage extra criteria on the active goal", "Session",
               args_hint="[text | remove N | clear]"),
-    CommandDef("status", "Show session info", "Session"),
+    CommandDef("status", "Show session, model, token, and context info", "Session"),
    CommandDef("whoami", "Show your slash command access (admin / user)", "Info"),
    CommandDef("profile", "Show active profile name and home directory", "Info"),
    CommandDef("sethome", "Set this chat as the home channel", "Session",
--- a/locales/af.yaml
+++ b/locales/af.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Titel:** {title}"
    created:               "**Geskep:** {timestamp}"
    last_activity:         "**Laaste aktiwiteit:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Kumulatiewe API-tokens (elke oproep weer gestuur):** {tokens}"
    agent_running:         "**Agent loop:** {state}"
    state_yes:             "Ja ⚡"
--- a/locales/de.yaml
+++ b/locales/de.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Titel:** {title}"
    created:               "**Erstellt:** {timestamp}"
    last_activity:         "**Letzte Aktivität:** {timestamp}"
+    model:                 "**Modell:** `{model}`"
+    model_provider:        "**Modell:** `{model}` ({provider})"
+    context:               "**Kontext:** {used} / {total} ({pct}%)"
+    context_used:          "**Kontext:** ~{used} Tokens"
    tokens:                "**Kumulierte API-Tokens (bei jedem Aufruf erneut gesendet):** {tokens}"
    agent_running:         "**Agent läuft:** {state}"
    state_yes:             "Ja ⚡"
--- a/locales/en.yaml
+++ b/locales/en.yaml
@ -281,6 +281,10 @@ gateway:
    title:                 "**Title:** {title}"
    created:               "**Created:** {timestamp}"
    last_activity:         "**Last Activity:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Cumulative API tokens (re-sent each call):** {tokens}"
    agent_running:         "**Agent Running:** {state}"
    state_yes:             "Yes ⚡"
--- a/locales/es.yaml
+++ b/locales/es.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Título:** {title}"
    created:               "**Creado:** {timestamp}"
    last_activity:         "**Última actividad:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Tokens de API acumulados (reenviados en cada llamada):** {tokens}"
    agent_running:         "**Agente activo:** {state}"
    state_yes:             "Sí ⚡"
--- a/locales/fr.yaml
+++ b/locales/fr.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Titre :** {title}"
    created:               "**Créé :** {timestamp}"
    last_activity:         "**Dernière activité :** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Jetons :** {tokens}"
    agent_running:         "**Agent en cours :** {state}"
    state_yes:             "Oui ⚡"
--- a/locales/ga.yaml
+++ b/locales/ga.yaml
@ -273,6 +273,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Teideal:** {title}"
    created:               "**Cruthaithe:** {timestamp}"
    last_activity:         "**Gníomhaíocht is déanaí:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Comharthaí:** {tokens}"
    agent_running:         "**Gníomhaire ag rith:** {state}"
    state_yes:             "Tá ⚡"
--- a/locales/hu.yaml
+++ b/locales/hu.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Cím:** {title}"
    created:               "**Létrehozva:** {timestamp}"
    last_activity:         "**Utolsó tevékenység:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Tokenek:** {tokens}"
    agent_running:         "**Ügynök fut:** {state}"
    state_yes:             "Igen ⚡"
--- a/locales/it.yaml
+++ b/locales/it.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Titolo:** {title}"
    created:               "**Creata:** {timestamp}"
    last_activity:         "**Ultima attività:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Token:** {tokens}"
    agent_running:         "**Agente in esecuzione:** {state}"
    state_yes:             "Sì ⚡"
--- a/locales/ja.yaml
+++ b/locales/ja.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**タイトル:** {title}"
    created:               "**作成日時:** {timestamp}"
    last_activity:         "**最終アクティビティ:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**トークン:** {tokens}"
    agent_running:         "**エージェント実行中:** {state}"
    state_yes:             "はい ⚡"
--- a/locales/ko.yaml
+++ b/locales/ko.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**제목:** {title}"
    created:               "**생성됨:** {timestamp}"
    last_activity:         "**최종 활동:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**토큰:** {tokens}"
    agent_running:         "**에이전트 실행 중:** {state}"
    state_yes:             "예 ⚡"
--- a/locales/pt.yaml
+++ b/locales/pt.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Título:** {title}"
    created:               "**Criada:** {timestamp}"
    last_activity:         "**Última atividade:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Tokens de API cumulativos (reenviados a cada chamada):** {tokens}"
    agent_running:         "**Agente em execução:** {state}"
    state_yes:             "Sim ⚡"
--- a/locales/ru.yaml
+++ b/locales/ru.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Название:** {title}"
    created:               "**Создано:** {timestamp}"
    last_activity:         "**Последняя активность:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Токены:** {tokens}"
    agent_running:         "**Агент активен:** {state}"
    state_yes:             "Да ⚡"
--- a/locales/tr.yaml
+++ b/locales/tr.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Başlık:** {title}"
    created:               "**Oluşturuldu:** {timestamp}"
    last_activity:         "**Son etkinlik:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Token:** {tokens}"
    agent_running:         "**Aracı çalışıyor:** {state}"
    state_yes:             "Evet ⚡"
--- a/locales/uk.yaml
+++ b/locales/uk.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**Назва:** {title}"
    created:               "**Створено:** {timestamp}"
    last_activity:         "**Остання активність:** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Токени:** {tokens}"
    agent_running:         "**Агент активний:** {state}"
    state_yes:             "Так ⚡"
--- a/locales/zh-hant.yaml
+++ b/locales/zh-hant.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**標題：** {title}"
    created:               "**建立時間：** {timestamp}"
    last_activity:         "**最近活動：** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Token 數：** {tokens}"
    agent_running:         "**代理執行中：** {state}"
    state_yes:             "是 ⚡"
--- a/locales/zh.yaml
+++ b/locales/zh.yaml
@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
    title:                 "**标题：** {title}"
    created:               "**创建时间：** {timestamp}"
    last_activity:         "**最近活动：** {timestamp}"
+    model:                 "**Model:** `{model}`"
+    model_provider:        "**Model:** `{model}` ({provider})"
+    context:               "**Context:** {used} / {total} ({pct}%)"
+    context_used:          "**Context:** ~{used} tokens"
    tokens:                "**Token 数：** {tokens}"
    agent_running:         "**代理运行中：** {state}"
    state_yes:             "是 ⚡"
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@ -174,6 +174,79 @@ async def test_status_command_tokens_zero_when_session_db_row_missing():
    assert "**Cumulative API tokens (re-sent each call):** 0" in result


+@pytest.mark.asyncio
+async def test_status_command_includes_live_agent_model_and_context():
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=0,
+    )
+    runner = _make_runner(session_entry)
+    runner._session_db.get_session.return_value = {
+        "input_tokens": 1000,
+        "output_tokens": 250,
+        "cache_read_tokens": 0,
+        "cache_write_tokens": 0,
+        "reasoning_tokens": 0,
+        "model": "openai/gpt-test",
+    }
+    running_agent = SimpleNamespace(
+        model="openai/gpt-test",
+        provider="openai",
+        context_compressor=SimpleNamespace(
+            last_prompt_tokens=12_345,
+            context_length=100_000,
+        ),
+        interrupt=MagicMock(),
+    )
+    runner._running_agents[build_session_key(_make_source())] = running_agent
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert "**Model:** `openai/gpt-test` (openai)" in result
+    assert "**Context:** 12,345 / 100,000 (12%)" in result
+    assert "**Cumulative API tokens (re-sent each call):** 1,250 (cumulative)" in result
+
+
+@pytest.mark.asyncio
+async def test_status_command_includes_persisted_model_and_context_when_agent_not_running(monkeypatch):
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=0,
+        last_prompt_tokens=24_000,
+    )
+    runner = _make_runner(session_entry)
+    runner._session_db.get_session.return_value = {
+        "input_tokens": 2000,
+        "output_tokens": 500,
+        "cache_read_tokens": 0,
+        "cache_write_tokens": 0,
+        "reasoning_tokens": 0,
+        "model": "openai/gpt-persisted",
+        "billing_provider": "openai-codex",
+        "billing_base_url": "https://example.invalid/v1",
+    }
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 272_000,
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert "**Model:** `openai/gpt-persisted` (openai-codex)" in result
+    assert "**Context:** 24,000 / 272,000 (9%)" in result
+    assert "**Cumulative API tokens (re-sent each call):** 2,500 (cumulative)" in result
+
+
@pytest.mark.asyncio
 async def test_agents_command_reports_active_agents_and_processes(monkeypatch):
    session_key = build_session_key(_make_source())