From ead38107a2f2b6d6a71e92e978fe98a684bd3be8 Mon Sep 17 00:00:00 2001 From: Wolfram Ravenwolf Date: Sun, 31 May 2026 17:14:47 +0200 Subject: [PATCH] feat(status): restore model and context in gateway status PROBLEM: The old public /status PR drifted out of the current Amy patch stack, leaving /status without the model/provider, context window, or explicit cumulative token label that Wolfram uses to monitor context pressure from chat. SOLUTION: Re-port the feature onto the current gateway status handler. Prefer live/cached agent runtime metadata, fall back to SessionDB + SessionStore state between turns, add localized status model/context lines, and keep token totals explicitly labeled cumulative. Verification: tests/gateway/test_status_command.py, tests/hermes_cli/test_commands.py --- gateway/slash_commands.py | 128 +++++++++++++++++++++++++-- hermes_cli/commands.py | 2 +- locales/af.yaml | 4 + locales/de.yaml | 4 + locales/en.yaml | 4 + locales/es.yaml | 4 + locales/fr.yaml | 4 + locales/ga.yaml | 4 + locales/hu.yaml | 4 + locales/it.yaml | 4 + locales/ja.yaml | 4 + locales/ko.yaml | 4 + locales/pt.yaml | 4 + locales/ru.yaml | 4 + locales/tr.yaml | 4 + locales/uk.yaml | 4 + locales/zh-hant.yaml | 4 + locales/zh.yaml | 4 + tests/gateway/test_status_command.py | 73 +++++++++++++++ 19 files changed, 257 insertions(+), 10 deletions(-) diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py index 9a463fd249d..e65739eebc2 100644 --- a/gateway/slash_commands.py +++ b/gateway/slash_commands.py @@ -394,20 +394,35 @@ class GatewaySlashCommandsMixin: async def _handle_status_command(self, event: MessageEvent) -> str: """Handle /status command.""" + from gateway.run import _AGENT_PENDING_SENTINEL, _load_gateway_config, _resolve_gateway_model + source = event.source session_entry = self.session_store.get_or_create_session(source) connected_platforms = [p.value for p in self.adapters.keys()] - # Check if there's an active agent + # Check if there's an active agent. Keep the sentinel distinct: a + # starting/pending run should not be treated as a fully usable agent for + # model/context display, but it still occupies the session slot. session_key = session_entry.session_key - is_running = session_key in self._running_agents + agent = self._running_agents.get(session_key) + is_running = agent is not None and agent is not _AGENT_PENDING_SENTINEL # Count pending /queue follow-ups (slot + overflow). adapter = self.adapters.get(source.platform) if source else None queue_depth = self._queue_depth(session_key, adapter=adapter) + def _clean_str(value: Any) -> str: + return value.strip() if isinstance(value, str) and value.strip() else "" + + def _int_value(value: Any) -> int: + try: + return int(value) + except (TypeError, ValueError): + return 0 + title = None + session_row: dict[str, Any] = {} # Pull token totals from the SQLite session DB rather than the # in-memory SessionStore. The agent's per-turn token deltas are # persisted into sessions_db (run_agent.py), not into SessionEntry, @@ -422,17 +437,106 @@ class GatewaySlashCommandsMixin: title = None try: row = self._session_db.get_session(session_entry.session_id) - if row: + if isinstance(row, dict): + session_row = row db_total_tokens = ( - (row.get("input_tokens") or 0) - + (row.get("output_tokens") or 0) - + (row.get("cache_read_tokens") or 0) - + (row.get("cache_write_tokens") or 0) - + (row.get("reasoning_tokens") or 0) + _int_value(row.get("input_tokens")) + + _int_value(row.get("output_tokens")) + + _int_value(row.get("cache_read_tokens")) + + _int_value(row.get("cache_write_tokens")) + + _int_value(row.get("reasoning_tokens")) ) except Exception: db_total_tokens = 0 + # Resolve model/context for cockpit-style status. Prefer the live or + # cached agent because it carries the actual runtime route and context + # compressor. Fall back to persisted SessionDB metadata plus the + # SessionStore's last_prompt_tokens so /status remains useful between + # turns without making billing/account calls. + status_agent = agent if is_running else None + if status_agent is None: + cache_lock = getattr(self, "_agent_cache_lock", None) + cache = getattr(self, "_agent_cache", None) + if cache_lock is not None and cache is not None: + try: + with cache_lock: + cached = cache.get(session_key) + if cached: + status_agent = cached[0] + except Exception: + status_agent = None + + model_name = "" + provider_name = "" + base_url = "" + context_used = 0 + context_total = 0 + if status_agent is not None and status_agent is not _AGENT_PENDING_SENTINEL: + model_name = _clean_str(getattr(status_agent, "model", "")) + provider_name = _clean_str(getattr(status_agent, "provider", "")) + base_url = _clean_str(getattr(status_agent, "base_url", "")) + ctx = getattr(status_agent, "context_compressor", None) + if ctx is not None: + context_used = _int_value(getattr(ctx, "last_prompt_tokens", 0)) + context_total = _int_value(getattr(ctx, "context_length", 0)) + + model_name = model_name or _clean_str(session_row.get("model")) + provider_name = provider_name or _clean_str(session_row.get("billing_provider")) + base_url = base_url or _clean_str(session_row.get("billing_base_url")) + context_used = context_used or _int_value(getattr(session_entry, "last_prompt_tokens", 0)) + + user_config: dict[str, Any] = {} + if not model_name or not provider_name or not context_total: + try: + user_config = _load_gateway_config() + except Exception: + user_config = {} + if not model_name: + model_name = _resolve_gateway_model(user_config) + if not provider_name: + model_cfg = user_config.get("model", {}) if isinstance(user_config, dict) else {} + if isinstance(model_cfg, dict): + provider_name = _clean_str(model_cfg.get("provider")) + if not context_total and model_name: + try: + from agent.model_metadata import get_model_context_length + + model_cfg = user_config.get("model", {}) if isinstance(user_config, dict) else {} + configured_context = None + if isinstance(model_cfg, dict): + configured_context = model_cfg.get("context_length") + custom_providers = user_config.get("custom_providers") if isinstance(user_config, dict) else None + context_total = get_model_context_length( + model_name, + base_url=base_url, + api_key="", + config_context_length=configured_context if isinstance(configured_context, int) else None, + provider=provider_name, + custom_providers=custom_providers if isinstance(custom_providers, list) else None, + ) + except Exception: + context_total = 0 + + model_line = "" + if model_name: + if provider_name: + model_line = t("gateway.status.model_provider", model=model_name, provider=provider_name) + else: + model_line = t("gateway.status.model", model=model_name) + + context_line = "" + if context_total: + pct = min(100, round((context_used / context_total) * 100)) if context_total else 0 + context_line = t( + "gateway.status.context", + used=f"{context_used:,}", + total=f"{context_total:,}", + pct=f"{pct}", + ) + elif context_used: + context_line = t("gateway.status.context_used", used=f"{context_used:,}") + lines = [ t("gateway.status.header"), "", @@ -443,7 +547,13 @@ class GatewaySlashCommandsMixin: lines.extend([ t("gateway.status.created", timestamp=session_entry.created_at.strftime('%Y-%m-%d %H:%M')), t("gateway.status.last_activity", timestamp=session_entry.updated_at.strftime('%Y-%m-%d %H:%M')), - t("gateway.status.tokens", tokens=f"{db_total_tokens:,}"), + ]) + if model_line: + lines.append(model_line) + if context_line: + lines.append(context_line) + lines.extend([ + t("gateway.status.tokens", tokens=f"{db_total_tokens:,} (cumulative)"), t("gateway.status.agent_running", state=t("gateway.status.state_yes") if is_running else t("gateway.status.state_no")), ]) if queue_depth: diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 7812eba7d5f..576eefbf0bd 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -109,7 +109,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[text | pause | resume | clear | status]"), CommandDef("subgoal", "Add or manage extra criteria on the active goal", "Session", args_hint="[text | remove N | clear]"), - CommandDef("status", "Show session info", "Session"), + CommandDef("status", "Show session, model, token, and context info", "Session"), CommandDef("whoami", "Show your slash command access (admin / user)", "Info"), CommandDef("profile", "Show active profile name and home directory", "Info"), CommandDef("sethome", "Set this chat as the home channel", "Session", diff --git a/locales/af.yaml b/locales/af.yaml index 7a01f51983c..ece46799d98 100644 --- a/locales/af.yaml +++ b/locales/af.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Titel:** {title}" created: "**Geskep:** {timestamp}" last_activity: "**Laaste aktiwiteit:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Kumulatiewe API-tokens (elke oproep weer gestuur):** {tokens}" agent_running: "**Agent loop:** {state}" state_yes: "Ja ⚡" diff --git a/locales/de.yaml b/locales/de.yaml index f3414c1df3b..154268e60dd 100644 --- a/locales/de.yaml +++ b/locales/de.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Titel:** {title}" created: "**Erstellt:** {timestamp}" last_activity: "**Letzte Aktivität:** {timestamp}" + model: "**Modell:** `{model}`" + model_provider: "**Modell:** `{model}` ({provider})" + context: "**Kontext:** {used} / {total} ({pct}%)" + context_used: "**Kontext:** ~{used} Tokens" tokens: "**Kumulierte API-Tokens (bei jedem Aufruf erneut gesendet):** {tokens}" agent_running: "**Agent läuft:** {state}" state_yes: "Ja ⚡" diff --git a/locales/en.yaml b/locales/en.yaml index 00a7654f4f0..a8a132622f4 100644 --- a/locales/en.yaml +++ b/locales/en.yaml @@ -281,6 +281,10 @@ gateway: title: "**Title:** {title}" created: "**Created:** {timestamp}" last_activity: "**Last Activity:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Cumulative API tokens (re-sent each call):** {tokens}" agent_running: "**Agent Running:** {state}" state_yes: "Yes ⚡" diff --git a/locales/es.yaml b/locales/es.yaml index 96967d95632..9e4d827526c 100644 --- a/locales/es.yaml +++ b/locales/es.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Título:** {title}" created: "**Creado:** {timestamp}" last_activity: "**Última actividad:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Tokens de API acumulados (reenviados en cada llamada):** {tokens}" agent_running: "**Agente activo:** {state}" state_yes: "Sí ⚡" diff --git a/locales/fr.yaml b/locales/fr.yaml index 6185f79ec52..692c71221fb 100644 --- a/locales/fr.yaml +++ b/locales/fr.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Titre :** {title}" created: "**Créé :** {timestamp}" last_activity: "**Dernière activité :** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Jetons :** {tokens}" agent_running: "**Agent en cours :** {state}" state_yes: "Oui ⚡" diff --git a/locales/ga.yaml b/locales/ga.yaml index 752e3266053..cdacf94312a 100644 --- a/locales/ga.yaml +++ b/locales/ga.yaml @@ -273,6 +273,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Teideal:** {title}" created: "**Cruthaithe:** {timestamp}" last_activity: "**Gníomhaíocht is déanaí:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Comharthaí:** {tokens}" agent_running: "**Gníomhaire ag rith:** {state}" state_yes: "Tá ⚡" diff --git a/locales/hu.yaml b/locales/hu.yaml index 55d57698364..fec8aac766f 100644 --- a/locales/hu.yaml +++ b/locales/hu.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Cím:** {title}" created: "**Létrehozva:** {timestamp}" last_activity: "**Utolsó tevékenység:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Tokenek:** {tokens}" agent_running: "**Ügynök fut:** {state}" state_yes: "Igen ⚡" diff --git a/locales/it.yaml b/locales/it.yaml index 82cf4ce8500..5e17a835f48 100644 --- a/locales/it.yaml +++ b/locales/it.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Titolo:** {title}" created: "**Creata:** {timestamp}" last_activity: "**Ultima attività:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Token:** {tokens}" agent_running: "**Agente in esecuzione:** {state}" state_yes: "Sì ⚡" diff --git a/locales/ja.yaml b/locales/ja.yaml index 4aeee2a4cf3..b6d9a957588 100644 --- a/locales/ja.yaml +++ b/locales/ja.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**タイトル:** {title}" created: "**作成日時:** {timestamp}" last_activity: "**最終アクティビティ:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**トークン:** {tokens}" agent_running: "**エージェント実行中:** {state}" state_yes: "はい ⚡" diff --git a/locales/ko.yaml b/locales/ko.yaml index 8af6b28fe7a..f07d22837ad 100644 --- a/locales/ko.yaml +++ b/locales/ko.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**제목:** {title}" created: "**생성됨:** {timestamp}" last_activity: "**최종 활동:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**토큰:** {tokens}" agent_running: "**에이전트 실행 중:** {state}" state_yes: "예 ⚡" diff --git a/locales/pt.yaml b/locales/pt.yaml index 69bdb14a9bc..5be22d90b1e 100644 --- a/locales/pt.yaml +++ b/locales/pt.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Título:** {title}" created: "**Criada:** {timestamp}" last_activity: "**Última atividade:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Tokens de API cumulativos (reenviados a cada chamada):** {tokens}" agent_running: "**Agente em execução:** {state}" state_yes: "Sim ⚡" diff --git a/locales/ru.yaml b/locales/ru.yaml index a105f1e68aa..ca5617a4cc4 100644 --- a/locales/ru.yaml +++ b/locales/ru.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Название:** {title}" created: "**Создано:** {timestamp}" last_activity: "**Последняя активность:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Токены:** {tokens}" agent_running: "**Агент активен:** {state}" state_yes: "Да ⚡" diff --git a/locales/tr.yaml b/locales/tr.yaml index 49e8fdc454e..29bacf36ee4 100644 --- a/locales/tr.yaml +++ b/locales/tr.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Başlık:** {title}" created: "**Oluşturuldu:** {timestamp}" last_activity: "**Son etkinlik:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Token:** {tokens}" agent_running: "**Aracı çalışıyor:** {state}" state_yes: "Evet ⚡" diff --git a/locales/uk.yaml b/locales/uk.yaml index 2fa55c14c92..1e20ec7b6ca 100644 --- a/locales/uk.yaml +++ b/locales/uk.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**Назва:** {title}" created: "**Створено:** {timestamp}" last_activity: "**Остання активність:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Токени:** {tokens}" agent_running: "**Агент активний:** {state}" state_yes: "Так ⚡" diff --git a/locales/zh-hant.yaml b/locales/zh-hant.yaml index fd1729203f3..a7aae1adb8a 100644 --- a/locales/zh-hant.yaml +++ b/locales/zh-hant.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**標題:** {title}" created: "**建立時間:** {timestamp}" last_activity: "**最近活動:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Token 數:** {tokens}" agent_running: "**代理執行中:** {state}" state_yes: "是 ⚡" diff --git a/locales/zh.yaml b/locales/zh.yaml index 17b74e4688f..7f9789ee3be 100644 --- a/locales/zh.yaml +++ b/locales/zh.yaml @@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another title: "**标题:** {title}" created: "**创建时间:** {timestamp}" last_activity: "**最近活动:** {timestamp}" + model: "**Model:** `{model}`" + model_provider: "**Model:** `{model}` ({provider})" + context: "**Context:** {used} / {total} ({pct}%)" + context_used: "**Context:** ~{used} tokens" tokens: "**Token 数:** {tokens}" agent_running: "**代理运行中:** {state}" state_yes: "是 ⚡" diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py index 0b88d271808..639beef957c 100644 --- a/tests/gateway/test_status_command.py +++ b/tests/gateway/test_status_command.py @@ -174,6 +174,79 @@ async def test_status_command_tokens_zero_when_session_db_row_missing(): assert "**Cumulative API tokens (re-sent each call):** 0" in result +@pytest.mark.asyncio +async def test_status_command_includes_live_agent_model_and_context(): + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + total_tokens=0, + ) + runner = _make_runner(session_entry) + runner._session_db.get_session.return_value = { + "input_tokens": 1000, + "output_tokens": 250, + "cache_read_tokens": 0, + "cache_write_tokens": 0, + "reasoning_tokens": 0, + "model": "openai/gpt-test", + } + running_agent = SimpleNamespace( + model="openai/gpt-test", + provider="openai", + context_compressor=SimpleNamespace( + last_prompt_tokens=12_345, + context_length=100_000, + ), + interrupt=MagicMock(), + ) + runner._running_agents[build_session_key(_make_source())] = running_agent + + result = await runner._handle_message(_make_event("/status")) + + assert "**Model:** `openai/gpt-test` (openai)" in result + assert "**Context:** 12,345 / 100,000 (12%)" in result + assert "**Cumulative API tokens (re-sent each call):** 1,250 (cumulative)" in result + + +@pytest.mark.asyncio +async def test_status_command_includes_persisted_model_and_context_when_agent_not_running(monkeypatch): + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + total_tokens=0, + last_prompt_tokens=24_000, + ) + runner = _make_runner(session_entry) + runner._session_db.get_session.return_value = { + "input_tokens": 2000, + "output_tokens": 500, + "cache_read_tokens": 0, + "cache_write_tokens": 0, + "reasoning_tokens": 0, + "model": "openai/gpt-persisted", + "billing_provider": "openai-codex", + "billing_base_url": "https://example.invalid/v1", + } + monkeypatch.setattr( + "agent.model_metadata.get_model_context_length", + lambda *_args, **_kwargs: 272_000, + ) + + result = await runner._handle_message(_make_event("/status")) + + assert "**Model:** `openai/gpt-persisted` (openai-codex)" in result + assert "**Context:** 24,000 / 272,000 (9%)" in result + assert "**Cumulative API tokens (re-sent each call):** 2,500 (cumulative)" in result + + @pytest.mark.asyncio async def test_agents_command_reports_active_agents_and_processes(monkeypatch): session_key = build_session_key(_make_source())