feat(status): restore model and context in gateway status

PROBLEM: The old public /status PR drifted out of the current Amy patch stack, leaving /status without the model/provider, context window, or explicit cumulative token label that Wolfram uses to monitor context pressure from chat.

SOLUTION: Re-port the feature onto the current gateway status handler. Prefer live/cached agent runtime metadata, fall back to SessionDB + SessionStore state between turns, add localized status model/context lines, and keep token totals explicitly labeled cumulative.

Verification: tests/gateway/test_status_command.py, tests/hermes_cli/test_commands.py
This commit is contained in:
Wolfram Ravenwolf 2026-05-31 17:14:47 +02:00 committed by Teknium
parent 5035fa9029
commit ead38107a2
19 changed files with 257 additions and 10 deletions

View file

@ -394,20 +394,35 @@ class GatewaySlashCommandsMixin:
async def _handle_status_command(self, event: MessageEvent) -> str:
"""Handle /status command."""
from gateway.run import _AGENT_PENDING_SENTINEL, _load_gateway_config, _resolve_gateway_model
source = event.source
session_entry = self.session_store.get_or_create_session(source)
connected_platforms = [p.value for p in self.adapters.keys()]
# Check if there's an active agent
# Check if there's an active agent. Keep the sentinel distinct: a
# starting/pending run should not be treated as a fully usable agent for
# model/context display, but it still occupies the session slot.
session_key = session_entry.session_key
is_running = session_key in self._running_agents
agent = self._running_agents.get(session_key)
is_running = agent is not None and agent is not _AGENT_PENDING_SENTINEL
# Count pending /queue follow-ups (slot + overflow).
adapter = self.adapters.get(source.platform) if source else None
queue_depth = self._queue_depth(session_key, adapter=adapter)
def _clean_str(value: Any) -> str:
return value.strip() if isinstance(value, str) and value.strip() else ""
def _int_value(value: Any) -> int:
try:
return int(value)
except (TypeError, ValueError):
return 0
title = None
session_row: dict[str, Any] = {}
# Pull token totals from the SQLite session DB rather than the
# in-memory SessionStore. The agent's per-turn token deltas are
# persisted into sessions_db (run_agent.py), not into SessionEntry,
@ -422,17 +437,106 @@ class GatewaySlashCommandsMixin:
title = None
try:
row = self._session_db.get_session(session_entry.session_id)
if row:
if isinstance(row, dict):
session_row = row
db_total_tokens = (
(row.get("input_tokens") or 0)
+ (row.get("output_tokens") or 0)
+ (row.get("cache_read_tokens") or 0)
+ (row.get("cache_write_tokens") or 0)
+ (row.get("reasoning_tokens") or 0)
_int_value(row.get("input_tokens"))
+ _int_value(row.get("output_tokens"))
+ _int_value(row.get("cache_read_tokens"))
+ _int_value(row.get("cache_write_tokens"))
+ _int_value(row.get("reasoning_tokens"))
)
except Exception:
db_total_tokens = 0
# Resolve model/context for cockpit-style status. Prefer the live or
# cached agent because it carries the actual runtime route and context
# compressor. Fall back to persisted SessionDB metadata plus the
# SessionStore's last_prompt_tokens so /status remains useful between
# turns without making billing/account calls.
status_agent = agent if is_running else None
if status_agent is None:
cache_lock = getattr(self, "_agent_cache_lock", None)
cache = getattr(self, "_agent_cache", None)
if cache_lock is not None and cache is not None:
try:
with cache_lock:
cached = cache.get(session_key)
if cached:
status_agent = cached[0]
except Exception:
status_agent = None
model_name = ""
provider_name = ""
base_url = ""
context_used = 0
context_total = 0
if status_agent is not None and status_agent is not _AGENT_PENDING_SENTINEL:
model_name = _clean_str(getattr(status_agent, "model", ""))
provider_name = _clean_str(getattr(status_agent, "provider", ""))
base_url = _clean_str(getattr(status_agent, "base_url", ""))
ctx = getattr(status_agent, "context_compressor", None)
if ctx is not None:
context_used = _int_value(getattr(ctx, "last_prompt_tokens", 0))
context_total = _int_value(getattr(ctx, "context_length", 0))
model_name = model_name or _clean_str(session_row.get("model"))
provider_name = provider_name or _clean_str(session_row.get("billing_provider"))
base_url = base_url or _clean_str(session_row.get("billing_base_url"))
context_used = context_used or _int_value(getattr(session_entry, "last_prompt_tokens", 0))
user_config: dict[str, Any] = {}
if not model_name or not provider_name or not context_total:
try:
user_config = _load_gateway_config()
except Exception:
user_config = {}
if not model_name:
model_name = _resolve_gateway_model(user_config)
if not provider_name:
model_cfg = user_config.get("model", {}) if isinstance(user_config, dict) else {}
if isinstance(model_cfg, dict):
provider_name = _clean_str(model_cfg.get("provider"))
if not context_total and model_name:
try:
from agent.model_metadata import get_model_context_length
model_cfg = user_config.get("model", {}) if isinstance(user_config, dict) else {}
configured_context = None
if isinstance(model_cfg, dict):
configured_context = model_cfg.get("context_length")
custom_providers = user_config.get("custom_providers") if isinstance(user_config, dict) else None
context_total = get_model_context_length(
model_name,
base_url=base_url,
api_key="",
config_context_length=configured_context if isinstance(configured_context, int) else None,
provider=provider_name,
custom_providers=custom_providers if isinstance(custom_providers, list) else None,
)
except Exception:
context_total = 0
model_line = ""
if model_name:
if provider_name:
model_line = t("gateway.status.model_provider", model=model_name, provider=provider_name)
else:
model_line = t("gateway.status.model", model=model_name)
context_line = ""
if context_total:
pct = min(100, round((context_used / context_total) * 100)) if context_total else 0
context_line = t(
"gateway.status.context",
used=f"{context_used:,}",
total=f"{context_total:,}",
pct=f"{pct}",
)
elif context_used:
context_line = t("gateway.status.context_used", used=f"{context_used:,}")
lines = [
t("gateway.status.header"),
"",
@ -443,7 +547,13 @@ class GatewaySlashCommandsMixin:
lines.extend([
t("gateway.status.created", timestamp=session_entry.created_at.strftime('%Y-%m-%d %H:%M')),
t("gateway.status.last_activity", timestamp=session_entry.updated_at.strftime('%Y-%m-%d %H:%M')),
t("gateway.status.tokens", tokens=f"{db_total_tokens:,}"),
])
if model_line:
lines.append(model_line)
if context_line:
lines.append(context_line)
lines.extend([
t("gateway.status.tokens", tokens=f"{db_total_tokens:,} (cumulative)"),
t("gateway.status.agent_running", state=t("gateway.status.state_yes") if is_running else t("gateway.status.state_no")),
])
if queue_depth:

View file

@ -109,7 +109,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[text | pause | resume | clear | status]"),
CommandDef("subgoal", "Add or manage extra criteria on the active goal", "Session",
args_hint="[text | remove N | clear]"),
CommandDef("status", "Show session info", "Session"),
CommandDef("status", "Show session, model, token, and context info", "Session"),
CommandDef("whoami", "Show your slash command access (admin / user)", "Info"),
CommandDef("profile", "Show active profile name and home directory", "Info"),
CommandDef("sethome", "Set this chat as the home channel", "Session",

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Titel:** {title}"
created: "**Geskep:** {timestamp}"
last_activity: "**Laaste aktiwiteit:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Kumulatiewe API-tokens (elke oproep weer gestuur):** {tokens}"
agent_running: "**Agent loop:** {state}"
state_yes: "Ja ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Titel:** {title}"
created: "**Erstellt:** {timestamp}"
last_activity: "**Letzte Aktivität:** {timestamp}"
model: "**Modell:** `{model}`"
model_provider: "**Modell:** `{model}` ({provider})"
context: "**Kontext:** {used} / {total} ({pct}%)"
context_used: "**Kontext:** ~{used} Tokens"
tokens: "**Kumulierte API-Tokens (bei jedem Aufruf erneut gesendet):** {tokens}"
agent_running: "**Agent läuft:** {state}"
state_yes: "Ja ⚡"

View file

@ -281,6 +281,10 @@ gateway:
title: "**Title:** {title}"
created: "**Created:** {timestamp}"
last_activity: "**Last Activity:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Cumulative API tokens (re-sent each call):** {tokens}"
agent_running: "**Agent Running:** {state}"
state_yes: "Yes ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Título:** {title}"
created: "**Creado:** {timestamp}"
last_activity: "**Última actividad:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Tokens de API acumulados (reenviados en cada llamada):** {tokens}"
agent_running: "**Agente activo:** {state}"
state_yes: "Sí ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Titre :** {title}"
created: "**Créé :** {timestamp}"
last_activity: "**Dernière activité :** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Jetons :** {tokens}"
agent_running: "**Agent en cours :** {state}"
state_yes: "Oui ⚡"

View file

@ -273,6 +273,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Teideal:** {title}"
created: "**Cruthaithe:** {timestamp}"
last_activity: "**Gníomhaíocht is déanaí:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Comharthaí:** {tokens}"
agent_running: "**Gníomhaire ag rith:** {state}"
state_yes: "Tá ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Cím:** {title}"
created: "**Létrehozva:** {timestamp}"
last_activity: "**Utolsó tevékenység:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Tokenek:** {tokens}"
agent_running: "**Ügynök fut:** {state}"
state_yes: "Igen ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Titolo:** {title}"
created: "**Creata:** {timestamp}"
last_activity: "**Ultima attività:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Token:** {tokens}"
agent_running: "**Agente in esecuzione:** {state}"
state_yes: "Sì ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**タイトル:** {title}"
created: "**作成日時:** {timestamp}"
last_activity: "**最終アクティビティ:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**トークン:** {tokens}"
agent_running: "**エージェント実行中:** {state}"
state_yes: "はい ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**제목:** {title}"
created: "**생성됨:** {timestamp}"
last_activity: "**최종 활동:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**토큰:** {tokens}"
agent_running: "**에이전트 실행 중:** {state}"
state_yes: "예 ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Título:** {title}"
created: "**Criada:** {timestamp}"
last_activity: "**Última atividade:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Tokens de API cumulativos (reenviados a cada chamada):** {tokens}"
agent_running: "**Agente em execução:** {state}"
state_yes: "Sim ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Название:** {title}"
created: "**Создано:** {timestamp}"
last_activity: "**Последняя активность:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Токены:** {tokens}"
agent_running: "**Агент активен:** {state}"
state_yes: "Да ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Başlık:** {title}"
created: "**Oluşturuldu:** {timestamp}"
last_activity: "**Son etkinlik:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Token:** {tokens}"
agent_running: "**Aracı çalışıyor:** {state}"
state_yes: "Evet ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**Назва:** {title}"
created: "**Створено:** {timestamp}"
last_activity: "**Остання активність:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Токени:** {tokens}"
agent_running: "**Агент активний:** {state}"
state_yes: "Так ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**標題:** {title}"
created: "**建立時間:** {timestamp}"
last_activity: "**最近活動:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Token 數:** {tokens}"
agent_running: "**代理執行中:** {state}"
state_yes: "是 ⚡"

View file

@ -269,6 +269,10 @@ Future messages in this room will use that transcript until `/reset` or another
title: "**标题:** {title}"
created: "**创建时间:** {timestamp}"
last_activity: "**最近活动:** {timestamp}"
model: "**Model:** `{model}`"
model_provider: "**Model:** `{model}` ({provider})"
context: "**Context:** {used} / {total} ({pct}%)"
context_used: "**Context:** ~{used} tokens"
tokens: "**Token 数:** {tokens}"
agent_running: "**代理运行中:** {state}"
state_yes: "是 ⚡"

View file

@ -174,6 +174,79 @@ async def test_status_command_tokens_zero_when_session_db_row_missing():
assert "**Cumulative API tokens (re-sent each call):** 0" in result
@pytest.mark.asyncio
async def test_status_command_includes_live_agent_model_and_context():
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
total_tokens=0,
)
runner = _make_runner(session_entry)
runner._session_db.get_session.return_value = {
"input_tokens": 1000,
"output_tokens": 250,
"cache_read_tokens": 0,
"cache_write_tokens": 0,
"reasoning_tokens": 0,
"model": "openai/gpt-test",
}
running_agent = SimpleNamespace(
model="openai/gpt-test",
provider="openai",
context_compressor=SimpleNamespace(
last_prompt_tokens=12_345,
context_length=100_000,
),
interrupt=MagicMock(),
)
runner._running_agents[build_session_key(_make_source())] = running_agent
result = await runner._handle_message(_make_event("/status"))
assert "**Model:** `openai/gpt-test` (openai)" in result
assert "**Context:** 12,345 / 100,000 (12%)" in result
assert "**Cumulative API tokens (re-sent each call):** 1,250 (cumulative)" in result
@pytest.mark.asyncio
async def test_status_command_includes_persisted_model_and_context_when_agent_not_running(monkeypatch):
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
total_tokens=0,
last_prompt_tokens=24_000,
)
runner = _make_runner(session_entry)
runner._session_db.get_session.return_value = {
"input_tokens": 2000,
"output_tokens": 500,
"cache_read_tokens": 0,
"cache_write_tokens": 0,
"reasoning_tokens": 0,
"model": "openai/gpt-persisted",
"billing_provider": "openai-codex",
"billing_base_url": "https://example.invalid/v1",
}
monkeypatch.setattr(
"agent.model_metadata.get_model_context_length",
lambda *_args, **_kwargs: 272_000,
)
result = await runner._handle_message(_make_event("/status"))
assert "**Model:** `openai/gpt-persisted` (openai-codex)" in result
assert "**Context:** 24,000 / 272,000 (9%)" in result
assert "**Cumulative API tokens (re-sent each call):** 2,500 (cumulative)" in result
@pytest.mark.asyncio
async def test_agents_command_reports_active_agents_and_processes(monkeypatch):
session_key = build_session_key(_make_source())