mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(gateway): surface session config on /new, /reset, and auto-reset (#3321)
When a new session starts in the gateway (via /new, /reset, or auto-reset), send the user a summary of the detected configuration: ✨ Session reset! Starting fresh. ◆ Model: qwen3.5:27b-q4_K_M ◆ Provider: custom ◆ Context: 8K tokens (config) ◆ Endpoint: http://localhost:11434/v1 This makes misconfigured context length immediately visible — a user running a local 8K model that falls to the 128K default will see: ◆ Context: 128K tokens (default — set model.context_length in config to override) Instead of silently getting no compression and degrading responses. - _format_session_info() resolves model, provider, context length, and endpoint from config + runtime, matching the hygiene code's resolution chain - Local/custom endpoints shown; cloud endpoints hidden (not useful) - Context source annotated: config, detected, or default with hint - Appended to /new and /reset responses, and auto-reset notifications - 9 tests covering all formatting paths and failure resilience Addresses the user-facing side of #2708 — instead of trying to fix every edge case in context detection, surface the values so users can immediately see when something is wrong.
This commit is contained in:
parent
3f95e741a7
commit
58ca875e19
2 changed files with 207 additions and 2 deletions
|
|
@ -1982,6 +1982,12 @@ class GatewayRunner:
|
||||||
f"Use /resume to browse and restore a previous session.\n"
|
f"Use /resume to browse and restore a previous session.\n"
|
||||||
f"Adjust reset timing in config.yaml under session_reset."
|
f"Adjust reset timing in config.yaml under session_reset."
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
session_info = self._format_session_info()
|
||||||
|
if session_info:
|
||||||
|
notice = f"{notice}\n\n{session_info}"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
await adapter.send(
|
await adapter.send(
|
||||||
source.chat_id, notice,
|
source.chat_id, notice,
|
||||||
metadata=getattr(event, 'metadata', None),
|
metadata=getattr(event, 'metadata', None),
|
||||||
|
|
@ -2749,6 +2755,85 @@ class GatewayRunner:
|
||||||
# Clear session env
|
# Clear session env
|
||||||
self._clear_session_env()
|
self._clear_session_env()
|
||||||
|
|
||||||
|
def _format_session_info(self) -> str:
|
||||||
|
"""Resolve current model config and return a formatted info block.
|
||||||
|
|
||||||
|
Surfaces model, provider, context length, and endpoint so gateway
|
||||||
|
users can immediately see if context detection went wrong (e.g.
|
||||||
|
local models falling to the 128K default).
|
||||||
|
"""
|
||||||
|
from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
|
||||||
|
|
||||||
|
model = _resolve_gateway_model()
|
||||||
|
config_context_length = None
|
||||||
|
provider = None
|
||||||
|
base_url = None
|
||||||
|
api_key = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
cfg_path = _hermes_home / "config.yaml"
|
||||||
|
if cfg_path.exists():
|
||||||
|
import yaml as _info_yaml
|
||||||
|
with open(cfg_path, encoding="utf-8") as f:
|
||||||
|
data = _info_yaml.safe_load(f) or {}
|
||||||
|
model_cfg = data.get("model", {})
|
||||||
|
if isinstance(model_cfg, dict):
|
||||||
|
raw_ctx = model_cfg.get("context_length")
|
||||||
|
if raw_ctx is not None:
|
||||||
|
try:
|
||||||
|
config_context_length = int(raw_ctx)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
provider = model_cfg.get("provider") or None
|
||||||
|
base_url = model_cfg.get("base_url") or None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Resolve runtime credentials for probing
|
||||||
|
try:
|
||||||
|
runtime = _resolve_runtime_agent_kwargs()
|
||||||
|
provider = provider or runtime.get("provider")
|
||||||
|
base_url = base_url or runtime.get("base_url")
|
||||||
|
api_key = runtime.get("api_key")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
context_length = get_model_context_length(
|
||||||
|
model,
|
||||||
|
base_url=base_url or "",
|
||||||
|
api_key=api_key or "",
|
||||||
|
config_context_length=config_context_length,
|
||||||
|
provider=provider or "",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Format context source hint
|
||||||
|
if config_context_length is not None:
|
||||||
|
ctx_source = "config"
|
||||||
|
elif context_length == DEFAULT_FALLBACK_CONTEXT:
|
||||||
|
ctx_source = "default — set model.context_length in config to override"
|
||||||
|
else:
|
||||||
|
ctx_source = "detected"
|
||||||
|
|
||||||
|
# Format context length for display
|
||||||
|
if context_length >= 1_000_000:
|
||||||
|
ctx_display = f"{context_length / 1_000_000:.1f}M"
|
||||||
|
elif context_length >= 1_000:
|
||||||
|
ctx_display = f"{context_length // 1_000}K"
|
||||||
|
else:
|
||||||
|
ctx_display = str(context_length)
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
f"◆ Model: `{model}`",
|
||||||
|
f"◆ Provider: {provider or 'openrouter'}",
|
||||||
|
f"◆ Context: {ctx_display} tokens ({ctx_source})",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Show endpoint for local/custom setups
|
||||||
|
if base_url and ("localhost" in base_url or "127.0.0.1" in base_url or "0.0.0.0" in base_url):
|
||||||
|
lines.append(f"◆ Endpoint: {base_url}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
async def _handle_reset_command(self, event: MessageEvent) -> str:
|
async def _handle_reset_command(self, event: MessageEvent) -> str:
|
||||||
"""Handle /new or /reset command."""
|
"""Handle /new or /reset command."""
|
||||||
source = event.source
|
source = event.source
|
||||||
|
|
@ -2789,12 +2874,22 @@ class GatewayRunner:
|
||||||
"session_key": session_key,
|
"session_key": session_key,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Resolve session config info to surface to the user
|
||||||
|
try:
|
||||||
|
session_info = self._format_session_info()
|
||||||
|
except Exception:
|
||||||
|
session_info = ""
|
||||||
|
|
||||||
if new_entry:
|
if new_entry:
|
||||||
return "✨ Session reset! I've started fresh with no memory of our previous conversation."
|
header = "✨ Session reset! Starting fresh."
|
||||||
else:
|
else:
|
||||||
# No existing session, just create one
|
# No existing session, just create one
|
||||||
self.session_store.get_or_create_session(source, force_new=True)
|
self.session_store.get_or_create_session(source, force_new=True)
|
||||||
return "✨ New session started!"
|
header = "✨ New session started!"
|
||||||
|
|
||||||
|
if session_info:
|
||||||
|
return f"{header}\n\n{session_info}"
|
||||||
|
return header
|
||||||
|
|
||||||
async def _handle_status_command(self, event: MessageEvent) -> str:
|
async def _handle_status_command(self, event: MessageEvent) -> str:
|
||||||
"""Handle /status command."""
|
"""Handle /status command."""
|
||||||
|
|
|
||||||
110
tests/gateway/test_session_info.py
Normal file
110
tests/gateway/test_session_info.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
"""Tests for GatewayRunner._format_session_info — session config surfacing."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def runner():
|
||||||
|
"""Create a bare GatewayRunner without __init__."""
|
||||||
|
return GatewayRunner.__new__(GatewayRunner)
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_info(tmp_path, config_yaml, model, runtime):
|
||||||
|
"""Return a context-manager stack that patches _format_session_info deps."""
|
||||||
|
cfg_path = tmp_path / "config.yaml"
|
||||||
|
if config_yaml is not None:
|
||||||
|
cfg_path.write_text(config_yaml)
|
||||||
|
return (
|
||||||
|
patch("gateway.run._hermes_home", tmp_path),
|
||||||
|
patch("gateway.run._resolve_gateway_model", return_value=model),
|
||||||
|
patch("gateway.run._resolve_runtime_agent_kwargs", return_value=runtime),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatSessionInfo:
|
||||||
|
|
||||||
|
def test_includes_model_name(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: anthropic/claude-opus-4.6\n provider: openrouter\n",
|
||||||
|
"anthropic/claude-opus-4.6",
|
||||||
|
{"provider": "openrouter", "base_url": "https://openrouter.ai/api/v1", "api_key": "k"})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "claude-opus-4.6" in info
|
||||||
|
|
||||||
|
def test_includes_provider(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n provider: openrouter\n",
|
||||||
|
"test-model",
|
||||||
|
{"provider": "openrouter", "base_url": "", "api_key": ""})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "openrouter" in info
|
||||||
|
|
||||||
|
def test_config_context_length(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n context_length: 32768\n",
|
||||||
|
"test-model",
|
||||||
|
{"provider": "custom", "base_url": "", "api_key": ""})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "32K" in info
|
||||||
|
assert "config" in info
|
||||||
|
|
||||||
|
def test_default_fallback_hint(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: unknown-model-xyz\n",
|
||||||
|
"unknown-model-xyz",
|
||||||
|
{"provider": "", "base_url": "", "api_key": ""})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "128K" in info
|
||||||
|
assert "model.context_length" in info
|
||||||
|
|
||||||
|
def test_local_endpoint_shown(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(
|
||||||
|
tmp_path,
|
||||||
|
"model:\n default: qwen3:8b\n provider: custom\n base_url: http://localhost:11434/v1\n context_length: 8192\n",
|
||||||
|
"qwen3:8b",
|
||||||
|
{"provider": "custom", "base_url": "http://localhost:11434/v1", "api_key": ""})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "localhost:11434" in info
|
||||||
|
assert "8K" in info
|
||||||
|
|
||||||
|
def test_cloud_endpoint_hidden(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n provider: openrouter\n",
|
||||||
|
"test-model",
|
||||||
|
{"provider": "openrouter", "base_url": "https://openrouter.ai/api/v1", "api_key": "k"})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "Endpoint" not in info
|
||||||
|
|
||||||
|
def test_million_context_format(self, runner, tmp_path):
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n context_length: 1000000\n",
|
||||||
|
"test-model",
|
||||||
|
{"provider": "", "base_url": "", "api_key": ""})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "1.0M" in info
|
||||||
|
|
||||||
|
def test_missing_config(self, runner, tmp_path):
|
||||||
|
"""No config.yaml should not crash."""
|
||||||
|
p1, p2, p3 = _patch_info(tmp_path, None, # don't create config
|
||||||
|
"anthropic/claude-sonnet-4.6",
|
||||||
|
{"provider": "openrouter", "base_url": "", "api_key": ""})
|
||||||
|
with p1, p2, p3:
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "Model" in info
|
||||||
|
assert "Context" in info
|
||||||
|
|
||||||
|
def test_runtime_resolution_failure_doesnt_crash(self, runner, tmp_path):
|
||||||
|
"""If runtime resolution raises, should still produce output."""
|
||||||
|
cfg_path = tmp_path / "config.yaml"
|
||||||
|
cfg_path.write_text("model:\n default: test-model\n context_length: 4096\n")
|
||||||
|
with patch("gateway.run._hermes_home", tmp_path), \
|
||||||
|
patch("gateway.run._resolve_gateway_model", return_value="test-model"), \
|
||||||
|
patch("gateway.run._resolve_runtime_agent_kwargs", side_effect=RuntimeError("no creds")):
|
||||||
|
info = runner._format_session_info()
|
||||||
|
assert "4K" in info
|
||||||
|
assert "config" in info
|
||||||
Loading…
Add table
Add a link
Reference in a new issue