mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(/model): show provider-enforced context length, not raw models.dev (#15438)
/model gpt-5.5 on openai-codex showed 'Context: 1,050,000 tokens' because the display block used ModelInfo.context_window directly from models.dev. Codex OAuth actually enforces 272K for the same slug, and the agent's compressor already runs at 272K via get_model_context_length() — so the banner + real context budget said 272K while /model lied with 1M. Route the display context through a new resolve_display_context_length() helper that always prefers agent.model_metadata.get_model_context_length (which knows about Codex OAuth, Copilot, Nous caps) and only falls back to models.dev when that returns nothing. Fix applied to all 3 /model display sites: cli.py _handle_model_switch gateway/run.py picker on_model_selected callback gateway/run.py text-fallback confirmation Reported by @emilstridell (Telegram, April 2026).
This commit is contained in:
parent
13038dc747
commit
05d8f11085
4 changed files with 161 additions and 33 deletions
|
|
@ -5659,9 +5659,17 @@ class GatewayRunner:
|
|||
lines = [f"Model switched to `{result.new_model}`"]
|
||||
lines.append(f"Provider: {plabel}")
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or current_base_url or "",
|
||||
api_key=result.api_key or current_api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
lines.append(f"Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
lines.append(f"Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
|
|
@ -5795,28 +5803,25 @@ class GatewayRunner:
|
|||
lines = [f"Model switched to `{result.new_model}`"]
|
||||
lines.append(f"Provider: {provider_label}")
|
||||
|
||||
# Rich metadata from models.dev
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry.
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or current_base_url or "",
|
||||
api_key=result.api_key or current_api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
lines.append(f"Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
lines.append(f"Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
lines.append(f"Cost: {mi.format_cost()}")
|
||||
lines.append(f"Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or current_base_url,
|
||||
api_key=result.api_key or current_api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache notice
|
||||
cache_enabled = (
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue