fix(context): align guidance with 64k minimum

This commit is contained in:
helix4u 2026-05-24 20:31:27 -06:00 committed by Teknium
parent 1d5deac346
commit 3b839f4369
6 changed files with 41 additions and 35 deletions

10
cli.py
View file

@ -4958,20 +4958,22 @@ class HermesCLI:
if os.environ.get("HERMES_DEFER_AGENT_STARTUP") != "1":
self._show_tool_availability_warnings()
# Warn about very low context lengths (common with local servers)
if ctx_len and ctx_len <= 8192:
# Warn about low context lengths (common with local servers). Keep
# this tied to the runtime guard so guidance cannot drift again.
from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
if ctx_len and ctx_len < MINIMUM_CONTEXT_LENGTH:
self._console_print()
self._console_print(
f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — "
f"this is likely too low for agent use with tools.[/]"
)
self._console_print(
"[dim] Hermes needs 16k32k minimum. Tool schemas + system prompt alone use ~4k8k.[/]"
f"[dim] Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens. Tool schemas + system prompt use a large fixed prefix.[/]"
)
base_url = getattr(self, "base_url", "") or ""
if "11434" in base_url or "ollama" in base_url.lower():
self._console_print(
"[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
f"[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH={MINIMUM_CONTEXT_LENGTH} ollama serve[/]"
)
elif "1234" in base_url:
self._console_print(