mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
fix(context): honor model.context_length for Ollama num_ctx and all display paths
When a user sets model.context_length in config.yaml, the value was only used for Hermes' internal compression decisions (context_compressor) but NOT for Ollama's num_ctx parameter. Ollama auto-detects context from GGUF metadata (often 256K+) and allocates that much VRAM regardless of the user's config — causing OOM on smaller GPUs like the P100 (16GB). Root cause: two separate context values existed independently: - context_compressor.context_length = config value (e.g. 65536) ✓ - _ollama_num_ctx = GGUF metadata value (e.g. 256000) ✗ ignored config Changes: 1. Cap Ollama num_ctx to config context_length (run_agent.py) When model.context_length is explicitly set and no explicit ollama_num_ctx override exists, cap the auto-detected GGUF value to the user's context_length. This is the core fix — it prevents Ollama from allocating more VRAM than the user budgeted. 2. Pass config_context_length through all secondary call sites Several paths called get_model_context_length() without the config override, falling through to the 256K default fallback: - cli.py: @-reference expansion and /model switch display - gateway/run.py: @-reference expansion and /model switch display - tui_gateway/server.py: @-reference expansion - hermes_cli/model_switch.py: resolve_display_context_length() 3. Normalize root-level context_length in config (hermes_cli/config.py) _normalize_root_model_keys() now migrates root-level context_length into the model section, matching existing behavior for provider and base_url. Users who wrote `context_length: 65536` at the YAML root instead of under `model:` had it silently ignored. 4. Fix misleading comments (agent/model_metadata.py) DEFAULT_FALLBACK_CONTEXT is 256K (CONTEXT_PROBE_TIERS[0]), not 128K as two comments stated. Tests: 3 new tests for root-level context_length normalization. All existing context_length tests pass (96 tests).
This commit is contained in:
parent
fbb3775770
commit
0dd373ec43
8 changed files with 109 additions and 10 deletions
|
|
@ -354,6 +354,49 @@ class TestRootLevelProviderOverride:
|
|||
assert result["model"]["provider"] == "correct-provider"
|
||||
assert "provider" not in result # root key still cleaned up
|
||||
|
||||
def test_normalize_root_context_length_migrates_to_model(self):
|
||||
"""Root-level context_length is migrated into the model section."""
|
||||
from hermes_cli.config import _normalize_root_model_keys
|
||||
|
||||
config = {
|
||||
"context_length": 128000,
|
||||
"model": {
|
||||
"default": "my-model",
|
||||
},
|
||||
}
|
||||
result = _normalize_root_model_keys(config)
|
||||
assert result["model"]["context_length"] == 128000
|
||||
assert "context_length" not in result # root key cleaned up
|
||||
|
||||
def test_normalize_root_context_length_does_not_override_existing(self):
|
||||
"""Existing model.context_length is not overridden by root-level key."""
|
||||
from hermes_cli.config import _normalize_root_model_keys
|
||||
|
||||
config = {
|
||||
"context_length": 256000,
|
||||
"model": {
|
||||
"default": "my-model",
|
||||
"context_length": 128000,
|
||||
},
|
||||
}
|
||||
result = _normalize_root_model_keys(config)
|
||||
assert result["model"]["context_length"] == 128000 # preserved
|
||||
assert "context_length" not in result # root key still cleaned up
|
||||
|
||||
def test_normalize_root_context_length_with_string_model(self):
|
||||
"""Root-level context_length is migrated even when model is a string."""
|
||||
from hermes_cli.config import _normalize_root_model_keys
|
||||
|
||||
config = {
|
||||
"context_length": 128000,
|
||||
"model": "my-model",
|
||||
}
|
||||
result = _normalize_root_model_keys(config)
|
||||
assert isinstance(result["model"], dict)
|
||||
assert result["model"]["default"] == "my-model"
|
||||
assert result["model"]["context_length"] == 128000
|
||||
assert "context_length" not in result
|
||||
|
||||
|
||||
class TestProviderResolution:
|
||||
def test_api_key_is_string_or_none(self):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue