mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(model_metadata): consult DEFAULT_CONTEXT_LENGTHS before 256K fallback on custom endpoints
Problem: get_model_context_length() had an early return at the end of the custom-endpoint probe branch (step 3) that returned DEFAULT_FALLBACK_CONTEXT (256K) without ever consulting the hardcoded DEFAULT_CONTEXT_LENGTHS catalog (step 8). Models served through a custom/proxied gateway (e.g. corporate Anthropic proxy) that didn't expose Ollama or local-server endpoints would hit this path and get capped at 256K, even when the model name clearly matched a known entry in the catalog (e.g. claude-opus-4-8 → 1M). Changes: - agent/model_metadata.py: Before returning DEFAULT_FALLBACK_CONTEXT at the end of the custom-endpoint branch, consult DEFAULT_CONTEXT_LENGTHS using the same longest-key-first fuzzy matching as step 8. Only fall through to 256K if no catalog entry matches. - tests/agent/test_model_metadata.py: Updated existing test and added new test covering the custom-endpoint → catalog fallback behavior. Fixes #38865
This commit is contained in:
parent
f1d3afb151
commit
2e61de0638
2 changed files with 89 additions and 3 deletions
|
|
@ -1684,6 +1684,26 @@ def get_model_context_length(
|
|||
"in config.yaml to override.",
|
||||
model, base_url, f"{DEFAULT_FALLBACK_CONTEXT:,}",
|
||||
)
|
||||
# 3b. Before falling back to the hard 256K default, consult the
|
||||
# hardcoded catalog as a last resort. A proxied/custom Anthropic
|
||||
# gateway (e.g. corporate proxy) fails the Ollama/local probes
|
||||
# above, but the model name may still match an entry in
|
||||
# DEFAULT_CONTEXT_LENGTHS (e.g. "claude-opus-4-8" → 1M).
|
||||
# Without this, the early return here short-circuits the catalog
|
||||
# lookup at step 8 and silently caps context at 256K.
|
||||
model_lower = model.lower()
|
||||
for default_model, length in sorted(
|
||||
DEFAULT_CONTEXT_LENGTHS.items(),
|
||||
key=lambda x: len(x[0]),
|
||||
reverse=True,
|
||||
):
|
||||
if default_model in model_lower:
|
||||
logger.info(
|
||||
"Using hardcoded context length %s for model %r "
|
||||
"(custom endpoint, catalog match on %r)",
|
||||
f"{length:,}", model, default_model,
|
||||
)
|
||||
return length
|
||||
return DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ from unittest.mock import patch, MagicMock
|
|||
from agent.model_metadata import (
|
||||
CONTEXT_PROBE_TIERS,
|
||||
DEFAULT_CONTEXT_LENGTHS,
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
_strip_provider_prefix,
|
||||
estimate_tokens_rough,
|
||||
estimate_messages_tokens_rough,
|
||||
|
|
@ -773,17 +774,24 @@ class TestGetModelContextLength:
|
|||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch):
|
||||
def test_custom_endpoint_without_metadata_falls_back_to_catalog(self, mock_endpoint_fetch, mock_fetch):
|
||||
"""Custom endpoint with no metadata should fall back to the hardcoded
|
||||
catalog (not 256K) when the model name matches a known entry.
|
||||
|
||||
Previously this returned CONTEXT_PROBE_TIERS[0] (256K) because the
|
||||
custom-endpoint branch short-circuited before the catalog lookup.
|
||||
See #38865.
|
||||
"""
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {}
|
||||
|
||||
# GLM-5-TEE matches the "glm" entry in DEFAULT_CONTEXT_LENGTHS
|
||||
result = get_model_context_length(
|
||||
"zai-org/GLM-5-TEE",
|
||||
base_url="https://llm.chutes.ai/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == CONTEXT_PROBE_TIERS[0]
|
||||
assert result == 202752 # "glm" entry in DEFAULT_CONTEXT_LENGTHS
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
|
|
@ -858,6 +866,64 @@ class TestGetModelContextLength:
|
|||
|
||||
assert result == 200000
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_custom_endpoint_falls_back_to_hardcoded_catalog(self, mock_fetch):
|
||||
"""Custom/proxied endpoint that fails all probes should still resolve
|
||||
via DEFAULT_CONTEXT_LENGTHS instead of returning 256K.
|
||||
|
||||
Regression test for #38865: a corporate Anthropic proxy (custom
|
||||
base_url) caused the custom-endpoint branch to short-circuit before
|
||||
the catalog lookup, capping context at 256K even for models like
|
||||
claude-opus-4-8 that are in the hardcoded catalog with 1M.
|
||||
"""
|
||||
mock_fetch.return_value = {}
|
||||
|
||||
# Patch all the probe functions that the custom-endpoint branch calls
|
||||
# so they all fail (return None/empty), simulating a proxy that
|
||||
# doesn't expose Ollama or local-server endpoints.
|
||||
with (
|
||||
patch(
|
||||
"agent.model_metadata._resolve_endpoint_context_length",
|
||||
return_value=None,
|
||||
),
|
||||
patch(
|
||||
"agent.model_metadata._query_ollama_api_show",
|
||||
return_value=None,
|
||||
),
|
||||
patch(
|
||||
"agent.model_metadata._query_local_context_length",
|
||||
return_value=None,
|
||||
),
|
||||
patch(
|
||||
"agent.model_metadata.is_local_endpoint",
|
||||
return_value=False,
|
||||
),
|
||||
):
|
||||
# A known model behind a custom proxy should resolve to its
|
||||
# catalog value (1M), NOT the 256K fallback.
|
||||
ctx = get_model_context_length(
|
||||
"claude-opus-4-8",
|
||||
base_url="https://my-gateway.example.com/v1/claude",
|
||||
)
|
||||
assert ctx == 1000000, f"Expected 1000000, got {ctx}"
|
||||
|
||||
# Another known model
|
||||
ctx2 = get_model_context_length(
|
||||
"claude-sonnet-4-6",
|
||||
base_url="https://my-gateway.example.com/v1/claude",
|
||||
)
|
||||
assert ctx2 == 1000000, f"Expected 1000000, got {ctx2}"
|
||||
|
||||
# An unknown model on a custom endpoint should still fall back
|
||||
# to 256K (no catalog match).
|
||||
ctx3 = get_model_context_length(
|
||||
"totally-unknown-model",
|
||||
base_url="https://my-gateway.example.com/v1/claude",
|
||||
)
|
||||
assert ctx3 == DEFAULT_FALLBACK_CONTEXT, (
|
||||
f"Expected {DEFAULT_FALLBACK_CONTEXT}, got {ctx3}"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Bedrock context resolution — must run BEFORE custom-endpoint probe
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue