mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(deepseek): bump V4 family context window to 1M tokens
#14934 added deepseek-v4-pro / deepseek-v4-flash to the DeepSeek native provider but the context-window lookup still falls back to the existing "deepseek" substring entry (128K). DeepSeek V4 ships with a 1M context window, so any caller relying on get_model_context_length() for pre-flight token budgeting (compression, context warnings) under-counts by ~8x. Add explicit lowercase entries for the four DeepSeek model ids that ship 1M context: - deepseek-v4-pro - deepseek-v4-flash - deepseek-chat (legacy alias, server-side maps to v4-flash non-thinking) - deepseek-reasoner (legacy alias, server-side maps to v4-flash thinking) Longest-key-first substring matching means these explicit entries also cover the vendor-prefixed forms (deepseek/deepseek-v4-pro on OpenRouter and Nous Portal) without regressing the existing 128K fallback for older / unknown DeepSeek model ids on custom endpoints. Source: https://api-docs.deepseek.com/zh-cn/quick_start/pricing
This commit is contained in:
parent
6051fba9dc
commit
f88a2cf877
2 changed files with 48 additions and 1 deletions
|
|
@ -142,7 +142,17 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
|
|
|
|||
|
|
@ -192,6 +192,43 @@ class TestDefaultContextLengths:
|
|||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_deepseek_v4_models_1m_context(self):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
expected_keys = {
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
}
|
||||
for key, value in expected_keys.items():
|
||||
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
|
||||
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
|
||||
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
|
||||
)
|
||||
|
||||
# Longest-first substring matching must resolve both the bare V4
|
||||
# ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
|
||||
# / Nous Portal) to 1M without probing down to the legacy 128K
|
||||
# ``deepseek`` substring fallback.
|
||||
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
|
||||
cases = [
|
||||
("deepseek-v4-pro", 1_000_000),
|
||||
("deepseek-v4-flash", 1_000_000),
|
||||
("deepseek/deepseek-v4-pro", 1_000_000),
|
||||
("deepseek/deepseek-v4-flash", 1_000_000),
|
||||
("deepseek-chat", 1_000_000),
|
||||
("deepseek-reasoner", 1_000_000),
|
||||
]
|
||||
for model_id, expected_ctx in cases:
|
||||
actual = get_model_context_length(model_id)
|
||||
assert actual == expected_ctx, (
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_all_values_positive(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
assert value > 0, f"{key} has non-positive context length"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue