fix(metadata): qwen3.6-plus has a 1M context window (#27008)

qwen3.6-plus did not have an explicit entry in DEFAULT_CONTEXT_LENGTHS,
so the longest-substring fallback matched the generic 'qwen': 131072
catch-all. That dropped the effective context limit from 1,048,576
tokens to 131,072, prematurely lowered the compression threshold, and
produced misleading warnings about main/compression context mismatch
in long sessions.

Add an explicit 'qwen3.6-plus': 1048576 entry before the catch-all and
cover it with a regression test (bare, qwen/, and dashscope/ prefixes).

Note: PR #6599 also mentions touching model_metadata.py but the actual
diff only edits hermes_cli/models.py, so this fix is independent and
not duplicated by that PR.

Closes #27008
This commit is contained in:
haran2001 2026-05-17 02:29:27 -07:00 committed by Teknium
parent 5a2a858b84
commit d9abbe7fa4
2 changed files with 11 additions and 0 deletions

View file

@ -194,6 +194,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"llama": 131072,
# Qwen — specific model families before the catch-all.
# Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
"qwen3.6-plus": 1048576, # 1M context (DashScope/Alibaba & OpenRouter)
"qwen3-coder-plus": 1000000, # 1M context
"qwen3-coder": 262144, # 256K context
"qwen": 131072,

View file

@ -746,6 +746,16 @@ class TestGetModelContextLength:
mock_fetch.return_value = {}
assert get_model_context_length("qwen3-coder") == 262144
@patch("agent.model_metadata.fetch_model_metadata")
def test_qwen3_6_plus_context_length(self, mock_fetch):
"""qwen3.6-plus has a 1M context window, not the generic 128K Qwen default."""
mock_fetch.return_value = {}
assert get_model_context_length("qwen3.6-plus") == 1048576
# Provider-prefixed variants must resolve to the same explicit entry
# via the longest-substring fallback (no portal/OR cache available).
assert get_model_context_length("qwen/qwen3.6-plus") == 1048576
assert get_model_context_length("dashscope/qwen3.6-plus") == 1048576
@patch("agent.model_metadata.fetch_model_metadata")
def test_qwen_generic_context_length(self, mock_fetch):
"""Generic qwen models still get the 128K default."""