fix(metadata): qwen3.6-plus has a 1M context window (#27008)

qwen3.6-plus did not have an explicit entry in DEFAULT_CONTEXT_LENGTHS, so the longest-substring fallback matched the generic 'qwen': 131072 catch-all. That dropped the effective context limit from 1,048,576 tokens to 131,072, prematurely lowered the compression threshold, and produced misleading warnings about main/compression context mismatch in long sessions. Add an explicit 'qwen3.6-plus': 1048576 entry before the catch-all and cover it with a regression test (bare, qwen/, and dashscope/ prefixes). Note: PR #6599 also mentions touching model_metadata.py but the actual diff only edits hermes_cli/models.py, so this fix is independent and not duplicated by that PR. Closes #27008
2026-07-16 14:32:34 +00:00 · 2026-05-17 02:29:27 -07:00 · 2026-05-17 02:29:27 -07:00 · d9abbe7fa4
commit d9abbe7fa4
parent 5a2a858b84
2 changed files with 11 additions and 0 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -194,6 +194,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "llama": 131072,
    # Qwen — specific model families before the catch-all.
    # Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
+    "qwen3.6-plus": 1048576,      # 1M context (DashScope/Alibaba & OpenRouter)
    "qwen3-coder-plus": 1000000,  # 1M context
    "qwen3-coder": 262144,        # 256K context
    "qwen": 131072,
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -746,6 +746,16 @@ class TestGetModelContextLength:
        mock_fetch.return_value = {}
        assert get_model_context_length("qwen3-coder") == 262144

+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen3_6_plus_context_length(self, mock_fetch):
+        """qwen3.6-plus has a 1M context window, not the generic 128K Qwen default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3.6-plus") == 1048576
+        # Provider-prefixed variants must resolve to the same explicit entry
+        # via the longest-substring fallback (no portal/OR cache available).
+        assert get_model_context_length("qwen/qwen3.6-plus") == 1048576
+        assert get_model_context_length("dashscope/qwen3.6-plus") == 1048576
+
    @patch("agent.model_metadata.fetch_model_metadata")
    def test_qwen_generic_context_length(self, mock_fetch):
        """Generic qwen models still get the 128K default."""