From 1e5bf7bc3f24e5acc41e253f1494770d00221fd4 Mon Sep 17 00:00:00 2001
From: IMHaoyan <657290301@qq.com>
Date: Sun, 12 Apr 2026 15:10:14 +0800
Subject: [PATCH] fix: remove early return in custom endpoint context length
 resolution

When a custom endpoint does not expose a /models API, get_model_context_length
short-circuits at step 2 and returns the 128K fallback, skipping steps 4-8
(models.dev, OpenRouter metadata, hardcoded defaults like 'glm': 202752).

This causes false warnings like:
  'Compression model (glm-5.1) context is 128,000 tokens, but the main
   model's compression threshold is 180,000 tokens.'

even though the hardcoded DEFAULT_CONTEXT_LENGTHS table has 'glm': 202752
which would have matched if the lookup were allowed to continue.

Fix: Remove the early return at step 2 so the resolution chain falls through
to steps 4-8 (models.dev, OpenRouter cache, hardcoded defaults) before
reaching the final 128K fallback at step 10.
---
 agent/model_metadata.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 03f70b3fe..e112aa0e4 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -979,13 +979,8 @@ def get_model_context_length(
                 if local_ctx and local_ctx > 0:
                     save_context_length(model, base_url, local_ctx)
                     return local_ctx
-            logger.info(
-                "Could not detect context length for model %r at %s — "
-                "defaulting to %s tokens (probe-down). Set model.context_length "
-                "in config.yaml to override.",
-                model, base_url, f"{DEFAULT_FALLBACK_CONTEXT:,}",
-            )
-            return DEFAULT_FALLBACK_CONTEXT
+            # Don't early-return the fallback here — continue to steps 4-8
+            # so hardcoded defaults and models.dev can still match.
 
     # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
     if provider == "anthropic" or (