fix: 6 bugs in model metadata, reasoning detection, and delegate tool

Cherry-picked from PR #2169 by @0xbyt4.

1. _strip_provider_prefix: skip Ollama model:tag names (qwen:0.5b)
2. Fuzzy match: remove reverse direction that made claude-sonnet-4
   resolve to 1M instead of 200K
3. _has_content_after_think_block: reuse _strip_think_blocks() to
   handle all tag variants (thinking, reasoning, REASONING_SCRATCHPAD)
4. models.dev lookup: elif→if so nous provider also queries models.dev
5. Disk cache fallback: use 5-min TTL instead of full hour so network
   is retried soon
6. Delegate build: wrap child construction in try/finally so
   _last_resolved_tool_names is always restored on exception
This commit is contained in:
Test 2026-03-20 08:52:37 -07:00
parent 2ea4dd30c6
commit 55ce601502
4 changed files with 50 additions and 30 deletions

View file

@ -34,17 +34,29 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
})
_OLLAMA_TAG_PATTERN = re.compile(
r"^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)",
re.IGNORECASE,
)
def _strip_provider_prefix(model: str) -> str:
"""Strip a recognised provider prefix from a model string.
``"local:my-model"`` ``"my-model"``
``"qwen3.5:27b"`` ``"qwen3.5:27b"`` (unchanged not a provider prefix)
``"qwen:0.5b"`` ``"qwen:0.5b"`` (unchanged Ollama model:tag)
``"deepseek:latest"`` ``"deepseek:latest"``(unchanged Ollama model:tag)
"""
if ":" not in model or model.startswith("http"):
return model
prefix = model.split(":", 1)[0].strip().lower()
if prefix in _PROVIDER_PREFIXES:
return model.split(":", 1)[1]
prefix, suffix = model.split(":", 1)
prefix_lower = prefix.strip().lower()
if prefix_lower in _PROVIDER_PREFIXES:
# Don't strip if suffix looks like an Ollama tag (e.g. "7b", "latest", "q4_0")
if _OLLAMA_TAG_PATTERN.match(suffix.strip()):
return model
return suffix
return model
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
@ -800,7 +812,7 @@ def get_model_context_length(
ctx = _resolve_nous_context_length(model)
if ctx:
return ctx
elif provider:
if provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(provider, model)
if ctx:
@ -812,10 +824,13 @@ def get_model_context_length(
return metadata[model].get("context_length", 128000)
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
# Only check `default_model in model` (is the key a substring of the input).
# The reverse (`model in default_model`) causes shorter names like
# "claude-sonnet-4" to incorrectly match "claude-sonnet-4-6" and return 1M.
for default_model, length in sorted(
DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
):
if default_model in model or model in default_model:
if default_model in model:
return length
# 9. Query local server as last resort