diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 14508745e..1de89a1b4 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1308,6 +1308,19 @@ def get_model_context_length( if inferred: effective_provider = inferred + # 5a. Copilot live /models API — max_prompt_tokens from the user's account. + # This catches account-specific models (e.g. claude-opus-4.6-1m) that + # don't exist in models.dev. For models that ARE in models.dev, this + # returns the provider-enforced limit which is what users can actually use. + if effective_provider in ("copilot", "copilot-acp", "github-copilot"): + try: + from hermes_cli.models import get_copilot_model_context + ctx = get_copilot_model_context(model, api_key=api_key) + if ctx: + return ctx + except Exception: + pass # Fall through to models.dev + if effective_provider == "nous": ctx = _resolve_nous_context_length(model) if ctx: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 4d2302274..887b33ec7 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1902,6 +1902,51 @@ def fetch_github_model_catalog( return None +# ─── Copilot catalog context-window helpers ───────────────────────────────── + +# Module-level cache: {model_id: max_prompt_tokens} +_copilot_context_cache: dict[str, int] = {} +_copilot_context_cache_time: float = 0.0 +_COPILOT_CONTEXT_CACHE_TTL = 3600 # 1 hour + + +def get_copilot_model_context(model_id: str, api_key: Optional[str] = None) -> Optional[int]: + """Look up max_prompt_tokens for a Copilot model from the live /models API. + + Results are cached in-process for 1 hour to avoid repeated API calls. + Returns the token limit or None if not found. + """ + global _copilot_context_cache, _copilot_context_cache_time + + # Serve from cache if fresh + if _copilot_context_cache and (time.time() - _copilot_context_cache_time < _COPILOT_CONTEXT_CACHE_TTL): + if model_id in _copilot_context_cache: + return _copilot_context_cache[model_id] + # Cache is fresh but model not in it — don't re-fetch + return None + + # Fetch and populate cache + catalog = fetch_github_model_catalog(api_key=api_key) + if not catalog: + return None + + cache: dict[str, int] = {} + for item in catalog: + mid = str(item.get("id") or "").strip() + if not mid: + continue + caps = item.get("capabilities") or {} + limits = caps.get("limits") or {} + max_prompt = limits.get("max_prompt_tokens") + if isinstance(max_prompt, int) and max_prompt > 0: + cache[mid] = max_prompt + + _copilot_context_cache = cache + _copilot_context_cache_time = time.time() + + return cache.get(model_id) + + def _is_github_models_base_url(base_url: Optional[str]) -> bool: normalized = (base_url or "").strip().rstrip("/").lower() return ( diff --git a/tests/hermes_cli/test_copilot_context.py b/tests/hermes_cli/test_copilot_context.py new file mode 100644 index 000000000..cb2404897 --- /dev/null +++ b/tests/hermes_cli/test_copilot_context.py @@ -0,0 +1,134 @@ +"""Tests for Copilot live /models context-window resolution.""" + +from __future__ import annotations + +import time +from unittest.mock import patch + +import pytest + +from hermes_cli.models import get_copilot_model_context + + +# Sample catalog items mimicking the Copilot /models API response +_SAMPLE_CATALOG = [ + { + "id": "claude-opus-4.6-1m", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 1000000, "max_output_tokens": 64000}, + }, + }, + { + "id": "gpt-4.1", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 128000, "max_output_tokens": 32768}, + }, + }, + { + "id": "claude-sonnet-4", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 200000, "max_output_tokens": 64000}, + }, + }, + { + "id": "model-without-limits", + "capabilities": {"type": "chat"}, + }, + { + "id": "model-zero-limit", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 0}, + }, + }, +] + + +@pytest.fixture(autouse=True) +def _clear_cache(): + """Reset module-level cache before each test.""" + import hermes_cli.models as mod + + mod._copilot_context_cache = {} + mod._copilot_context_cache_time = 0.0 + yield + mod._copilot_context_cache = {} + mod._copilot_context_cache_time = 0.0 + + +class TestGetCopilotModelContext: + """Tests for get_copilot_model_context().""" + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_returns_max_prompt_tokens(self, mock_fetch): + assert get_copilot_model_context("claude-opus-4.6-1m") == 1_000_000 + assert get_copilot_model_context("gpt-4.1") == 128_000 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_returns_none_for_unknown_model(self, mock_fetch): + assert get_copilot_model_context("nonexistent-model") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_skips_models_without_limits(self, mock_fetch): + assert get_copilot_model_context("model-without-limits") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_skips_zero_limit(self, mock_fetch): + assert get_copilot_model_context("model-zero-limit") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_caches_results(self, mock_fetch): + get_copilot_model_context("gpt-4.1") + get_copilot_model_context("claude-sonnet-4") + # Only one API call despite two lookups + assert mock_fetch.call_count == 1 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_cache_expires(self, mock_fetch): + import hermes_cli.models as mod + + get_copilot_model_context("gpt-4.1") + assert mock_fetch.call_count == 1 + + # Expire the cache + mod._copilot_context_cache_time = time.time() - 7200 + get_copilot_model_context("gpt-4.1") + assert mock_fetch.call_count == 2 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=None) + def test_returns_none_when_catalog_unavailable(self, mock_fetch): + assert get_copilot_model_context("gpt-4.1") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=[]) + def test_returns_none_for_empty_catalog(self, mock_fetch): + assert get_copilot_model_context("gpt-4.1") is None + + +class TestModelMetadataCopilotIntegration: + """Test that get_model_context_length() uses Copilot live API for copilot provider.""" + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_copilot_provider_uses_live_api(self, mock_fetch): + from agent.model_metadata import get_model_context_length + + ctx = get_model_context_length("claude-opus-4.6-1m", provider="copilot") + assert ctx == 1_000_000 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_copilot_acp_provider_uses_live_api(self, mock_fetch): + from agent.model_metadata import get_model_context_length + + ctx = get_model_context_length("claude-sonnet-4", provider="copilot-acp") + assert ctx == 200_000 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=None) + def test_falls_through_when_catalog_unavailable(self, mock_fetch): + from agent.model_metadata import get_model_context_length + + # Should not raise, should fall through to models.dev or defaults + ctx = get_model_context_length("gpt-4.1", provider="copilot") + assert isinstance(ctx, int) + assert ctx > 0