fix: provider/model resolution — salvage 4 PRs + MiniMax aux URL fix (#5983)

Salvaged fixes from community PRs: - fix(model_switch): _read_auth_store → _load_auth_store + fix auth store key lookup (was checking top-level dict instead of store['providers']). OAuth providers now correctly detected in /model picker. Cherry-picked from PR #5911 by Xule Lin (linxule). - fix(ollama): pass num_ctx to override 2048 default context window. Ollama defaults to 2048 context regardless of model capabilities. Now auto-detects from /api/show metadata and injects num_ctx into every request. Config override via model.ollama_num_ctx. Fixes #2708. Cherry-picked from PR #5929 by kshitij (kshitijk4poor). - fix(aux): normalize provider aliases for vision/auxiliary routing. Adds _normalize_aux_provider() with 17 aliases (google→gemini, claude→anthropic, glm→zai, etc). Fixes vision routing failure when provider is set to 'google' instead of 'gemini'. Cherry-picked from PR #5793 by e11i (Elizabeth1979). - fix(aux): rewrite MiniMax /anthropic base URLs to /v1 for OpenAI SDK. MiniMax's inference_base_url ends in /anthropic (Anthropic Messages API), but auxiliary client uses OpenAI SDK which appends /chat/completions → 404 at /anthropic/chat/completions. Generic _to_openai_base_url() helper rewrites terminal /anthropic to /v1 for OpenAI-compatible endpoint. Inspired by PR #5786 by Lempkey. Added debug logging to silent exception blocks across all fixes. Co-authored-by: Hermes Agent <hermes@nousresearch.com>
2026-04-25 00:51:20 +00:00 · 2026-04-07 22:23:28 -07:00 · 2026-04-07 22:23:28 -07:00 · 5c03f2e7cc
commit 5c03f2e7cc
parent 8d7a98d2ff
7 changed files with 378 additions and 31 deletions
--- a/tests/test_ollama_num_ctx.py
+++ b/tests/test_ollama_num_ctx.py
@ -0,0 +1,135 @@
+"""Tests for Ollama num_ctx context length detection and injection.
+
+Covers:
+  agent/model_metadata.py — query_ollama_num_ctx()
+  run_agent.py — _ollama_num_ctx detection + extra_body injection
+"""
+
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from agent.model_metadata import query_ollama_num_ctx
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# Level 1: query_ollama_num_ctx — Ollama API interaction
+# ═══════════════════════════════════════════════════════════════════════
+
+
+def _mock_httpx_client(show_response_data, status_code=200):
+    """Create a mock httpx.Client context manager that returns given /api/show data."""
+    mock_resp = MagicMock(status_code=status_code)
+    mock_resp.json.return_value = show_response_data
+    mock_client = MagicMock()
+    mock_client.post.return_value = mock_resp
+    mock_ctx = MagicMock()
+    mock_ctx.__enter__ = MagicMock(return_value=mock_client)
+    mock_ctx.__exit__ = MagicMock(return_value=False)
+    return mock_ctx, mock_client
+
+
+class TestQueryOllamaNumCtx:
+    """Test the Ollama /api/show context length query."""
+
+    def test_returns_context_from_model_info(self):
+        """Should extract context_length from GGUF model_info metadata."""
+        show_data = {
+            "model_info": {"llama.context_length": 131072},
+            "parameters": "",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            # httpx is imported inside the function — patch the module import
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("llama3.1:8b", "http://localhost:11434/v1")
+
+        assert result == 131072
+
+    def test_prefers_explicit_num_ctx_from_modelfile(self):
+        """If the Modelfile sets num_ctx explicitly, that should take priority."""
+        show_data = {
+            "model_info": {"llama.context_length": 131072},
+            "parameters": "num_ctx 32768\ntemperature 0.7",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("custom-model", "http://localhost:11434")
+
+        assert result == 32768
+
+    def test_returns_none_for_non_ollama_server(self):
+        """Should return None if the server is not Ollama."""
+        with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"):
+            result = query_ollama_num_ctx("model", "http://localhost:1234")
+        assert result is None
+
+    def test_returns_none_on_connection_error(self):
+        """Should return None if the server is unreachable."""
+        with patch("agent.model_metadata.detect_local_server_type", side_effect=Exception("timeout")):
+            result = query_ollama_num_ctx("model", "http://localhost:11434")
+        assert result is None
+
+    def test_returns_none_on_404(self):
+        """Should return None if the model is not found."""
+        mock_ctx, _ = _mock_httpx_client({}, status_code=404)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("nonexistent", "http://localhost:11434")
+
+        assert result is None
+
+    def test_strips_provider_prefix(self):
+        """Should strip 'local:' prefix from model name before querying."""
+        show_data = {
+            "model_info": {"qwen2.context_length": 32768},
+            "parameters": "",
+        }
+        mock_ctx, mock_client = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("local:qwen2.5:7b", "http://localhost:11434/v1")
+
+        # Verify the post was called with stripped name (no "local:" prefix)
+        call_args = mock_client.post.call_args
+        assert call_args[1]["json"]["name"] == "qwen2.5:7b" or call_args[0][1] is not None
+        assert result == 32768
+
+    def test_handles_qwen2_architecture_key(self):
+        """Different model architectures use different key prefixes in model_info."""
+        show_data = {
+            "model_info": {"qwen2.context_length": 65536},
+            "parameters": "",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("qwen2.5:32b", "http://localhost:11434")
+
+        assert result == 65536
+
+    def test_returns_none_when_model_info_empty(self):
+        """Should return None if model_info has no context_length key."""
+        show_data = {
+            "model_info": {"llama.embedding_length": 4096},
+            "parameters": "",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("model", "http://localhost:11434")
+
+        assert result is None