fix: provider/model resolution — salvage 4 PRs + MiniMax aux URL fix (#5983)

Salvaged fixes from community PRs: - fix(model_switch): _read_auth_store → _load_auth_store + fix auth store key lookup (was checking top-level dict instead of store['providers']). OAuth providers now correctly detected in /model picker. Cherry-picked from PR #5911 by Xule Lin (linxule). - fix(ollama): pass num_ctx to override 2048 default context window. Ollama defaults to 2048 context regardless of model capabilities. Now auto-detects from /api/show metadata and injects num_ctx into every request. Config override via model.ollama_num_ctx. Fixes #2708. Cherry-picked from PR #5929 by kshitij (kshitijk4poor). - fix(aux): normalize provider aliases for vision/auxiliary routing. Adds _normalize_aux_provider() with 17 aliases (google→gemini, claude→anthropic, glm→zai, etc). Fixes vision routing failure when provider is set to 'google' instead of 'gemini'. Cherry-picked from PR #5793 by e11i (Elizabeth1979). - fix(aux): rewrite MiniMax /anthropic base URLs to /v1 for OpenAI SDK. MiniMax's inference_base_url ends in /anthropic (Anthropic Messages API), but auxiliary client uses OpenAI SDK which appends /chat/completions → 404 at /anthropic/chat/completions. Generic _to_openai_base_url() helper rewrites terminal /anthropic to /v1 for OpenAI-compatible endpoint. Inspired by PR #5786 by Lempkey. Added debug logging to silent exception blocks across all fixes. Co-authored-by: Hermes Agent <hermes@nousresearch.com>
2026-07-20 15:33:54 +00:00 · 2026-04-07 22:23:28 -07:00 · 2026-04-07 22:23:28 -07:00 · 5c03f2e7cc
commit 5c03f2e7cc
parent 8d7a98d2ff
7 changed files with 378 additions and 31 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -611,6 +611,59 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


+def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
+    """Query an Ollama server for the model's context length.
+
+    Returns the model's maximum context from GGUF metadata via ``/api/show``,
+    or the explicit ``num_ctx`` from the Modelfile if set.  Returns None if
+    the server is unreachable or not Ollama.
+
+    This is the value that should be passed as ``num_ctx`` in Ollama chat
+    requests to override the default 2048.
+    """
+    import httpx
+
+    bare_model = _strip_provider_prefix(model)
+    server_url = base_url.rstrip("/")
+    if server_url.endswith("/v1"):
+        server_url = server_url[:-3]
+
+    try:
+        server_type = detect_local_server_type(base_url)
+    except Exception:
+        return None
+    if server_type != "ollama":
+        return None
+
+    try:
+        with httpx.Client(timeout=3.0) as client:
+            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
+            if resp.status_code != 200:
+                return None
+            data = resp.json()
+
+            # Prefer explicit num_ctx from Modelfile parameters (user override)
+            params = data.get("parameters", "")
+            if "num_ctx" in params:
+                for line in params.split("\n"):
+                    if "num_ctx" in line:
+                        parts = line.strip().split()
+                        if len(parts) >= 2:
+                            try:
+                                return int(parts[-1])
+                            except ValueError:
+                                pass
+
+            # Fall back to GGUF model_info context_length (training max)
+            model_info = data.get("model_info", {})
+            for key, value in model_info.items():
+                if "context_length" in key and isinstance(value, (int, float)):
+                    return int(value)
+    except Exception:
+        pass
+    return None
+
+
 def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx