mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix: provider/model resolution — salvage 4 PRs + MiniMax aux URL fix (#5983)
Salvaged fixes from community PRs: - fix(model_switch): _read_auth_store → _load_auth_store + fix auth store key lookup (was checking top-level dict instead of store['providers']). OAuth providers now correctly detected in /model picker. Cherry-picked from PR #5911 by Xule Lin (linxule). - fix(ollama): pass num_ctx to override 2048 default context window. Ollama defaults to 2048 context regardless of model capabilities. Now auto-detects from /api/show metadata and injects num_ctx into every request. Config override via model.ollama_num_ctx. Fixes #2708. Cherry-picked from PR #5929 by kshitij (kshitijk4poor). - fix(aux): normalize provider aliases for vision/auxiliary routing. Adds _normalize_aux_provider() with 17 aliases (google→gemini, claude→anthropic, glm→zai, etc). Fixes vision routing failure when provider is set to 'google' instead of 'gemini'. Cherry-picked from PR #5793 by e11i (Elizabeth1979). - fix(aux): rewrite MiniMax /anthropic base URLs to /v1 for OpenAI SDK. MiniMax's inference_base_url ends in /anthropic (Anthropic Messages API), but auxiliary client uses OpenAI SDK which appends /chat/completions → 404 at /anthropic/chat/completions. Generic _to_openai_base_url() helper rewrites terminal /anthropic to /v1 for OpenAI-compatible endpoint. Inspired by PR #5786 by Lempkey. Added debug logging to silent exception blocks across all fixes. Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
parent
8d7a98d2ff
commit
5c03f2e7cc
7 changed files with 378 additions and 31 deletions
|
|
@ -471,6 +471,23 @@ class TestExplicitProviderRouting:
|
|||
client, model = resolve_provider_client("zai")
|
||||
assert client is not None
|
||||
|
||||
def test_explicit_google_alias_uses_gemini_credentials(self):
|
||||
"""provider='google' should route through the gemini API-key provider."""
|
||||
with (
|
||||
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
|
||||
"api_key": "gemini-key",
|
||||
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
}),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
mock_openai.return_value = MagicMock()
|
||||
client, model = resolve_provider_client("google", model="gemini-3.1-pro-preview")
|
||||
|
||||
assert client is not None
|
||||
assert model == "gemini-3.1-pro-preview"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
|
||||
def test_explicit_unknown_returns_none(self, monkeypatch):
|
||||
"""Unknown provider should return None."""
|
||||
client, model = resolve_provider_client("nonexistent-provider")
|
||||
|
|
@ -822,6 +839,31 @@ class TestAuxiliaryPoolAwareness:
|
|||
assert model == "google/gemini-3-flash-preview"
|
||||
assert client is not None
|
||||
|
||||
def test_vision_config_google_provider_uses_gemini_credentials(self, monkeypatch):
|
||||
config = {
|
||||
"auxiliary": {
|
||||
"vision": {
|
||||
"provider": "google",
|
||||
"model": "gemini-3.1-pro-preview",
|
||||
}
|
||||
}
|
||||
}
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
||||
with (
|
||||
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
|
||||
"api_key": "gemini-key",
|
||||
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
}),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
resolved_provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert resolved_provider == "gemini"
|
||||
assert client is not None
|
||||
assert model == "gemini-3.1-pro-preview"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
|
||||
def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
|
||||
"""When explicitly forced to 'main', vision CAN use custom endpoint."""
|
||||
config = {
|
||||
|
|
|
|||
42
tests/agent/test_minimax_auxiliary_url.py
Normal file
42
tests/agent/test_minimax_auxiliary_url.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
"""Tests for MiniMax auxiliary client URL normalization.
|
||||
|
||||
MiniMax and MiniMax-CN set inference_base_url to the /anthropic path.
|
||||
The auxiliary client uses the OpenAI SDK, which needs /v1 instead.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from agent.auxiliary_client import _to_openai_base_url
|
||||
|
||||
|
||||
class TestToOpenaiBaseUrl:
|
||||
def test_minimax_global_anthropic_suffix_replaced(self):
|
||||
assert _to_openai_base_url("https://api.minimax.io/anthropic") == "https://api.minimax.io/v1"
|
||||
|
||||
def test_minimax_cn_anthropic_suffix_replaced(self):
|
||||
assert _to_openai_base_url("https://api.minimaxi.com/anthropic") == "https://api.minimaxi.com/v1"
|
||||
|
||||
def test_trailing_slash_stripped_before_replace(self):
|
||||
assert _to_openai_base_url("https://api.minimax.io/anthropic/") == "https://api.minimax.io/v1"
|
||||
|
||||
def test_v1_url_unchanged(self):
|
||||
assert _to_openai_base_url("https://api.openai.com/v1") == "https://api.openai.com/v1"
|
||||
|
||||
def test_openrouter_url_unchanged(self):
|
||||
assert _to_openai_base_url("https://openrouter.ai/api/v1") == "https://openrouter.ai/api/v1"
|
||||
|
||||
def test_anthropic_domain_unchanged(self):
|
||||
"""api.anthropic.com doesn't end with /anthropic — should be untouched."""
|
||||
assert _to_openai_base_url("https://api.anthropic.com") == "https://api.anthropic.com"
|
||||
|
||||
def test_anthropic_in_subpath_unchanged(self):
|
||||
assert _to_openai_base_url("https://example.com/anthropic/extra") == "https://example.com/anthropic/extra"
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _to_openai_base_url("") == ""
|
||||
|
||||
def test_none(self):
|
||||
assert _to_openai_base_url(None) == ""
|
||||
135
tests/test_ollama_num_ctx.py
Normal file
135
tests/test_ollama_num_ctx.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""Tests for Ollama num_ctx context length detection and injection.
|
||||
|
||||
Covers:
|
||||
agent/model_metadata.py — query_ollama_num_ctx()
|
||||
run_agent.py — _ollama_num_ctx detection + extra_body injection
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.model_metadata import query_ollama_num_ctx
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Level 1: query_ollama_num_ctx — Ollama API interaction
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
def _mock_httpx_client(show_response_data, status_code=200):
|
||||
"""Create a mock httpx.Client context manager that returns given /api/show data."""
|
||||
mock_resp = MagicMock(status_code=status_code)
|
||||
mock_resp.json.return_value = show_response_data
|
||||
mock_client = MagicMock()
|
||||
mock_client.post.return_value = mock_resp
|
||||
mock_ctx = MagicMock()
|
||||
mock_ctx.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_ctx.__exit__ = MagicMock(return_value=False)
|
||||
return mock_ctx, mock_client
|
||||
|
||||
|
||||
class TestQueryOllamaNumCtx:
|
||||
"""Test the Ollama /api/show context length query."""
|
||||
|
||||
def test_returns_context_from_model_info(self):
|
||||
"""Should extract context_length from GGUF model_info metadata."""
|
||||
show_data = {
|
||||
"model_info": {"llama.context_length": 131072},
|
||||
"parameters": "",
|
||||
}
|
||||
mock_ctx, _ = _mock_httpx_client(show_data)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
|
||||
# httpx is imported inside the function — patch the module import
|
||||
import httpx
|
||||
with patch.object(httpx, "Client", return_value=mock_ctx):
|
||||
result = query_ollama_num_ctx("llama3.1:8b", "http://localhost:11434/v1")
|
||||
|
||||
assert result == 131072
|
||||
|
||||
def test_prefers_explicit_num_ctx_from_modelfile(self):
|
||||
"""If the Modelfile sets num_ctx explicitly, that should take priority."""
|
||||
show_data = {
|
||||
"model_info": {"llama.context_length": 131072},
|
||||
"parameters": "num_ctx 32768\ntemperature 0.7",
|
||||
}
|
||||
mock_ctx, _ = _mock_httpx_client(show_data)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
|
||||
import httpx
|
||||
with patch.object(httpx, "Client", return_value=mock_ctx):
|
||||
result = query_ollama_num_ctx("custom-model", "http://localhost:11434")
|
||||
|
||||
assert result == 32768
|
||||
|
||||
def test_returns_none_for_non_ollama_server(self):
|
||||
"""Should return None if the server is not Ollama."""
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"):
|
||||
result = query_ollama_num_ctx("model", "http://localhost:1234")
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_on_connection_error(self):
|
||||
"""Should return None if the server is unreachable."""
|
||||
with patch("agent.model_metadata.detect_local_server_type", side_effect=Exception("timeout")):
|
||||
result = query_ollama_num_ctx("model", "http://localhost:11434")
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_on_404(self):
|
||||
"""Should return None if the model is not found."""
|
||||
mock_ctx, _ = _mock_httpx_client({}, status_code=404)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
|
||||
import httpx
|
||||
with patch.object(httpx, "Client", return_value=mock_ctx):
|
||||
result = query_ollama_num_ctx("nonexistent", "http://localhost:11434")
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_strips_provider_prefix(self):
|
||||
"""Should strip 'local:' prefix from model name before querying."""
|
||||
show_data = {
|
||||
"model_info": {"qwen2.context_length": 32768},
|
||||
"parameters": "",
|
||||
}
|
||||
mock_ctx, mock_client = _mock_httpx_client(show_data)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
|
||||
import httpx
|
||||
with patch.object(httpx, "Client", return_value=mock_ctx):
|
||||
result = query_ollama_num_ctx("local:qwen2.5:7b", "http://localhost:11434/v1")
|
||||
|
||||
# Verify the post was called with stripped name (no "local:" prefix)
|
||||
call_args = mock_client.post.call_args
|
||||
assert call_args[1]["json"]["name"] == "qwen2.5:7b" or call_args[0][1] is not None
|
||||
assert result == 32768
|
||||
|
||||
def test_handles_qwen2_architecture_key(self):
|
||||
"""Different model architectures use different key prefixes in model_info."""
|
||||
show_data = {
|
||||
"model_info": {"qwen2.context_length": 65536},
|
||||
"parameters": "",
|
||||
}
|
||||
mock_ctx, _ = _mock_httpx_client(show_data)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
|
||||
import httpx
|
||||
with patch.object(httpx, "Client", return_value=mock_ctx):
|
||||
result = query_ollama_num_ctx("qwen2.5:32b", "http://localhost:11434")
|
||||
|
||||
assert result == 65536
|
||||
|
||||
def test_returns_none_when_model_info_empty(self):
|
||||
"""Should return None if model_info has no context_length key."""
|
||||
show_data = {
|
||||
"model_info": {"llama.embedding_length": 4096},
|
||||
"parameters": "",
|
||||
}
|
||||
mock_ctx, _ = _mock_httpx_client(show_data)
|
||||
|
||||
with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
|
||||
import httpx
|
||||
with patch.object(httpx, "Client", return_value=mock_ctx):
|
||||
result = query_ollama_num_ctx("model", "http://localhost:11434")
|
||||
|
||||
assert result is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue