mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(agent): also treat provider=ollama as an Ollama GLM backend
Follow-up to the #13971 fix: a genuine native Ollama provider reached through a reverse proxy carries no ollama/:11434 URL signature, so the restricted detection would miss it. Add provider=="ollama" as an explicit True case (idea from #14789, @Tranquil-Flow) and cover both it and the #13971 LiteLLM-proxy-to-zai false-positive with E2E tests.
This commit is contained in:
parent
266521b55f
commit
f062cf076b
2 changed files with 85 additions and 1 deletions
10
run_agent.py
10
run_agent.py
|
|
@ -1373,12 +1373,20 @@ class AIAgent:
|
|||
Detection relies on explicit Ollama signatures:
|
||||
- Port 11434 (Ollama default)
|
||||
- "ollama" in the base URL (e.g. ollama.local, /ollama/ path)
|
||||
- provider explicitly set to "ollama"
|
||||
|
||||
Crucially it does NOT match arbitrary local/private endpoints
|
||||
(LiteLLM/sglang/vLLM/LM Studio proxies, Tailscale boxes), which
|
||||
report finish_reason correctly and were the source of #13971's
|
||||
false-positive truncation continuations.
|
||||
"""
|
||||
model_lower = (self.model or "").lower()
|
||||
provider_lower = (self.provider or "").lower()
|
||||
if "glm" not in model_lower and provider_lower != "zai":
|
||||
return False
|
||||
return "ollama" in self._base_url_lower or ":11434" in self._base_url_lower
|
||||
if "ollama" in self._base_url_lower or ":11434" in self._base_url_lower:
|
||||
return True
|
||||
return provider_lower == "ollama"
|
||||
|
||||
def _should_treat_stop_as_truncated(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -4602,6 +4602,82 @@ class TestRunConversation:
|
|||
third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"]
|
||||
assert "truncated by the output length limit" in third_call_messages[-1]["content"]
|
||||
|
||||
def test_ollama_provider_without_url_signature_still_triggers_heuristic(self, agent):
|
||||
"""provider='ollama' triggers the heuristic even when the base URL
|
||||
carries no ``ollama``/``:11434`` signature (e.g. a reverse proxy)."""
|
||||
self._setup_agent(agent)
|
||||
agent.base_url = "http://my-proxy.internal:9000/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.provider = "ollama"
|
||||
agent.model = "glm-5.1:cloud"
|
||||
|
||||
tool_turn = _mock_response(
|
||||
content="",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
|
||||
)
|
||||
misreported_stop = _mock_response(
|
||||
content="Based on the search results, the best next",
|
||||
finish_reason="stop",
|
||||
)
|
||||
continued = _mock_response(
|
||||
content=" step is to update the config.",
|
||||
finish_reason="stop",
|
||||
)
|
||||
agent.client.chat.completions.create.side_effect = [
|
||||
tool_turn,
|
||||
misreported_stop,
|
||||
continued,
|
||||
]
|
||||
|
||||
with (
|
||||
patch("run_agent.handle_function_call", return_value="search result"),
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("hello")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["api_calls"] == 3
|
||||
third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"]
|
||||
assert "truncated by the output length limit" in third_call_messages[-1]["content"]
|
||||
|
||||
def test_zai_via_local_proxy_does_not_trigger_heuristic(self, agent):
|
||||
"""Issue #13971: a local LiteLLM proxy forwarding to remote Z.AI
|
||||
must NOT be treated as an Ollama backend. provider='zai' on
|
||||
localhost:8000 with no ollama/:11434 signature reports stop
|
||||
correctly and the response should be delivered as-is."""
|
||||
self._setup_agent(agent)
|
||||
agent.base_url = "http://localhost:8000/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.provider = "zai"
|
||||
agent.model = "glm-5-turbo"
|
||||
|
||||
tool_turn = _mock_response(
|
||||
content="",
|
||||
finish_reason="tool_calls",
|
||||
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
|
||||
)
|
||||
# Complete response ending without ASCII punctuation — must NOT be
|
||||
# reclassified as truncated.
|
||||
normal_stop = _mock_response(
|
||||
content="Done — the config has been updated",
|
||||
finish_reason="stop",
|
||||
)
|
||||
agent.client.chat.completions.create.side_effect = [tool_turn, normal_stop]
|
||||
|
||||
with (
|
||||
patch("run_agent.handle_function_call", return_value="search result"),
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("hello")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["api_calls"] == 2
|
||||
assert result["final_response"] == "Done — the config has been updated"
|
||||
|
||||
def test_length_thinking_exhausted_skips_continuation(self, agent):
|
||||
"""When finish_reason='length' but content is only thinking, skip retries."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue