fix(agent): also treat provider=ollama as an Ollama GLM backend

Follow-up to the #13971 fix: a genuine native Ollama provider reached
through a reverse proxy carries no ollama/:11434 URL signature, so the
restricted detection would miss it. Add provider=="ollama" as an
explicit True case (idea from #14789, @Tranquil-Flow) and cover both it
and the #13971 LiteLLM-proxy-to-zai false-positive with E2E tests.
This commit is contained in:
teknium1 2026-06-27 03:49:36 -07:00 committed by Teknium
parent 266521b55f
commit f062cf076b
2 changed files with 85 additions and 1 deletions

View file

@ -1373,12 +1373,20 @@ class AIAgent:
Detection relies on explicit Ollama signatures:
- Port 11434 (Ollama default)
- "ollama" in the base URL (e.g. ollama.local, /ollama/ path)
- provider explicitly set to "ollama"
Crucially it does NOT match arbitrary local/private endpoints
(LiteLLM/sglang/vLLM/LM Studio proxies, Tailscale boxes), which
report finish_reason correctly and were the source of #13971's
false-positive truncation continuations.
"""
model_lower = (self.model or "").lower()
provider_lower = (self.provider or "").lower()
if "glm" not in model_lower and provider_lower != "zai":
return False
return "ollama" in self._base_url_lower or ":11434" in self._base_url_lower
if "ollama" in self._base_url_lower or ":11434" in self._base_url_lower:
return True
return provider_lower == "ollama"
def _should_treat_stop_as_truncated(
self,

View file

@ -4602,6 +4602,82 @@ class TestRunConversation:
third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"]
assert "truncated by the output length limit" in third_call_messages[-1]["content"]
def test_ollama_provider_without_url_signature_still_triggers_heuristic(self, agent):
"""provider='ollama' triggers the heuristic even when the base URL
carries no ``ollama``/``:11434`` signature (e.g. a reverse proxy)."""
self._setup_agent(agent)
agent.base_url = "http://my-proxy.internal:9000/v1"
agent._base_url_lower = agent.base_url.lower()
agent.provider = "ollama"
agent.model = "glm-5.1:cloud"
tool_turn = _mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
)
misreported_stop = _mock_response(
content="Based on the search results, the best next",
finish_reason="stop",
)
continued = _mock_response(
content=" step is to update the config.",
finish_reason="stop",
)
agent.client.chat.completions.create.side_effect = [
tool_turn,
misreported_stop,
continued,
]
with (
patch("run_agent.handle_function_call", return_value="search result"),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello")
assert result["completed"] is True
assert result["api_calls"] == 3
third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"]
assert "truncated by the output length limit" in third_call_messages[-1]["content"]
def test_zai_via_local_proxy_does_not_trigger_heuristic(self, agent):
"""Issue #13971: a local LiteLLM proxy forwarding to remote Z.AI
must NOT be treated as an Ollama backend. provider='zai' on
localhost:8000 with no ollama/:11434 signature reports stop
correctly and the response should be delivered as-is."""
self._setup_agent(agent)
agent.base_url = "http://localhost:8000/v1"
agent._base_url_lower = agent.base_url.lower()
agent.provider = "zai"
agent.model = "glm-5-turbo"
tool_turn = _mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
)
# Complete response ending without ASCII punctuation — must NOT be
# reclassified as truncated.
normal_stop = _mock_response(
content="Done — the config has been updated",
finish_reason="stop",
)
agent.client.chat.completions.create.side_effect = [tool_turn, normal_stop]
with (
patch("run_agent.handle_function_call", return_value="search result"),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello")
assert result["completed"] is True
assert result["api_calls"] == 2
assert result["final_response"] == "Done — the config has been updated"
def test_length_thinking_exhausted_skips_continuation(self, agent):
"""When finish_reason='length' but content is only thinking, skip retries."""