From 77e04a29d5742e50add9c0d84a396e1eee3c4356 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 22 Apr 2026 06:11:47 -0700
Subject: [PATCH] fix(error_classifier): don't classify generic 404 as
 model_not_found (#14013)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 404 branch in _classify_by_status had dead code: the generic
fallback below the _MODEL_NOT_FOUND_PATTERNS check returned the
exact same classification (model_not_found + should_fallback=True),
so every 404 — regardless of message — was treated as a missing model.

This bites local-endpoint users (llama.cpp, Ollama, vLLM) whose 404s
usually mean a wrong endpoint path, proxy routing glitch, or transient
backend issue — not a missing model. Claiming 'model not found' misleads
the next turn and silently falls back to another provider when the real
problem was a URL typo the user should see.

Fix: only classify 404 as model_not_found when the message actually
matches _MODEL_NOT_FOUND_PATTERNS ("invalid model", "model not found",
etc.). Otherwise fall through as unknown (retryable) so the real error
surfaces in the retry loop.

Test updated to match the new behavior. 103 error_classifier tests pass.
---
 agent/error_classifier.py            | 13 +++++++++----
 tests/agent/test_error_classifier.py |  8 +++++++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index fcdb8ba676..14a2609d83 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -470,11 +470,16 @@ def _classify_by_status(
                 retryable=False,
                 should_fallback=True,
             )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
         return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
         )
 
     if status_code == 413:
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index be4775a4d3..44e7059a9b 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -298,9 +298,15 @@ class TestClassifyApiError:
         assert result.retryable is False
 
     def test_404_generic(self):
+        # Generic 404 with no "model not found" signal — common for local
+        # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths.  Treat
+        # as unknown (retryable) so the real error surfaces, rather than
+        # claiming the model is missing and silently falling back.
         e = MockAPIError("Not Found", status_code=404)
         result = classify_api_error(e)
-        assert result.reason == FailoverReason.model_not_found
+        assert result.reason == FailoverReason.unknown
+        assert result.retryable is True
+        assert result.should_fallback is False
 
     # ── Payload too large ──