diff --git a/agent/error_classifier.py b/agent/error_classifier.py index fcdb8ba676..14a2609d83 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -470,11 +470,16 @@ def _classify_by_status( retryable=False, should_fallback=True, ) - # Generic 404 — could be model or endpoint + # Generic 404 with no "model not found" signal — could be a wrong + # endpoint path (common with local llama.cpp / Ollama / vLLM when + # the URL is slightly misconfigured), a proxy routing glitch, or + # a transient backend issue. Classifying these as model_not_found + # silently falls back to a different provider and tells the model + # the model is missing, which is wrong and wastes a turn. Treat + # as unknown so the retry loop surfaces the real error instead. return result_fn( - FailoverReason.model_not_found, - retryable=False, - should_fallback=True, + FailoverReason.unknown, + retryable=True, ) if status_code == 413: diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index be4775a4d3..44e7059a9b 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -298,9 +298,15 @@ class TestClassifyApiError: assert result.retryable is False def test_404_generic(self): + # Generic 404 with no "model not found" signal — common for local + # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths. Treat + # as unknown (retryable) so the real error surfaces, rather than + # claiming the model is missing and silently falling back. e = MockAPIError("Not Found", status_code=404) result = classify_api_error(e) - assert result.reason == FailoverReason.model_not_found + assert result.reason == FailoverReason.unknown + assert result.retryable is True + assert result.should_fallback is False # ── Payload too large ──