diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 4d11804f4cb..39fa378a914 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -2096,7 +2096,13 @@ def _is_payment_error(exc: Exception) -> bool:
     """Detect payment/credit/quota exhaustion errors.
 
     Returns True for HTTP 402 (Payment Required) and for 429/other errors
-    whose message indicates billing exhaustion rather than rate limiting.
+    whose message indicates billing exhaustion or daily quota exhaustion
+    rather than transient rate limiting.
+
+    Daily token quota errors (e.g. Bedrock "Too many tokens per day",
+    Vertex AI "quota exceeded") are functionally equivalent to credit
+    exhaustion — the provider cannot serve the request until the quota
+    resets — and should trigger the same provider-fallback logic.
     """
     status = getattr(exc, "status_code", None)
     if status == 402:
@@ -2104,10 +2110,19 @@ def _is_payment_error(exc: Exception) -> bool:
     err_lower = str(exc).lower()
     # OpenRouter and other providers include "credits" or "afford" in 402 bodies,
     # but sometimes wrap them in 429 or other codes.
+    # Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
+    # uses different language but is semantically identical to credit exhaustion.
     if status in {402, 429, None}:
-        if any(kw in err_lower for kw in ("credits", "insufficient funds",
-                                           "can only afford", "billing",
-                                           "payment required")):
+        if any(kw in err_lower for kw in (
+            "credits", "insufficient funds",
+            "can only afford", "billing",
+            "payment required",
+            # Daily / monthly quota exhaustion keywords
+            "quota exceeded", "quota_exceeded",
+            "too many tokens per day", "daily limit",
+            "tokens per day", "daily quota",
+            "resource exhausted",  # Vertex AI / gRPC quota errors
+        )):
             return True
     return False
 
@@ -4538,11 +4553,17 @@ def call_llm(
             or _is_connection_error(first_err)
             or _is_rate_limit_error(first_err)
         )
-        # Only try alternative providers when the user didn't explicitly
-        # configure this task's provider.  Explicit provider = hard constraint;
-        # auto (the default) = best-effort fallback chain.  (#7559)
+        # Respect explicit provider choice for transient errors (auth, request
+        # validation, etc.) but allow fallback when the provider clearly cannot
+        # serve the request due to capacity: payment/quota exhaustion and
+        # connection failures are capacity problems, not request constraints.
+        # See #26803: daily token quota (429 + "too many tokens per day") must
+        # fall back just like a 402 credit error.
         is_auto = resolved_provider in {"auto", "", None}
-        if should_fallback and is_auto:
+        # Capacity errors bypass the explicit-provider gate: the provider
+        # literally cannot serve this request regardless of user intent.
+        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
+        if should_fallback and (is_auto or is_capacity_error):
             if _is_payment_error(first_err):
                 reason = "payment error"
                 # Resolve the actual provider label (resolved_provider may be
@@ -4870,8 +4891,12 @@ async def async_call_llm(
             or _is_connection_error(first_err)
             or _is_rate_limit_error(first_err)
         )
+        # Capacity errors (payment/quota/connection) bypass the explicit-provider
+        # gate — the provider cannot serve the request regardless of user intent.
+        # See #26803: daily token quota must fall back like a 402 credit error.
         is_auto = resolved_provider in {"auto", "", None}
-        if should_fallback and is_auto:
+        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
+        if should_fallback and (is_auto or is_capacity_error):
             if _is_payment_error(first_err):
                 reason = "payment error"
                 _mark_provider_unhealthy(
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 61af7585a21..6194d586928 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -925,6 +925,44 @@ class TestIsPaymentError:
         exc = Exception("connection reset")
         assert _is_payment_error(exc) is False
 
+    # ── Daily / monthly quota exhaustion (#26803) ────────────────────────────
+
+    def test_429_quota_exceeded(self):
+        """Cloud provider quota exhaustion (e.g. Vertex AI) is a payment error."""
+        exc = Exception("RESOURCE_EXHAUSTED: quota exceeded for project")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_too_many_tokens_per_day(self):
+        """Bedrock / LiteLLM daily token limit is a payment error."""
+        exc = Exception("Too many tokens per day: 1000000 used, 1000000 limit")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_daily_limit_phrase(self):
+        """Generic 'daily limit' phrasing is a payment error."""
+        exc = Exception("You have exceeded your daily limit.")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_resource_exhausted_grpc(self):
+        """Vertex AI gRPC RESOURCE_EXHAUSTED maps to payment error."""
+        exc = Exception("resource exhausted")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_daily_quota_phrase(self):
+        """'daily quota' phrasing is a payment error."""
+        exc = Exception("Daily quota of 500 requests reached.")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_transient_rate_limit_not_quota(self):
+        """Transient 429 rate limit without quota keywords is NOT a payment error."""
+        exc = Exception("Rate limit exceeded. Retry after 10s.")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is False
+
 
 class TestIsRateLimitError:
     """_is_rate_limit_error detects 429 rate-limit errors warranting fallback."""