From 738f0bac1373b90e9aebeea942b61569d0bc8b30 Mon Sep 17 00:00:00 2001
From: aaronagent <1115117931@qq.com>
Date: Fri, 10 Apr 2026 12:00:31 +0800
Subject: [PATCH] fix: align auth-by-message classification with status-code
 path, decode URLs before secret check

error_classifier.py: Message-only auth errors ("invalid api key", "unauthorized",
etc.) were classified as retryable=True (line 707), inconsistent with the HTTP 401
path (line 432) which correctly uses retryable=False + should_fallback=True.  The
mismatch causes 3 wasted retries with the same broken credential before fallback,
while 401 errors immediately attempt fallback.  Align the message-based path to
match: retryable=False, should_fallback=True.

web_tools.py: The _PREFIX_RE secret-detection check in web_extract_tool() runs
against the raw URL string (line 1196).  URL-encoded secrets like %73k-1234... (
sk-1234...) bypass the filter because the regex expects literal ASCII.  Add
urllib.parse.unquote() before the check so percent-encoded variants are also caught.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 agent/error_classifier.py | 1 +
 tools/web_tools.py        | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index 30a2ad491..158105030 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -734,6 +734,7 @@ def _classify_by_message(
             FailoverReason.auth,
             retryable=False,
             should_rotate_credential=True,
+            should_fallback=True,
         )
 
     # Model not found patterns
diff --git a/tools/web_tools.py b/tools/web_tools.py
index f743c4272..21a6c8a86 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -1190,10 +1190,12 @@ async def web_extract_tool(
     Raises:
         Exception: If extraction fails or API key is not set
     """
-    # Block URLs containing embedded secrets (exfiltration prevention)
+    # Block URLs containing embedded secrets (exfiltration prevention).
+    # URL-decode first so percent-encoded secrets (%73k- = sk-) are caught.
     from agent.redact import _PREFIX_RE
+    from urllib.parse import unquote
     for _url in urls:
-        if _PREFIX_RE.search(_url):
+        if _PREFIX_RE.search(_url) or _PREFIX_RE.search(unquote(_url)):
             return json.dumps({
                 "success": False,
                 "error": "Blocked: URL contains what appears to be an API key or token. "