diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index b85f77a9d23..a3f3d2261ba 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -42,26 +42,26 @@ ADAPTIVE_EFFORT_MAP = {
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
     # Claude 4.6
-    "claude-opus-4-6":   128_000,
-    "claude-sonnet-4-6":  64_000,
+    "claude-opus-4-6": 128_000,
+    "claude-sonnet-4-6": 64_000,
     # Claude 4.5
-    "claude-opus-4-5":    64_000,
-    "claude-sonnet-4-5":  64_000,
-    "claude-haiku-4-5":   64_000,
+    "claude-opus-4-5": 64_000,
+    "claude-sonnet-4-5": 64_000,
+    "claude-haiku-4-5": 64_000,
     # Claude 4
-    "claude-opus-4":      32_000,
-    "claude-sonnet-4":    64_000,
+    "claude-opus-4": 32_000,
+    "claude-sonnet-4": 64_000,
     # Claude 3.7
     "claude-3-7-sonnet": 128_000,
     # Claude 3.5
-    "claude-3-5-sonnet":   8_192,
-    "claude-3-5-haiku":    8_192,
+    "claude-3-5-sonnet": 8_192,
+    "claude-3-5-haiku": 8_192,
     # Claude 3
-    "claude-3-opus":       4_096,
-    "claude-3-sonnet":     4_096,
-    "claude-3-haiku":      4_096,
+    "claude-3-opus": 4_096,
+    "claude-3-sonnet": 4_096,
+    "claude-3-haiku": 4_096,
     # Third-party Anthropic-compatible providers
-    "minimax":            131_072,
+    "minimax": 131_072,
 }
 
 # For any model not in the table, assume the highest current limit.
@@ -138,7 +138,9 @@ def _detect_claude_code_version() -> str:
         try:
             result = _sp.run(
                 [cmd, "--version"],
-                capture_output=True, text=True, timeout=5,
+                capture_output=True,
+                text=True,
+                timeout=5,
             )
             if result.returncode == 0 and result.stdout.strip():
                 # Output is like "2.1.74 (Claude Code)" or just "2.1.74"
@@ -224,7 +226,9 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
     if not normalized:
         return False
     normalized = normalized.rstrip("/").lower()
-    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
+    return normalized.startswith(
+        ("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")
+    )
 
 
 def _common_betas_for_base_url(base_url: str | None) -> list[str]:
@@ -357,7 +361,9 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
     return now_ms < (expires_at - 60_000)
 
 
-def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
+def refresh_anthropic_oauth_pure(
+    refresh_token: str, *, use_json: bool = False
+) -> Dict[str, Any]:
     """Refresh an Anthropic OAuth token without mutating local credential files."""
     import time
     import urllib.parse
@@ -368,18 +374,22 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
 
     client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
     if use_json:
-        data = json.dumps({
-            "grant_type": "refresh_token",
-            "refresh_token": refresh_token,
-            "client_id": client_id,
-        }).encode()
+        data = json.dumps(
+            {
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": client_id,
+            }
+        ).encode()
         content_type = "application/json"
     else:
-        data = urllib.parse.urlencode({
-            "grant_type": "refresh_token",
-            "refresh_token": refresh_token,
-            "client_id": client_id,
-        }).encode()
+        data = urllib.parse.urlencode(
+            {
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": client_id,
+            }
+        ).encode()
         content_type = "application/x-www-form-urlencoded"
 
     token_endpoints = [
@@ -485,7 +495,9 @@ def _write_claude_code_credentials(
         logger.debug("Failed to write refreshed credentials: %s", e)
 
 
-def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
+def _resolve_claude_code_token_from_credentials(
+    creds: Optional[Dict[str, Any]] = None,
+) -> Optional[str]:
     """Resolve a token from Claude Code credential files, refreshing if needed."""
     creds = creds or read_claude_code_credentials()
     if creds and is_claude_code_token_valid(creds):
@@ -496,11 +508,15 @@ def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]]
         refreshed = _refresh_oauth_token(creds)
         if refreshed:
             return refreshed
-        logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
+        logger.debug(
+            "Token refresh failed — re-run 'claude setup-token' to reauthenticate"
+        )
     return None
 
 
-def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
+def _prefer_refreshable_claude_code_token(
+    env_token: str, creds: Optional[Dict[str, Any]]
+) -> Optional[str]:
     """Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
 
     Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
@@ -624,9 +640,11 @@ def _generate_pkce() -> tuple:
     import secrets
 
     verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
-    challenge = base64.urlsafe_b64encode(
-        hashlib.sha256(verifier.encode()).digest()
-    ).rstrip(b"=").decode()
+    challenge = (
+        base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
+        .rstrip(b"=")
+        .decode()
+    )
     return verifier, challenge
 
 
@@ -687,14 +705,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
     try:
         import urllib.request
 
-        exchange_data = json.dumps({
-            "grant_type": "authorization_code",
-            "client_id": _OAUTH_CLIENT_ID,
-            "code": code,
-            "state": state,
-            "redirect_uri": _OAUTH_REDIRECT_URI,
-            "code_verifier": verifier,
-        }).encode()
+        exchange_data = json.dumps(
+            {
+                "grant_type": "authorization_code",
+                "client_id": _OAUTH_CLIENT_ID,
+                "code": code,
+                "state": state,
+                "redirect_uri": _OAUTH_REDIRECT_URI,
+                "code_verifier": verifier,
+            }
+        ).encode()
 
         req = urllib.request.Request(
             _OAUTH_TOKEN_URL,
@@ -755,7 +775,7 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
     """
     lower = model.lower()
     if lower.startswith("anthropic/"):
-        model = model[len("anthropic/"):]
+        model = model[len("anthropic/") :]
     if not preserve_dots:
         # OpenRouter uses dots for version separators (claude-opus-4.6),
         # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
@@ -770,6 +790,7 @@ def _sanitize_tool_id(tool_id: str) -> str:
     characters with underscores and ensure non-empty.
     """
     import re
+
     if not tool_id:
         return "tool_0"
     sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
@@ -783,11 +804,15 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
     result = []
     for t in tools:
         fn = t.get("function", {})
-        result.append({
-            "name": fn.get("name", ""),
-            "description": fn.get("description", ""),
-            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
-        })
+        result.append(
+            {
+                "name": fn.get("name", ""),
+                "description": fn.get("description", ""),
+                "input_schema": fn.get(
+                    "parameters", {"type": "object", "properties": {}}
+                ),
+            }
+        )
     return result
 
 
@@ -801,7 +826,7 @@ def _image_source_from_openai_url(url: str) -> Dict[str, str]:
         header, _, data = url.partition(",")
         media_type = "image/jpeg"
         if header.startswith("data:"):
-            mime_part = header[len("data:"):].split(";", 1)[0].strip()
+            mime_part = header[len("data:") :].split(";", 1)[0].strip()
             if mime_part.startswith("image/"):
                 media_type = mime_part
         return {
@@ -828,7 +853,11 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
         block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
     elif ptype in {"image_url", "input_image"}:
         image_value = part.get("image_url", {})
-        url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
+        url = (
+            image_value.get("url", "")
+            if isinstance(image_value, dict)
+            else str(image_value or "")
+        )
         block = {"type": "image", "source": _image_source_from_openai_url(url)}
     else:
         block = dict(part)
@@ -864,7 +893,10 @@ def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None)
         return result
     if isinstance(value, dict):
         _path.add(obj_id)
-        result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
+        result = {
+            k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
+            for k, v in value.items()
+        }
         _path.discard(obj_id)
         return result
     if isinstance(value, (list, tuple)):
@@ -925,9 +957,10 @@ def convert_messages_to_anthropic(
     system_prompt is a string or list of content blocks (when cache_control present).
 
     When *base_url* is provided and points to a third-party Anthropic-compatible
-    endpoint, all thinking block signatures are stripped.  Signatures are
-    Anthropic-proprietary — third-party endpoints cannot validate them and will
-    reject them with HTTP 400 "Invalid signature in thinking block".
+    endpoint, Anthropic thinking signatures are removed. Signed thinking blocks
+    are downgraded to plain text to preserve useful reasoning context, while
+    redacted_thinking blocks are dropped. Third-party endpoints cannot validate
+    Anthropic signatures and may reject them with HTTP 400.
     """
     system = None
     result = []
@@ -970,12 +1003,14 @@ def convert_messages_to_anthropic(
                     parsed_args = json.loads(args) if isinstance(args, str) else args
                 except (json.JSONDecodeError, ValueError):
                     parsed_args = {}
-                blocks.append({
-                    "type": "tool_use",
-                    "id": _sanitize_tool_id(tc.get("id", "")),
-                    "name": fn.get("name", ""),
-                    "input": parsed_args,
-                })
+                blocks.append(
+                    {
+                        "type": "tool_use",
+                        "id": _sanitize_tool_id(tc.get("id", "")),
+                        "name": fn.get("name", ""),
+                        "input": parsed_args,
+                    }
+                )
             # Anthropic rejects empty assistant content
             effective = blocks or content
             if not effective or effective == "":
@@ -985,7 +1020,9 @@ def convert_messages_to_anthropic(
 
         if role == "tool":
             # Sanitize tool_use_id and ensure non-empty content
-            result_content = content if isinstance(content, str) else json.dumps(content)
+            result_content = (
+                content if isinstance(content, str) else json.dumps(content)
+            )
             if not result_content:
                 result_content = "(no output)"
             tool_result = {
@@ -1057,7 +1094,8 @@ def convert_messages_to_anthropic(
             m["content"] = [
                 b
                 for b in m["content"]
-                if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
+                if b.get("type") != "tool_result"
+                or b.get("tool_use_id") in tool_use_ids
             ]
             if not m["content"]:
                 m["content"] = [{"type": "text", "text": "(tool result removed)"}]
@@ -1088,8 +1126,12 @@ def convert_messages_to_anthropic(
                 # and becomes invalid once merged.
                 if isinstance(m["content"], list):
                     m["content"] = [
-                        b for b in m["content"]
-                        if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
+                        b
+                        for b in m["content"]
+                        if not (
+                            isinstance(b, dict)
+                            and b.get("type") in ("thinking", "redacted_thinking")
+                        )
                     ]
                 prev_blocks = fixed[-1]["content"]
                 curr_blocks = m["content"]
@@ -1117,9 +1159,8 @@ def convert_messages_to_anthropic(
     # Signatures are Anthropic-proprietary.  Third-party endpoints
     # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
     # them and will reject them outright.  When targeting a third-party
-    # endpoint, strip ALL thinking/redacted_thinking blocks from every
-    # assistant message — the third-party will generate its own
-    # thinking blocks if it supports extended thinking.
+    # endpoint, downgrade thinking blocks to plain text and drop
+    # redacted_thinking blocks.
     #
     # For direct Anthropic (strategy following clawdbot/OpenClaw):
     # 1. Strip thinking/redacted_thinking from all assistant messages
@@ -1142,12 +1183,33 @@ def convert_messages_to_anthropic(
         if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
             continue
 
-        if _is_third_party or idx != last_assistant_idx:
-            # Third-party endpoint: strip ALL thinking blocks from every
-            # assistant message — signatures are Anthropic-proprietary.
-            # Direct Anthropic: strip from non-latest assistant messages only.
+        if _is_third_party:
+            # Third-party endpoint: Anthropic signatures are proprietary
+            # and will be rejected.  Downgrade thinking blocks to plain
+            # text so the model retains reasoning context across turns.
+            # (Direct Anthropic would validate signatures; third-party
+            # endpoints like z.ai / GLM-5.1 don't use signatures at all.)
+            _tp_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    _tp_content.append(b)
+                    continue
+                # redacted_thinking carries opaque data — drop it.
+                if b.get("type") == "redacted_thinking":
+                    continue
+                # Regular thinking → plain text preserves reasoning for next turn.
+                thinking_text = b.get("thinking", "")
+                if thinking_text:
+                    _tp_content.append({"type": "text", "text": thinking_text})
+            m["content"] = _tp_content or [
+                {"type": "text", "text": "(thinking elided)"}
+            ]
+        elif idx != last_assistant_idx:
+            # Direct Anthropic: strip thinking from non-latest assistant
+            # messages to avoid stale-signature 400s.
             stripped = [
-                b for b in m["content"]
+                b
+                for b in m["content"]
                 if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
             ]
             m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
@@ -1235,7 +1297,9 @@ def build_anthropic_kwargs(
     Currently only supported on native Anthropic endpoints (not third-party
     compatible ones).
     """
-    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
+    system, anthropic_messages = convert_messages_to_anthropic(
+        messages, base_url=base_url
+    )
     anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
 
     model = normalize_model_name(model, preserve_dots=preserve_dots)
@@ -1287,7 +1351,10 @@ def build_anthropic_kwargs(
                         if block.get("type") == "tool_use" and "name" in block:
                             if not block["name"].startswith(_MCP_TOOL_PREFIX):
                                 block["name"] = _MCP_TOOL_PREFIX + block["name"]
-                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                        elif (
+                            block.get("type") == "tool_result"
+                            and "tool_use_id" in block
+                        ):
                             pass  # tool_result uses ID, not name
 
     kwargs: Dict[str, Any] = {
@@ -1319,7 +1386,10 @@ def build_anthropic_kwargs(
     # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
     # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
     if reasoning_config and isinstance(reasoning_config, dict):
-        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
+        if (
+            reasoning_config.get("enabled") is not False
+            and "haiku" not in model.lower()
+        ):
             effort = str(reasoning_config.get("effort", "medium")).lower()
             budget = THINKING_BUDGET.get(effort, 8000)
             if _supports_adaptive_thinking(model):
@@ -1378,7 +1448,7 @@ def normalize_anthropic_response(
         elif block.type == "tool_use":
             name = block.name
             if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
+                name = name[len(_MCP_TOOL_PREFIX) :]
             tool_calls.append(
                 SimpleNamespace(
                     id=block.id,
diff --git a/run_agent.py b/run_agent.py
index efaeba82945..bc917884e75 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -15,7 +15,7 @@ Features:
 
 Usage:
     from run_agent import AIAgent
-    
+
     agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
     response = agent.run_conversation("Tell me about the latest Python updates")
 """
@@ -27,6 +27,7 @@ import copy
 import hashlib
 import json
 import logging
+
 logger = logging.getLogger(__name__)
 import os
 import random
@@ -50,8 +51,10 @@ from hermes_constants import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
 
 _hermes_home = get_hermes_home()
-_project_env = Path(__file__).parent / '.env'
-_loaded_env_paths = load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
+_project_env = Path(__file__).parent / ".env"
+_loaded_env_paths = load_hermes_dotenv(
+    hermes_home=_hermes_home, project_env=_project_env
+)
 if _loaded_env_paths:
     for _env_path in _loaded_env_paths:
         logger.info("Loaded environment variables from %s", _env_path)
@@ -79,37 +82,55 @@ from agent.memory_manager import build_memory_context_block
 from agent.retry_utils import jittered_backoff
 from agent.error_classifier import classify_api_error, FailoverReason
 from agent.prompt_builder import (
-    DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
-    MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
+    DEFAULT_AGENT_IDENTITY,
+    PLATFORM_HINTS,
+    MEMORY_GUIDANCE,
+    SESSION_SEARCH_GUIDANCE,
+    SKILLS_GUIDANCE,
     build_nous_subscription_prompt,
 )
 from agent.model_metadata import (
     fetch_model_metadata,
-    estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough,
-    get_next_probe_tier, parse_context_limit_from_error,
+    estimate_tokens_rough,
+    estimate_messages_tokens_rough,
+    estimate_request_tokens_rough,
+    get_next_probe_tier,
+    parse_context_limit_from_error,
     parse_available_output_tokens_from_error,
-    save_context_length, is_local_endpoint,
+    save_context_length,
+    is_local_endpoint,
     query_ollama_num_ctx,
 )
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import (
+    build_skills_system_prompt,
+    build_context_files_prompt,
+    build_environment_hints,
+    load_soul_md,
+    TOOL_USE_ENFORCEMENT_GUIDANCE,
+    TOOL_USE_ENFORCEMENT_MODELS,
+    DEVELOPER_ROLE_MODELS,
+    GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
+    OPENAI_MODEL_EXECUTION_GUIDANCE,
+)
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
-    KawaiiSpinner, build_tool_preview as _build_tool_preview,
+    KawaiiSpinner,
+    build_tool_preview as _build_tool_preview,
     get_cute_tool_message as _get_cute_tool_message_impl,
     _detect_tool_failure,
     get_tool_emoji as _get_tool_emoji,
 )
 from agent.trajectory import (
-    convert_scratchpad_to_think, has_incomplete_scratchpad,
+    convert_scratchpad_to_think,
+    has_incomplete_scratchpad,
     save_trajectory as _save_trajectory_to_file,
 )
 from utils import atomic_json_write, env_var_enabled
 
 
-
 class _SafeWriter:
     """Transparent stdio wrapper that catches OSError/ValueError from broken pipes.
 
@@ -216,19 +237,21 @@ class IterationBudget:
 _NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
 
 # Read-only tools with no shared mutable session state.
-_PARALLEL_SAFE_TOOLS = frozenset({
-    "ha_get_state",
-    "ha_list_entities",
-    "ha_list_services",
-    "read_file",
-    "search_files",
-    "session_search",
-    "skill_view",
-    "skills_list",
-    "vision_analyze",
-    "web_extract",
-    "web_search",
-})
+_PARALLEL_SAFE_TOOLS = frozenset(
+    {
+        "ha_get_state",
+        "ha_list_entities",
+        "ha_list_services",
+        "read_file",
+        "search_files",
+        "session_search",
+        "skill_view",
+        "skills_list",
+        "vision_analyze",
+        "web_extract",
+        "web_search",
+    }
+)
 
 # File tools can run concurrently when they target independent paths.
 _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
@@ -250,7 +273,7 @@ _DESTRUCTIVE_PATTERNS = re.compile(
     re.VERBOSE,
 )
 # Output redirects that overwrite files (> but not >>)
-_REDIRECT_OVERWRITE = re.compile(r'[^>]>[^>]|^>[^>]')
+_REDIRECT_OVERWRITE = re.compile(r"[^>]>[^>]|^>[^>]")
 
 
 def _is_destructive_command(cmd: str) -> bool:
@@ -297,7 +320,9 @@ def _should_parallelize_tool_batch(tool_calls) -> bool:
             scoped_path = _extract_parallel_scope_path(tool_name, function_args)
             if scoped_path is None:
                 return False
-            if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
+            if any(
+                _paths_overlap(scoped_path, existing) for existing in reserved_paths
+            ):
                 return False
             reserved_paths.append(scoped_path)
             continue
@@ -336,10 +361,7 @@ def _paths_overlap(left: Path, right: Path) -> bool:
     return left_parts[:common_len] == right_parts[:common_len]
 
 
-
-_SURROGATE_RE = re.compile(r'[\ud800-\udfff]')
-
-
+_SURROGATE_RE = re.compile(r"[\ud800-\udfff]")
 
 
 def _sanitize_surrogates(text: str) -> str:
@@ -349,7 +371,7 @@ def _sanitize_surrogates(text: str) -> str:
     OpenAI SDK.  This is a fast no-op when the text contains no surrogates.
     """
     if _SURROGATE_RE.search(text):
-        return _SURROGATE_RE.sub('\ufffd', text)
+        return _SURROGATE_RE.sub("\ufffd", text)
     return text
 
 
@@ -366,18 +388,18 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
             continue
         content = msg.get("content")
         if isinstance(content, str) and _SURROGATE_RE.search(content):
-            msg["content"] = _SURROGATE_RE.sub('\ufffd', content)
+            msg["content"] = _SURROGATE_RE.sub("\ufffd", content)
             found = True
         elif isinstance(content, list):
             for part in content:
                 if isinstance(part, dict):
                     text = part.get("text")
                     if isinstance(text, str) and _SURROGATE_RE.search(text):
-                        part["text"] = _SURROGATE_RE.sub('\ufffd', text)
+                        part["text"] = _SURROGATE_RE.sub("\ufffd", text)
                         found = True
         name = msg.get("name")
         if isinstance(name, str) and _SURROGATE_RE.search(name):
-            msg["name"] = _SURROGATE_RE.sub('\ufffd', name)
+            msg["name"] = _SURROGATE_RE.sub("\ufffd", name)
             found = True
         tool_calls = msg.get("tool_calls")
         if isinstance(tool_calls, list):
@@ -386,17 +408,17 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
                     continue
                 tc_id = tc.get("id")
                 if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id):
-                    tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id)
+                    tc["id"] = _SURROGATE_RE.sub("\ufffd", tc_id)
                     found = True
                 fn = tc.get("function")
                 if isinstance(fn, dict):
                     fn_name = fn.get("name")
                     if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name):
-                        fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name)
+                        fn["name"] = _SURROGATE_RE.sub("\ufffd", fn_name)
                         found = True
                     fn_args = fn.get("arguments")
                     if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
-                        fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
+                        fn["arguments"] = _SURROGATE_RE.sub("\ufffd", fn_args)
                         found = True
     return found
 
@@ -407,7 +429,7 @@ def _strip_non_ascii(text: str) -> str:
     Used as a last resort when the system encoding is ASCII and can't handle
     any non-ASCII characters (e.g. LANG=C on Chromebooks).
     """
-    return text.encode('ascii', errors='ignore').decode('ascii')
+    return text.encode("ascii", errors="ignore").decode("ascii")
 
 
 def _sanitize_messages_non_ascii(messages: list) -> bool:
@@ -494,9 +516,6 @@ def _sanitize_structure_non_ascii(payload: Any) -> bool:
     return found
 
 
-
-
-
 # =========================================================================
 # Large tool result handler — save oversized output to temp file
 # =========================================================================
@@ -663,7 +682,9 @@ class AIAgent:
         # instead of going directly to stdout where patch_stdout's StdoutProxy
         # would mangle the escape sequences.  None = use builtins.print.
         self._print_fn = None
-        self.background_review_callback = None  # Optional sync callback for gateway delivery
+        self.background_review_callback = (
+            None  # Optional sync callback for gateway delivery
+        )
         self.skip_context_files = skip_context_files
         self.pass_session_id = pass_session_id
         self.persist_session = persist_session
@@ -672,7 +693,11 @@ class AIAgent:
         self.log_prefix = f"{log_prefix} " if log_prefix else ""
         # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
         self.base_url = base_url or ""
-        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
+        provider_name = (
+            provider.strip().lower()
+            if isinstance(provider, str) and provider.strip()
+            else None
+        )
         self.provider = provider_name or ""
         self.acp_command = acp_command or command
         self.acp_args = list(acp_args or args or [])
@@ -680,10 +705,14 @@ class AIAgent:
             self.api_mode = api_mode
         elif self.provider == "openai-codex":
             self.api_mode = "codex_responses"
-        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
+        elif (
+            provider_name is None
+        ) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
             self.api_mode = "codex_responses"
             self.provider = "openai-codex"
-        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
+        elif self.provider == "anthropic" or (
+            provider_name is None and "api.anthropic.com" in self._base_url_lower
+        ):
             self.api_mode = "anthropic_messages"
             self.provider = "anthropic"
         elif self._base_url_lower.rstrip("/").endswith("/anthropic"):
@@ -745,7 +774,6 @@ class AIAgent:
         self.status_callback = status_callback
         self.tool_gen_callback = tool_gen_callback
 
-        
         # Tool execution state — allows _vprint during tool execution
         # even when stream consumers are registered (no tokens streaming then)
         self._executing_tools = False
@@ -755,12 +783,12 @@ class AIAgent:
         self._interrupt_message = None  # Optional message that triggered interrupt
         self._execution_thread_id: int | None = None  # Set at run_conversation() start
         self._client_lock = threading.RLock()
-        
+
         # Subagent delegation state
-        self._delegate_depth = 0        # 0 = top-level agent, incremented for children
-        self._active_children = []      # Running child AIAgents (for interrupt propagation)
+        self._delegate_depth = 0  # 0 = top-level agent, incremented for children
+        self._active_children = []  # Running child AIAgents (for interrupt propagation)
         self._active_children_lock = threading.Lock()
-        
+
         # Store OpenRouter provider preferences
         self.providers_allowed = providers_allowed
         self.providers_ignored = providers_ignored
@@ -772,24 +800,28 @@ class AIAgent:
         # Store toolset filtering options
         self.enabled_toolsets = enabled_toolsets
         self.disabled_toolsets = disabled_toolsets
-        
+
         # Model response configuration
         self.max_tokens = max_tokens  # None = use model default
-        self.reasoning_config = reasoning_config  # None = use default (medium for OpenRouter)
+        self.reasoning_config = (
+            reasoning_config  # None = use default (medium for OpenRouter)
+        )
         self.service_tier = service_tier
         self.request_overrides = dict(request_overrides or {})
         self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
         self._force_ascii_payload = False
-        
+
         # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
         # Reduces input costs by ~75% on multi-turn conversations by caching the
         # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
         is_openrouter = self._is_openrouter_url()
         is_claude = "claude" in self.model.lower()
-        is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic"
+        is_native_anthropic = (
+            self.api_mode == "anthropic_messages" and self.provider == "anthropic"
+        )
         self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
         self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
-        
+
         # Iteration budget: the LLM is only notified when it actually exhausts
         # the iteration budget (api_call_count >= max_iterations).  At that
         # point we inject ONE message, allow one final API call, and if the
@@ -822,6 +854,7 @@ class AIAgent:
         # both live under ~/.hermes/logs/.  Idempotent, so gateway mode
         # (which creates a new AIAgent per message) won't duplicate handlers.
         from hermes_logging import setup_logging, setup_verbose_logging
+
         setup_logging(hermes_home=_hermes_home)
 
         if self.verbose_logging:
@@ -834,14 +867,14 @@ class AIAgent:
                 # for status; logger INFO/WARNING messages just clutter it.
                 # File handlers (agent.log, errors.log) still capture everything.
                 for quiet_logger in [
-                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
-                    'run_agent',            # agent runner internals
-                    'trajectory_compressor',
-                    'cron',                 # scheduler (only relevant in daemon mode)
-                    'hermes_cli',           # CLI helpers
+                    "tools",  # all tools.* (terminal, browser, web, file, etc.)
+                    "run_agent",  # agent runner internals
+                    "trajectory_compressor",
+                    "cron",  # scheduler (only relevant in daemon mode)
+                    "hermes_cli",  # CLI helpers
                 ]:
                     logging.getLogger(quiet_logger).setLevel(logging.ERROR)
-        
+
         # Internal stream callback (set during streaming TTS).
         # Initialized here so _vprint can reference it before run_conversation.
         self._stream_callback = None
@@ -874,23 +907,34 @@ class AIAgent:
         self._is_anthropic_oauth = False
 
         if self.api_mode == "anthropic_messages":
-            from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+            from agent.anthropic_adapter import (
+                build_anthropic_client,
+                resolve_anthropic_token,
+            )
+
             # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
             # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key.
             # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
             _is_native_anthropic = self.provider == "anthropic"
-            effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
+            effective_key = (
+                (api_key or resolve_anthropic_token() or "")
+                if _is_native_anthropic
+                else (api_key or "")
+            )
             self.api_key = effective_key
             self._anthropic_api_key = effective_key
             self._anthropic_base_url = base_url
             from agent.anthropic_adapter import _is_oauth_token as _is_oat
+
             self._is_anthropic_oauth = _is_oat(effective_key)
             self._anthropic_client = build_anthropic_client(effective_key, base_url)
             # No OpenAI client needed for Anthropic mode
             self.client = None
             self._client_kwargs = {}
             if not self.quiet_mode:
-                print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
+                print(
+                    f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)"
+                )
                 if effective_key and len(effective_key) > 12:
                     print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
         else:
@@ -921,16 +965,23 @@ class AIAgent:
             else:
                 # No explicit creds — use the centralized provider router
                 from agent.auxiliary_client import resolve_provider_client
+
                 _routed_client, _ = resolve_provider_client(
-                    self.provider or "auto", model=self.model, raw_codex=True)
+                    self.provider or "auto", model=self.model, raw_codex=True
+                )
                 if _routed_client is not None:
                     client_kwargs = {
                         "api_key": _routed_client.api_key,
                         "base_url": str(_routed_client.base_url),
                     }
                     # Preserve any default_headers the router set
-                    if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
-                        client_kwargs["default_headers"] = dict(_routed_client._default_headers)
+                    if (
+                        hasattr(_routed_client, "_default_headers")
+                        and _routed_client._default_headers
+                    ):
+                        client_kwargs["default_headers"] = dict(
+                            _routed_client._default_headers
+                        )
                 else:
                     # When the user explicitly chose a non-OpenRouter provider
                     # but no credentials were found, fail fast with a clear
@@ -952,7 +1003,7 @@ class AIAgent:
                             "X-OpenRouter-Categories": "productivity,cli-agent",
                         },
                     }
-            
+
             self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
 
             # Enable fine-grained tool streaming for Claude on OpenRouter.
@@ -962,7 +1013,10 @@ class AIAgent:
             # stream tool call arguments token-by-token, keeping the
             # connection alive.
             _effective_base = str(client_kwargs.get("base_url", "")).lower()
-            if "openrouter" in _effective_base and "claude" in (self.model or "").lower():
+            if (
+                "openrouter" in _effective_base
+                and "claude" in (self.model or "").lower()
+            ):
                 headers = client_kwargs.get("default_headers") or {}
                 existing_beta = headers.get("x-anthropic-beta", "")
                 _FINE_GRAINED = "fine-grained-tool-streaming-2025-05-14"
@@ -976,7 +1030,9 @@ class AIAgent:
             self.api_key = client_kwargs.get("api_key", "")
             self.base_url = client_kwargs.get("base_url", self.base_url)
             try:
-                self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
+                self.client = self._create_openai_client(
+                    client_kwargs, reason="agent_init", shared=True
+                )
                 if not self.quiet_mode:
                     print(f"🤖 AI Agent initialized with model: {self.model}")
                     if base_url:
@@ -986,20 +1042,27 @@ class AIAgent:
                     if key_used and key_used != "dummy-key" and len(key_used) > 12:
                         print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
                     else:
-                        print(f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
+                        print(
+                            f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')"
+                        )
             except Exception as e:
                 raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
-        
+
         # Provider fallback chain — ordered list of backup providers tried
         # when the primary is exhausted (rate-limit, overload, connection
         # failure).  Supports both legacy single-dict ``fallback_model`` and
         # new list ``fallback_providers`` format.
         if isinstance(fallback_model, list):
             self._fallback_chain = [
-                f for f in fallback_model
+                f
+                for f in fallback_model
                 if isinstance(f, dict) and f.get("provider") and f.get("model")
             ]
-        elif isinstance(fallback_model, dict) and fallback_model.get("provider") and fallback_model.get("model"):
+        elif (
+            isinstance(fallback_model, dict)
+            and fallback_model.get("provider")
+            and fallback_model.get("model")
+        ):
             self._fallback_chain = [fallback_model]
         else:
             self._fallback_chain = []
@@ -1012,8 +1075,12 @@ class AIAgent:
                 fb = self._fallback_chain[0]
                 print(f"🔄 Fallback model: {fb['model']} ({fb['provider']})")
             else:
-                print(f"🔄 Fallback chain ({len(self._fallback_chain)} providers): " +
-                      " → ".join(f"{f['model']} ({f['provider']})" for f in self._fallback_chain))
+                print(
+                    f"🔄 Fallback chain ({len(self._fallback_chain)} providers): "
+                    + " → ".join(
+                        f"{f['model']} ({f['provider']})" for f in self._fallback_chain
+                    )
+                )
 
         # Get available tools with filtering
         self.tools = get_tool_definitions(
@@ -1021,7 +1088,7 @@ class AIAgent:
             disabled_toolsets=disabled_toolsets,
             quiet_mode=self.quiet_mode,
         )
-        
+
         # Show tool configuration and store valid tool names for validation
         self.valid_tool_names = set()
         if self.tools:
@@ -1029,7 +1096,7 @@ class AIAgent:
             tool_names = sorted(self.valid_tool_names)
             if not self.quiet_mode:
                 print(f"🛠️  Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
-                
+
                 # Show filtering info if applied
                 if enabled_toolsets:
                     print(f"   ✅ Enabled toolsets: {', '.join(enabled_toolsets)}")
@@ -1037,28 +1104,40 @@ class AIAgent:
                     print(f"   ❌ Disabled toolsets: {', '.join(disabled_toolsets)}")
         elif not self.quiet_mode:
             print("🛠️  No tools loaded (all tools filtered out or unavailable)")
-        
+
         # Check tool requirements
         if self.tools and not self.quiet_mode:
             requirements = check_toolset_requirements()
-            missing_reqs = [name for name, available in requirements.items() if not available]
+            missing_reqs = [
+                name for name, available in requirements.items() if not available
+            ]
             if missing_reqs:
-                print(f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}")
-        
+                print(
+                    f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}"
+                )
+
         # Show trajectory saving status
         if self.save_trajectories and not self.quiet_mode:
             print("📝 Trajectory saving enabled")
-        
+
         # Show ephemeral system prompt status
         if self.ephemeral_system_prompt and not self.quiet_mode:
-            prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
-            print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
-        
+            prompt_preview = (
+                self.ephemeral_system_prompt[:60] + "..."
+                if len(self.ephemeral_system_prompt) > 60
+                else self.ephemeral_system_prompt
+            )
+            print(
+                f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)"
+            )
+
         # Show prompt caching status
         if self._use_prompt_caching and not self.quiet_mode:
-            source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
+            source = (
+                "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
+            )
             print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
-        
+
         # Session logging setup - auto-save conversation trajectories for debugging
         self.session_start = datetime.now()
         if session_id:
@@ -1069,35 +1148,39 @@ class AIAgent:
             timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S")
             short_uuid = uuid.uuid4().hex[:6]
             self.session_id = f"{timestamp_str}_{short_uuid}"
-        
+
         # Session logs go into ~/.hermes/sessions/ alongside gateway sessions
         hermes_home = get_hermes_home()
         self.logs_dir = hermes_home / "sessions"
         self.logs_dir.mkdir(parents=True, exist_ok=True)
         self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
-        
+
         # Track conversation messages for session logging
         self._session_messages: List[Dict[str, Any]] = []
-        
+
         # Cached system prompt -- built once per session, only rebuilt on compression
         self._cached_system_prompt: Optional[str] = None
-        
+
         # Filesystem checkpoint manager (transparent — not a tool)
         from tools.checkpoint_manager import CheckpointManager
+
         self._checkpoint_mgr = CheckpointManager(
             enabled=checkpoints_enabled,
             max_snapshots=checkpoint_max_snapshots,
         )
-        
+
         # SQLite session store (optional -- provided by CLI or gateway)
         self._session_db = session_db
         self._parent_session_id = parent_session_id
-        self._last_flushed_db_idx = 0  # tracks DB-write cursor to prevent duplicate writes
+        self._last_flushed_db_idx = (
+            0  # tracks DB-write cursor to prevent duplicate writes
+        )
         if self._session_db:
             try:
                 self._session_db.create_session(
                     session_id=self.session_id,
-                    source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+                    source=self.platform
+                    or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
                     model=self.model,
                     model_config={
                         "max_iterations": self.max_iterations,
@@ -1115,16 +1198,19 @@ class AIAgent:
                 # lock clears.  The session row may be missing from the index
                 # for this run, but that is recoverable (flushes upsert rows).
                 logger.warning(
-                    "Session DB create_session failed (session_search still available): %s", e
+                    "Session DB create_session failed (session_search still available): %s",
+                    e,
                 )
-        
+
         # In-memory todo list for task planning (one per agent/session)
         from tools.todo_tool import TodoStore
+
         self._todo_store = TodoStore()
-        
+
         # Load config once for memory, skills, and compression sections
         try:
             from hermes_cli.config import load_config as _load_agent_config
+
             _agent_cfg = _load_agent_config()
         except Exception:
             _agent_cfg = {}
@@ -1141,11 +1227,14 @@ class AIAgent:
             try:
                 mem_config = _agent_cfg.get("memory", {})
                 self._memory_enabled = mem_config.get("memory_enabled", False)
-                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
+                self._user_profile_enabled = mem_config.get(
+                    "user_profile_enabled", False
+                )
                 self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
                 self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
                 if self._memory_enabled or self._user_profile_enabled:
                     from tools.memory_tool import MemoryStore
+
                     self._memory_store = MemoryStore(
                         memory_char_limit=mem_config.get("memory_char_limit", 2200),
                         user_char_limit=mem_config.get("user_char_limit", 1375),
@@ -1153,15 +1242,15 @@ class AIAgent:
                     self._memory_store.load_from_disk()
             except Exception:
                 pass  # Memory is optional -- don't break agent init
-        
-
 
         # Memory provider plugin (external — one at a time, alongside built-in)
         # Reads memory.provider from config to select which plugin to activate.
         self._memory_manager = None
         if not skip_memory:
             try:
-                _mem_provider_name = mem_config.get("provider", "") if mem_config else ""
+                _mem_provider_name = (
+                    mem_config.get("provider", "") if mem_config else ""
+                )
 
                 # Auto-migrate: if Honcho was actively configured (enabled +
                 # credentials) but memory.provider is not set, activate the
@@ -1170,20 +1259,29 @@ class AIAgent:
                 # file may be from a different tool.
                 if not _mem_provider_name:
                     try:
-                        from plugins.memory.honcho.client import HonchoClientConfig as _HCC
+                        from plugins.memory.honcho.client import (
+                            HonchoClientConfig as _HCC,
+                        )
+
                         _hcfg = _HCC.from_global_config()
                         if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url):
                             _mem_provider_name = "honcho"
                             # Persist so this only auto-migrates once
                             try:
-                                from hermes_cli.config import load_config as _lc, save_config as _sc
+                                from hermes_cli.config import (
+                                    load_config as _lc,
+                                    save_config as _sc,
+                                )
+
                                 _cfg = _lc()
                                 _cfg.setdefault("memory", {})["provider"] = "honcho"
                                 _sc(_cfg)
                             except Exception:
                                 pass
                             if not self.quiet_mode:
-                                print("  ✓ Auto-migrated Honcho to memory provider plugin.")
+                                print(
+                                    "  ✓ Auto-migrated Honcho to memory provider plugin."
+                                )
                                 print("    Your config and data are preserved.\n")
                     except Exception:
                         pass
@@ -1191,12 +1289,14 @@ class AIAgent:
                 if _mem_provider_name:
                     from agent.memory_manager import MemoryManager as _MemoryManager
                     from plugins.memory import load_memory_provider as _load_mem
+
                     self._memory_manager = _MemoryManager()
                     _mp = _load_mem(_mem_provider_name)
                     if _mp and _mp.is_available():
                         self._memory_manager.add_provider(_mp)
                     if self._memory_manager.providers:
                         from hermes_constants import get_hermes_home as _ghh
+
                         _init_kwargs = {
                             "session_id": self.session_id,
                             "platform": platform or "cli",
@@ -1209,15 +1309,21 @@ class AIAgent:
                         # Profile identity for per-profile provider scoping
                         try:
                             from hermes_cli.profiles import get_active_profile_name
+
                             _profile = get_active_profile_name()
                             _init_kwargs["agent_identity"] = _profile
                             _init_kwargs["agent_workspace"] = "hermes"
                         except Exception:
                             pass
                         self._memory_manager.initialize_all(**_init_kwargs)
-                        logger.info("Memory provider '%s' activated", _mem_provider_name)
+                        logger.info(
+                            "Memory provider '%s' activated", _mem_provider_name
+                        )
                     else:
-                        logger.debug("Memory provider '%s' not found or not available", _mem_provider_name)
+                        logger.debug(
+                            "Memory provider '%s' not found or not available",
+                            _mem_provider_name,
+                        )
                         self._memory_manager = None
             except Exception as _mpe:
                 logger.warning("Memory provider plugin init failed: %s", _mpe)
@@ -1236,7 +1342,9 @@ class AIAgent:
         self._skill_nudge_interval = 10
         try:
             skills_config = _agent_cfg.get("skills", {})
-            self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 10))
+            self._skill_nudge_interval = int(
+                skills_config.get("creation_nudge_interval", 10)
+            )
         except Exception:
             pass
 
@@ -1254,7 +1362,11 @@ class AIAgent:
         if not isinstance(_compression_cfg, dict):
             _compression_cfg = {}
         compression_threshold = float(_compression_cfg.get("threshold", 0.50))
-        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
+        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in (
+            "true",
+            "1",
+            "yes",
+        )
         compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
         compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
 
@@ -1275,6 +1387,7 @@ class AIAgent:
                     _config_context_length,
                 )
                 import sys
+
                 print(
                     f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
                     f"  Must be a plain integer (e.g. 256000, not '256K').\n"
@@ -1290,6 +1403,7 @@ class AIAgent:
         if _config_context_length is None:
             try:
                 from hermes_cli.config import get_compatible_custom_providers
+
                 _custom_providers = get_compatible_custom_providers(_agent_cfg)
             except Exception:
                 _custom_providers = _agent_cfg.get("custom_providers")
@@ -1314,9 +1428,11 @@ class AIAgent:
                                         "custom_providers: %r — must be a plain "
                                         "integer (e.g. 256000, not '256K'). "
                                         "Falling back to auto-detection.",
-                                        self.model, _cp_ctx,
+                                        self.model,
+                                        _cp_ctx,
                                     )
                                     import sys
+
                                     print(
                                         f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
                                         f"  Must be a plain integer (e.g. 256000, not '256K').\n"
@@ -1324,7 +1440,7 @@ class AIAgent:
                                         file=sys.stderr,
                                     )
                     break
-        
+
         # Select context engine: config-driven (like memory providers).
         # 1. Check config.yaml context.engine setting
         # 2. Check plugins/context_engine/<name>/ directory (repo-shipped)
@@ -1333,7 +1449,9 @@ class AIAgent:
         _selected_engine = None
         _engine_name = "compressor"  # default
         try:
-            _ctx_cfg = _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {}
+            _ctx_cfg = (
+                _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {}
+            )
             _engine_name = _ctx_cfg.get("engine", "compressor") or "compressor"
         except Exception:
             pass
@@ -1342,14 +1460,18 @@ class AIAgent:
             # Try loading from plugins/context_engine/<name>/
             try:
                 from plugins.context_engine import load_context_engine
+
                 _selected_engine = load_context_engine(_engine_name)
             except Exception as _ce_load_err:
-                logger.debug("Context engine load from plugins/context_engine/: %s", _ce_load_err)
+                logger.debug(
+                    "Context engine load from plugins/context_engine/: %s", _ce_load_err
+                )
 
             # Try general plugin system as fallback
             if _selected_engine is None:
                 try:
                     from hermes_cli.plugins import get_plugin_context_engine
+
                     _candidate = get_plugin_context_engine()
                     if _candidate and _candidate.name == _engine_name:
                         _selected_engine = _candidate
@@ -1367,6 +1489,7 @@ class AIAgent:
             self.context_compressor = _selected_engine
             # Resolve context_length for plugin engines — mirrors switch_model() path
             from agent.model_metadata import get_model_context_length
+
             _plugin_ctx_len = get_model_context_length(
                 self.model,
                 base_url=self.base_url,
@@ -1403,6 +1526,7 @@ class AIAgent:
         # Reject models whose context window is below the minimum required
         # for reliable tool-calling workflows (64K tokens).
         from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
+
         _ctx = getattr(self.context_compressor, "context_length", 0)
         if _ctx and _ctx < MINIMUM_CONTEXT_LENGTH:
             raise ValueError(
@@ -1415,7 +1539,11 @@ class AIAgent:
 
         # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand)
         self._context_engine_tool_names: set = set()
-        if hasattr(self, "context_compressor") and self.context_compressor and self.tools is not None:
+        if (
+            hasattr(self, "context_compressor")
+            and self.context_compressor
+            and self.tools is not None
+        ):
             for _schema in self.context_compressor.get_tool_schemas():
                 _wrapped = {"type": "function", "function": _schema}
                 self.tools.append(_wrapped)
@@ -1432,7 +1560,9 @@ class AIAgent:
                     hermes_home=str(get_hermes_home()),
                     platform=self.platform or "cli",
                     model=self.model,
-                    context_length=getattr(self.context_compressor, "context_length", 0),
+                    context_length=getattr(
+                        self.context_compressor, "context_length", 0
+                    ),
                 )
             except Exception as _ce_err:
                 logger.debug("Context engine on_session_start: %s", _ce_err)
@@ -1455,7 +1585,7 @@ class AIAgent:
         self.session_estimated_cost_usd = 0.0
         self.session_cost_status = "unknown"
         self.session_cost_source = "none"
-        
+
         # ── Ollama num_ctx injection ──
         # Ollama defaults to 2048 context regardless of the model's capabilities.
         # When running against an Ollama server, detect the model's max context
@@ -1469,8 +1599,14 @@ class AIAgent:
             try:
                 self._ollama_num_ctx = int(_ollama_num_ctx_override)
             except (TypeError, ValueError):
-                logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override)
-        if self._ollama_num_ctx is None and self.base_url and is_local_endpoint(self.base_url):
+                logger.debug(
+                    "Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override
+                )
+        if (
+            self._ollama_num_ctx is None
+            and self.base_url
+            and is_local_endpoint(self.base_url)
+        ):
             try:
                 _detected = query_ollama_num_ctx(self.model, self.base_url)
                 if _detected and _detected > 0:
@@ -1485,9 +1621,13 @@ class AIAgent:
 
         if not self.quiet_mode:
             if compression_enabled:
-                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
+                print(
+                    f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold * 100)}% = {self.context_compressor.threshold_tokens:,})"
+                )
             else:
-                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
+                print(
+                    f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)"
+                )
 
         # Check immediately so CLI users see the warning at startup.
         # Gateway status_callback is not yet wired, so any warning is stored
@@ -1519,15 +1659,17 @@ class AIAgent:
             "compressor_threshold_tokens": _cc.threshold_tokens,
         }
         if self.api_mode == "anthropic_messages":
-            self._primary_runtime.update({
-                "anthropic_api_key": self._anthropic_api_key,
-                "anthropic_base_url": self._anthropic_base_url,
-                "is_anthropic_oauth": self._is_anthropic_oauth,
-            })
+            self._primary_runtime.update(
+                {
+                    "anthropic_api_key": self._anthropic_api_key,
+                    "anthropic_base_url": self._anthropic_base_url,
+                    "is_anthropic_oauth": self._is_anthropic_oauth,
+                }
+            )
 
     def reset_session_state(self):
         """Reset all session-scoped token counters to 0 for a fresh session.
-        
+
         This method encapsulates the reset logic for all session-level metrics
         including:
         - Token usage counters (input, output, total, prompt, completion)
@@ -1536,10 +1678,10 @@ class AIAgent:
         - Reasoning tokens
         - Estimated cost tracking
         - Context compressor internal counters
-        
+
         The method safely handles optional attributes (e.g., context compressor)
         using ``hasattr`` checks.
-        
+
         This keeps the counter reset logic DRY and maintainable in one place
         rather than scattering it across multiple methods.
         """
@@ -1556,15 +1698,17 @@ class AIAgent:
         self.session_estimated_cost_usd = 0.0
         self.session_cost_status = "unknown"
         self.session_cost_source = "none"
-        
+
         # Turn counter (added after reset_session_state was first written — #2635)
         self._user_turn_count = 0
 
         # Context engine reset (works for both built-in compressor and plugins)
         if hasattr(self, "context_compressor") and self.context_compressor:
             self.context_compressor.on_session_reset()
-    
-    def switch_model(self, new_model, new_provider, api_key='', base_url='', api_mode=''):
+
+    def switch_model(
+        self, new_model, new_provider, api_key="", base_url="", api_mode=""
+    ):
         """Switch the model/provider in-place for a live agent.
 
         Called by the /model command handlers (CLI and gateway) after
@@ -1603,16 +1747,24 @@ class AIAgent:
                 resolve_anthropic_token,
                 _is_oauth_token,
             )
+
             # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
             # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own
             # API key — falling back would send Anthropic credentials to third-party endpoints.
             _is_native_anthropic = new_provider == "anthropic"
-            effective_key = (api_key or self.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or self.api_key or "")
+            effective_key = (
+                (api_key or self.api_key or resolve_anthropic_token() or "")
+                if _is_native_anthropic
+                else (api_key or self.api_key or "")
+            )
             self.api_key = effective_key
             self._anthropic_api_key = effective_key
-            self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None)
+            self._anthropic_base_url = base_url or getattr(
+                self, "_anthropic_base_url", None
+            )
             self._anthropic_client = build_anthropic_client(
-                effective_key, self._anthropic_base_url,
+                effective_key,
+                self._anthropic_base_url,
             )
             self._is_anthropic_oauth = _is_oauth_token(effective_key)
             self.client = None
@@ -1631,15 +1783,18 @@ class AIAgent:
             )
 
         # ── Re-evaluate prompt caching ──
-        is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic"
-        self._use_prompt_caching = (
-            ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower())
-            or is_native_anthropic
+        is_native_anthropic = (
+            api_mode == "anthropic_messages" and new_provider == "anthropic"
         )
+        self._use_prompt_caching = (
+            "openrouter" in (self.base_url or "").lower()
+            and "claude" in new_model.lower()
+        ) or is_native_anthropic
 
         # ── Update context compressor ──
         if hasattr(self, "context_compressor") and self.context_compressor:
             from agent.model_metadata import get_model_context_length
+
             new_context_length = get_model_context_length(
                 self.model,
                 base_url=self.base_url,
@@ -1660,7 +1815,11 @@ class AIAgent:
         self._cached_system_prompt = None
 
         # ── Update _primary_runtime so the change persists across turns ──
-        _cc = self.context_compressor if hasattr(self, "context_compressor") and self.context_compressor else None
+        _cc = (
+            self.context_compressor
+            if hasattr(self, "context_compressor") and self.context_compressor
+            else None
+        )
         self._primary_runtime = {
             "model": self.model,
             "provider": self.provider,
@@ -1669,19 +1828,27 @@ class AIAgent:
             "api_key": getattr(self, "api_key", ""),
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
-            "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
-            "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
+            "compressor_model": getattr(_cc, "model", self.model)
+            if _cc
+            else self.model,
+            "compressor_base_url": getattr(_cc, "base_url", self.base_url)
+            if _cc
+            else self.base_url,
             "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
-            "compressor_provider": getattr(_cc, "provider", self.provider) if _cc else self.provider,
+            "compressor_provider": getattr(_cc, "provider", self.provider)
+            if _cc
+            else self.provider,
             "compressor_context_length": _cc.context_length if _cc else 0,
             "compressor_threshold_tokens": _cc.threshold_tokens if _cc else 0,
         }
         if api_mode == "anthropic_messages":
-            self._primary_runtime.update({
-                "anthropic_api_key": self._anthropic_api_key,
-                "anthropic_base_url": self._anthropic_base_url,
-                "is_anthropic_oauth": self._is_anthropic_oauth,
-            })
+            self._primary_runtime.update(
+                {
+                    "anthropic_api_key": self._anthropic_api_key,
+                    "anthropic_base_url": self._anthropic_base_url,
+                    "is_anthropic_oauth": self._is_anthropic_oauth,
+                }
+            )
 
         # ── Reset fallback state ──
         self._fallback_activated = False
@@ -1689,7 +1856,10 @@ class AIAgent:
 
         logging.info(
             "Model switched in-place: %s (%s) -> %s (%s)",
-            old_model, old_provider, new_model, new_provider,
+            old_model,
+            old_provider,
+            new_model,
+            new_provider,
         )
 
     def _safe_print(self, *args, **kwargs):
@@ -1844,7 +2014,9 @@ class AIAgent:
             # ignoring the explicit config value.  Pass it as the highest-
             # priority hint so the configured value is always respected.
             _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {})
-            _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None
+            _aux_context_config = (
+                _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None
+            )
             if _aux_context_config is not None:
                 try:
                     _aux_context_config = int(_aux_context_config)
@@ -1862,7 +2034,9 @@ class AIAgent:
             if aux_context < threshold:
                 # Suggest a threshold that would fit the aux model,
                 # rounded down to a clean percentage.
-                safe_pct = int((aux_context / self.context_compressor.context_length) * 100)
+                safe_pct = int(
+                    (aux_context / self.context_compressor.context_length) * 100
+                )
                 msg = (
                     f"⚠ Compression model ({aux_model}) context "
                     f"is {aux_context:,} tokens, but the main model's "
@@ -1892,9 +2066,7 @@ class AIAgent:
                     threshold,
                 )
         except Exception as exc:
-            logger.debug(
-                "Compression feasibility check failed (non-fatal): %s", exc
-            )
+            logger.debug("Compression feasibility check failed (non-fatal): %s", exc)
 
     def _replay_compression_warning(self) -> None:
         """Re-send the compression warning through ``status_callback``.
@@ -1939,7 +2111,7 @@ class AIAgent:
 
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
-        
+
         OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
         'max_completion_tokens'. OpenRouter, local models, and older
         OpenAI models use 'max_tokens'.
@@ -1970,19 +2142,33 @@ class AIAgent:
 
         # Check if there's any non-whitespace content remaining
         return bool(cleaned.strip())
-    
+
     def _strip_think_blocks(self, content: str) -> str:
         """Remove reasoning/thinking blocks from content, returning only visible text."""
         if not content:
             return ""
         # Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
         # <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
-        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
-        content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
-        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
-        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+        content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
+        content = re.sub(
+            r"<thinking>.*?</thinking>", "", content, flags=re.DOTALL | re.IGNORECASE
+        )
+        content = re.sub(r"<reasoning>.*?</reasoning>", "", content, flags=re.DOTALL)
+        content = re.sub(
+            r"<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>",
+            "",
+            content,
+            flags=re.DOTALL,
+        )
+        content = re.sub(
+            r"<thought>.*?</thought>", "", content, flags=re.DOTALL | re.IGNORECASE
+        )
+        content = re.sub(
+            r"</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*",
+            "",
+            content,
+            flags=re.IGNORECASE,
+        )
         return content
 
     def _looks_like_codex_intermediate_ack(
@@ -1995,14 +2181,19 @@ class AIAgent:
         if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
             return False
 
-        assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
+        assistant_text = (
+            self._strip_think_blocks(assistant_content or "").strip().lower()
+        )
         if not assistant_text:
             return False
         if len(assistant_text) > 1200:
             return False
 
         has_future_ack = bool(
-            re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
+            re.search(
+                r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b",
+                assistant_text,
+            )
         )
         if not has_future_ack:
             return False
@@ -2050,51 +2241,60 @@ class AIAgent:
             or "~/" in user_text
             or "/" in user_text
         )
-        assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
+        assistant_mentions_action = any(
+            marker in assistant_text for marker in action_markers
+        )
         assistant_targets_workspace = any(
             marker in assistant_text for marker in workspace_markers
         )
-        return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
-    
-    
+        return (
+            user_targets_workspace or assistant_targets_workspace
+        ) and assistant_mentions_action
+
     def _extract_reasoning(self, assistant_message) -> Optional[str]:
         """
         Extract reasoning/thinking content from an assistant message.
-        
+
         OpenRouter and various providers can return reasoning in multiple formats:
         1. message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.)
         2. message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.)
         3. message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified)
-        
+
         Args:
             assistant_message: The assistant message object from the API response
-            
+
         Returns:
             Combined reasoning text, or None if no reasoning found
         """
         reasoning_parts = []
-        
+
         # Check direct reasoning field
-        if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
+        if hasattr(assistant_message, "reasoning") and assistant_message.reasoning:
             reasoning_parts.append(assistant_message.reasoning)
-        
+
         # Check reasoning_content field (alternative name used by some providers)
-        if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
+        if (
+            hasattr(assistant_message, "reasoning_content")
+            and assistant_message.reasoning_content
+        ):
             # Don't duplicate if same as reasoning
             if assistant_message.reasoning_content not in reasoning_parts:
                 reasoning_parts.append(assistant_message.reasoning_content)
-        
+
         # Check reasoning_details array (OpenRouter unified format)
         # Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
-        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+        if (
+            hasattr(assistant_message, "reasoning_details")
+            and assistant_message.reasoning_details
+        ):
             for detail in assistant_message.reasoning_details:
                 if isinstance(detail, dict):
                     # Extract summary from reasoning detail object
                     summary = (
-                        detail.get('summary')
-                        or detail.get('thinking')
-                        or detail.get('content')
-                        or detail.get('text')
+                        detail.get("summary")
+                        or detail.get("thinking")
+                        or detail.get("content")
+                        or detail.get("text")
                     )
                     if summary and summary not in reasoning_parts:
                         reasoning_parts.append(summary)
@@ -2117,11 +2317,11 @@ class AIAgent:
                     cleaned = block.strip()
                     if cleaned and cleaned not in reasoning_parts:
                         reasoning_parts.append(cleaned)
-        
+
         # Combine all reasoning parts
         if reasoning_parts:
             return "\n\n".join(reasoning_parts)
-        
+
         return None
 
     def _cleanup_task_resources(self, task_id: str) -> None:
@@ -2217,11 +2417,14 @@ class AIAgent:
 
         def _run_review():
             import contextlib, os as _os
+
             review_agent = None
             try:
-                with open(_os.devnull, "w") as _devnull, \
-                     contextlib.redirect_stdout(_devnull), \
-                     contextlib.redirect_stderr(_devnull):
+                with (
+                    open(_os.devnull, "w") as _devnull,
+                    contextlib.redirect_stdout(_devnull),
+                    contextlib.redirect_stderr(_devnull),
+                ):
                     review_agent = AIAgent(
                         model=self.model,
                         max_iterations=8,
@@ -2258,14 +2461,34 @@ class AIAgent:
                         actions.append(message)
                     elif "updated" in message.lower():
                         actions.append(message)
-                    elif "added" in message.lower() or (target and "add" in message.lower()):
-                        label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+                    elif "added" in message.lower() or (
+                        target and "add" in message.lower()
+                    ):
+                        label = (
+                            "Memory"
+                            if target == "memory"
+                            else "User profile"
+                            if target == "user"
+                            else target
+                        )
                         actions.append(f"{label} updated")
                     elif "Entry added" in message:
-                        label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+                        label = (
+                            "Memory"
+                            if target == "memory"
+                            else "User profile"
+                            if target == "user"
+                            else target
+                        )
                         actions.append(f"{label} updated")
                     elif "removed" in message.lower() or "replaced" in message.lower():
-                        label = "Memory" if target == "memory" else "User profile" if target == "user" else target
+                        label = (
+                            "Memory"
+                            if target == "memory"
+                            else "User profile"
+                            if target == "user"
+                            else target
+                        )
                         actions.append(f"{label} updated")
 
                 if actions:
@@ -2311,7 +2534,9 @@ class AIAgent:
             if isinstance(msg, dict) and msg.get("role") == "user":
                 msg["content"] = override
 
-    def _persist_session(self, messages: List[Dict], conversation_history: List[Dict] = None):
+    def _persist_session(
+        self, messages: List[Dict], conversation_history: List[Dict] = None
+    ):
         """Save session state to both JSON log and SQLite on any exit path.
 
         Ensures conversations are never lost, even on errors or early returns.
@@ -2324,7 +2549,9 @@ class AIAgent:
         self._save_session_log(messages)
         self._flush_messages_to_session_db(messages, conversation_history)
 
-    def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
+    def _flush_messages_to_session_db(
+        self, messages: List[Dict], conversation_history: List[Dict] = None
+    ):
         """Persist any un-flushed messages to the SQLite session store.
 
         Uses _last_flushed_db_idx to track which messages have already been
@@ -2365,8 +2592,12 @@ class AIAgent:
                     tool_call_id=msg.get("tool_call_id"),
                     finish_reason=msg.get("finish_reason"),
                     reasoning=msg.get("reasoning") if role == "assistant" else None,
-                    reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
-                    codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
+                    reasoning_details=msg.get("reasoning_details")
+                    if role == "assistant"
+                    else None,
+                    codex_reasoning_items=msg.get("codex_reasoning_items")
+                    if role == "assistant"
+                    else None,
                 )
             self._last_flushed_db_idx = len(messages)
         except Exception as e:
@@ -2375,44 +2606,44 @@ class AIAgent:
     def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
         """
         Get messages up to (but not including) the last assistant turn.
-        
+
         This is used when we need to "roll back" to the last successful point
         in the conversation, typically when the final assistant message is
         incomplete or malformed.
-        
+
         Args:
             messages: Full message list
-            
+
         Returns:
             Messages up to the last complete assistant turn (ending with user/tool message)
         """
         if not messages:
             return []
-        
+
         # Find the index of the last assistant message
         last_assistant_idx = None
         for i in range(len(messages) - 1, -1, -1):
             if messages[i].get("role") == "assistant":
                 last_assistant_idx = i
                 break
-        
+
         if last_assistant_idx is None:
             # No assistant message found, return all messages
             return messages.copy()
-        
+
         # Return everything up to (not including) the last assistant message
         return messages[:last_assistant_idx]
-    
+
     def _format_tools_for_system_message(self) -> str:
         """
         Format tool definitions for the system message in the trajectory format.
-        
+
         Returns:
             str: JSON string representation of tool definitions
         """
         if not self.tools:
             return "[]"
-        
+
         # Convert tool definitions to the format expected in trajectories
         formatted_tools = []
         for tool in self.tools:
@@ -2421,26 +2652,28 @@ class AIAgent:
                 "name": func["name"],
                 "description": func.get("description", ""),
                 "parameters": func.get("parameters", {}),
-                "required": None  # Match the format in the example
+                "required": None,  # Match the format in the example
             }
             formatted_tools.append(formatted_tool)
-        
+
         return json.dumps(formatted_tools, ensure_ascii=False)
-    
-    def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
+
+    def _convert_to_trajectory_format(
+        self, messages: List[Dict[str, Any]], user_query: str, completed: bool
+    ) -> List[Dict[str, Any]]:
         """
         Convert internal message format to trajectory format for saving.
-        
+
         Args:
             messages (List[Dict]): Internal message history
             user_query (str): Original user query
             completed (bool): Whether the conversation completed successfully
-            
+
         Returns:
             List[Dict]: Messages in trajectory format
         """
         trajectory = []
-        
+
         # Add system message with tool definitions
         system_msg = (
             "You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
@@ -2455,71 +2688,69 @@ class AIAgent:
             "Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
             "Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
         )
-        
-        trajectory.append({
-            "from": "system",
-            "value": system_msg
-        })
-        
+
+        trajectory.append({"from": "system", "value": system_msg})
+
         # Add the actual user prompt (from the dataset) as the first human message
-        trajectory.append({
-            "from": "human",
-            "value": user_query
-        })
-        
+        trajectory.append({"from": "human", "value": user_query})
+
         # Skip the first message (the user query) since we already added it above.
         # Prefill messages are injected at API-call time only (not in the messages
         # list), so no offset adjustment is needed here.
         i = 1
-        
+
         while i < len(messages):
             msg = messages[i]
-            
+
             if msg["role"] == "assistant":
                 # Check if this message has tool calls
                 if "tool_calls" in msg and msg["tool_calls"]:
                     # Format assistant message with tool calls
                     # Add <think> tags around reasoning for trajectory storage
                     content = ""
-                    
+
                     # Prepend reasoning in <think> tags if available (native thinking tokens)
                     if msg.get("reasoning") and msg["reasoning"].strip():
                         content = f"<think>\n{msg['reasoning']}\n</think>\n"
-                    
+
                     if msg.get("content") and msg["content"].strip():
                         # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
                         # (used when native thinking is disabled and model reasons via XML)
                         content += convert_scratchpad_to_think(msg["content"]) + "\n"
-                    
+
                     # Add tool calls wrapped in XML tags
                     for tool_call in msg["tool_calls"]:
-                        if not tool_call or not isinstance(tool_call, dict): continue
+                        if not tool_call or not isinstance(tool_call, dict):
+                            continue
                         # Parse arguments - should always succeed since we validate during conversation
                         # but keep try-except as safety net
                         try:
-                            arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
+                            arguments = (
+                                json.loads(tool_call["function"]["arguments"])
+                                if isinstance(tool_call["function"]["arguments"], str)
+                                else tool_call["function"]["arguments"]
+                            )
                         except json.JSONDecodeError:
                             # This shouldn't happen since we validate and retry during conversation,
                             # but if it does, log warning and use empty dict
-                            logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
+                            logging.warning(
+                                f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}"
+                            )
                             arguments = {}
-                        
+
                         tool_call_json = {
                             "name": tool_call["function"]["name"],
-                            "arguments": arguments
+                            "arguments": arguments,
                         }
                         content += f"<tool_call>\n{json.dumps(tool_call_json, ensure_ascii=False)}\n</tool_call>\n"
-                    
+
                     # Ensure every gpt turn has a <think> block (empty if no reasoning)
                     # so the format is consistent for training data
                     if "<think>" not in content:
                         content = "<think>\n</think>\n" + content
-                    
-                    trajectory.append({
-                        "from": "gpt",
-                        "value": content.rstrip()
-                    })
-                    
+
+                    trajectory.append({"from": "gpt", "value": content.rstrip()})
+
                     # Collect all subsequent tool responses
                     tool_responses = []
                     j = i + 1
@@ -2527,7 +2758,7 @@ class AIAgent:
                         tool_msg = messages[j]
                         # Format tool response with XML tags
                         tool_response = "<tool_response>\n"
-                        
+
                         # Try to parse tool content as JSON if it looks like JSON
                         tool_content = tool_msg["content"]
                         try:
@@ -2535,67 +2766,65 @@ class AIAgent:
                                 tool_content = json.loads(tool_content)
                         except (json.JSONDecodeError, AttributeError):
                             pass  # Keep as string if not valid JSON
-                        
+
                         tool_index = len(tool_responses)
                         tool_name = (
                             msg["tool_calls"][tool_index]["function"]["name"]
                             if tool_index < len(msg["tool_calls"])
                             else "unknown"
                         )
-                        tool_response += json.dumps({
-                            "tool_call_id": tool_msg.get("tool_call_id", ""),
-                            "name": tool_name,
-                            "content": tool_content
-                        }, ensure_ascii=False)
+                        tool_response += json.dumps(
+                            {
+                                "tool_call_id": tool_msg.get("tool_call_id", ""),
+                                "name": tool_name,
+                                "content": tool_content,
+                            },
+                            ensure_ascii=False,
+                        )
                         tool_response += "\n</tool_response>"
                         tool_responses.append(tool_response)
                         j += 1
-                    
+
                     # Add all tool responses as a single message
                     if tool_responses:
-                        trajectory.append({
-                            "from": "tool",
-                            "value": "\n".join(tool_responses)
-                        })
+                        trajectory.append(
+                            {"from": "tool", "value": "\n".join(tool_responses)}
+                        )
                         i = j - 1  # Skip the tool messages we just processed
-                
+
                 else:
                     # Regular assistant message without tool calls
                     # Add <think> tags around reasoning for trajectory storage
                     content = ""
-                    
+
                     # Prepend reasoning in <think> tags if available (native thinking tokens)
                     if msg.get("reasoning") and msg["reasoning"].strip():
                         content = f"<think>\n{msg['reasoning']}\n</think>\n"
-                    
+
                     # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
                     # (used when native thinking is disabled and model reasons via XML)
                     raw_content = msg["content"] or ""
                     content += convert_scratchpad_to_think(raw_content)
-                    
+
                     # Ensure every gpt turn has a <think> block (empty if no reasoning)
                     if "<think>" not in content:
                         content = "<think>\n</think>\n" + content
-                    
-                    trajectory.append({
-                        "from": "gpt",
-                        "value": content.strip()
-                    })
-            
+
+                    trajectory.append({"from": "gpt", "value": content.strip()})
+
             elif msg["role"] == "user":
-                trajectory.append({
-                    "from": "human",
-                    "value": msg["content"]
-                })
-            
+                trajectory.append({"from": "human", "value": msg["content"]})
+
             i += 1
-        
+
         return trajectory
-    
-    def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
+
+    def _save_trajectory(
+        self, messages: List[Dict[str, Any]], user_query: str, completed: bool
+    ):
         """
         Save conversation trajectory to JSONL file.
-        
+
         Args:
             messages (List[Dict]): Complete message history
             user_query (str): Original user query
@@ -2603,10 +2832,10 @@ class AIAgent:
         """
         if not self.save_trajectories:
             return
-        
+
         trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
         _save_trajectory_to_file(trajectory, self.model, completed)
-    
+
     @staticmethod
     def _summarize_api_error(error: Exception) -> str:
         """Extract a human-readable one-liner from an API error.
@@ -2616,6 +2845,7 @@ class AIAgent:
         str(error) for everything else.
         """
         import re as _re
+
         raw = str(error)
 
         # Cloudflare / proxy HTML pages: grab the <title> for a clean summary
@@ -2637,7 +2867,11 @@ class AIAgent:
         # JSON body errors from OpenAI/Anthropic SDKs
         body = getattr(error, "body", None)
         if isinstance(body, dict):
-            msg = body.get("error", {}).get("message") if isinstance(body.get("error"), dict) else body.get("message")
+            msg = (
+                body.get("error", {}).get("message")
+                if isinstance(body.get("error"), dict)
+                else body.get("message")
+            )
             if msg:
                 status_code = getattr(error, "status_code", None)
                 prefix = f"HTTP {status_code}: " if status_code else ""
@@ -2658,27 +2892,27 @@ class AIAgent:
     def _clean_error_message(self, error_msg: str) -> str:
         """
         Clean up error messages for user display, removing HTML content and truncating.
-        
+
         Args:
             error_msg: Raw error message from API or exception
-            
+
         Returns:
             Clean, user-friendly error message
         """
         if not error_msg:
             return "Unknown error"
-            
+
         # Remove HTML content (common with CloudFlare and gateway error pages)
-        if error_msg.strip().startswith('<!DOCTYPE html') or '<html' in error_msg:
+        if error_msg.strip().startswith("<!DOCTYPE html") or "<html" in error_msg:
             return "Service temporarily unavailable (HTML error page returned)"
-            
+
         # Remove newlines and excessive whitespace
-        cleaned = ' '.join(error_msg.split())
-        
+        cleaned = " ".join(error_msg.split())
+
         # Truncate if too long
         if len(cleaned) > 150:
             cleaned = cleaned[:150] + "..."
-            
+
         return cleaned
 
     @staticmethod
@@ -2730,10 +2964,18 @@ class AIAgent:
         if "reset_at" not in context:
             message = context.get("message") or ""
             if isinstance(message, str):
-                delay_match = re.search(r"quotaResetDelay[:\s\"]+(\\d+(?:\\.\\d+)?)(ms|s)", message, re.IGNORECASE)
+                delay_match = re.search(
+                    r"quotaResetDelay[:\s\"]+(\\d+(?:\\.\\d+)?)(ms|s)",
+                    message,
+                    re.IGNORECASE,
+                )
                 if delay_match:
                     value = float(delay_match.group(1))
-                    seconds = value / 1000.0 if delay_match.group(2).lower() == "ms" else value
+                    seconds = (
+                        value / 1000.0
+                        if delay_match.group(2).lower() == "ms"
+                        else value
+                    )
                     context["reset_at"] = time.time() + seconds
                 else:
                     sec_match = re.search(
@@ -2746,7 +2988,9 @@ class AIAgent:
 
         return context
 
-    def _usage_summary_for_api_request_hook(self, response: Any) -> Optional[Dict[str, Any]]:
+    def _usage_summary_for_api_request_hook(
+        self, response: Any
+    ) -> Optional[Dict[str, Any]]:
         """Token buckets for ``post_api_request`` plugins (no raw ``response`` object)."""
         if response is None:
             return None
@@ -2819,7 +3063,9 @@ class AIAgent:
                 response_obj = getattr(error, "response", None)
                 if response_obj is not None:
                     try:
-                        error_info["response_status"] = getattr(response_obj, "status_code", None)
+                        error_info["response_status"] = getattr(
+                            response_obj, "status_code", None
+                        )
                         error_info["response_text"] = response_obj.text
                     except Exception as e:
                         logger.debug("Could not extract error response details: %s", e)
@@ -2827,21 +3073,29 @@ class AIAgent:
                 dump_payload["error"] = error_info
 
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-            dump_file = self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
+            dump_file = (
+                self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
+            )
             dump_file.write_text(
                 json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
                 encoding="utf-8",
             )
 
-            self._vprint(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
+            self._vprint(
+                f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}"
+            )
 
             if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"):
-                print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
+                print(
+                    json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)
+                )
 
             return dump_file
         except Exception as dump_error:
             if self.verbose_logging:
-                logging.warning(f"Failed to dump API request debug payload: {dump_error}")
+                logging.warning(
+                    f"Failed to dump API request debug payload: {dump_error}"
+                )
             return None
 
     @staticmethod
@@ -2850,8 +3104,8 @@ class AIAgent:
         if not content:
             return content
         content = convert_scratchpad_to_think(content)
-        content = re.sub(r'\n+(<think>)', r'\n\1', content)
-        content = re.sub(r'(</think>)\n+', r'\1\n', content)
+        content = re.sub(r"\n+(<think>)", r"\n\1", content)
+        content = re.sub(r"(</think>)\n+", r"\1\n", content)
         return content.strip()
 
     def _save_session_log(self, messages: List[Dict[str, Any]] = None):
@@ -2885,12 +3139,17 @@ class AIAgent:
             # with partial history and would otherwise clobber the full JSON log.
             if self.session_log_file.exists():
                 try:
-                    existing = json.loads(self.session_log_file.read_text(encoding="utf-8"))
-                    existing_count = existing.get("message_count", len(existing.get("messages", [])))
+                    existing = json.loads(
+                        self.session_log_file.read_text(encoding="utf-8")
+                    )
+                    existing_count = existing.get(
+                        "message_count", len(existing.get("messages", []))
+                    )
                     if existing_count > len(cleaned):
                         logging.debug(
                             "Skipping session log overwrite: existing has %d messages, current has %d",
-                            existing_count, len(cleaned),
+                            existing_count,
+                            len(cleaned),
                         )
                         return
                 except Exception:
@@ -2919,26 +3178,26 @@ class AIAgent:
         except Exception as e:
             if self.verbose_logging:
                 logging.warning(f"Failed to save session log: {e}")
-    
+
     def interrupt(self, message: str = None) -> None:
         """
         Request the agent to interrupt its current tool-calling loop.
-        
+
         Call this from another thread (e.g., input handler, message receiver)
         to gracefully stop the agent and process a new message.
-        
+
         Also signals long-running tool executions (e.g. terminal commands)
         to terminate early, so the agent can respond immediately.
-        
+
         Args:
             message: Optional new message that triggered the interrupt.
                      If provided, the agent will include this in its response context.
-        
+
         Example (CLI):
             # In a separate input thread:
             if user_typed_something:
                 agent.interrupt(user_input)
-        
+
         Example (Messaging):
             # When new message arrives for active session:
             if session_has_running_agent:
@@ -2959,8 +3218,17 @@ class AIAgent:
             except Exception as e:
                 logger.debug("Failed to propagate interrupt to child agent: %s", e)
         if not self.quiet_mode:
-            print("\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
-    
+            print(
+                "\n⚡ Interrupt requested"
+                + (
+                    f": '{message[:40]}...'"
+                    if message and len(message) > 40
+                    else f": '{message}'"
+                    if message
+                    else ""
+                )
+            )
+
     def clear_interrupt(self) -> None:
         """Clear any pending interrupt request and the per-thread tool interrupt signal."""
         self._interrupt_requested = False
@@ -2985,6 +3253,7 @@ class AIAgent:
             return
         try:
             from agent.rate_limit_tracker import parse_rate_limit_headers
+
             state = parse_rate_limit_headers(headers, provider=self.provider)
             if state is not None:
                 self._rate_limit_state = state
@@ -3039,7 +3308,7 @@ class AIAgent:
                 )
             except Exception:
                 pass
-    
+
     def close(self) -> None:
         """Release all resources held by this agent instance.
 
@@ -3058,6 +3327,7 @@ class AIAgent:
         # 1. Kill background processes for this task
         try:
             from tools.process_registry import process_registry
+
             process_registry.kill_all(task_id=task_id)
         except Exception:
             pass
@@ -3065,6 +3335,7 @@ class AIAgent:
         # 2. Clean terminal sandbox environments
         try:
             from tools.terminal_tool import cleanup_vm
+
             cleanup_vm(task_id)
         except Exception:
             pass
@@ -3072,6 +3343,7 @@ class AIAgent:
         # 3. Clean browser daemon sessions
         try:
             from tools.browser_tool import cleanup_browser
+
             cleanup_browser(task_id)
         except Exception:
             pass
@@ -3101,7 +3373,7 @@ class AIAgent:
     def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
         """
         Recover todo state from conversation history.
-        
+
         The gateway creates a fresh AIAgent per message, so the in-memory
         TodoStore is empty. We scan the history for the most recent todo
         tool response and replay it to reconstruct the state.
@@ -3122,32 +3394,25 @@ class AIAgent:
                     break
             except (json.JSONDecodeError, TypeError):
                 continue
-        
+
         if last_todo_response:
             # Replay the items into the store (replace mode)
             self._todo_store.write(last_todo_response, merge=False)
             if not self.quiet_mode:
-                self._vprint(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
+                self._vprint(
+                    f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history"
+                )
         _set_interrupt(False)
-    
+
     @property
     def is_interrupted(self) -> bool:
         """Check if an interrupt has been requested."""
         return self._interrupt_requested
 
-
-
-
-
-
-
-
-
-
     def _build_system_prompt(self, system_message: str = None) -> str:
         """
         Assemble the full system prompt from all layers.
-        
+
         Called once per session (cached on self._cached_system_prompt) and only
         rebuilt after context compression events. This ensures the system prompt
         is stable across all turns in a session, maximizing prefix cache hits.
@@ -3197,13 +3462,21 @@ class AIAgent:
         if self.valid_tool_names:
             _enforce = self._tool_use_enforcement
             _inject = False
-            if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")):
+            if _enforce is True or (
+                isinstance(_enforce, str)
+                and _enforce.lower() in ("true", "always", "yes", "on")
+            ):
                 _inject = True
-            elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")):
+            elif _enforce is False or (
+                isinstance(_enforce, str)
+                and _enforce.lower() in ("false", "never", "no", "off")
+            ):
                 _inject = False
             elif isinstance(_enforce, list):
                 model_lower = (self.model or "").lower()
-                _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
+                _inject = any(
+                    p.lower() in model_lower for p in _enforce if isinstance(p, str)
+                )
             else:
                 # "auto" or any unrecognised value — use hardcoded defaults
                 model_lower = (self.model or "").lower()
@@ -3247,12 +3520,16 @@ class AIAgent:
             except Exception:
                 pass
 
-        has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
+        has_skills_tools = any(
+            name in self.valid_tool_names
+            for name in ["skills_list", "skill_view", "skill_manage"]
+        )
         if has_skills_tools:
             avail_toolsets = {
                 toolset
                 for toolset in (
-                    get_toolset_for_tool(tool_name) for tool_name in self.valid_tool_names
+                    get_toolset_for_tool(tool_name)
+                    for tool_name in self.valid_tool_names
                 )
                 if toolset
             }
@@ -3272,13 +3549,17 @@ class AIAgent:
             # other dev files — inflating token usage by ~10k for no benefit.
             _context_cwd = os.getenv("TERMINAL_CWD") or None
             context_files_prompt = build_context_files_prompt(
-                cwd=_context_cwd, skip_soul=_soul_loaded)
+                cwd=_context_cwd, skip_soul=_soul_loaded
+            )
             if context_files_prompt:
                 prompt_parts.append(context_files_prompt)
 
         from hermes_time import now as _hermes_now
+
         now = _hermes_now()
-        timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
+        timestamp_line = (
+            f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
+        )
         if self.pass_session_id and self.session_id:
             timestamp_line += f"\nSession ID: {self.session_id}"
         if self.model:
@@ -3291,7 +3572,9 @@ class AIAgent:
         # of the requested model. Inject explicit model identity into the system prompt
         # so the agent can correctly report which model it is (workaround for API bug).
         if self.provider == "alibaba":
-            _model_short = self.model.split("/")[-1] if "/" in self.model else self.model
+            _model_short = (
+                self.model.split("/")[-1] if "/" in self.model else self.model
+            )
             prompt_parts.append(
                 f"You are powered by the model named {_model_short}. "
                 f"The exact model ID is {self.model}. "
@@ -3322,7 +3605,9 @@ class AIAgent:
             return tc.get("id", "") or ""
         return getattr(tc, "id", "") or ""
 
-    _VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"})
+    _VALID_API_ROLES = frozenset(
+        {"system", "user", "assistant", "tool", "function", "developer"}
+    )
 
     @staticmethod
     def _sanitize_api_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -3364,8 +3649,12 @@ class AIAgent:
         orphaned_results = result_call_ids - surviving_call_ids
         if orphaned_results:
             messages = [
-                m for m in messages
-                if not (m.get("role") == "tool" and m.get("tool_call_id") in orphaned_results)
+                m
+                for m in messages
+                if not (
+                    m.get("role") == "tool"
+                    and m.get("tool_call_id") in orphaned_results
+                )
             ]
             logger.debug(
                 "Pre-call sanitizer: removed %d orphaned tool result(s)",
@@ -3382,11 +3671,13 @@ class AIAgent:
                     for tc in msg.get("tool_calls") or []:
                         cid = AIAgent._get_tool_call_id_static(tc)
                         if cid in missing_results:
-                            patched.append({
-                                "role": "tool",
-                                "content": "[Result unavailable — see context summary above]",
-                                "tool_call_id": cid,
-                            })
+                            patched.append(
+                                {
+                                    "role": "tool",
+                                    "content": "[Result unavailable — see context summary above]",
+                                    "tool_call_id": cid,
+                                }
+                            )
             messages = patched
             logger.debug(
                 "Pre-call sanitizer: added %d stub tool result(s)",
@@ -3405,8 +3696,11 @@ class AIAgent:
         Returns the original list if no truncation was needed.
         """
         from tools.delegate_tool import _get_max_concurrent_children
+
         max_children = _get_max_concurrent_children()
-        delegate_count = sum(1 for tc in tool_calls if tc.function.name == "delegate_task")
+        delegate_count = sum(
+            1 for tc in tool_calls if tc.function.name == "delegate_task"
+        )
         if delegate_count <= max_children:
             return tool_calls
         kept_delegates = 0
@@ -3421,7 +3715,8 @@ class AIAgent:
         logger.warning(
             "Truncated %d excess delegate_task call(s) to enforce "
             "max_concurrent_children=%d limit",
-            delegate_count - max_children, max_children,
+            delegate_count - max_children,
+            max_children,
         )
         return truncated
 
@@ -3474,7 +3769,7 @@ class AIAgent:
     def _invalidate_system_prompt(self):
         """
         Invalidate the cached system prompt, forcing a rebuild on the next turn.
-        
+
         Called after context compression events. Also reloads memory from disk
         so the rebuilt prompt captures any writes from this session.
         """
@@ -3482,7 +3777,9 @@ class AIAgent:
         if self._memory_store:
             self._memory_store.load_from_disk()
 
-    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+    def _responses_tools(
+        self, tools: Optional[List[Dict[str, Any]]] = None
+    ) -> Optional[List[Dict[str, Any]]]:
         """Convert chat-completions tool schemas to Responses function-tool schemas."""
         source_tools = tools if tools is not None else self.tools
         if not source_tools:
@@ -3494,13 +3791,17 @@ class AIAgent:
             name = fn.get("name")
             if not isinstance(name, str) or not name.strip():
                 continue
-            converted.append({
-                "type": "function",
-                "name": name,
-                "description": fn.get("description", ""),
-                "strict": False,
-                "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
-            })
+            converted.append(
+                {
+                    "type": "function",
+                    "name": name,
+                    "description": fn.get("description", ""),
+                    "strict": False,
+                    "parameters": fn.get(
+                        "parameters", {"type": "object", "properties": {}}
+                    ),
+                }
+            )
         return converted or None
 
     @staticmethod
@@ -3512,6 +3813,7 @@ class AIAgent:
         make every API call's prefix unique, breaking OpenAI's prompt cache.
         """
         import hashlib
+
         seed = f"{fn_name}:{arguments}:{index}"
         digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
         return f"call_{digest}"
@@ -3548,13 +3850,13 @@ class AIAgent:
         if source.startswith("fc_"):
             return source
         if source.startswith("call_") and len(source) > len("call_"):
-            return f"fc_{source[len('call_'):]}"
+            return f"fc_{source[len('call_') :]}"
 
         sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
         if sanitized.startswith("fc_"):
             return sanitized
         if sanitized.startswith("call_") and len(sanitized) > len("call_"):
-            return f"fc_{sanitized[len('call_'):]}"
+            return f"fc_{sanitized[len('call_') :]}"
         if sanitized:
             return f"fc_{sanitized[:48]}"
 
@@ -3562,7 +3864,9 @@ class AIAgent:
         digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
         return f"fc_{digest}"
 
-    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def _chat_messages_to_responses_input(
+        self, messages: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
         """Convert internal chat-style messages to Responses input items."""
         items: List[Dict[str, Any]] = []
         seen_item_ids: set = set()
@@ -3619,8 +3923,8 @@ class AIAgent:
                             if not isinstance(fn_name, str) or not fn_name.strip():
                                 continue
 
-                            embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
-                                tc.get("id")
+                            embedded_call_id, embedded_response_item_id = (
+                                self._split_responses_tool_id(tc.get("id"))
                             )
                             call_id = tc.get("call_id")
                             if not isinstance(call_id, str) or not call_id.strip():
@@ -3631,10 +3935,12 @@ class AIAgent:
                                     and embedded_response_item_id.startswith("fc_")
                                     and len(embedded_response_item_id) > len("fc_")
                                 ):
-                                    call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+                                    call_id = f"call_{embedded_response_item_id[len('fc_') :]}"
                                 else:
                                     _raw_args = str(fn.get("arguments", "{}"))
-                                    call_id = self._deterministic_call_id(fn_name, _raw_args, len(items))
+                                    call_id = self._deterministic_call_id(
+                                        fn_name, _raw_args, len(items)
+                                    )
                             call_id = call_id.strip()
 
                             arguments = fn.get("arguments", "{}")
@@ -3644,12 +3950,14 @@ class AIAgent:
                                 arguments = str(arguments)
                             arguments = arguments.strip() or "{}"
 
-                            items.append({
-                                "type": "function_call",
-                                "call_id": call_id,
-                                "name": fn_name,
-                                "arguments": arguments,
-                            })
+                            items.append(
+                                {
+                                    "type": "function_call",
+                                    "call_id": call_id,
+                                    "name": fn_name,
+                                    "arguments": arguments,
+                                }
+                            )
                     continue
 
                 items.append({"role": role, "content": content_text})
@@ -3663,11 +3971,13 @@ class AIAgent:
                         call_id = raw_tool_call_id.strip()
                 if not isinstance(call_id, str) or not call_id.strip():
                     continue
-                items.append({
-                    "type": "function_call_output",
-                    "call_id": call_id,
-                    "output": str(msg.get("content", "") or ""),
-                })
+                items.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": call_id,
+                        "output": str(msg.get("content", "") or ""),
+                    }
+                )
 
         return items
 
@@ -3686,9 +3996,13 @@ class AIAgent:
                 call_id = item.get("call_id")
                 name = item.get("name")
                 if not isinstance(call_id, str) or not call_id.strip():
-                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] function_call is missing call_id."
+                    )
                 if not isinstance(name, str) or not name.strip():
-                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] function_call is missing name."
+                    )
 
                 arguments = item.get("arguments", "{}")
                 if isinstance(arguments, dict):
@@ -3710,7 +4024,9 @@ class AIAgent:
             if item_type == "function_call_output":
                 call_id = item.get("call_id")
                 if not isinstance(call_id, str) or not call_id.strip():
-                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] function_call_output is missing call_id."
+                    )
                 output = item.get("output", "")
                 if output is None:
                     output = ""
@@ -3734,7 +4050,10 @@ class AIAgent:
                         if item_id in seen_ids:
                             continue
                         seen_ids.add(item_id)
-                    reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
+                    reasoning_item = {
+                        "type": "reasoning",
+                        "encrypted_content": encrypted,
+                    }
                     # Do NOT include the "id" in the outgoing item — with
                     # store=False (our default) the API tries to resolve the
                     # id server-side and returns 404.  The id is still used
@@ -3776,11 +4095,15 @@ class AIAgent:
         required = {"model", "instructions", "input"}
         missing = [key for key in required if key not in api_kwargs]
         if missing:
-            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+            raise ValueError(
+                f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}."
+            )
 
         model = api_kwargs.get("model")
         if not isinstance(model, str) or not model.strip():
-            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+            raise ValueError(
+                "Codex Responses request 'model' must be a non-empty string."
+            )
         model = model.strip()
 
         instructions = api_kwargs.get("instructions")
@@ -3796,20 +4119,28 @@ class AIAgent:
         normalized_tools = None
         if tools is not None:
             if not isinstance(tools, list):
-                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+                raise ValueError(
+                    "Codex Responses request 'tools' must be a list when provided."
+                )
             normalized_tools = []
             for idx, tool in enumerate(tools):
                 if not isinstance(tool, dict):
                     raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
                 if tool.get("type") != "function":
-                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+                    raise ValueError(
+                        f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}."
+                    )
 
                 name = tool.get("name")
                 parameters = tool.get("parameters")
                 if not isinstance(name, str) or not name.strip():
-                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+                    raise ValueError(
+                        f"Codex Responses tools[{idx}] is missing a valid name."
+                    )
                 if not isinstance(parameters, dict):
-                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+                    raise ValueError(
+                        f"Codex Responses tools[{idx}] is missing valid parameters."
+                    )
 
                 description = tool.get("description", "")
                 if description is None:
@@ -3836,9 +4167,19 @@ class AIAgent:
             raise ValueError("Codex Responses contract requires 'store' to be false.")
 
         allowed_keys = {
-            "model", "instructions", "input", "tools", "store",
-            "reasoning", "include", "max_output_tokens", "temperature",
-            "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
+            "model",
+            "instructions",
+            "input",
+            "tools",
+            "store",
+            "reasoning",
+            "include",
+            "max_output_tokens",
+            "temperature",
+            "tool_choice",
+            "parallel_tool_calls",
+            "prompt_cache_key",
+            "service_tier",
         }
         normalized: Dict[str, Any] = {
             "model": model,
@@ -3869,7 +4210,11 @@ class AIAgent:
             normalized["temperature"] = float(temperature)
 
         # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
-        for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
+        for passthrough_key in (
+            "tool_choice",
+            "parallel_tool_calls",
+            "prompt_cache_key",
+        ):
             val = api_kwargs.get(passthrough_key)
             if val is not None:
                 normalized[passthrough_key] = val
@@ -3882,7 +4227,9 @@ class AIAgent:
                 normalized["stream"] = True
             allowed_keys.add("stream")
         elif "stream" in api_kwargs:
-            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+            raise ValueError(
+                "Codex Responses stream flag is only allowed in fallback streaming requests."
+            )
 
         unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
         if unexpected:
@@ -3935,12 +4282,19 @@ class AIAgent:
             if isinstance(out_text, str) and out_text.strip():
                 logger.debug(
                     "Codex response has empty output but output_text is present (%d chars); "
-                    "synthesizing output item.", len(out_text.strip()),
+                    "synthesizing output item.",
+                    len(out_text.strip()),
                 )
-                output = [SimpleNamespace(
-                    type="message", role="assistant", status="completed",
-                    content=[SimpleNamespace(type="output_text", text=out_text.strip())],
-                )]
+                output = [
+                    SimpleNamespace(
+                        type="message",
+                        role="assistant",
+                        status="completed",
+                        content=[
+                            SimpleNamespace(type="output_text", text=out_text.strip())
+                        ],
+                    )
+                ]
                 response.output = output
             else:
                 raise RuntimeError("Responses API returned no output items")
@@ -3956,14 +4310,22 @@ class AIAgent:
             if isinstance(error_obj, dict):
                 error_msg = error_obj.get("message") or str(error_obj)
             else:
-                error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+                error_msg = (
+                    str(error_obj)
+                    if error_obj
+                    else f"Responses API returned status '{response_status}'"
+                )
             raise RuntimeError(error_msg)
 
         content_parts: List[str] = []
         reasoning_parts: List[str] = []
         reasoning_items_raw: List[Dict[str, Any]] = []
         tool_calls: List[Any] = []
-        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+        has_incomplete_items = response_status in {
+            "queued",
+            "in_progress",
+            "incomplete",
+        }
         saw_commentary_phase = False
         saw_final_answer_phase = False
 
@@ -4009,7 +4371,9 @@ class AIAgent:
                         for part in summary:
                             text = getattr(part, "text", None)
                             if isinstance(text, str):
-                                raw_summary.append({"type": "summary_text", "text": text})
+                                raw_summary.append(
+                                    {"type": "summary_text", "text": text}
+                                )
                         raw_item["summary"] = raw_summary
                     reasoning_items_raw.append(raw_item)
             elif item_type == "function_call":
@@ -4022,19 +4386,29 @@ class AIAgent:
                 raw_call_id = getattr(item, "call_id", None)
                 raw_item_id = getattr(item, "id", None)
                 embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
-                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+                call_id = (
+                    raw_call_id
+                    if isinstance(raw_call_id, str) and raw_call_id.strip()
+                    else embedded_call_id
+                )
                 if not isinstance(call_id, str) or not call_id.strip():
-                    call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls))
+                    call_id = self._deterministic_call_id(
+                        fn_name, arguments, len(tool_calls)
+                    )
                 call_id = call_id.strip()
                 response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
-                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
-                tool_calls.append(SimpleNamespace(
-                    id=call_id,
-                    call_id=call_id,
-                    response_item_id=response_item_id,
-                    type="function",
-                    function=SimpleNamespace(name=fn_name, arguments=arguments),
-                ))
+                response_item_id = self._derive_responses_function_call_id(
+                    call_id, response_item_id
+                )
+                tool_calls.append(
+                    SimpleNamespace(
+                        id=call_id,
+                        call_id=call_id,
+                        response_item_id=response_item_id,
+                        type="function",
+                        function=SimpleNamespace(name=fn_name, arguments=arguments),
+                    )
+                )
             elif item_type == "custom_tool_call":
                 fn_name = getattr(item, "name", "") or ""
                 arguments = getattr(item, "input", "{}")
@@ -4043,19 +4417,29 @@ class AIAgent:
                 raw_call_id = getattr(item, "call_id", None)
                 raw_item_id = getattr(item, "id", None)
                 embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
-                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+                call_id = (
+                    raw_call_id
+                    if isinstance(raw_call_id, str) and raw_call_id.strip()
+                    else embedded_call_id
+                )
                 if not isinstance(call_id, str) or not call_id.strip():
-                    call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls))
+                    call_id = self._deterministic_call_id(
+                        fn_name, arguments, len(tool_calls)
+                    )
                 call_id = call_id.strip()
                 response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
-                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
-                tool_calls.append(SimpleNamespace(
-                    id=call_id,
-                    call_id=call_id,
-                    response_item_id=response_item_id,
-                    type="function",
-                    function=SimpleNamespace(name=fn_name, arguments=arguments),
-                ))
+                response_item_id = self._derive_responses_function_call_id(
+                    call_id, response_item_id
+                )
+                tool_calls.append(
+                    SimpleNamespace(
+                        id=call_id,
+                        call_id=call_id,
+                        response_item_id=response_item_id,
+                        type="function",
+                        function=SimpleNamespace(name=fn_name, arguments=arguments),
+                    )
+                )
 
         final_text = "\n".join([p for p in content_parts if p]).strip()
         if not final_text and hasattr(response, "output_text"):
@@ -4074,7 +4458,9 @@ class AIAgent:
 
         if tool_calls:
             finish_reason = "tool_calls"
-        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+        elif has_incomplete_items or (
+            saw_commentary_phase and not saw_final_answer_phase
+        ):
             finish_reason = "incomplete"
         elif reasoning_items_raw and not final_text:
             # Response contains only reasoning (encrypted thinking state) with
@@ -4138,8 +4524,12 @@ class AIAgent:
             return bool(getattr(http_client, "is_closed", False))
         return False
 
-    def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
-        if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
+    def _create_openai_client(
+        self, client_kwargs: dict, *, reason: str, shared: bool
+    ) -> Any:
+        if self.provider == "copilot-acp" or str(
+            client_kwargs.get("base_url", "")
+        ).startswith("acp://copilot"):
             from agent.copilot_acp_client import CopilotACPClient
 
             client = CopilotACPClient(**client_kwargs)
@@ -4192,9 +4582,8 @@ class AIAgent:
                 or []
             )
             for conn in list(connections):
-                stream = (
-                    getattr(conn, "_network_stream", None)
-                    or getattr(conn, "_stream", None)
+                stream = getattr(conn, "_network_stream", None) or getattr(
+                    conn, "_stream", None
                 )
                 if stream is None:
                     continue
@@ -4246,7 +4635,9 @@ class AIAgent:
         with self._openai_client_lock():
             old_client = getattr(self, "client", None)
             try:
-                new_client = self._create_openai_client(self._client_kwargs, reason=reason, shared=True)
+                new_client = self._create_openai_client(
+                    self._client_kwargs, reason=reason, shared=True
+                )
             except Exception as exc:
                 logger.warning(
                     "Failed to rebuild shared OpenAI client (%s) %s error=%s",
@@ -4305,9 +4696,8 @@ class AIAgent:
             dead_count = 0
             for conn in list(connections):
                 # Check for connections that are idle but have closed sockets
-                stream = (
-                    getattr(conn, "_network_stream", None)
-                    or getattr(conn, "_stream", None)
+                stream = getattr(conn, "_network_stream", None) or getattr(
+                    conn, "_stream", None
                 )
                 if stream is None:
                     continue
@@ -4320,6 +4710,7 @@ class AIAgent:
                     continue
                 # Probe socket health with a non-blocking recv peek
                 import socket as _socket
+
                 try:
                     sock.setblocking(False)
                     data = sock.recv(1, _socket.MSG_PEEK | _socket.MSG_DONTWAIT)
@@ -4358,11 +4749,15 @@ class AIAgent:
     def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
         self._close_openai_client(client, reason=reason, shared=False)
 
-    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
+    def _run_codex_stream(
+        self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None
+    ):
         """Execute one streaming Responses API request and return the final response."""
         import httpx as _httpx
 
-        active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct")
+        active_client = client or self._ensure_primary_openai_client(
+            reason="codex_stream_direct"
+        )
         max_stream_retries = 1
         has_tool_calls = False
         first_delta_fired = False
@@ -4380,7 +4775,10 @@ class AIAgent:
                             break
                         event_type = getattr(event, "type", "")
                         # Fire callbacks on text content deltas (suppress during tool calls)
-                        if "output_text.delta" in event_type or event_type == "response.output_text.delta":
+                        if (
+                            "output_text.delta" in event_type
+                            or event_type == "response.output_text.delta"
+                        ):
                             delta_text = getattr(event, "delta", "")
                             if delta_text:
                                 self._codex_streamed_text_parts.append(delta_text)
@@ -4412,12 +4810,20 @@ class AIAgent:
                         # Log non-completed terminal events for diagnostics
                         elif event_type in ("response.incomplete", "response.failed"):
                             resp_obj = getattr(event, "response", None)
-                            status = getattr(resp_obj, "status", None) if resp_obj else None
-                            incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
+                            status = (
+                                getattr(resp_obj, "status", None) if resp_obj else None
+                            )
+                            incomplete_details = (
+                                getattr(resp_obj, "incomplete_details", None)
+                                if resp_obj
+                                else None
+                            )
                             logger.warning(
                                 "Codex Responses stream received terminal event %s "
                                 "(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
-                                event_type, status, incomplete_details,
+                                event_type,
+                                status,
+                                incomplete_details,
                                 sum(len(p) for p in self._codex_streamed_text_parts),
                                 self._client_log_context(),
                             )
@@ -4435,18 +4841,30 @@ class AIAgent:
                             )
                         elif self._codex_streamed_text_parts and not has_tool_calls:
                             assembled = "".join(self._codex_streamed_text_parts)
-                            final_response.output = [SimpleNamespace(
-                                type="message",
-                                role="assistant",
-                                status="completed",
-                                content=[SimpleNamespace(type="output_text", text=assembled)],
-                            )]
+                            final_response.output = [
+                                SimpleNamespace(
+                                    type="message",
+                                    role="assistant",
+                                    status="completed",
+                                    content=[
+                                        SimpleNamespace(
+                                            type="output_text", text=assembled
+                                        )
+                                    ],
+                                )
+                            ]
                             logger.debug(
                                 "Codex stream: synthesized output from %d text deltas (%d chars)",
-                                len(self._codex_streamed_text_parts), len(assembled),
+                                len(self._codex_streamed_text_parts),
+                                len(assembled),
                             )
                     return final_response
-            except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
+            except (
+                _httpx.RemoteProtocolError,
+                _httpx.ReadTimeout,
+                _httpx.ConnectError,
+                ConnectionError,
+            ) as exc:
                 if attempt < max_stream_retries:
                     logger.debug(
                         "Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
@@ -4461,7 +4879,9 @@ class AIAgent:
                     self._client_log_context(),
                     exc,
                 )
-                return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
+                return self._run_codex_create_stream_fallback(
+                    api_kwargs, client=active_client
+                )
             except RuntimeError as exc:
                 err_text = str(exc)
                 missing_completed = "response.completed" in err_text
@@ -4478,15 +4898,21 @@ class AIAgent:
                         "Responses stream did not emit response.completed; falling back to create(stream=True). %s",
                         self._client_log_context(),
                     )
-                    return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
+                    return self._run_codex_create_stream_fallback(
+                        api_kwargs, client=active_client
+                    )
                 raise
 
     def _run_codex_create_stream_fallback(self, api_kwargs: dict, client: Any = None):
         """Fallback path for stream completion edge cases on Codex-style Responses backends."""
-        active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
+        active_client = client or self._ensure_primary_openai_client(
+            reason="codex_create_stream_fallback"
+        )
         fallback_kwargs = dict(api_kwargs)
         fallback_kwargs["stream"] = True
-        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._preflight_codex_api_kwargs(
+            fallback_kwargs, allow_stream=True
+        )
         stream_or_response = active_client.responses.create(**fallback_kwargs)
 
         # Compatibility shim for mocks or providers that still return a concrete response.
@@ -4519,7 +4945,11 @@ class AIAgent:
                     if delta:
                         collected_text_deltas.append(delta)
 
-                if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
+                if event_type not in {
+                    "response.completed",
+                    "response.incomplete",
+                    "response.failed",
+                }:
                     continue
 
                 terminal_response = getattr(event, "response", None)
@@ -4537,14 +4967,22 @@ class AIAgent:
                             )
                         elif collected_text_deltas:
                             assembled = "".join(collected_text_deltas)
-                            terminal_response.output = [SimpleNamespace(
-                                type="message", role="assistant",
-                                status="completed",
-                                content=[SimpleNamespace(type="output_text", text=assembled)],
-                            )]
+                            terminal_response.output = [
+                                SimpleNamespace(
+                                    type="message",
+                                    role="assistant",
+                                    status="completed",
+                                    content=[
+                                        SimpleNamespace(
+                                            type="output_text", text=assembled
+                                        )
+                                    ],
+                                )
+                            ]
                             logger.debug(
                                 "Codex fallback stream: synthesized from %d deltas (%d chars)",
-                                len(collected_text_deltas), len(assembled),
+                                len(collected_text_deltas),
+                                len(assembled),
                             )
                     return terminal_response
         finally:
@@ -4557,7 +4995,9 @@ class AIAgent:
 
         if terminal_response is not None:
             return terminal_response
-        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
+        raise RuntimeError(
+            "Responses create(stream=True) fallback did not emit a terminal response."
+        )
 
     def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
         if self.api_mode != "codex_responses" or self.provider != "openai-codex":
@@ -4596,7 +5036,9 @@ class AIAgent:
             from hermes_cli.auth import resolve_nous_runtime_credentials
 
             creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                min_key_ttl_seconds=max(
+                    60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))
+                ),
                 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
                 force_mint=force,
             )
@@ -4624,7 +5066,9 @@ class AIAgent:
         return True
 
     def _try_refresh_anthropic_client_credentials(self) -> bool:
-        if self.api_mode != "anthropic_messages" or not hasattr(self, "_anthropic_api_key"):
+        if self.api_mode != "anthropic_messages" or not hasattr(
+            self, "_anthropic_api_key"
+        ):
             return False
         # Only refresh credentials for the native Anthropic provider.
         # Other anthropic_messages providers (MiniMax, Alibaba, etc.) use their own keys.
@@ -4632,7 +5076,10 @@ class AIAgent:
             return False
 
         try:
-            from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
+            from agent.anthropic_adapter import (
+                resolve_anthropic_token,
+                build_anthropic_client,
+            )
 
             new_token = resolve_anthropic_token()
         except Exception as exc:
@@ -4651,14 +5098,19 @@ class AIAgent:
             pass
 
         try:
-            self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None))
+            self._anthropic_client = build_anthropic_client(
+                new_token, getattr(self, "_anthropic_base_url", None)
+            )
         except Exception as exc:
-            logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc)
+            logger.warning(
+                "Failed to rebuild Anthropic client after credential refresh: %s", exc
+            )
             return False
 
         self._anthropic_api_key = new_token
         # Update OAuth flag — token type may have changed (API key ↔ OAuth)
         from agent.anthropic_adapter import _is_oauth_token
+
         self._is_anthropic_oauth = _is_oauth_token(new_token)
         return True
 
@@ -4680,8 +5132,14 @@ class AIAgent:
             self._client_kwargs.pop("default_headers", None)
 
     def _swap_credential(self, entry) -> None:
-        runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
-        runtime_base = getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or self.base_url
+        runtime_key = getattr(entry, "runtime_api_key", None) or getattr(
+            entry, "access_token", ""
+        )
+        runtime_base = (
+            getattr(entry, "runtime_base_url", None)
+            or getattr(entry, "base_url", None)
+            or self.base_url
+        )
 
         if self.api_mode == "anthropic_messages":
             from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token
@@ -4700,7 +5158,9 @@ class AIAgent:
             return
 
         self.api_key = runtime_key
-        self.base_url = runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base
+        self.base_url = (
+            runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base
+        )
         self._client_kwargs["api_key"] = self.api_key
         self._client_kwargs["base_url"] = self.base_url
         self._apply_client_headers_for_base_url(self.base_url)
@@ -4743,7 +5203,9 @@ class AIAgent:
 
         if effective_reason == FailoverReason.billing:
             rotate_status = status_code if status_code is not None else 402
-            next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context)
+            next_entry = pool.mark_exhausted_and_rotate(
+                status_code=rotate_status, error_context=error_context
+            )
             if next_entry is not None:
                 logger.info(
                     "Credential %s (billing) — rotated to pool entry %s",
@@ -4758,7 +5220,9 @@ class AIAgent:
             if not has_retried_429:
                 return False, True
             rotate_status = status_code if status_code is not None else 429
-            next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context)
+            next_entry = pool.mark_exhausted_and_rotate(
+                status_code=rotate_status, error_context=error_context
+            )
             if next_entry is not None:
                 logger.info(
                     "Credential %s (rate limit) — rotated to pool entry %s",
@@ -4772,13 +5236,17 @@ class AIAgent:
         if effective_reason == FailoverReason.auth:
             refreshed = pool.try_refresh_current()
             if refreshed is not None:
-                logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}")
+                logger.info(
+                    f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}"
+                )
                 self._swap_credential(refreshed)
                 return True, has_retried_429
             # Refresh failed — rotate to next credential instead of giving up.
             # The failed entry is already marked exhausted by try_refresh_current().
             rotate_status = status_code if status_code is not None else 401
-            next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context)
+            next_entry = pool.mark_exhausted_and_rotate(
+                status_code=rotate_status, error_context=error_context
+            )
             if next_entry is not None:
                 logger.info(
                     "Credential %s (auth refresh failed) — rotated to pool entry %s",
@@ -4815,7 +5283,11 @@ class AIAgent:
         def _call():
             try:
                 if self.api_mode == "codex_responses":
-                    request_client_holder["client"] = self._create_request_openai_client(reason="codex_stream_request")
+                    request_client_holder["client"] = (
+                        self._create_request_openai_client(
+                            reason="codex_stream_request"
+                        )
+                    )
                     result["response"] = self._run_codex_stream(
                         api_kwargs,
                         client=request_client_holder["client"],
@@ -4824,14 +5296,22 @@ class AIAgent:
                 elif self.api_mode == "anthropic_messages":
                     result["response"] = self._anthropic_messages_create(api_kwargs)
                 else:
-                    request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
-                    result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
+                    request_client_holder["client"] = (
+                        self._create_request_openai_client(
+                            reason="chat_completion_request"
+                        )
+                    )
+                    result["response"] = request_client_holder[
+                        "client"
+                    ].chat.completions.create(**api_kwargs)
             except Exception as e:
                 result["error"] = e
             finally:
                 request_client = request_client_holder.get("client")
                 if request_client is not None:
-                    self._close_request_openai_client(request_client, reason="request_complete")
+                    self._close_request_openai_client(
+                        request_client, reason="request_complete"
+                    )
 
         # ── Stale-call timeout (mirrors streaming stale detector) ────────
         # Non-streaming calls return nothing until the full response is
@@ -4878,8 +5358,10 @@ class AIAgent:
                 logger.warning(
                     "Non-streaming API call stale for %.0fs (threshold %.0fs). "
                     "model=%s context=~%s tokens. Killing connection.",
-                    _elapsed, _stale_timeout,
-                    api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
+                    _elapsed,
+                    _stale_timeout,
+                    api_kwargs.get("model", "unknown"),
+                    f"{_est_ctx:,}",
                 )
                 self._emit_status(
                     f"⚠️ No response from provider for {int(_elapsed)}s "
@@ -4898,7 +5380,9 @@ class AIAgent:
                     else:
                         rc = request_client_holder.get("client")
                         if rc is not None:
-                            self._close_request_openai_client(rc, reason="stale_call_kill")
+                            self._close_request_openai_client(
+                                rc, reason="stale_call_kill"
+                            )
                 except Exception:
                     pass
                 self._touch_activity(
@@ -4929,7 +5413,9 @@ class AIAgent:
                     else:
                         request_client = request_client_holder.get("client")
                         if request_client is not None:
-                            self._close_request_openai_client(request_client, reason="interrupt_abort")
+                            self._close_request_openai_client(
+                                request_client, reason="interrupt_abort"
+                            )
                 except Exception:
                     pass
                 raise InterruptedError("Agent interrupted during API call")
@@ -4963,7 +5449,9 @@ class AIAgent:
         if not visible_content:
             return False
         streamed = self._normalize_interim_visible_text(
-            self._strip_think_blocks(getattr(self, "_current_streamed_assistant_text", "") or "")
+            self._strip_think_blocks(
+                getattr(self, "_current_streamed_assistant_text", "") or ""
+            )
         )
         return bool(streamed) and streamed == visible_content
 
@@ -4991,7 +5479,11 @@ class AIAgent:
         if getattr(self, "_stream_needs_break", False) and text and text.strip():
             self._stream_needs_break = False
             text = "\n\n" + text
-        callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
+        callbacks = [
+            cb
+            for cb in (self.stream_delta_callback, self._stream_callback)
+            if cb is not None
+        ]
         delivered = False
         for cb in callbacks:
             try:
@@ -5065,7 +5557,9 @@ class AIAgent:
         result = {"response": None, "error": None}
         request_client_holder = {"client": None}
         first_delta_fired = {"done": False}
-        deltas_were_sent = {"yes": False}  # Track if any deltas were fired (for fallback)
+        deltas_were_sent = {
+            "yes": False
+        }  # Track if any deltas were fired (for fallback)
         # Wall-clock timestamp of the last real streaming chunk.  The outer
         # poll loop uses this to detect stale connections that keep receiving
         # SSE keep-alive pings but no actual data.
@@ -5082,17 +5576,23 @@ class AIAgent:
         def _call_chat_completions():
             """Stream a chat completions response."""
             import httpx as _httpx
+
             _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
             _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
             # Local providers (Ollama, llama.cpp, vLLM) can take minutes for
             # prefill on large contexts before producing the first token.
             # Auto-increase the httpx read timeout unless the user explicitly
             # overrode HERMES_STREAM_READ_TIMEOUT.
-            if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url):
+            if (
+                _stream_read_timeout == 120.0
+                and self.base_url
+                and is_local_endpoint(self.base_url)
+            ):
                 _stream_read_timeout = _base_timeout
                 logger.debug(
                     "Local provider detected (%s) — stream read timeout raised to %.0fs",
-                    self.base_url, _stream_read_timeout,
+                    self.base_url,
+                    _stream_read_timeout,
                 )
             stream_kwargs = {
                 **api_kwargs,
@@ -5112,7 +5612,9 @@ class AIAgent:
             # attempt's start, not a previous attempt's last chunk.
             last_chunk_time["t"] = time.time()
             self._touch_activity("waiting for provider response (streaming)")
-            stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
+            stream = request_client_holder["client"].chat.completions.create(
+                **stream_kwargs
+            )
 
             # Capture rate limit headers from the initial HTTP response.
             # The OpenAI SDK Stream object exposes the underlying httpx
@@ -5126,7 +5628,7 @@ class AIAgent:
             # in a parallel batch, distinguishing them only by id.  Track
             # the last seen id per raw index so we can detect a new tool
             # call starting at the same index and redirect it to a fresh slot.
-            _last_id_at_idx: dict = {}      # raw_index -> last seen non-empty id
+            _last_id_at_idx: dict = {}  # raw_index -> last seen non-empty id
             _active_slot_by_idx: dict = {}  # raw_index -> current slot in tool_calls_acc
             finish_reason = None
             model_name = None
@@ -5153,7 +5655,9 @@ class AIAgent:
                     model_name = chunk.model
 
                 # Accumulate reasoning content
-                reasoning_text = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
+                reasoning_text = getattr(delta, "reasoning_content", None) or getattr(
+                    delta, "reasoning", None
+                )
                 if reasoning_text:
                     reasoning_parts.append(reasoning_text)
                     _fire_first_delta()
@@ -5220,7 +5724,9 @@ class AIAgent:
                             if tc_delta.function.name:
                                 entry["function"]["name"] += tc_delta.function.name
                             if tc_delta.function.arguments:
-                                entry["function"]["arguments"] += tc_delta.function.arguments
+                                entry["function"]["arguments"] += (
+                                    tc_delta.function.arguments
+                                )
                         extra = getattr(tc_delta, "extra_content", None)
                         if extra is None and hasattr(tc_delta, "model_extra"):
                             extra = (tc_delta.model_extra or {}).get("extra_content")
@@ -5256,15 +5762,17 @@ class AIAgent:
                             json.loads(arguments)
                         except json.JSONDecodeError:
                             has_truncated_tool_args = True
-                    mock_tool_calls.append(SimpleNamespace(
-                        id=tc["id"],
-                        type=tc["type"],
-                        extra_content=tc.get("extra_content"),
-                        function=SimpleNamespace(
-                            name=tc["function"]["name"],
-                            arguments=arguments,
-                        ),
-                    ))
+                    mock_tool_calls.append(
+                        SimpleNamespace(
+                            id=tc["id"],
+                            type=tc["type"],
+                            extra_content=tc.get("extra_content"),
+                            function=SimpleNamespace(
+                                name=tc["function"]["name"],
+                                arguments=arguments,
+                            ),
+                        )
+                    )
 
             effective_finish_reason = finish_reason or "stop"
             if has_truncated_tool_args:
@@ -5366,16 +5874,27 @@ class AIAgent:
                             # delivered.  Don't retry or fall back — partial
                             # content already reached the user.
                             logger.warning(
-                                "Streaming failed after partial delivery, not retrying: %s", e
+                                "Streaming failed after partial delivery, not retrying: %s",
+                                e,
                             )
                             result["error"] = e
                             return
 
                         _is_timeout = isinstance(
-                            e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
+                            e,
+                            (
+                                _httpx.ReadTimeout,
+                                _httpx.ConnectTimeout,
+                                _httpx.PoolTimeout,
+                            ),
                         )
                         _is_conn_err = isinstance(
-                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
+                            e,
+                            (
+                                _httpx.ConnectError,
+                                _httpx.RemoteProtocolError,
+                                ConnectionError,
+                            ),
                         )
 
                         # SSE error events from proxies (e.g. OpenRouter sends
@@ -5389,7 +5908,10 @@ class AIAgent:
                         _is_sse_conn_err = False
                         if not _is_timeout and not _is_conn_err:
                             from openai import APIError as _APIError
-                            if isinstance(e, _APIError) and not getattr(e, "status_code", None):
+
+                            if isinstance(e, _APIError) and not getattr(
+                                e, "status_code", None
+                            ):
                                 _err_lower_sse = str(e).lower()
                                 _SSE_CONN_PHRASES = (
                                     "connection lost",
@@ -5459,8 +5981,7 @@ class AIAgent:
                         else:
                             _err_lower = str(e).lower()
                             _is_stream_unsupported = (
-                                "stream" in _err_lower
-                                and "not supported" in _err_lower
+                                "stream" in _err_lower and "not supported" in _err_lower
                             )
                             if _is_stream_unsupported:
                                 self._disable_streaming = True
@@ -5485,15 +6006,26 @@ class AIAgent:
             finally:
                 request_client = request_client_holder.get("client")
                 if request_client is not None:
-                    self._close_request_openai_client(request_client, reason="stream_request_complete")
+                    self._close_request_openai_client(
+                        request_client, reason="stream_request_complete"
+                    )
 
-        _stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0))
+        _stream_stale_timeout_base = float(
+            os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0)
+        )
         # Local providers (Ollama, oMLX, llama-cpp) can take 300+ seconds
         # for prefill on large contexts.  Disable the stale detector unless
         # the user explicitly set HERMES_STREAM_STALE_TIMEOUT.
-        if _stream_stale_timeout_base == 180.0 and self.base_url and is_local_endpoint(self.base_url):
+        if (
+            _stream_stale_timeout_base == 180.0
+            and self.base_url
+            and is_local_endpoint(self.base_url)
+        ):
             _stream_stale_timeout = float("inf")
-            logger.debug("Local provider detected (%s) — stale stream timeout disabled", self.base_url)
+            logger.debug(
+                "Local provider detected (%s) — stale stream timeout disabled",
+                self.base_url,
+            )
         else:
             # Scale the stale timeout for large contexts: slow models (like Opus)
             # can legitimately think for minutes before producing the first token
@@ -5522,8 +6054,10 @@ class AIAgent:
                 logger.warning(
                     "Stream stale for %.0fs (threshold %.0fs) — no chunks received. "
                     "model=%s context=~%s tokens. Killing connection.",
-                    _stale_elapsed, _stream_stale_timeout,
-                    api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
+                    _stale_elapsed,
+                    _stream_stale_timeout,
+                    api_kwargs.get("model", "unknown"),
+                    f"{_est_ctx:,}",
                 )
                 self._emit_status(
                     f"⚠️ No response from provider for {int(_stale_elapsed)}s "
@@ -5534,13 +6068,17 @@ class AIAgent:
                 try:
                     rc = request_client_holder.get("client")
                     if rc is not None:
-                        self._close_request_openai_client(rc, reason="stale_stream_kill")
+                        self._close_request_openai_client(
+                            rc, reason="stale_stream_kill"
+                        )
                 except Exception:
                     pass
                 # Rebuild the primary client too — its connection pool
                 # may hold dead sockets from the same provider outage.
                 try:
-                    self._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
+                    self._replace_primary_openai_client(
+                        reason="stale_stream_pool_cleanup"
+                    )
                 except Exception:
                     pass
                 # Reset the timer so we don't kill repeatedly while
@@ -5563,7 +6101,9 @@ class AIAgent:
                     else:
                         request_client = request_client_holder.get("client")
                         if request_client is not None:
-                            self._close_request_openai_client(request_client, reason="stream_interrupt_abort")
+                            self._close_request_openai_client(
+                                request_client, reason="stream_interrupt_abort"
+                            )
                 except Exception:
                     pass
                 raise InterruptedError("Agent interrupted during streaming API call")
@@ -5589,15 +6129,21 @@ class AIAgent:
                     result["error"],
                 )
                 _stub_msg = SimpleNamespace(
-                    role="assistant", content=_partial_text, tool_calls=None,
+                    role="assistant",
+                    content=_partial_text,
+                    tool_calls=None,
                     reasoning_content=None,
                 )
                 return SimpleNamespace(
                     id="partial-stream-stub",
                     model=getattr(self, "model", "unknown"),
-                    choices=[SimpleNamespace(
-                        index=0, message=_stub_msg, finish_reason="stop",
-                    )],
+                    choices=[
+                        SimpleNamespace(
+                            index=0,
+                            message=_stub_msg,
+                            finish_reason="stop",
+                        )
+                    ],
                     usage=None,
                 )
             raise result["error"]
@@ -5632,6 +6178,7 @@ class AIAgent:
         # access for Codex providers.
         try:
             from agent.auxiliary_client import resolve_provider_client
+
             # Pass base_url and api_key from fallback config so custom
             # endpoints (e.g. Ollama Cloud) resolve correctly instead of
             # falling through to OpenRouter defaults.
@@ -5639,16 +6186,23 @@ class AIAgent:
             fb_api_key_hint = (fb.get("api_key") or "").strip() or None
             # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
             # when no explicit key is in the fallback config.
-            if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
+            if (
+                fb_base_url_hint
+                and "ollama.com" in fb_base_url_hint.lower()
+                and not fb_api_key_hint
+            ):
                 fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
             fb_client, _resolved_fb_model = resolve_provider_client(
-                fb_provider, model=fb_model, raw_codex=True,
+                fb_provider,
+                model=fb_model,
+                raw_codex=True,
                 explicit_base_url=fb_base_url_hint,
-                explicit_api_key=fb_api_key_hint)
+                explicit_api_key=fb_api_key_hint,
+            )
             if fb_client is None:
                 logging.warning(
-                    "Fallback to %s failed: provider not configured",
-                    fb_provider)
+                    "Fallback to %s failed: provider not configured", fb_provider
+                )
                 return self._try_activate_fallback()  # try next in chain
             try:
                 from hermes_cli.model_normalize import normalize_model_for_provider
@@ -5662,7 +6216,9 @@ class AIAgent:
             fb_base_url = str(fb_client.base_url)
             if fb_provider == "openai-codex":
                 fb_api_mode = "codex_responses"
-            elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
+            elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith(
+                "/anthropic"
+            ):
                 fb_api_mode = "anthropic_messages"
             elif self._is_direct_openai_url(fb_base_url):
                 fb_api_mode = "codex_responses"
@@ -5680,12 +6236,23 @@ class AIAgent:
 
             if fb_api_mode == "anthropic_messages":
                 # Build native Anthropic client instead of using OpenAI client
-                from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
-                effective_key = (fb_client.api_key or resolve_anthropic_token() or "") if fb_provider == "anthropic" else (fb_client.api_key or "")
+                from agent.anthropic_adapter import (
+                    build_anthropic_client,
+                    resolve_anthropic_token,
+                    _is_oauth_token,
+                )
+
+                effective_key = (
+                    (fb_client.api_key or resolve_anthropic_token() or "")
+                    if fb_provider == "anthropic"
+                    else (fb_client.api_key or "")
+                )
                 self.api_key = effective_key
                 self._anthropic_api_key = effective_key
                 self._anthropic_base_url = fb_base_url
-                self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
+                self._anthropic_client = build_anthropic_client(
+                    effective_key, self._anthropic_base_url
+                )
                 self._is_anthropic_oauth = _is_oauth_token(effective_key)
                 self.client = None
                 self._client_kwargs = {}
@@ -5711,21 +6278,25 @@ class AIAgent:
                 }
 
             # Re-evaluate prompt caching for the new provider/model
-            is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
-            self._use_prompt_caching = (
-                ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower())
-                or is_native_anthropic
+            is_native_anthropic = (
+                fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
             )
+            self._use_prompt_caching = (
+                "openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()
+            ) or is_native_anthropic
 
             # Update context compressor limits for the fallback model.
             # Without this, compression decisions use the primary model's
             # context window (e.g. 200K) instead of the fallback's (e.g. 32K),
             # causing oversized sessions to overflow the fallback.
-            if hasattr(self, 'context_compressor') and self.context_compressor:
+            if hasattr(self, "context_compressor") and self.context_compressor:
                 from agent.model_metadata import get_model_context_length
+
                 fb_context_length = get_model_context_length(
-                    self.model, base_url=self.base_url,
-                    api_key=self.api_key, provider=self.provider,
+                    self.model,
+                    base_url=self.base_url,
+                    api_key=self.api_key,
+                    provider=self.provider,
                 )
                 self.context_compressor.update_model(
                     model=self.model,
@@ -5741,7 +6312,9 @@ class AIAgent:
             )
             logging.info(
                 "Fallback activated: %s → %s (%s)",
-                old_model, fb_model, fb_provider,
+                old_model,
+                fb_model,
+                fb_provider,
             )
             return True
         except Exception as e:
@@ -5769,7 +6342,7 @@ class AIAgent:
             # ── Core runtime state ──
             self.model = rt["model"]
             self.provider = rt["provider"]
-            self.base_url = rt["base_url"]           # setter updates _base_url_lower
+            self.base_url = rt["base_url"]  # setter updates _base_url_lower
             self.api_mode = rt["api_mode"]
             self.api_key = rt["api_key"]
             self._client_kwargs = dict(rt["client_kwargs"])
@@ -5778,10 +6351,12 @@ class AIAgent:
             # ── Rebuild client for the primary provider ──
             if self.api_mode == "anthropic_messages":
                 from agent.anthropic_adapter import build_anthropic_client
+
                 self._anthropic_api_key = rt["anthropic_api_key"]
                 self._anthropic_base_url = rt["anthropic_base_url"]
                 self._anthropic_client = build_anthropic_client(
-                    rt["anthropic_api_key"], rt["anthropic_base_url"],
+                    rt["anthropic_api_key"],
+                    rt["anthropic_base_url"],
                 )
                 self._is_anthropic_oauth = rt["is_anthropic_oauth"]
                 self.client = None
@@ -5808,7 +6383,8 @@ class AIAgent:
 
             logging.info(
                 "Primary runtime restored for new turn: %s (%s)",
-                self.model, self.provider,
+                self.model,
+                self.provider,
             )
             return True
         except Exception as e:
@@ -5817,14 +6393,24 @@ class AIAgent:
 
     # Which error types indicate a transient transport failure worth
     # one more attempt with a rebuilt client / connection pool.
-    _TRANSIENT_TRANSPORT_ERRORS = frozenset({
-        "ReadTimeout", "ConnectTimeout", "PoolTimeout",
-        "ConnectError", "RemoteProtocolError",
-        "APIConnectionError", "APITimeoutError",
-    })
+    _TRANSIENT_TRANSPORT_ERRORS = frozenset(
+        {
+            "ReadTimeout",
+            "ConnectTimeout",
+            "PoolTimeout",
+            "ConnectError",
+            "RemoteProtocolError",
+            "APIConnectionError",
+            "APITimeoutError",
+        }
+    )
 
     def _try_recover_primary_transport(
-        self, api_error: Exception, *, retry_count: int, max_retries: int,
+        self,
+        api_error: Exception,
+        *,
+        retry_count: int,
+        max_retries: int,
     ) -> bool:
         """Attempt one extra primary-provider recovery cycle for transient transport failures.
 
@@ -5858,7 +6444,9 @@ class AIAgent:
             if getattr(self, "client", None) is not None:
                 try:
                     self._close_openai_client(
-                        self.client, reason="primary_recovery", shared=True,
+                        self.client,
+                        reason="primary_recovery",
+                        shared=True,
                     )
                 except Exception:
                     pass
@@ -5874,10 +6462,12 @@ class AIAgent:
 
             if self.api_mode == "anthropic_messages":
                 from agent.anthropic_adapter import build_anthropic_client
+
                 self._anthropic_api_key = rt["anthropic_api_key"]
                 self._anthropic_base_url = rt["anthropic_base_url"]
                 self._anthropic_client = build_anthropic_client(
-                    rt["anthropic_api_key"], rt["anthropic_base_url"],
+                    rt["anthropic_api_key"],
+                    rt["anthropic_base_url"],
                 )
                 self._is_anthropic_oauth = rt["is_anthropic_oauth"]
                 self.client = None
@@ -5907,7 +6497,10 @@ class AIAgent:
         if not isinstance(content, list):
             return False
         for part in content:
-            if isinstance(part, dict) and part.get("type") in {"image_url", "input_image"}:
+            if isinstance(part, dict) and part.get("type") in {
+                "image_url",
+                "input_image",
+            }:
                 return True
         return False
 
@@ -5916,7 +6509,7 @@ class AIAgent:
         header, _, data = str(image_url or "").partition(",")
         mime = "image/jpeg"
         if header.startswith("data:"):
-            mime_part = header[len("data:"):].split(";", 1)[0].strip()
+            mime_part = header[len("data:") :].split(";", 1)[0].strip()
             if mime_part.startswith("image/"):
                 mime = mime_part
         suffix = {
@@ -5926,7 +6519,9 @@ class AIAgent:
             "image/jpeg": ".jpg",
             "image/jpg": ".jpg",
         }.get(mime, ".jpg")
-        tmp = tempfile.NamedTemporaryFile(prefix="anthropic_image_", suffix=suffix, delete=False)
+        tmp = tempfile.NamedTemporaryFile(
+            prefix="anthropic_image_", suffix=suffix, delete=False
+        )
         with tmp:
             tmp.write(base64.b64decode(data))
         path = Path(tmp.name)
@@ -5951,14 +6546,18 @@ class AIAgent:
         vision_source = str(image_url or "")
         cleanup_path: Optional[Path] = None
         if vision_source.startswith("data:"):
-            vision_source, cleanup_path = self._materialize_data_url_for_vision(vision_source)
+            vision_source, cleanup_path = self._materialize_data_url_for_vision(
+                vision_source
+            )
 
         description = ""
         try:
             from tools.vision_tools import vision_analyze_tool
 
             result_json = asyncio.run(
-                vision_analyze_tool(image_url=vision_source, user_prompt=analysis_prompt)
+                vision_analyze_tool(
+                    image_url=vision_source, user_prompt=analysis_prompt
+                )
             )
             result = json.loads(result_json) if isinstance(result_json, str) else {}
             description = (result.get("analysis") or "").strip()
@@ -5976,9 +6575,7 @@ class AIAgent:
 
         note = f"[The {role_label} attached an image. Here's what it contains:\n{description}]"
         if vision_source and not str(image_url or "").startswith("data:"):
-            note += (
-                f"\n[If you need a closer look, use vision_analyze with image_url: {vision_source}]"
-            )
+            note += f"\n[If you need a closer look, use vision_analyze with image_url: {vision_source}]"
 
         self._anthropic_image_fallback_cache[cache_key] = note
         return note
@@ -6006,11 +6603,19 @@ class AIAgent:
 
             if ptype in {"image_url", "input_image"}:
                 image_data = part.get("image_url", {})
-                image_url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data or "")
+                image_url = (
+                    image_data.get("url", "")
+                    if isinstance(image_data, dict)
+                    else str(image_data or "")
+                )
                 if image_url:
-                    image_notes.append(self._describe_image_for_anthropic_fallback(image_url, role))
+                    image_notes.append(
+                        self._describe_image_for_anthropic_fallback(image_url, role)
+                    )
                 else:
-                    image_notes.append("[An image was attached but no image source was available.]")
+                    image_notes.append(
+                        "[An image was attached but no image source was available.]"
+                    )
                 continue
 
             text = str(part.get("text", "") or "").strip()
@@ -6025,7 +6630,9 @@ class AIAgent:
             return prefix
         if suffix:
             return suffix
-        return "[A multimodal message was converted to text for Anthropic compatibility.]"
+        return (
+            "[A multimodal message was converted to text for Anthropic compatibility.]"
+        )
 
     def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
         if not any(
@@ -6050,10 +6657,23 @@ class AIAgent:
         MiniMax keeps dots (e.g. MiniMax-M2.7).
         OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free).
         ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1)."""
-        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}:
+        if (getattr(self, "provider", "") or "").lower() in {
+            "alibaba",
+            "minimax",
+            "minimax-cn",
+            "opencode-go",
+            "opencode-zen",
+            "zai",
+        }:
             return True
         base = (getattr(self, "base_url", "") or "").lower()
-        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base
+        return (
+            "dashscope" in base
+            or "aliyuncs" in base
+            or "minimax" in base
+            or "opencode.ai/zen/" in base
+            or "bigmodel.cn" in base
+        )
 
     def _is_qwen_portal(self) -> bool:
         """Return True when the base URL targets Qwen Portal."""
@@ -6086,7 +6706,11 @@ class AIAgent:
         for msg in prepared:
             if isinstance(msg, dict) and msg.get("role") == "system":
                 content = msg.get("content")
-                if isinstance(content, list) and content and isinstance(content[-1], dict):
+                if (
+                    isinstance(content, list)
+                    and content
+                    and isinstance(content[-1], dict)
+                ):
                     content[-1]["cache_control"] = {"type": "ephemeral"}
                 break
 
@@ -6116,7 +6740,11 @@ class AIAgent:
         for msg in messages:
             if isinstance(msg, dict) and msg.get("role") == "system":
                 content = msg.get("content")
-                if isinstance(content, list) and content and isinstance(content[-1], dict):
+                if (
+                    isinstance(content, list)
+                    and content
+                    and isinstance(content[-1], dict)
+                ):
                     content[-1]["cache_control"] = {"type": "ephemeral"}
                 break
 
@@ -6124,6 +6752,7 @@ class AIAgent:
         """Build the keyword arguments dict for the active API mode."""
         if self.api_mode == "anthropic_messages":
             from agent.anthropic_adapter import build_anthropic_kwargs
+
             anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
             # Pass context_length (total input+output window) so the adapter can
             # clamp max_tokens (output cap) when the user configured a smaller
@@ -6140,7 +6769,9 @@ class AIAgent:
                 model=self.model,
                 messages=anthropic_messages,
                 tools=self.tools,
-                max_tokens=ephemeral_out if ephemeral_out is not None else self.max_tokens,
+                max_tokens=ephemeral_out
+                if ephemeral_out is not None
+                else self.max_tokens,
                 reasoning_config=self.reasoning_config,
                 is_oauth=self._is_anthropic_oauth,
                 preserve_dots=self._anthropic_preserve_dots(),
@@ -6204,7 +6835,10 @@ class AIAgent:
                     if github_reasoning is not None:
                         kwargs["reasoning"] = github_reasoning
                 else:
-                    kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                    kwargs["reasoning"] = {
+                        "effort": reasoning_effort,
+                        "summary": "auto",
+                    }
                     kwargs["include"] = ["reasoning.encrypted_content"]
             elif not is_github_responses:
                 kwargs["include"] = []
@@ -6259,7 +6893,9 @@ class AIAgent:
         if self._is_qwen_portal():
             if sanitized_messages is api_messages:
                 # No sanitization was done — we need our own copy.
-                sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages)
+                sanitized_messages = self._qwen_prepare_chat_messages(
+                    sanitized_messages
+                )
             else:
                 # Already a deepcopy — transform in place to avoid a second deepcopy.
                 self._qwen_prepare_chat_messages_inplace(sanitized_messages)
@@ -6315,7 +6951,9 @@ class AIAgent:
             # (the documented max output for qwen3-coder models) so the
             # model has adequate output budget for tool calls.
             api_kwargs.update(self._max_tokens_param(65536))
-        elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
+        elif (
+            self._is_openrouter_url() or "nousresearch" in self._base_url_lower
+        ) and "claude" in (self.model or "").lower():
             # OpenRouter and Nous Portal translate requests to Anthropic's
             # Messages API, which requires max_tokens as a mandatory field.
             # When we omit it, the proxy picks a default that can be too
@@ -6325,6 +6963,7 @@ class AIAgent:
             # limit ensures full capacity.
             try:
                 from agent.anthropic_adapter import _get_anthropic_max_output
+
                 _model_output_limit = _get_anthropic_max_output(self.model)
                 api_kwargs["max_tokens"] = _model_output_limit
             except Exception:
@@ -6361,10 +7000,7 @@ class AIAgent:
                     else:
                         extra_body["reasoning"] = rc
                 else:
-                    extra_body["reasoning"] = {
-                        "enabled": True,
-                        "effort": "medium"
-                    }
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
 
         # Nous Portal product attribution
         if _is_nous:
@@ -6388,7 +7024,11 @@ class AIAgent:
         # xAI prompt caching: send x-grok-conv-id header to route requests
         # to the same server, maximizing automatic cache hits.
         # https://docs.x.ai/developers/advanced-api-usage/prompt-caching
-        if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
+        if (
+            "x.ai" in self._base_url_lower
+            and hasattr(self, "session_id")
+            and self.session_id
+        ):
             api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
 
         # Priority Processing / generic request overrides (e.g. service_tier).
@@ -6409,7 +7049,10 @@ class AIAgent:
             return True
         if "ai-gateway.vercel.sh" in self._base_url_lower:
             return True
-        if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower:
+        if (
+            "models.github.ai" in self._base_url_lower
+            or "api.githubcopilot.com" in self._base_url_lower
+        ):
             try:
                 from hermes_cli.models import github_model_reasoning_efforts
 
@@ -6446,9 +7089,9 @@ class AIAgent:
         if self.reasoning_config and isinstance(self.reasoning_config, dict):
             if self.reasoning_config.get("enabled") is False:
                 return None
-            requested_effort = str(
-                self.reasoning_config.get("effort", "medium")
-            ).strip().lower()
+            requested_effort = (
+                str(self.reasoning_config.get("effort", "medium")).strip().lower()
+            )
         else:
             requested_effort = "medium"
 
@@ -6478,13 +7121,15 @@ class AIAgent:
         # directly in the content rather than returning separate API fields).
         if not reasoning_text:
             content = assistant_message.content or ""
-            think_blocks = re.findall(r'<think>(.*?)</think>', content, flags=re.DOTALL)
+            think_blocks = re.findall(r"<think>(.*?)</think>", content, flags=re.DOTALL)
             if think_blocks:
                 combined = "\n\n".join(b.strip() for b in think_blocks if b.strip())
                 reasoning_text = combined or None
 
         if reasoning_text and self.verbose_logging:
-            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}")
+            logging.debug(
+                f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}"
+            )
 
         if reasoning_text and self.reasoning_callback:
             # Skip callback when streaming is active — reasoning was already
@@ -6508,7 +7153,10 @@ class AIAgent:
             "finish_reason": finish_reason,
         }
 
-        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+        if (
+            hasattr(assistant_message, "reasoning_details")
+            and assistant_message.reasoning_details
+        ):
             # Pass reasoning_details back unmodified so providers (OpenRouter,
             # Anthropic, OpenAI) can maintain reasoning continuity across turns.
             # Each provider may include opaque fields (signature, encrypted_content)
@@ -6546,11 +7194,16 @@ class AIAgent:
                         _fn = getattr(tool_call, "function", None)
                         _fn_name = getattr(_fn, "name", "") if _fn else ""
                         _fn_args = getattr(_fn, "arguments", "{}") if _fn else "{}"
-                        call_id = self._deterministic_call_id(_fn_name, _fn_args, len(tool_calls))
+                        call_id = self._deterministic_call_id(
+                            _fn_name, _fn_args, len(tool_calls)
+                        )
                 call_id = call_id.strip()
 
                 response_item_id = getattr(tool_call, "response_item_id", None)
-                if not isinstance(response_item_id, str) or not response_item_id.strip():
+                if (
+                    not isinstance(response_item_id, str)
+                    or not response_item_id.strip()
+                ):
                     _, embedded_response_item_id = self._split_responses_tool_id(raw_id)
                     response_item_id = embedded_response_item_id
 
@@ -6566,7 +7219,7 @@ class AIAgent:
                     "type": tool_call.type,
                     "function": {
                         "name": tool_call.function.name,
-                        "arguments": tool_call.function.arguments
+                        "arguments": tool_call.function.arguments,
                     },
                 }
                 # Preserve extra_content (e.g. Gemini thought_signature) so it
@@ -6605,7 +7258,8 @@ class AIAgent:
         _STRIP_KEYS = {"call_id", "response_item_id"}
         api_msg["tool_calls"] = [
             {k: v for k, v in tc.items() if k not in _STRIP_KEYS}
-            if isinstance(tc, dict) else tc
+            if isinstance(tc, dict)
+            else tc
             for tc in tool_calls
         ]
         return api_msg
@@ -6641,12 +7295,14 @@ class AIAgent:
             return
         if "memory" not in self.valid_tool_names or not self._memory_store:
             return
-        effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
+        effective_min = (
+            min_turns if min_turns is not None else self._memory_flush_min_turns
+        )
         if self._user_turn_count < effective_min:
             return
 
         if messages is None:
-            messages = getattr(self, '_session_messages', None)
+            messages = getattr(self, "_session_messages", None)
         if not messages or len(messages) < 3:
             return
 
@@ -6656,7 +7312,11 @@ class AIAgent:
             "corrections, and recurring patterns over task-specific details.]"
         )
         _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
-        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
+        flush_msg = {
+            "role": "user",
+            "content": flush_content,
+            "_flush_sentinel": _sentinel,
+        }
         messages.append(flush_msg)
 
         try:
@@ -6678,11 +7338,13 @@ class AIAgent:
                 api_messages.append(api_msg)
 
             if self._cached_system_prompt:
-                api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
+                api_messages = [
+                    {"role": "system", "content": self._cached_system_prompt}
+                ] + api_messages
 
             # Make one API call with only the memory tool available
             memory_tool_def = None
-            for t in (self.tools or []):
+            for t in self.tools or []:
                 if t.get("function", {}).get("name") == "memory":
                     memory_tool_def = t
                     break
@@ -6694,6 +7356,7 @@ class AIAgent:
             # Use auxiliary client for the flush call when available --
             # it's cheaper and avoids Codex Responses API incompatibility.
             from agent.auxiliary_client import call_llm as _call_llm
+
             _aux_available = True
             try:
                 response = _call_llm(
@@ -6718,10 +7381,15 @@ class AIAgent:
                 response = self._run_codex_stream(codex_kwargs)
             elif not _aux_available and self.api_mode == "anthropic_messages":
                 # Native Anthropic — use the Anthropic client directly
-                from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs
+                from agent.anthropic_adapter import (
+                    build_anthropic_kwargs as _build_ant_kwargs,
+                )
+
                 ant_kwargs = _build_ant_kwargs(
-                    model=self.model, messages=api_messages,
-                    tools=[memory_tool_def], max_tokens=5120,
+                    model=self.model,
+                    messages=api_messages,
+                    tools=[memory_tool_def],
+                    max_tokens=5120,
                     reasoning_config=None,
                     preserve_dots=self._anthropic_preserve_dots(),
                 )
@@ -6735,7 +7403,10 @@ class AIAgent:
                     **self._max_tokens_param(5120),
                 }
                 from agent.auxiliary_client import _get_task_timeout
-                response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
+
+                response = self._ensure_primary_openai_client(
+                    reason="flush_memories"
+                ).chat.completions.create(
                     **api_kwargs, timeout=_get_task_timeout("flush_memories")
                 )
 
@@ -6746,8 +7417,13 @@ class AIAgent:
                 if assistant_msg and assistant_msg.tool_calls:
                     tool_calls = assistant_msg.tool_calls
             elif self.api_mode == "anthropic_messages" and not _aux_available:
-                from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
-                _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth)
+                from agent.anthropic_adapter import (
+                    normalize_anthropic_response as _nar_flush,
+                )
+
+                _flush_msg, _ = _nar_flush(
+                    response, strip_tool_prefix=self._is_anthropic_oauth
+                )
                 if _flush_msg and _flush_msg.tool_calls:
                     tool_calls = _flush_msg.tool_calls
             elif hasattr(response, "choices") and response.choices:
@@ -6761,6 +7437,7 @@ class AIAgent:
                         args = json.loads(tc.function.arguments)
                         flush_target = args.get("target", "memory")
                         from tools.memory_tool import memory_tool as _memory_tool
+
                         _memory_tool(
                             action=args.get("action"),
                             target=flush_target,
@@ -6769,7 +7446,9 @@ class AIAgent:
                             store=self._memory_store,
                         )
                         if not self.quiet_mode:
-                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
+                            print(
+                                f"  🧠 Memory flush: saved to {args.get('target', 'memory')}"
+                            )
                     except Exception as e:
                         logger.debug("Memory flush tool call failed: %s", e)
         except Exception as e:
@@ -6784,7 +7463,15 @@ class AIAgent:
             if messages and messages[-1].get("_flush_sentinel") == _sentinel:
                 messages.pop()
 
-    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
+    def _compress_context(
+        self,
+        messages: list,
+        system_message: str,
+        *,
+        approx_tokens: int = None,
+        task_id: str = "default",
+        focus_topic: str = None,
+    ) -> tuple:
         """Compress conversation context and split the session in SQLite.
 
         Args:
@@ -6798,8 +7485,10 @@ class AIAgent:
         _pre_msg_count = len(messages)
         logger.info(
             "context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
-            self.session_id or "none", _pre_msg_count,
-            f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
+            self.session_id or "none",
+            _pre_msg_count,
+            f"{approx_tokens:,}" if approx_tokens else "unknown",
+            self.model,
             focus_topic,
         )
         # Pre-compression memory flush: let the model save memories before they're lost
@@ -6812,7 +7501,9 @@ class AIAgent:
             except Exception:
                 pass
 
-        compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
+        compressed = self.context_compressor.compress(
+            messages, current_tokens=approx_tokens, focus_topic=focus_topic
+        )
 
         todo_snapshot = self._todo_store.format_for_injection()
         if todo_snapshot:
@@ -6828,27 +7519,39 @@ class AIAgent:
                 old_title = self._session_db.get_session_title(self.session_id)
                 self._session_db.end_session(self.session_id, "compression")
                 old_session_id = self.session_id
-                self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+                self.session_id = (
+                    f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+                )
                 # Update session_log_file to point to the new session's JSON file
-                self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
+                self.session_log_file = (
+                    self.logs_dir / f"session_{self.session_id}.json"
+                )
                 self._session_db.create_session(
                     session_id=self.session_id,
-                    source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+                    source=self.platform
+                    or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
                     model=self.model,
                     parent_session_id=old_session_id,
                 )
                 # Auto-number the title for the continuation session
                 if old_title:
                     try:
-                        new_title = self._session_db.get_next_title_in_lineage(old_title)
+                        new_title = self._session_db.get_next_title_in_lineage(
+                            old_title
+                        )
                         self._session_db.set_session_title(self.session_id, new_title)
                     except (ValueError, Exception) as e:
                         logger.debug("Could not propagate title on compression: %s", e)
-                self._session_db.update_system_prompt(self.session_id, new_system_prompt)
+                self._session_db.update_system_prompt(
+                    self.session_id, new_system_prompt
+                )
                 # Reset flush cursor — new session starts with no messages written
                 self._last_flushed_db_idx = 0
             except Exception as e:
-                logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
+                logger.warning(
+                    "Session DB compression split failed — new session will NOT be indexed: %s",
+                    e,
+                )
 
         # Warn on repeated compressions (quality degrades with each pass)
         _cc = self.context_compressor.compression_count
@@ -6861,10 +7564,9 @@ class AIAgent:
 
         # Update token estimate after compaction so pressure calculations
         # use the post-compression count, not the stale pre-compression one.
-        _compressed_est = (
-            estimate_tokens_rough(new_system_prompt)
-            + estimate_messages_tokens_rough(compressed)
-        )
+        _compressed_est = estimate_tokens_rough(
+            new_system_prompt
+        ) + estimate_messages_tokens_rough(compressed)
         self.context_compressor.last_prompt_tokens = _compressed_est
         self.context_compressor.last_completion_tokens = 0
 
@@ -6887,18 +7589,27 @@ class AIAgent:
         # file it needs the full content, not a "file unchanged" stub.
         try:
             from tools.file_tools import reset_file_dedup
+
             reset_file_dedup(task_id)
         except Exception:
             pass
 
         logger.info(
             "context compression done: session=%s messages=%d->%d tokens=~%s",
-            self.session_id or "none", _pre_msg_count, len(compressed),
+            self.session_id or "none",
+            _pre_msg_count,
+            len(compressed),
             f"{_compressed_est:,}",
         )
         return compressed, new_system_prompt
 
-    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
+    def _execute_tool_calls(
+        self,
+        assistant_message,
+        messages: list,
+        effective_task_id: str,
+        api_call_count: int = 0,
+    ) -> None:
         """Execute tool calls from the assistant message and append results to messages.
 
         Dispatches to concurrent execution only for batches that look
@@ -6921,8 +7632,13 @@ class AIAgent:
         finally:
             self._executing_tools = False
 
-    def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str,
-                     tool_call_id: Optional[str] = None) -> str:
+    def _invoke_tool(
+        self,
+        function_name: str,
+        function_args: dict,
+        effective_task_id: str,
+        tool_call_id: Optional[str] = None,
+    ) -> str:
         """Invoke a single tool and return the result string. No display logic.
 
         Handles both agent-level tools (todo, memory, etc.) and registry-dispatched
@@ -6933,8 +7649,11 @@ class AIAgent:
         block_message: Optional[str] = None
         try:
             from hermes_cli.plugins import get_pre_tool_call_block_message
+
             block_message = get_pre_tool_call_block_message(
-                function_name, function_args, task_id=effective_task_id or "",
+                function_name,
+                function_args,
+                task_id=effective_task_id or "",
             )
         except Exception:
             pass
@@ -6943,6 +7662,7 @@ class AIAgent:
 
         if function_name == "todo":
             from tools.todo_tool import todo_tool as _todo_tool
+
             return _todo_tool(
                 todos=function_args.get("todos"),
                 merge=function_args.get("merge", False),
@@ -6950,8 +7670,11 @@ class AIAgent:
             )
         elif function_name == "session_search":
             if not self._session_db:
-                return json.dumps({"success": False, "error": "Session database not available."})
+                return json.dumps(
+                    {"success": False, "error": "Session database not available."}
+                )
             from tools.session_search_tool import session_search as _session_search
+
             return _session_search(
                 query=function_args.get("query", ""),
                 role_filter=function_args.get("role_filter"),
@@ -6962,6 +7685,7 @@ class AIAgent:
         elif function_name == "memory":
             target = function_args.get("target", "memory")
             from tools.memory_tool import memory_tool as _memory_tool
+
             result = _memory_tool(
                 action=function_args.get("action"),
                 target=target,
@@ -6970,7 +7694,10 @@ class AIAgent:
                 store=self._memory_store,
             )
             # Bridge: notify external memory provider of built-in memory writes
-            if self._memory_manager and function_args.get("action") in ("add", "replace"):
+            if self._memory_manager and function_args.get("action") in (
+                "add",
+                "replace",
+            ):
                 try:
                     self._memory_manager.on_memory_write(
                         function_args.get("action", ""),
@@ -6984,6 +7711,7 @@ class AIAgent:
             return self._memory_manager.handle_tool_call(function_name, function_args)
         elif function_name == "clarify":
             from tools.clarify_tool import clarify_tool as _clarify_tool
+
             return _clarify_tool(
                 question=function_args.get("question", ""),
                 choices=function_args.get("choices"),
@@ -6991,6 +7719,7 @@ class AIAgent:
             )
         elif function_name == "delegate_task":
             from tools.delegate_tool import delegate_task as _delegate_task
+
             return _delegate_task(
                 goal=function_args.get("goal"),
                 context=function_args.get("context"),
@@ -7001,10 +7730,14 @@ class AIAgent:
             )
         else:
             return handle_function_call(
-                function_name, function_args, effective_task_id,
+                function_name,
+                function_args,
+                effective_task_id,
                 tool_call_id=tool_call_id,
                 session_id=self.session_id or "",
-                enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+                enabled_tools=list(self.valid_tool_names)
+                if self.valid_tool_names
+                else None,
                 skip_pre_tool_call_hook=True,
             )
 
@@ -7019,6 +7752,7 @@ class AIAgent:
         """
         import shutil as _shutil
         import textwrap as _tw
+
         cols = _shutil.get_terminal_size((120, 24)).columns
         wrap_width = max(40, cols - len(indent))
         out_lines: list[str] = []
@@ -7026,14 +7760,23 @@ class AIAgent:
             if len(raw_line) <= wrap_width:
                 out_lines.append(raw_line)
             else:
-                wrapped = _tw.wrap(raw_line, width=wrap_width,
-                                   break_long_words=True,
-                                   break_on_hyphens=False)
+                wrapped = _tw.wrap(
+                    raw_line,
+                    width=wrap_width,
+                    break_long_words=True,
+                    break_on_hyphens=False,
+                )
                 out_lines.extend(wrapped or [raw_line])
         body = ("\n" + indent).join(out_lines)
         return f"{indent}{label}{body}"
 
-    def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
+    def _execute_tool_calls_concurrent(
+        self,
+        assistant_message,
+        messages: list,
+        effective_task_id: str,
+        api_call_count: int = 0,
+    ) -> None:
         """Execute multiple tool calls concurrently using a thread pool.
 
         Results are collected in the original tool-call order and appended to
@@ -7046,11 +7789,13 @@ class AIAgent:
         if self._interrupt_requested:
             print(f"{self.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)")
             for tc in tool_calls:
-                messages.append({
-                    "role": "tool",
-                    "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
-                    "tool_call_id": tc.id,
-                })
+                messages.append(
+                    {
+                        "role": "tool",
+                        "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
+                        "tool_call_id": tc.id,
+                    }
+                )
             return
 
         # ── Parse args + pre-execution bookkeeping ───────────────────────
@@ -7072,12 +7817,19 @@ class AIAgent:
                 function_args = {}
 
             # Checkpoint for file-mutating tools
-            if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
+            if (
+                function_name in ("write_file", "patch")
+                and self._checkpoint_mgr.enabled
+            ):
                 try:
                     file_path = function_args.get("path", "")
                     if file_path:
-                        work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
-                        self._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
+                        work_dir = self._checkpoint_mgr.get_working_dir_for_path(
+                            file_path
+                        )
+                        self._checkpoint_mgr.ensure_checkpoint(
+                            work_dir, f"before {function_name}"
+                        )
                 except Exception:
                     pass
 
@@ -7086,7 +7838,9 @@ class AIAgent:
                 try:
                     cmd = function_args.get("command", "")
                     if _is_destructive_command(cmd):
-                        cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
+                        cwd = function_args.get("workdir") or os.getenv(
+                            "TERMINAL_CWD", os.getcwd()
+                        )
                         self._checkpoint_mgr.ensure_checkpoint(
                             cwd, f"before terminal: {cmd[:60]}"
                         )
@@ -7103,10 +7857,20 @@ class AIAgent:
                 args_str = json.dumps(args, ensure_ascii=False)
                 if self.verbose_logging:
                     print(f"  📞 Tool {i}: {name}({list(args.keys())})")
-                    print(self._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
+                    print(
+                        self._wrap_verbose(
+                            "Args: ", json.dumps(args, indent=2, ensure_ascii=False)
+                        )
+                    )
                 else:
-                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
-                    print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
+                    args_preview = (
+                        args_str[: self.log_prefix_chars] + "..."
+                        if len(args_str) > self.log_prefix_chars
+                        else args_str
+                    )
+                    print(
+                        f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}"
+                    )
 
         for tc, name, args in parsed_calls:
             if self.tool_progress_callback:
@@ -7131,28 +7895,51 @@ class AIAgent:
             """Worker function executed in a thread."""
             start = time.time()
             try:
-                result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id)
+                result = self._invoke_tool(
+                    function_name, function_args, effective_task_id, tool_call.id
+                )
             except Exception as tool_error:
                 result = f"Error executing tool '{function_name}': {tool_error}"
-                logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
+                logger.error(
+                    "_invoke_tool raised for %s: %s",
+                    function_name,
+                    tool_error,
+                    exc_info=True,
+                )
             duration = time.time() - start
             is_error, _ = _detect_tool_failure(function_name, result)
             if is_error:
-                logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
+                logger.info(
+                    "tool %s failed (%.2fs): %s", function_name, duration, result[:200]
+                )
             else:
-                logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
+                logger.info(
+                    "tool %s completed (%.2fs, %d chars)",
+                    function_name,
+                    duration,
+                    len(result),
+                )
             results[index] = (function_name, function_args, result, duration, is_error)
 
         # Start spinner for CLI mode (skip when TUI handles tool progress)
         spinner = None
-        if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
+        if (
+            self._should_emit_quiet_tool_messages()
+            and self._should_start_quiet_spinner()
+        ):
             face = random.choice(KawaiiSpinner.KAWAII_WAITING)
-            spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn)
+            spinner = KawaiiSpinner(
+                f"{face} ⚡ running {num_tools} tools concurrently",
+                spinner_type="dots",
+                print_fn=self._print_fn,
+            )
             spinner.start()
 
         try:
             max_workers = min(num_tools, _MAX_TOOL_WORKERS)
-            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            with concurrent.futures.ThreadPoolExecutor(
+                max_workers=max_workers
+            ) as executor:
                 futures = []
                 for i, (tc, name, args) in enumerate(parsed_calls):
                     f = executor.submit(_run_tool, i, tc, name, args)
@@ -7165,46 +7952,81 @@ class AIAgent:
                 # Build a summary message for the spinner stop
                 completed = sum(1 for r in results if r is not None)
                 total_dur = sum(r[3] for r in results if r is not None)
-                spinner.stop(f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total")
+                spinner.stop(
+                    f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total"
+                )
 
         # ── Post-execution: display per-tool results ─────────────────────
         for i, (tc, name, args) in enumerate(parsed_calls):
             r = results[i]
             if r is None:
                 # Shouldn't happen, but safety fallback
-                function_result = f"Error executing tool '{name}': thread did not return a result"
+                function_result = (
+                    f"Error executing tool '{name}': thread did not return a result"
+                )
                 tool_duration = 0.0
             else:
-                function_name, function_args, function_result, tool_duration, is_error = r
+                (
+                    function_name,
+                    function_args,
+                    function_result,
+                    tool_duration,
+                    is_error,
+                ) = r
 
                 if is_error:
-                    result_preview = function_result[:200] if len(function_result) > 200 else function_result
-                    logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+                    result_preview = (
+                        function_result[:200]
+                        if len(function_result) > 200
+                        else function_result
+                    )
+                    logger.warning(
+                        "Tool %s returned error (%.2fs): %s",
+                        function_name,
+                        tool_duration,
+                        result_preview,
+                    )
 
                 if self.tool_progress_callback:
                     try:
                         self.tool_progress_callback(
-                            "tool.completed", function_name, None, None,
-                            duration=tool_duration, is_error=is_error,
+                            "tool.completed",
+                            function_name,
+                            None,
+                            None,
+                            duration=tool_duration,
+                            is_error=is_error,
                         )
                     except Exception as cb_err:
                         logging.debug(f"Tool progress callback error: {cb_err}")
 
                 if self.verbose_logging:
-                    logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
-                    logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
+                    logging.debug(
+                        f"Tool {function_name} completed in {tool_duration:.2f}s"
+                    )
+                    logging.debug(
+                        f"Tool result ({len(function_result)} chars): {function_result}"
+                    )
 
             # Print cute message per tool
             if self._should_emit_quiet_tool_messages():
-                cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
+                cute_msg = _get_cute_tool_message_impl(
+                    name, args, tool_duration, result=function_result
+                )
                 self._safe_print(f"  {cute_msg}")
             elif not self.quiet_mode:
                 if self.verbose_logging:
-                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
+                    print(f"  ✅ Tool {i + 1} completed in {tool_duration:.2f}s")
                     print(self._wrap_verbose("Result: ", function_result))
                 else:
-                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
-                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
+                    response_preview = (
+                        function_result[: self.log_prefix_chars] + "..."
+                        if len(function_result) > self.log_prefix_chars
+                        else function_result
+                    )
+                    print(
+                        f"  ✅ Tool {i + 1} completed in {tool_duration:.2f}s - {response_preview}"
+                    )
 
             self._current_tool = None
             self._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
@@ -7239,16 +8061,25 @@ class AIAgent:
             turn_tool_msgs = messages[-num_tools:]
             enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
 
-    def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
+    def _execute_tool_calls_sequential(
+        self,
+        assistant_message,
+        messages: list,
+        effective_task_id: str,
+        api_call_count: int = 0,
+    ) -> None:
         """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
         for i, tool_call in enumerate(assistant_message.tool_calls, 1):
             # SAFETY: check interrupt BEFORE starting each tool.
             # If the user sent "stop" during a previous tool's execution,
             # do NOT start any more tools -- skip them all immediately.
             if self._interrupt_requested:
-                remaining_calls = assistant_message.tool_calls[i-1:]
+                remaining_calls = assistant_message.tool_calls[i - 1 :]
                 if remaining_calls:
-                    self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
+                    self._vprint(
+                        f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)",
+                        force=True,
+                    )
                 for skipped_tc in remaining_calls:
                     skipped_name = skipped_tc.function.name
                     skip_msg = {
@@ -7273,8 +8104,11 @@ class AIAgent:
             _block_msg: Optional[str] = None
             try:
                 from hermes_cli.plugins import get_pre_tool_call_block_message
+
                 _block_msg = get_pre_tool_call_block_message(
-                    function_name, function_args, task_id=effective_task_id or "",
+                    function_name,
+                    function_args,
+                    task_id=effective_task_id or "",
                 )
             except Exception:
                 pass
@@ -7293,11 +8127,24 @@ class AIAgent:
             if not self.quiet_mode:
                 args_str = json.dumps(function_args, ensure_ascii=False)
                 if self.verbose_logging:
-                    print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
-                    print(self._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
+                    print(
+                        f"  📞 Tool {i}: {function_name}({list(function_args.keys())})"
+                    )
+                    print(
+                        self._wrap_verbose(
+                            "Args: ",
+                            json.dumps(function_args, indent=2, ensure_ascii=False),
+                        )
+                    )
                 else:
-                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
-                    print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
+                    args_preview = (
+                        args_str[: self.log_prefix_chars] + "..."
+                        if len(args_str) > self.log_prefix_chars
+                        else args_str
+                    )
+                    print(
+                        f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}"
+                    )
 
             if _block_msg is None:
                 self._current_tool = function_name
@@ -7309,6 +8156,7 @@ class AIAgent:
             if _block_msg is None:
                 try:
                     from tools.environments.base import set_activity_callback
+
                     set_activity_callback(self._touch_activity)
                 except Exception:
                     pass
@@ -7316,7 +8164,9 @@ class AIAgent:
             if _block_msg is None and self.tool_progress_callback:
                 try:
                     preview = _build_tool_preview(function_name, function_args)
-                    self.tool_progress_callback("tool.started", function_name, preview, function_args)
+                    self.tool_progress_callback(
+                        "tool.started", function_name, preview, function_args
+                    )
                 except Exception as cb_err:
                     logging.debug(f"Tool progress callback error: {cb_err}")
 
@@ -7327,11 +8177,17 @@ class AIAgent:
                     logging.debug(f"Tool start callback error: {cb_err}")
 
             # Checkpoint: snapshot working dir before file-mutating tools
-            if _block_msg is None and function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
+            if (
+                _block_msg is None
+                and function_name in ("write_file", "patch")
+                and self._checkpoint_mgr.enabled
+            ):
                 try:
                     file_path = function_args.get("path", "")
                     if file_path:
-                        work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
+                        work_dir = self._checkpoint_mgr.get_working_dir_for_path(
+                            file_path
+                        )
                         self._checkpoint_mgr.ensure_checkpoint(
                             work_dir, f"before {function_name}"
                         )
@@ -7339,11 +8195,17 @@ class AIAgent:
                     pass  # never block tool execution
 
             # Checkpoint before destructive terminal commands
-            if _block_msg is None and function_name == "terminal" and self._checkpoint_mgr.enabled:
+            if (
+                _block_msg is None
+                and function_name == "terminal"
+                and self._checkpoint_mgr.enabled
+            ):
                 try:
                     cmd = function_args.get("command", "")
                     if _is_destructive_command(cmd):
-                        cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
+                        cwd = function_args.get("workdir") or os.getenv(
+                            "TERMINAL_CWD", os.getcwd()
+                        )
                         self._checkpoint_mgr.ensure_checkpoint(
                             cwd, f"before terminal: {cmd[:60]}"
                         )
@@ -7358,6 +8220,7 @@ class AIAgent:
                 tool_duration = 0.0
             elif function_name == "todo":
                 from tools.todo_tool import todo_tool as _todo_tool
+
                 function_result = _todo_tool(
                     todos=function_args.get("todos"),
                     merge=function_args.get("merge", False),
@@ -7365,12 +8228,19 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self._should_emit_quiet_tool_messages():
-                    self._vprint(f"  {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
+                    self._vprint(
+                        f"  {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}"
+                    )
             elif function_name == "session_search":
                 if not self._session_db:
-                    function_result = json.dumps({"success": False, "error": "Session database not available."})
+                    function_result = json.dumps(
+                        {"success": False, "error": "Session database not available."}
+                    )
                 else:
-                    from tools.session_search_tool import session_search as _session_search
+                    from tools.session_search_tool import (
+                        session_search as _session_search,
+                    )
+
                     function_result = _session_search(
                         query=function_args.get("query", ""),
                         role_filter=function_args.get("role_filter"),
@@ -7380,10 +8250,13 @@ class AIAgent:
                     )
                 tool_duration = time.time() - tool_start_time
                 if self._should_emit_quiet_tool_messages():
-                    self._vprint(f"  {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
+                    self._vprint(
+                        f"  {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}"
+                    )
             elif function_name == "memory":
                 target = function_args.get("target", "memory")
                 from tools.memory_tool import memory_tool as _memory_tool
+
                 function_result = _memory_tool(
                     action=function_args.get("action"),
                     target=target,
@@ -7393,9 +8266,12 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self._should_emit_quiet_tool_messages():
-                    self._vprint(f"  {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
+                    self._vprint(
+                        f"  {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}"
+                    )
             elif function_name == "clarify":
                 from tools.clarify_tool import clarify_tool as _clarify_tool
+
                 function_result = _clarify_tool(
                     question=function_args.get("question", ""),
                     choices=function_args.get("choices"),
@@ -7403,19 +8279,31 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self._should_emit_quiet_tool_messages():
-                    self._vprint(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
+                    self._vprint(
+                        f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}"
+                    )
             elif function_name == "delegate_task":
                 from tools.delegate_tool import delegate_task as _delegate_task
+
                 tasks_arg = function_args.get("tasks")
                 if tasks_arg and isinstance(tasks_arg, list):
                     spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
                 else:
                     goal_preview = (function_args.get("goal") or "")[:30]
-                    spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
+                    spinner_label = (
+                        f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
+                    )
                 spinner = None
-                if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
+                if (
+                    self._should_emit_quiet_tool_messages()
+                    and self._should_start_quiet_spinner()
+                ):
                     face = random.choice(KawaiiSpinner.KAWAII_WAITING)
-                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn)
+                    spinner = KawaiiSpinner(
+                        f"{face} {spinner_label}",
+                        spinner_type="dots",
+                        print_fn=self._print_fn,
+                    )
                     spinner.start()
                 self._delegate_spinner = spinner
                 _delegate_result = None
@@ -7432,30 +8320,58 @@ class AIAgent:
                 finally:
                     self._delegate_spinner = None
                     tool_duration = time.time() - tool_start_time
-                    cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
+                    cute_msg = _get_cute_tool_message_impl(
+                        "delegate_task",
+                        function_args,
+                        tool_duration,
+                        result=_delegate_result,
+                    )
                     if spinner:
                         spinner.stop(cute_msg)
                     elif self._should_emit_quiet_tool_messages():
                         self._vprint(f"  {cute_msg}")
-            elif self._context_engine_tool_names and function_name in self._context_engine_tool_names:
+            elif (
+                self._context_engine_tool_names
+                and function_name in self._context_engine_tool_names
+            ):
                 # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
                 spinner = None
                 if self.quiet_mode and not self.tool_progress_callback:
                     face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                     emoji = _get_tool_emoji(function_name)
-                    preview = _build_tool_preview(function_name, function_args) or function_name
-                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
+                    preview = (
+                        _build_tool_preview(function_name, function_args)
+                        or function_name
+                    )
+                    spinner = KawaiiSpinner(
+                        f"{face} {emoji} {preview}",
+                        spinner_type="dots",
+                        print_fn=self._print_fn,
+                    )
                     spinner.start()
                 _ce_result = None
                 try:
-                    function_result = self.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
+                    function_result = self.context_compressor.handle_tool_call(
+                        function_name, function_args, messages=messages
+                    )
                     _ce_result = function_result
                 except Exception as tool_error:
-                    function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
-                    logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
+                    function_result = json.dumps(
+                        {
+                            "error": f"Context engine tool '{function_name}' failed: {tool_error}"
+                        }
+                    )
+                    logger.error(
+                        "context_engine.handle_tool_call raised for %s: %s",
+                        function_name,
+                        tool_error,
+                        exc_info=True,
+                    )
                 finally:
                     tool_duration = time.time() - tool_start_time
-                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
+                    cute_msg = _get_cute_tool_message_impl(
+                        function_name, function_args, tool_duration, result=_ce_result
+                    )
                     if spinner:
                         spinner.stop(cute_msg)
                     elif self.quiet_mode:
@@ -7464,50 +8380,97 @@ class AIAgent:
                 # Memory provider tools (hindsight_retain, honcho_search, etc.)
                 # These are not in the tool registry — route through MemoryManager.
                 spinner = None
-                if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
+                if (
+                    self._should_emit_quiet_tool_messages()
+                    and self._should_start_quiet_spinner()
+                ):
                     face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                     emoji = _get_tool_emoji(function_name)
-                    preview = _build_tool_preview(function_name, function_args) or function_name
-                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
+                    preview = (
+                        _build_tool_preview(function_name, function_args)
+                        or function_name
+                    )
+                    spinner = KawaiiSpinner(
+                        f"{face} {emoji} {preview}",
+                        spinner_type="dots",
+                        print_fn=self._print_fn,
+                    )
                     spinner.start()
                 _mem_result = None
                 try:
-                    function_result = self._memory_manager.handle_tool_call(function_name, function_args)
+                    function_result = self._memory_manager.handle_tool_call(
+                        function_name, function_args
+                    )
                     _mem_result = function_result
                 except Exception as tool_error:
-                    function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"})
-                    logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
+                    function_result = json.dumps(
+                        {"error": f"Memory tool '{function_name}' failed: {tool_error}"}
+                    )
+                    logger.error(
+                        "memory_manager.handle_tool_call raised for %s: %s",
+                        function_name,
+                        tool_error,
+                        exc_info=True,
+                    )
                 finally:
                     tool_duration = time.time() - tool_start_time
-                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result)
+                    cute_msg = _get_cute_tool_message_impl(
+                        function_name, function_args, tool_duration, result=_mem_result
+                    )
                     if spinner:
                         spinner.stop(cute_msg)
                     elif self._should_emit_quiet_tool_messages():
                         self._vprint(f"  {cute_msg}")
             elif self.quiet_mode:
                 spinner = None
-                if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
+                if (
+                    self._should_emit_quiet_tool_messages()
+                    and self._should_start_quiet_spinner()
+                ):
                     face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                     emoji = _get_tool_emoji(function_name)
-                    preview = _build_tool_preview(function_name, function_args) or function_name
-                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
+                    preview = (
+                        _build_tool_preview(function_name, function_args)
+                        or function_name
+                    )
+                    spinner = KawaiiSpinner(
+                        f"{face} {emoji} {preview}",
+                        spinner_type="dots",
+                        print_fn=self._print_fn,
+                    )
                     spinner.start()
                 _spinner_result = None
                 try:
                     function_result = handle_function_call(
-                        function_name, function_args, effective_task_id,
+                        function_name,
+                        function_args,
+                        effective_task_id,
                         tool_call_id=tool_call.id,
                         session_id=self.session_id or "",
-                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+                        enabled_tools=list(self.valid_tool_names)
+                        if self.valid_tool_names
+                        else None,
                         skip_pre_tool_call_hook=True,
                     )
                     _spinner_result = function_result
                 except Exception as tool_error:
-                    function_result = f"Error executing tool '{function_name}': {tool_error}"
-                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
+                    function_result = (
+                        f"Error executing tool '{function_name}': {tool_error}"
+                    )
+                    logger.error(
+                        "handle_function_call raised for %s: %s",
+                        function_name,
+                        tool_error,
+                        exc_info=True,
+                    )
                 finally:
                     tool_duration = time.time() - tool_start_time
-                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
+                    cute_msg = _get_cute_tool_message_impl(
+                        function_name,
+                        function_args,
+                        tool_duration,
+                        result=_spinner_result,
+                    )
                     if spinner:
                         spinner.stop(cute_msg)
                     elif self._should_emit_quiet_tool_messages():
@@ -7515,48 +8478,85 @@ class AIAgent:
             else:
                 try:
                     function_result = handle_function_call(
-                        function_name, function_args, effective_task_id,
+                        function_name,
+                        function_args,
+                        effective_task_id,
                         tool_call_id=tool_call.id,
                         session_id=self.session_id or "",
-                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+                        enabled_tools=list(self.valid_tool_names)
+                        if self.valid_tool_names
+                        else None,
                         skip_pre_tool_call_hook=True,
                     )
                 except Exception as tool_error:
-                    function_result = f"Error executing tool '{function_name}': {tool_error}"
-                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
+                    function_result = (
+                        f"Error executing tool '{function_name}': {tool_error}"
+                    )
+                    logger.error(
+                        "handle_function_call raised for %s: %s",
+                        function_name,
+                        tool_error,
+                        exc_info=True,
+                    )
                 tool_duration = time.time() - tool_start_time
 
-            result_preview = function_result if self.verbose_logging else (
-                function_result[:200] if len(function_result) > 200 else function_result
+            result_preview = (
+                function_result
+                if self.verbose_logging
+                else (
+                    function_result[:200]
+                    if len(function_result) > 200
+                    else function_result
+                )
             )
 
             # Log tool errors to the persistent error log so [error] tags
             # in the UI always have a corresponding detailed entry on disk.
             _is_error_result, _ = _detect_tool_failure(function_name, function_result)
             if _is_error_result:
-                logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+                logger.warning(
+                    "Tool %s returned error (%.2fs): %s",
+                    function_name,
+                    tool_duration,
+                    result_preview,
+                )
             else:
-                logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result))
+                logger.info(
+                    "tool %s completed (%.2fs, %d chars)",
+                    function_name,
+                    tool_duration,
+                    len(function_result),
+                )
 
             if self.tool_progress_callback:
                 try:
                     self.tool_progress_callback(
-                        "tool.completed", function_name, None, None,
-                        duration=tool_duration, is_error=_is_error_result,
+                        "tool.completed",
+                        function_name,
+                        None,
+                        None,
+                        duration=tool_duration,
+                        is_error=_is_error_result,
                     )
                 except Exception as cb_err:
                     logging.debug(f"Tool progress callback error: {cb_err}")
 
             self._current_tool = None
-            self._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)")
+            self._touch_activity(
+                f"tool completed: {function_name} ({tool_duration:.1f}s)"
+            )
 
             if self.verbose_logging:
                 logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
-                logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
+                logging.debug(
+                    f"Tool result ({len(function_result)} chars): {function_result}"
+                )
 
             if self.tool_complete_callback:
                 try:
-                    self.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
+                    self.tool_complete_callback(
+                        tool_call.id, function_name, function_args, function_result
+                    )
                 except Exception as cb_err:
                     logging.debug(f"Tool complete callback error: {cb_err}")
 
@@ -7568,14 +8568,16 @@ class AIAgent:
             )
 
             # Discover subdirectory context files from tool arguments
-            subdir_hints = self._subdirectory_hints.check_tool_call(function_name, function_args)
+            subdir_hints = self._subdirectory_hints.check_tool_call(
+                function_name, function_args
+            )
             if subdir_hints:
                 function_result += subdir_hints
 
             tool_msg = {
                 "role": "tool",
                 "content": function_result,
-                "tool_call_id": tool_call.id
+                "tool_call_id": tool_call.id,
             }
             messages.append(tool_msg)
 
@@ -7584,18 +8586,27 @@ class AIAgent:
                     print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
                     print(self._wrap_verbose("Result: ", function_result))
                 else:
-                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
-                    print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
+                    response_preview = (
+                        function_result[: self.log_prefix_chars] + "..."
+                        if len(function_result) > self.log_prefix_chars
+                        else function_result
+                    )
+                    print(
+                        f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}"
+                    )
 
             if self._interrupt_requested and i < len(assistant_message.tool_calls):
                 remaining = len(assistant_message.tool_calls) - i
-                self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
+                self._vprint(
+                    f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)",
+                    force=True,
+                )
                 for skipped_tc in assistant_message.tool_calls[i:]:
                     skipped_name = skipped_tc.function.name
                     skip_msg = {
                         "role": "tool",
                         "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
-                        "tool_call_id": skipped_tc.id
+                        "tool_call_id": skipped_tc.id,
                     }
                     messages.append(skip_msg)
                 break
@@ -7606,9 +8617,9 @@ class AIAgent:
         # ── Per-turn aggregate budget enforcement ─────────────────────────
         num_tools_seq = len(assistant_message.tool_calls)
         if num_tools_seq > 0:
-            enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
-
-
+            enforce_turn_budget(
+                messages[-num_tools_seq:], env=get_active_env(effective_task_id)
+            )
 
     def _emit_context_pressure(self, compaction_progress: float, compressor) -> None:
         """Notify the user that context is approaching the compaction threshold.
@@ -7621,9 +8632,16 @@ class AIAgent:
         For CLI: prints a formatted line with a progress bar.
         For gateway: fires status_callback so the platform can send a chat message.
         """
-        from agent.display import format_context_pressure, format_context_pressure_gateway
+        from agent.display import (
+            format_context_pressure,
+            format_context_pressure_gateway,
+        )
 
-        threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5
+        threshold_pct = (
+            compressor.threshold_tokens / compressor.context_length
+            if compressor.context_length
+            else 0.5
+        )
 
         # CLI output — always shown (these are user-facing status notifications,
         # not verbose debug output, so they bypass quiet_mode).
@@ -7651,7 +8669,9 @@ class AIAgent:
 
     def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
         """Request a summary when max iterations are reached. Returns the final response text."""
-        print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Requesting summary...")
+        print(
+            f"⚠️  Reached maximum iterations ({self.max_iterations}). Requesting summary..."
+        )
 
         summary_request = (
             "You've reached the maximum number of tool-calling iterations allowed. "
@@ -7667,7 +8687,11 @@ class AIAgent:
             api_messages = []
             for msg in messages:
                 api_msg = msg.copy()
-                for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
+                for internal_field in (
+                    "reasoning",
+                    "finish_reason",
+                    "_thinking_prefill",
+                ):
                     api_msg.pop(internal_field, None)
                 if _needs_sanitize:
                     self._sanitize_tool_calls_for_strict_api(api_msg)
@@ -7675,9 +8699,13 @@ class AIAgent:
 
             effective_system = self._cached_system_prompt or ""
             if self.ephemeral_system_prompt:
-                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+                effective_system = (
+                    effective_system + "\n\n" + self.ephemeral_system_prompt
+                ).strip()
             if effective_system:
-                api_messages = [{"role": "system", "content": effective_system}] + api_messages
+                api_messages = [
+                    {"role": "system", "content": effective_system}
+                ] + api_messages
             if self.prefill_messages:
                 sys_offset = 1 if effective_system else 0
                 for idx, pfm in enumerate(self.prefill_messages):
@@ -7691,7 +8719,7 @@ class AIAgent:
                 else:
                     summary_extra_body["reasoning"] = {
                         "enabled": True,
-                        "effort": "medium"
+                        "effort": "medium",
                     }
             if _is_nous:
                 summary_extra_body["tags"] = ["product=hermes-agent"]
@@ -7701,7 +8729,11 @@ class AIAgent:
                 codex_kwargs.pop("tools", None)
                 summary_response = self._run_codex_stream(codex_kwargs)
                 assistant_message, _ = self._normalize_codex_response(summary_response)
-                final_response = (assistant_message.content or "").strip() if assistant_message else ""
+                final_response = (
+                    (assistant_message.content or "").strip()
+                    if assistant_message
+                    else ""
+                )
             else:
                 summary_kwargs = {
                     "model": self.model,
@@ -7727,29 +8759,49 @@ class AIAgent:
                     summary_kwargs["extra_body"] = summary_extra_body
 
                 if self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
-                    _ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
-                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
-                                   is_oauth=self._is_anthropic_oauth,
-                                   preserve_dots=self._anthropic_preserve_dots())
+                    from agent.anthropic_adapter import (
+                        build_anthropic_kwargs as _bak,
+                        normalize_anthropic_response as _nar,
+                    )
+
+                    _ant_kw = _bak(
+                        model=self.model,
+                        messages=api_messages,
+                        tools=None,
+                        max_tokens=self.max_tokens,
+                        reasoning_config=self.reasoning_config,
+                        is_oauth=self._is_anthropic_oauth,
+                        preserve_dots=self._anthropic_preserve_dots(),
+                    )
                     summary_response = self._anthropic_messages_create(_ant_kw)
-                    _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    _msg, _ = _nar(
+                        summary_response, strip_tool_prefix=self._is_anthropic_oauth
+                    )
                     final_response = (_msg.content or "").strip()
                 else:
-                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
+                    summary_response = self._ensure_primary_openai_client(
+                        reason="iteration_limit_summary"
+                    ).chat.completions.create(**summary_kwargs)
 
-                    if summary_response.choices and summary_response.choices[0].message.content:
+                    if (
+                        summary_response.choices
+                        and summary_response.choices[0].message.content
+                    ):
                         final_response = summary_response.choices[0].message.content
                     else:
                         final_response = ""
 
             if final_response:
                 if "<think>" in final_response:
-                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                    final_response = re.sub(
+                        r"<think>.*?</think>\s*", "", final_response, flags=re.DOTALL
+                    ).strip()
                 if final_response:
                     messages.append({"role": "assistant", "content": final_response})
                 else:
-                    final_response = "I reached the iteration limit and couldn't generate a summary."
+                    final_response = (
+                        "I reached the iteration limit and couldn't generate a summary."
+                    )
             else:
                 # Retry summary generation
                 if self.api_mode == "codex_responses":
@@ -7757,15 +8809,28 @@ class AIAgent:
                     codex_kwargs.pop("tools", None)
                     retry_response = self._run_codex_stream(codex_kwargs)
                     retry_msg, _ = self._normalize_codex_response(retry_response)
-                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                    final_response = (
+                        (retry_msg.content or "").strip() if retry_msg else ""
+                    )
                 elif self.api_mode == "anthropic_messages":
-                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
-                    _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
-                                    is_oauth=self._is_anthropic_oauth,
-                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
-                                    preserve_dots=self._anthropic_preserve_dots())
+                    from agent.anthropic_adapter import (
+                        build_anthropic_kwargs as _bak2,
+                        normalize_anthropic_response as _nar2,
+                    )
+
+                    _ant_kw2 = _bak2(
+                        model=self.model,
+                        messages=api_messages,
+                        tools=None,
+                        is_oauth=self._is_anthropic_oauth,
+                        max_tokens=self.max_tokens,
+                        reasoning_config=self.reasoning_config,
+                        preserve_dots=self._anthropic_preserve_dots(),
+                    )
                     retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    _retry_msg, _ = _nar2(
+                        retry_response, strip_tool_prefix=self._is_anthropic_oauth
+                    )
                     final_response = (_retry_msg.content or "").strip()
                 else:
                     summary_kwargs = {
@@ -7777,22 +8842,36 @@ class AIAgent:
                     if summary_extra_body:
                         summary_kwargs["extra_body"] = summary_extra_body
 
-                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs)
+                    summary_response = self._ensure_primary_openai_client(
+                        reason="iteration_limit_summary_retry"
+                    ).chat.completions.create(**summary_kwargs)
 
-                    if summary_response.choices and summary_response.choices[0].message.content:
+                    if (
+                        summary_response.choices
+                        and summary_response.choices[0].message.content
+                    ):
                         final_response = summary_response.choices[0].message.content
                     else:
                         final_response = ""
 
                 if final_response:
                     if "<think>" in final_response:
-                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                        final_response = re.sub(
+                            r"<think>.*?</think>\s*",
+                            "",
+                            final_response,
+                            flags=re.DOTALL,
+                        ).strip()
                     if final_response:
-                        messages.append({"role": "assistant", "content": final_response})
+                        messages.append(
+                            {"role": "assistant", "content": final_response}
+                        )
                     else:
                         final_response = "I reached the iteration limit and couldn't generate a summary."
                 else:
-                    final_response = "I reached the iteration limit and couldn't generate a summary."
+                    final_response = (
+                        "I reached the iteration limit and couldn't generate a summary."
+                    )
 
         except Exception as e:
             logging.warning(f"Failed to get summary response: {e}")
@@ -7835,6 +8914,7 @@ class AIAgent:
         # Tag all log records on this thread with the session ID so
         # ``hermes logs --session <id>`` can filter a single conversation.
         from hermes_logging import set_session_context
+
         set_session_context(self.session_id)
 
         # If the previous turn activated fallback, restore the primary
@@ -7856,7 +8936,7 @@ class AIAgent:
         self._persist_user_message_override = persist_user_message
         # Generate unique task_id if not provided to isolate VMs between concurrent tasks
         effective_task_id = task_id or str(uuid.uuid4())
-        
+
         # Reset retry counters and iteration budget at the start of each turn
         # so subagent usage from a previous turn doesn't eat into the next one.
         self._invalid_tool_retries = 0
@@ -7895,12 +8975,17 @@ class AIAgent:
         self.iteration_budget = IterationBudget(self.max_iterations)
 
         # Log conversation turn start for debugging/observability
-        _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message
+        _msg_preview = (
+            (user_message[:80] + "...") if len(user_message) > 80 else user_message
+        )
         _msg_preview = _msg_preview.replace("\n", " ")
         logger.info(
             "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r",
-            self.session_id or "none", self.model, self.provider or "unknown",
-            self.platform or "unknown", len(conversation_history or []),
+            self.session_id or "none",
+            self.model,
+            self.provider or "unknown",
+            self.platform or "unknown",
+            len(conversation_history or []),
             _msg_preview,
         )
 
@@ -7912,25 +8997,29 @@ class AIAgent:
         # recover the todo state from the most recent todo tool response in history)
         if conversation_history and not self._todo_store.has_items():
             self._hydrate_todo_store(conversation_history)
-        
+
         # Prefill messages (few-shot priming) are injected at API-call time only,
         # never stored in the messages list. This keeps them ephemeral: they won't
         # be saved to session DB, session logs, or batch trajectories, but they're
         # automatically re-applied on every API call (including session continuations).
-        
+
         # Track user turns for memory flush and periodic nudge logic
         self._user_turn_count += 1
 
         # Preserve the original user message (no nudge injection).
-        original_user_message = persist_user_message if persist_user_message is not None else user_message
+        original_user_message = (
+            persist_user_message if persist_user_message is not None else user_message
+        )
 
         # Track memory nudge trigger (turn-based, checked here).
         # Skill trigger is checked AFTER the agent loop completes, based on
         # how many tool iterations THIS turn used.
         _should_review_memory = False
-        if (self._memory_nudge_interval > 0
-                and "memory" in self.valid_tool_names
-                and self._memory_store):
+        if (
+            self._memory_nudge_interval > 0
+            and "memory" in self.valid_tool_names
+            and self._memory_store
+        ):
             self._turns_since_memory += 1
             if self._turns_since_memory >= self._memory_nudge_interval:
                 _should_review_memory = True
@@ -7941,10 +9030,12 @@ class AIAgent:
         messages.append(user_msg)
         current_turn_user_idx = len(messages) - 1
         self._persist_user_message_idx = current_turn_user_idx
-        
+
         if not self.quiet_mode:
-            self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
-        
+            self._safe_print(
+                f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'"
+            )
+
         # ── System prompt (cached per session for prefix caching) ──
         # Built once on first call, reused for all subsequent calls.
         # Only rebuilt after context compression events (which invalidate
@@ -7979,6 +9070,7 @@ class AIAgent:
                 # session-scoped state (e.g. warm a memory cache).
                 try:
                     from hermes_cli.plugins import invoke_hook as _invoke_hook
+
                     _invoke_hook(
                         "on_session_start",
                         session_id=self.session_id,
@@ -7991,7 +9083,9 @@ class AIAgent:
                 # Store the system prompt snapshot in SQLite
                 if self._session_db:
                     try:
-                        self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
+                        self._session_db.update_system_prompt(
+                            self.session_id, self._cached_system_prompt
+                        )
                     except Exception as e:
                         logger.debug("Session DB update_system_prompt failed: %s", e)
 
@@ -8006,8 +9100,10 @@ class AIAgent:
         # 4xx and abort the request entirely).
         if (
             self.compression_enabled
-            and len(messages) > self.context_compressor.protect_first_n
-                                + self.context_compressor.protect_last_n + 1
+            and len(messages)
+            > self.context_compressor.protect_first_n
+            + self.context_compressor.protect_last_n
+            + 1
         ):
             # Include tool schema tokens — with many tools these can add
             # 20-30K+ tokens that the old sys+msg estimate missed entirely.
@@ -8035,7 +9131,9 @@ class AIAgent:
                 for _pass in range(3):
                     _orig_len = len(messages)
                     messages, active_system_prompt = self._compress_context(
-                        messages, system_message, approx_tokens=_preflight_tokens,
+                        messages,
+                        system_message,
+                        approx_tokens=_preflight_tokens,
                         task_id=effective_task_id,
                     )
                     if len(messages) >= _orig_len:
@@ -8079,6 +9177,7 @@ class AIAgent:
         _plugin_user_context = ""
         try:
             from hermes_cli.plugins import invoke_hook as _invoke_hook
+
             _pre_results = _invoke_hook(
                 "pre_llm_call",
                 session_id=self.session_id,
@@ -8110,7 +9209,7 @@ class AIAgent:
         truncated_response_prefix = ""
         compression_attempts = 0
         _turn_exit_reason = "unknown"  # Diagnostic: why the loop ended
-        
+
         # Record the execution thread so interrupt()/clear_interrupt() can
         # scope the tool-level interrupt signal to THIS agent's thread only.
         # Must be set before clear_interrupt() which uses it.
@@ -8127,12 +9226,18 @@ class AIAgent:
         _ext_prefetch_cache = ""
         if self._memory_manager:
             try:
-                _query = original_user_message if isinstance(original_user_message, str) else ""
+                _query = (
+                    original_user_message
+                    if isinstance(original_user_message, str)
+                    else ""
+                )
                 _ext_prefetch_cache = self._memory_manager.prefetch_all(_query) or ""
             except Exception:
                 pass
 
-        while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) or self._budget_grace_call:
+        while (
+            api_call_count < self.max_iterations and self.iteration_budget.remaining > 0
+        ) or self._budget_grace_call:
             # Reset per-turn checkpoint dedup so each iteration can take one snapshot
             self._checkpoint_mgr.new_turn()
 
@@ -8141,9 +9246,11 @@ class AIAgent:
                 interrupted = True
                 _turn_exit_reason = "interrupted_by_user"
                 if not self.quiet_mode:
-                    self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...")
+                    self._safe_print(
+                        "\n⚡ Breaking out of tool loop due to interrupt..."
+                    )
                 break
-            
+
             api_call_count += 1
             self._api_call_count = api_call_count
             self._touch_activity(f"starting API call #{api_call_count}")
@@ -8156,7 +9263,9 @@ class AIAgent:
             elif not self.iteration_budget.consume():
                 _turn_exit_reason = "budget_exhausted"
                 if not self.quiet_mode:
-                    self._safe_print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
+                    self._safe_print(
+                        f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)"
+                    )
                 break
 
             # Fire step_callback for gateway hooks (agent:step event)
@@ -8184,14 +9293,20 @@ class AIAgent:
                             break
                     self.step_callback(api_call_count, prev_tools)
                 except Exception as _step_err:
-                    logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
+                    logger.debug(
+                        "step_callback error (iteration %s): %s",
+                        api_call_count,
+                        _step_err,
+                    )
 
             # Track tool-calling iterations for skill nudge.
             # Counter resets whenever skill_manage is actually used.
-            if (self._skill_nudge_interval > 0
-                    and "skill_manage" in self.valid_tool_names):
+            if (
+                self._skill_nudge_interval > 0
+                and "skill_manage" in self.valid_tool_names
+            ):
                 self._iters_since_skill += 1
-            
+
             # Prepare messages for API call
             # If we have an ephemeral system prompt, prepend it to the messages
             # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
@@ -8217,7 +9332,9 @@ class AIAgent:
                     if _injections:
                         _base = api_msg.get("content", "")
                         if isinstance(_base, str):
-                            api_msg["content"] = _base + "\n\n" + "\n\n".join(_injections)
+                            api_msg["content"] = (
+                                _base + "\n\n" + "\n\n".join(_injections)
+                            )
 
                 # For ALL assistant messages, pass reasoning back to the API
                 # This ensures multi-turn reasoning context is preserved
@@ -8252,13 +9369,17 @@ class AIAgent:
             # prompt, so the stable cache prefix remains unchanged.
             effective_system = active_system_prompt or ""
             if self.ephemeral_system_prompt:
-                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+                effective_system = (
+                    effective_system + "\n\n" + self.ephemeral_system_prompt
+                ).strip()
             # NOTE: Plugin context from pre_llm_call hooks is injected into the
             # user message (see injection block above), NOT the system prompt.
             # This is intentional — system prompt modifications break the prompt
             # cache prefix.  The system prompt is reserved for Hermes internals.
             if effective_system:
-                api_messages = [{"role": "system", "content": effective_system}] + api_messages
+                api_messages = [
+                    {"role": "system", "content": effective_system}
+                ] + api_messages
 
             # Inject ephemeral prefill messages right after the system prompt
             # but before conversation history. Same API-call-time-only pattern.
@@ -8272,7 +9393,11 @@ class AIAgent:
             # inject cache_control breakpoints (system + last 3 messages) to reduce
             # input token costs by ~75% on multi-turn conversations.
             if self._use_prompt_caching:
-                api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
+                api_messages = apply_anthropic_cache_control(
+                    api_messages,
+                    cache_ttl=self._cache_ttl,
+                    native_anthropic=(self.api_mode == "anthropic_messages"),
+                )
 
             # Safety net: strip orphaned tool results / add stubs for missing
             # results before sending to the API.  Runs unconditionally — not
@@ -8298,13 +9423,17 @@ class AIAgent:
                     if isinstance(tc, dict) and "function" in tc:
                         try:
                             args_obj = json.loads(tc["function"]["arguments"])
-                            tc = {**tc, "function": {
-                                **tc["function"],
-                                "arguments": json.dumps(
-                                    args_obj, separators=(",", ":"),
-                                    sort_keys=True,
-                                ),
-                            }}
+                            tc = {
+                                **tc,
+                                "function": {
+                                    **tc["function"],
+                                    "arguments": json.dumps(
+                                        args_obj,
+                                        separators=(",", ":"),
+                                        sort_keys=True,
+                                    ),
+                                },
+                            }
                         except Exception:
                             pass
                     new_tcs.append(tc)
@@ -8313,14 +9442,20 @@ class AIAgent:
             # Calculate approximate request size for logging
             total_chars = sum(len(str(msg)) for msg in api_messages)
             approx_tokens = estimate_messages_tokens_rough(api_messages)
-            
+
             # Thinking spinner for quiet mode (animated during API call)
             thinking_spinner = None
-            
+
             if not self.quiet_mode:
-                self._vprint(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
-                self._vprint(f"{self.log_prefix}   📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
-                self._vprint(f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}")
+                self._vprint(
+                    f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}..."
+                )
+                self._vprint(
+                    f"{self.log_prefix}   📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)"
+                )
+                self._vprint(
+                    f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}"
+                )
             else:
                 # Animated thinking spinner in quiet mode
                 face = random.choice(KawaiiSpinner.KAWAII_THINKING)
@@ -8329,27 +9464,40 @@ class AIAgent:
                     # CLI TUI mode: use prompt_toolkit widget instead of raw spinner
                     # (works in both streaming and non-streaming modes)
                     self.thinking_callback(f"{face} {verb}...")
-                elif not self._has_stream_consumers() and self._should_start_quiet_spinner():
+                elif (
+                    not self._has_stream_consumers()
+                    and self._should_start_quiet_spinner()
+                ):
                     # Raw KawaiiSpinner only when no streaming consumers and the
                     # spinner output has a safe sink.
-                    spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
-                    thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type, print_fn=self._print_fn)
+                    spinner_type = random.choice(
+                        ["brain", "sparkle", "pulse", "moon", "star"]
+                    )
+                    thinking_spinner = KawaiiSpinner(
+                        f"{face} {verb}...",
+                        spinner_type=spinner_type,
+                        print_fn=self._print_fn,
+                    )
                     thinking_spinner.start()
-            
+
             # Log request details if verbose
             if self.verbose_logging:
-                logging.debug(f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}")
-                logging.debug(f"Last message role: {messages[-1]['role'] if messages else 'none'}")
+                logging.debug(
+                    f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}"
+                )
+                logging.debug(
+                    f"Last message role: {messages[-1]['role'] if messages else 'none'}"
+                )
                 logging.debug(f"Total message size: ~{approx_tokens:,} tokens")
-            
+
             api_start_time = time.time()
             retry_count = 0
             max_retries = 3
             primary_recovery_attempted = False
             max_compression_attempts = 3
-            codex_auth_retry_attempted=False
-            anthropic_auth_retry_attempted=False
-            nous_auth_retry_attempted=False
+            codex_auth_retry_attempted = False
+            anthropic_auth_retry_attempted = False
+            nous_auth_retry_attempted = False
             thinking_sig_retry_attempted = False
             has_retried_429 = False
             restart_with_compressed_messages = False
@@ -8366,10 +9514,13 @@ class AIAgent:
                     if self._force_ascii_payload:
                         _sanitize_structure_non_ascii(api_kwargs)
                     if self.api_mode == "codex_responses":
-                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._preflight_codex_api_kwargs(
+                            api_kwargs, allow_stream=False
+                        )
 
                     try:
                         from hermes_cli.plugins import invoke_hook as _invoke_hook
+
                         _invoke_hook(
                             "pre_api_request",
                             task_id=effective_task_id,
@@ -8422,6 +9573,7 @@ class AIAgent:
                         # health checking, but skip for Mock clients in tests
                         # (mocks return SimpleNamespace, not stream iterators).
                         from unittest.mock import Mock
+
                         if isinstance(getattr(self, "client", None), Mock):
                             _use_streaming = False
 
@@ -8431,9 +9583,9 @@ class AIAgent:
                         )
                     else:
                         response = self._interruptible_api_call(api_kwargs)
-                    
+
                     api_duration = time.time() - api_start_time
-                    
+
                     # Stop thinking spinner silently -- the response box or tool
                     # execution messages that follow are more informative.
                     if thinking_spinner:
@@ -8441,20 +9593,30 @@ class AIAgent:
                         thinking_spinner = None
                     if self.thinking_callback:
                         self.thinking_callback("")
-                    
+
                     if not self.quiet_mode:
-                        self._vprint(f"{self.log_prefix}⏱️  API call completed in {api_duration:.2f}s")
-                    
+                        self._vprint(
+                            f"{self.log_prefix}⏱️  API call completed in {api_duration:.2f}s"
+                        )
+
                     if self.verbose_logging:
                         # Log response with provider info if available
-                        resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
-                        logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-                    
+                        resp_model = (
+                            getattr(response, "model", "N/A") if response else "N/A"
+                        )
+                        logging.debug(
+                            f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}"
+                        )
+
                     # Validate response shape before proceeding
                     response_invalid = False
                     error_details = []
                     if self.api_mode == "codex_responses":
-                        output_items = getattr(response, "output", None) if response is not None else None
+                        output_items = (
+                            getattr(response, "output", None)
+                            if response is not None
+                            else None
+                        )
                         if response is None:
                             response_invalid = True
                             error_details.append("response is None")
@@ -8467,7 +9629,9 @@ class AIAgent:
                             # from response.output_text. Only mark invalid
                             # when that fallback is also absent.
                             _out_text = getattr(response, "output_text", None)
-                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                            _out_text_stripped = (
+                                _out_text.strip() if isinstance(_out_text, str) else ""
+                            )
                             if _out_text_stripped:
                                 logger.debug(
                                     "Codex response.output is empty but output_text is present "
@@ -8476,18 +9640,25 @@ class AIAgent:
                                 )
                             else:
                                 _resp_status = getattr(response, "status", None)
-                                _resp_incomplete = getattr(response, "incomplete_details", None)
+                                _resp_incomplete = getattr(
+                                    response, "incomplete_details", None
+                                )
                                 logger.warning(
                                     "Codex response.output is empty after stream backfill "
                                     "(status=%s, incomplete_details=%s, model=%s). %s",
-                                    _resp_status, _resp_incomplete,
+                                    _resp_status,
+                                    _resp_incomplete,
                                     getattr(response, "model", None),
                                     f"api_mode={self.api_mode} provider={self.provider}",
                                 )
                                 response_invalid = True
                                 error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
-                        content_blocks = getattr(response, "content", None) if response is not None else None
+                        content_blocks = (
+                            getattr(response, "content", None)
+                            if response is not None
+                            else None
+                        )
                         if response is None:
                             response_invalid = True
                             error_details.append("response is None")
@@ -8498,12 +9669,19 @@ class AIAgent:
                             response_invalid = True
                             error_details.append("response.content is empty")
                     else:
-                        if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
+                        if (
+                            response is None
+                            or not hasattr(response, "choices")
+                            or response.choices is None
+                            or not response.choices
+                        ):
                             response_invalid = True
                             if response is None:
                                 error_details.append("response is None")
-                            elif not hasattr(response, 'choices'):
-                                error_details.append("response has no 'choices' attribute")
+                            elif not hasattr(response, "choices"):
+                                error_details.append(
+                                    "response has no 'choices' attribute"
+                                )
                             elif response.choices is None:
                                 error_details.append("response.choices is None")
                             else:
@@ -8516,16 +9694,18 @@ class AIAgent:
                             thinking_spinner = None
                         if self.thinking_callback:
                             self.thinking_callback("")
-                        
+
                         # Invalid response — could be rate limiting, provider timeout,
                         # upstream server error, or malformed response.
                         retry_count += 1
-                        
+
                         # Eager fallback: empty/malformed responses are a common
                         # rate-limit symptom.  Switch to fallback immediately
                         # rather than retrying with extended backoff.
                         if self._fallback_index < len(self._fallback_chain):
-                            self._emit_status("⚠️ Empty/malformed response — switching to fallback...")
+                            self._emit_status(
+                                "⚠️ Empty/malformed response — switching to fallback..."
+                            )
                         if self._try_activate_fallback():
                             retry_count = 0
                             compression_attempts = 0
@@ -8535,31 +9715,51 @@ class AIAgent:
                         # Check for error field in response (some providers include this)
                         error_msg = "Unknown"
                         provider_name = "Unknown"
-                        if response and hasattr(response, 'error') and response.error:
+                        if response and hasattr(response, "error") and response.error:
                             error_msg = str(response.error)
                             # Try to extract provider from error metadata
-                            if hasattr(response.error, 'metadata') and response.error.metadata:
-                                provider_name = response.error.metadata.get('provider_name', 'Unknown')
-                        elif response and hasattr(response, 'message') and response.message:
+                            if (
+                                hasattr(response.error, "metadata")
+                                and response.error.metadata
+                            ):
+                                provider_name = response.error.metadata.get(
+                                    "provider_name", "Unknown"
+                                )
+                        elif (
+                            response
+                            and hasattr(response, "message")
+                            and response.message
+                        ):
                             error_msg = str(response.message)
-                        
+
                         # Try to get provider from model field (OpenRouter often returns actual model used)
-                        if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
+                        if (
+                            provider_name == "Unknown"
+                            and response
+                            and hasattr(response, "model")
+                            and response.model
+                        ):
                             provider_name = f"model={response.model}"
-                        
+
                         # Check for x-openrouter-provider or similar metadata
                         if provider_name == "Unknown" and response:
                             # Log all response attributes for debugging
-                            resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
+                            resp_attrs = {
+                                k: str(v)[:100]
+                                for k, v in vars(response).items()
+                                if not k.startswith("_")
+                            }
                             if self.verbose_logging:
-                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
-                        
+                                logging.debug(
+                                    f"Response attributes for invalid response: {resp_attrs}"
+                                )
+
                         # Extract error code from response for contextual diagnostics
                         _resp_error_code = None
-                        if response and hasattr(response, 'error') and response.error:
-                            _code_raw = getattr(response.error, 'code', None)
+                        if response and hasattr(response, "error") and response.error:
+                            _code_raw = getattr(response.error, "code", None)
                             if _code_raw is None and isinstance(response.error, dict):
-                                _code_raw = response.error.get('code')
+                                _code_raw = response.error.get("code")
                             if _code_raw is not None:
                                 try:
                                     _resp_error_code = int(_code_raw)
@@ -8571,13 +9771,17 @@ class AIAgent:
                         if _resp_error_code == 524:
                             _failure_hint = f"upstream provider timed out (Cloudflare 524, {api_duration:.0f}s)"
                         elif _resp_error_code == 504:
-                            _failure_hint = f"upstream gateway timeout (504, {api_duration:.0f}s)"
+                            _failure_hint = (
+                                f"upstream gateway timeout (504, {api_duration:.0f}s)"
+                            )
                         elif _resp_error_code == 429:
                             _failure_hint = f"rate limited by upstream provider (429)"
                         elif _resp_error_code in (500, 502):
                             _failure_hint = f"upstream server error ({_resp_error_code}, {api_duration:.0f}s)"
                         elif _resp_error_code in (503, 529):
-                            _failure_hint = f"upstream provider overloaded ({_resp_error_code})"
+                            _failure_hint = (
+                                f"upstream provider overloaded ({_resp_error_code})"
+                            )
                         elif _resp_error_code is not None:
                             _failure_hint = f"upstream error (code {_resp_error_code}, {api_duration:.0f}s)"
                         elif api_duration < 10:
@@ -8587,42 +9791,69 @@ class AIAgent:
                         else:
                             _failure_hint = f"response time {api_duration:.1f}s"
 
-                        self._vprint(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True)
-                        self._vprint(f"{self.log_prefix}   🏢 Provider: {provider_name}", force=True)
+                        self._vprint(
+                            f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}   🏢 Provider: {provider_name}",
+                            force=True,
+                        )
                         cleaned_provider_error = self._clean_error_message(error_msg)
-                        self._vprint(f"{self.log_prefix}   📝 Provider message: {cleaned_provider_error}", force=True)
-                        self._vprint(f"{self.log_prefix}   ⏱️  {_failure_hint}", force=True)
-                        
+                        self._vprint(
+                            f"{self.log_prefix}   📝 Provider message: {cleaned_provider_error}",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}   ⏱️  {_failure_hint}", force=True
+                        )
+
                         if retry_count >= max_retries:
                             # Try fallback before giving up
-                            self._emit_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
+                            self._emit_status(
+                                f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback..."
+                            )
                             if self._try_activate_fallback():
                                 retry_count = 0
                                 compression_attempts = 0
                                 primary_recovery_attempted = False
                                 continue
-                            self._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
-                            logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
+                            self._emit_status(
+                                f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up."
+                            )
+                            logging.error(
+                                f"{self.log_prefix}Invalid API response after {max_retries} retries."
+                            )
                             self._persist_session(messages, conversation_history)
                             return {
                                 "messages": messages,
                                 "completed": False,
                                 "api_calls": api_call_count,
                                 "error": f"Invalid API response after {max_retries} retries: {_failure_hint}",
-                                "failed": True  # Mark as failure for filtering
+                                "failed": True,  # Mark as failure for filtering
                             }
-                        
+
                         # Backoff before retry — jittered exponential: 5s base, 120s cap
-                        wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
-                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
-                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
-                        
+                        wait_time = jittered_backoff(
+                            retry_count, base_delay=5.0, max_delay=120.0
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...",
+                            force=True,
+                        )
+                        logging.warning(
+                            f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}"
+                        )
+
                         # Sleep in small increments to stay responsive to interrupts
                         sleep_end = time.time() + wait_time
                         _backoff_touch_counter = 0
                         while time.time() < sleep_end:
                             if self._interrupt_requested:
-                                self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
+                                self._vprint(
+                                    f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.",
+                                    force=True,
+                                )
                                 self._persist_session(messages, conversation_history)
                                 self.clear_interrupt()
                                 return {
@@ -8646,24 +9877,41 @@ class AIAgent:
                     # Check finish_reason before proceeding
                     if self.api_mode == "codex_responses":
                         status = getattr(response, "status", None)
-                        incomplete_details = getattr(response, "incomplete_details", None)
+                        incomplete_details = getattr(
+                            response, "incomplete_details", None
+                        )
                         incomplete_reason = None
                         if isinstance(incomplete_details, dict):
                             incomplete_reason = incomplete_details.get("reason")
                         else:
-                            incomplete_reason = getattr(incomplete_details, "reason", None)
-                        if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
+                            incomplete_reason = getattr(
+                                incomplete_details, "reason", None
+                            )
+                        if status == "incomplete" and incomplete_reason in {
+                            "max_output_tokens",
+                            "length",
+                        }:
                             finish_reason = "length"
                         else:
                             finish_reason = "stop"
                     elif self.api_mode == "anthropic_messages":
-                        stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
-                        finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+                        stop_reason_map = {
+                            "end_turn": "stop",
+                            "tool_use": "tool_calls",
+                            "max_tokens": "length",
+                            "stop_sequence": "stop",
+                        }
+                        finish_reason = stop_reason_map.get(
+                            response.stop_reason, "stop"
+                        )
                     else:
                         finish_reason = response.choices[0].finish_reason
 
                     if finish_reason == "length":
-                        self._vprint(f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens", force=True)
+                        self._vprint(
+                            f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens",
+                            force=True,
+                        )
 
                         # ── Detect thinking-budget exhaustion ──────────────
                         # When the model spends ALL output tokens on reasoning
@@ -8673,16 +9921,30 @@ class AIAgent:
                         _trunc_content = None
                         _trunc_has_tool_calls = False
                         if self.api_mode == "chat_completions":
-                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
-                            _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
-                            _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
+                            _trunc_msg = (
+                                response.choices[0].message
+                                if (hasattr(response, "choices") and response.choices)
+                                else None
+                            )
+                            _trunc_content = (
+                                getattr(_trunc_msg, "content", None)
+                                if _trunc_msg
+                                else None
+                            )
+                            _trunc_has_tool_calls = (
+                                bool(getattr(_trunc_msg, "tool_calls", None))
+                                if _trunc_msg
+                                else False
+                            )
                         elif self.api_mode == "anthropic_messages":
                             # Anthropic response.content is a list of blocks
                             _text_parts = []
                             for _blk in getattr(response, "content", []):
                                 if getattr(_blk, "type", None) == "text":
                                     _text_parts.append(getattr(_blk, "text", ""))
-                            _trunc_content = "\n".join(_text_parts) if _text_parts else None
+                            _trunc_content = (
+                                "\n".join(_text_parts) if _text_parts else None
+                            )
 
                         # A response is "thinking exhausted" only when the model
                         # actually produced reasoning blocks but no visible text after
@@ -8692,8 +9954,9 @@ class AIAgent:
                         # truncations that deserve continuation retries, not as
                         # thinking-budget exhaustion.
                         _has_think_tags = bool(
-                            _trunc_content and re.search(
-                                r'<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)[^>]*>',
+                            _trunc_content
+                            and re.search(
+                                r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)[^>]*>",
                                 _trunc_content,
                                 re.IGNORECASE,
                             )
@@ -8702,7 +9965,12 @@ class AIAgent:
                             not _trunc_has_tool_calls
                             and _has_think_tags
                             and (
-                                (_trunc_content is not None and not self._has_content_after_think_block(_trunc_content))
+                                (
+                                    _trunc_content is not None
+                                    and not self._has_content_after_think_block(
+                                        _trunc_content
+                                    )
+                                )
                                 or _trunc_content is None
                             )
                         )
@@ -8745,10 +10013,14 @@ class AIAgent:
                             assistant_message = response.choices[0].message
                             if not assistant_message.tool_calls:
                                 length_continue_retries += 1
-                                interim_msg = self._build_assistant_message(assistant_message, finish_reason)
+                                interim_msg = self._build_assistant_message(
+                                    assistant_message, finish_reason
+                                )
                                 messages.append(interim_msg)
                                 if assistant_message.content:
-                                    truncated_response_prefix += assistant_message.content
+                                    truncated_response_prefix += (
+                                        assistant_message.content
+                                    )
 
                                 if length_continue_retries < 3:
                                     self._vprint(
@@ -8769,7 +10041,9 @@ class AIAgent:
                                     restart_with_length_continuation = True
                                     break
 
-                                partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
+                                partial_response = self._strip_think_blocks(
+                                    truncated_response_prefix
+                                ).strip()
                                 self._cleanup_task_resources(effective_task_id)
                                 self._persist_session(messages, conversation_history)
                                 return {
@@ -8811,8 +10085,12 @@ class AIAgent:
 
                         # If we have prior messages, roll back to last complete state
                         if len(messages) > 1:
-                            self._vprint(f"{self.log_prefix}   ⏪ Rolling back to last complete assistant turn")
-                            rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
+                            self._vprint(
+                                f"{self.log_prefix}   ⏪ Rolling back to last complete assistant turn"
+                            )
+                            rolled_back_messages = (
+                                self._get_messages_up_to_last_assistant(messages)
+                            )
 
                             self._cleanup_task_resources(effective_task_id)
                             self._persist_session(messages, conversation_history)
@@ -8823,11 +10101,14 @@ class AIAgent:
                                 "api_calls": api_call_count,
                                 "completed": False,
                                 "partial": True,
-                                "error": "Response truncated due to output length limit"
+                                "error": "Response truncated due to output length limit",
                             }
                         else:
                             # First message was truncated - mark as failed
-                            self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover", force=True)
+                            self._vprint(
+                                f"{self.log_prefix}❌ First response truncated - cannot recover",
+                                force=True,
+                            )
                             self._persist_session(messages, conversation_history)
                             return {
                                 "final_response": None,
@@ -8835,11 +10116,11 @@ class AIAgent:
                                 "api_calls": api_call_count,
                                 "completed": False,
                                 "failed": True,
-                                "error": "First response truncated due to output length limit"
+                                "error": "First response truncated due to output length limit",
                             }
-                    
+
                     # Track actual token usage from response for context management
-                    if hasattr(response, 'usage') and response.usage:
+                    if hasattr(response, "usage") and response.usage:
                         canonical_usage = normalize_usage(
                             response.usage,
                             provider=self.provider,
@@ -8860,9 +10141,15 @@ class AIAgent:
                         # from the error message), not guessed probe tiers.
                         if getattr(self.context_compressor, "_context_probed", False):
                             ctx = self.context_compressor.context_length
-                            if getattr(self.context_compressor, "_context_probe_persistable", False):
+                            if getattr(
+                                self.context_compressor,
+                                "_context_probe_persistable",
+                                False,
+                            ):
                                 save_context_length(self.model, self.base_url, ctx)
-                                self._safe_print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
+                                self._safe_print(
+                                    f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}"
+                                )
                             self.context_compressor._context_probed = False
                             self.context_compressor._context_probe_persistable = False
 
@@ -8872,19 +10159,30 @@ class AIAgent:
                         self.session_api_calls += 1
                         self.session_input_tokens += canonical_usage.input_tokens
                         self.session_output_tokens += canonical_usage.output_tokens
-                        self.session_cache_read_tokens += canonical_usage.cache_read_tokens
-                        self.session_cache_write_tokens += canonical_usage.cache_write_tokens
-                        self.session_reasoning_tokens += canonical_usage.reasoning_tokens
+                        self.session_cache_read_tokens += (
+                            canonical_usage.cache_read_tokens
+                        )
+                        self.session_cache_write_tokens += (
+                            canonical_usage.cache_write_tokens
+                        )
+                        self.session_reasoning_tokens += (
+                            canonical_usage.reasoning_tokens
+                        )
 
                         # Log API call details for debugging/observability
                         _cache_pct = ""
                         if canonical_usage.cache_read_tokens and prompt_tokens:
-                            _cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100*canonical_usage.cache_read_tokens/prompt_tokens:.0f}%)"
+                            _cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100 * canonical_usage.cache_read_tokens / prompt_tokens:.0f}%)"
                         logger.info(
                             "API call #%d: model=%s provider=%s in=%d out=%d total=%d latency=%.1fs%s",
-                            self.session_api_calls, self.model, self.provider or "unknown",
-                            prompt_tokens, completion_tokens, total_tokens,
-                            api_duration, _cache_pct,
+                            self.session_api_calls,
+                            self.model,
+                            self.provider or "unknown",
+                            prompt_tokens,
+                            completion_tokens,
+                            total_tokens,
+                            api_duration,
+                            _cache_pct,
                         )
 
                         cost_result = estimate_usage_cost(
@@ -8895,7 +10193,9 @@ class AIAgent:
                             api_key=getattr(self, "api_key", ""),
                         )
                         if cost_result.amount_usd is not None:
-                            self.session_estimated_cost_usd += float(cost_result.amount_usd)
+                            self.session_estimated_cost_usd += float(
+                                cost_result.amount_usd
+                            )
                         self.session_cost_status = cost_result.status
                         self.session_cost_source = cost_result.source
 
@@ -8916,37 +10216,63 @@ class AIAgent:
                                     cache_write_tokens=canonical_usage.cache_write_tokens,
                                     reasoning_tokens=canonical_usage.reasoning_tokens,
                                     estimated_cost_usd=float(cost_result.amount_usd)
-                                    if cost_result.amount_usd is not None else None,
+                                    if cost_result.amount_usd is not None
+                                    else None,
                                     cost_status=cost_result.status,
                                     cost_source=cost_result.source,
                                     billing_provider=self.provider,
                                     billing_base_url=self.base_url,
                                     billing_mode="subscription_included"
-                                    if cost_result.status == "included" else None,
+                                    if cost_result.status == "included"
+                                    else None,
                                     model=self.model,
                                 )
                             except Exception:
                                 pass  # never block the agent loop
-                        
+
                         if self.verbose_logging:
-                            logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
-                        
+                            logging.debug(
+                                f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}"
+                            )
+
                         # Log cache hit stats when prompt caching is active
                         if self._use_prompt_caching:
                             if self.api_mode == "anthropic_messages":
                                 # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
-                                cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
-                                written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
+                                cached = (
+                                    getattr(
+                                        response.usage, "cache_read_input_tokens", 0
+                                    )
+                                    or 0
+                                )
+                                written = (
+                                    getattr(
+                                        response.usage, "cache_creation_input_tokens", 0
+                                    )
+                                    or 0
+                                )
                             else:
                                 # OpenRouter uses prompt_tokens_details.cached_tokens
-                                details = getattr(response.usage, 'prompt_tokens_details', None)
-                                cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
-                                written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
+                                details = getattr(
+                                    response.usage, "prompt_tokens_details", None
+                                )
+                                cached = (
+                                    getattr(details, "cached_tokens", 0) or 0
+                                    if details
+                                    else 0
+                                )
+                                written = (
+                                    getattr(details, "cache_write_tokens", 0) or 0
+                                    if details
+                                    else 0
+                                )
                             prompt = usage_dict["prompt_tokens"]
                             hit_pct = (cached / prompt * 100) if prompt > 0 else 0
                             if not self.quiet_mode:
-                                self._vprint(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
-                    
+                                self._vprint(
+                                    f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)"
+                                )
+
                     has_retried_429 = False  # Reset on success
                     self._touch_activity(f"API call #{api_call_count} completed")
                     break  # Success, exit retry loop
@@ -8958,7 +10284,9 @@ class AIAgent:
                     if self.thinking_callback:
                         self.thinking_callback("")
                     api_elapsed = time.time() - api_start_time
-                    self._vprint(f"{self.log_prefix}⚡ Interrupted during API call.", force=True)
+                    self._vprint(
+                        f"{self.log_prefix}⚡ Interrupted during API call.", force=True
+                    )
                     self._persist_session(messages, conversation_history)
                     interrupted = True
                     final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
@@ -8983,7 +10311,10 @@ class AIAgent:
                     # first to strip surrogates, then once more for pure
                     # ASCII-only locale sanitization if needed.
                     # -----------------------------------------------------------
-                    if isinstance(api_error, UnicodeEncodeError) and getattr(self, '_unicode_sanitization_passes', 0) < 2:
+                    if (
+                        isinstance(api_error, UnicodeEncodeError)
+                        and getattr(self, "_unicode_sanitization_passes", 0) < 2
+                    ):
                         _err_str = str(api_error).lower()
                         _is_ascii_codec = "'ascii'" in _err_str or "ascii" in _err_str
                         _surrogates_found = _sanitize_messages_surrogates(messages)
@@ -9001,8 +10332,12 @@ class AIAgent:
                             # non-ASCII content from messages/tool schemas and retry.
                             _messages_sanitized = _sanitize_messages_non_ascii(messages)
                             _prefill_sanitized = False
-                            if isinstance(getattr(self, "prefill_messages", None), list):
-                                _prefill_sanitized = _sanitize_messages_non_ascii(self.prefill_messages)
+                            if isinstance(
+                                getattr(self, "prefill_messages", None), list
+                            ):
+                                _prefill_sanitized = _sanitize_messages_non_ascii(
+                                    self.prefill_messages
+                                )
 
                             _tools_sanitized = False
                             if isinstance(getattr(self, "tools", None), list):
@@ -9010,13 +10345,19 @@ class AIAgent:
 
                             _system_sanitized = False
                             if isinstance(active_system_prompt, str):
-                                _sanitized_system = _strip_non_ascii(active_system_prompt)
+                                _sanitized_system = _strip_non_ascii(
+                                    active_system_prompt
+                                )
                                 if _sanitized_system != active_system_prompt:
                                     active_system_prompt = _sanitized_system
                                     self._cached_system_prompt = _sanitized_system
                                     _system_sanitized = True
-                            if isinstance(getattr(self, "ephemeral_system_prompt", None), str):
-                                _sanitized_ephemeral = _strip_non_ascii(self.ephemeral_system_prompt)
+                            if isinstance(
+                                getattr(self, "ephemeral_system_prompt", None), str
+                            ):
+                                _sanitized_ephemeral = _strip_non_ascii(
+                                    self.ephemeral_system_prompt
+                                )
                                 if _sanitized_ephemeral != self.ephemeral_system_prompt:
                                     self.ephemeral_system_prompt = _sanitized_ephemeral
                                     _system_sanitized = True
@@ -9024,11 +10365,15 @@ class AIAgent:
                             _headers_sanitized = False
                             _default_headers = (
                                 self._client_kwargs.get("default_headers")
-                                if isinstance(getattr(self, "_client_kwargs", None), dict)
+                                if isinstance(
+                                    getattr(self, "_client_kwargs", None), dict
+                                )
                                 else None
                             )
                             if isinstance(_default_headers, dict):
-                                _headers_sanitized = _sanitize_structure_non_ascii(_default_headers)
+                                _headers_sanitized = _sanitize_structure_non_ascii(
+                                    _default_headers
+                                )
 
                             # Sanitize the API key — non-ASCII characters in
                             # credentials (e.g. ʋ instead of v from a bad
@@ -9042,12 +10387,16 @@ class AIAgent:
                                 _clean_key = _strip_non_ascii(_raw_key)
                                 if _clean_key != _raw_key:
                                     self.api_key = _clean_key
-                                    if isinstance(getattr(self, "_client_kwargs", None), dict):
+                                    if isinstance(
+                                        getattr(self, "_client_kwargs", None), dict
+                                    ):
                                         self._client_kwargs["api_key"] = _clean_key
                                     # Also update the live client — it holds its
                                     # own copy of api_key which auth_headers reads
                                     # dynamically on every request.
-                                    if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"):
+                                    if getattr(
+                                        self, "client", None
+                                    ) is not None and hasattr(self.client, "api_key"):
                                         self.client.api_key = _clean_key
                                     _credential_sanitized = True
                                     self._vprint(
@@ -9079,7 +10428,11 @@ class AIAgent:
 
                     # ── Classify the error for structured recovery decisions ──
                     _compressor = getattr(self, "context_compressor", None)
-                    _ctx_len = getattr(_compressor, "context_length", 200000) if _compressor else 200000
+                    _ctx_len = (
+                        getattr(_compressor, "context_length", 200000)
+                        if _compressor
+                        else 200000
+                    )
                     classified = classify_api_error(
                         api_error,
                         provider=getattr(self, "provider", "") or "",
@@ -9090,16 +10443,21 @@ class AIAgent:
                     )
                     logger.debug(
                         "Error classified: reason=%s status=%s retryable=%s compress=%s rotate=%s fallback=%s",
-                        classified.reason.value, classified.status_code,
-                        classified.retryable, classified.should_compress,
-                        classified.should_rotate_credential, classified.should_fallback,
+                        classified.reason.value,
+                        classified.status_code,
+                        classified.retryable,
+                        classified.should_compress,
+                        classified.should_rotate_credential,
+                        classified.should_fallback,
                     )
 
-                    recovered_with_pool, has_retried_429 = self._recover_with_credential_pool(
-                        status_code=status_code,
-                        has_retried_429=has_retried_429,
-                        classified_reason=classified.reason,
-                        error_context=error_context,
+                    recovered_with_pool, has_retried_429 = (
+                        self._recover_with_credential_pool(
+                            status_code=status_code,
+                            has_retried_429=has_retried_429,
+                            classified_reason=classified.reason,
+                            error_context=error_context,
+                        )
                     )
                     if recovered_with_pool:
                         continue
@@ -9111,7 +10469,9 @@ class AIAgent:
                     ):
                         codex_auth_retry_attempted = True
                         if self._try_refresh_codex_client_credentials(force=True):
-                            self._vprint(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
+                            self._vprint(
+                                f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request..."
+                            )
                             continue
                     if (
                         self.api_mode == "chat_completions"
@@ -9121,34 +10481,62 @@ class AIAgent:
                     ):
                         nous_auth_retry_attempted = True
                         if self._try_refresh_nous_client_credentials(force=True):
-                            print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
+                            print(
+                                f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request..."
+                            )
                             continue
                     if (
                         self.api_mode == "anthropic_messages"
                         and status_code == 401
-                        and hasattr(self, '_anthropic_api_key')
+                        and hasattr(self, "_anthropic_api_key")
                         and not anthropic_auth_retry_attempted
                     ):
                         anthropic_auth_retry_attempted = True
                         from agent.anthropic_adapter import _is_oauth_token
+
                         if self._try_refresh_anthropic_client_credentials():
-                            print(f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
+                            print(
+                                f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request..."
+                            )
                             continue
                         # Credential refresh didn't help — show diagnostic info
                         key = self._anthropic_api_key
-                        auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
-                        print(f"{self.log_prefix}🔐 Anthropic 401 — authentication failed.")
+                        auth_method = (
+                            "Bearer (OAuth/setup-token)"
+                            if _is_oauth_token(key)
+                            else "x-api-key (API key)"
+                        )
+                        print(
+                            f"{self.log_prefix}🔐 Anthropic 401 — authentication failed."
+                        )
                         print(f"{self.log_prefix}   Auth method: {auth_method}")
-                        print(f"{self.log_prefix}   Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{self.log_prefix}   Token: (empty or short)")
+                        print(
+                            f"{self.log_prefix}   Token prefix: {key[:12]}..."
+                            if key and len(key) > 12
+                            else f"{self.log_prefix}   Token: (empty or short)"
+                        )
                         print(f"{self.log_prefix}   Troubleshooting:")
                         from hermes_constants import display_hermes_home as _dhh_fn
+
                         _dhh = _dhh_fn()
-                        print(f"{self.log_prefix}     • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens")
-                        print(f"{self.log_prefix}     • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
-                        print(f"{self.log_prefix}     • For API keys: verify at https://console.anthropic.com/settings/keys")
-                        print(f"{self.log_prefix}     • For Claude Code: run 'claude /login' to refresh, then retry")
-                        print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
-                        print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")
+                        print(
+                            f"{self.log_prefix}     • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens"
+                        )
+                        print(
+                            f"{self.log_prefix}     • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values"
+                        )
+                        print(
+                            f"{self.log_prefix}     • For API keys: verify at https://console.anthropic.com/settings/keys"
+                        )
+                        print(
+                            f"{self.log_prefix}     • For Claude Code: run 'claude /login' to refresh, then retry"
+                        )
+                        print(
+                            f'{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN ""'
+                        )
+                        print(
+                            f'{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY ""'
+                        )
 
                     # ── Thinking block signature recovery ─────────────────
                     # Anthropic signs thinking blocks against the full turn
@@ -9173,7 +10561,8 @@ class AIAgent:
                         logging.warning(
                             "%sThinking block signature recovery: stripped "
                             "reasoning_details from %d messages",
-                            self.log_prefix, len(messages),
+                            self.log_prefix,
+                            len(messages),
                         )
                         continue
 
@@ -9182,7 +10571,7 @@ class AIAgent:
                     self._touch_activity(
                         f"API error recovery (attempt {retry_count}/{max_retries})"
                     )
-                    
+
                     error_type = type(api_error).__name__
                     error_msg = str(api_error).lower()
                     _error_summary = self._summarize_api_error(api_error)
@@ -9199,25 +10588,37 @@ class AIAgent:
                     _base = getattr(self, "base_url", "unknown")
                     _model = getattr(self, "model", "unknown")
                     _status_code_str = f" [HTTP {status_code}]" if status_code else ""
-                    self._vprint(f"{self.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}", force=True)
-                    self._vprint(f"{self.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
-                    self._vprint(f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True)
-                    self._vprint(f"{self.log_prefix}   📝 Error: {_error_summary}", force=True)
+                    self._vprint(
+                        f"{self.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}",
+                        force=True,
+                    )
+                    self._vprint(
+                        f"{self.log_prefix}   🔌 Provider: {_provider}  Model: {_model}",
+                        force=True,
+                    )
+                    self._vprint(
+                        f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True
+                    )
+                    self._vprint(
+                        f"{self.log_prefix}   📝 Error: {_error_summary}", force=True
+                    )
                     if status_code and status_code < 500:
                         _err_body = getattr(api_error, "body", None)
                         _err_body_str = str(_err_body)[:300] if _err_body else None
                         if _err_body_str:
-                            self._vprint(f"{self.log_prefix}   📋 Details: {_err_body_str}", force=True)
-                    self._vprint(f"{self.log_prefix}   ⏱️  Elapsed: {elapsed_time:.2f}s  Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens")
+                            self._vprint(
+                                f"{self.log_prefix}   📋 Details: {_err_body_str}",
+                                force=True,
+                            )
+                    self._vprint(
+                        f"{self.log_prefix}   ⏱️  Elapsed: {elapsed_time:.2f}s  Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens"
+                    )
 
                     # Actionable hint for OpenRouter "no tool endpoints" error.
                     # This fires regardless of whether fallback succeeds — the
                     # user needs to know WHY their model failed so they can fix
                     # their provider routing, not just silently fall back.
-                    if (
-                        self._is_openrouter_url()
-                        and "support tool use" in error_msg
-                    ):
+                    if self._is_openrouter_url() and "support tool use" in error_msg:
                         self._vprint(
                             f"{self.log_prefix}   💡 No OpenRouter providers for {_model} support tool calling with your current settings.",
                             force=True,
@@ -9238,7 +10639,10 @@ class AIAgent:
 
                     # Check for interrupt before deciding to retry
                     if self._interrupt_requested:
-                        self._vprint(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", force=True)
+                        self._vprint(
+                            f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.",
+                            force=True,
+                        )
                         self._persist_session(messages, conversation_history)
                         self.clear_interrupt()
                         return {
@@ -9248,7 +10652,7 @@ class AIAgent:
                             "completed": False,
                             "interrupted": True,
                         }
-                    
+
                     # Check for 413 payload-too-large BEFORE generic 4xx handler.
                     # A 413 is a payload-size error — the correct response is to
                     # compress history and retry, not abort immediately.
@@ -9293,7 +10697,8 @@ class AIAgent:
                         if compression_attempts <= max_compression_attempts:
                             original_len = len(messages)
                             messages, active_system_prompt = self._compress_context(
-                                messages, system_message,
+                                messages,
+                                system_message,
                                 approx_tokens=approx_tokens,
                                 task_id=effective_task_id,
                             )
@@ -9320,7 +10725,9 @@ class AIAgent:
                         FailoverReason.rate_limit,
                         FailoverReason.billing,
                     )
-                    if is_rate_limited and self._fallback_index < len(self._fallback_chain):
+                    if is_rate_limited and self._fallback_index < len(
+                        self._fallback_chain
+                    ):
                         # Don't eagerly fallback if credential pool rotation may
                         # still recover.  The pool's retry-then-rotate cycle needs
                         # at least one more attempt to fire — jumping to a fallback
@@ -9328,7 +10735,9 @@ class AIAgent:
                         pool = self._credential_pool
                         pool_may_recover = pool is not None and pool.has_available()
                         if not pool_may_recover:
-                            self._emit_status("⚠️ Rate limited — switching to fallback provider...")
+                            self._emit_status(
+                                "⚠️ Rate limited — switching to fallback provider..."
+                            )
                             if self._try_activate_fallback():
                                 retry_count = 0
                                 compression_attempts = 0
@@ -9342,9 +10751,17 @@ class AIAgent:
                     if is_payload_too_large:
                         compression_attempts += 1
                         if compression_attempts > max_compression_attempts:
-                            self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
-                            self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                            logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
+                            self._vprint(
+                                f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.",
+                                force=True,
+                            )
+                            self._vprint(
+                                f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.",
+                                force=True,
+                            )
+                            logging.error(
+                                f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts."
+                            )
                             self._persist_session(messages, conversation_history)
                             return {
                                 "messages": messages,
@@ -9355,11 +10772,15 @@ class AIAgent:
                                 "failed": True,
                                 "compression_exhausted": True,
                             }
-                        self._emit_status(f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
+                        self._emit_status(
+                            f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}..."
+                        )
 
                         original_len = len(messages)
                         messages, active_system_prompt = self._compress_context(
-                            messages, system_message, approx_tokens=approx_tokens,
+                            messages,
+                            system_message,
+                            approx_tokens=approx_tokens,
                             task_id=effective_task_id,
                         )
                         # Compression created a new session — clear history
@@ -9368,14 +10789,24 @@ class AIAgent:
                         conversation_history = None
 
                         if len(messages) < original_len:
-                            self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
+                            self._emit_status(
+                                f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying..."
+                            )
                             time.sleep(2)  # Brief pause between compression retries
                             restart_with_compressed_messages = True
                             break
                         else:
-                            self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True)
-                            self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                            logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
+                            self._vprint(
+                                f"{self.log_prefix}❌ Payload too large and cannot compress further.",
+                                force=True,
+                            )
+                            self._vprint(
+                                f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.",
+                                force=True,
+                            )
+                            logging.error(
+                                f"{self.log_prefix}413 payload too large. Cannot compress further."
+                            )
                             self._persist_session(messages, conversation_history)
                             return {
                                 "messages": messages,
@@ -9409,7 +10840,9 @@ class AIAgent:
                         #
                         # Note: max_tokens = output token cap (one response).
                         #       context_length = total window (input + output combined).
-                        available_out = parse_available_output_tokens_from_error(error_msg)
+                        available_out = parse_available_output_tokens_from_error(
+                            error_msg
+                        )
                         if available_out is not None:
                             # Error is purely about the output cap being too large.
                             # Cap output to the available space and retry without
@@ -9426,9 +10859,17 @@ class AIAgent:
                             # loop forever if the error keeps recurring.
                             compression_attempts += 1
                             if compression_attempts > max_compression_attempts:
-                                self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
-                                self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                                logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
+                                self._vprint(
+                                    f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.",
+                                    force=True,
+                                )
+                                self._vprint(
+                                    f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.",
+                                    force=True,
+                                )
+                                logging.error(
+                                    f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts."
+                                )
                                 self._persist_session(messages, conversation_history)
                                 return {
                                     "messages": messages,
@@ -9447,7 +10888,10 @@ class AIAgent:
                         parsed_limit = parse_context_limit_from_error(error_msg)
                         if parsed_limit and parsed_limit < old_ctx:
                             new_ctx = parsed_limit
-                            self._vprint(f"{self.log_prefix}⚠️  Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})",
+                                force=True,
+                            )
                         else:
                             # Step down to the next probe tier
                             new_ctx = get_next_probe_tier(old_ctx)
@@ -9472,15 +10916,29 @@ class AIAgent:
                                 compressor._context_probe_persistable = bool(
                                     parsed_limit and parsed_limit == new_ctx
                                 )
-                            self._vprint(f"{self.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", force=True)
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens",
+                                force=True,
+                            )
                         else:
-                            self._vprint(f"{self.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...", force=True)
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...",
+                                force=True,
+                            )
 
                         compression_attempts += 1
                         if compression_attempts > max_compression_attempts:
-                            self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
-                            self._vprint(f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
-                            logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
+                            self._vprint(
+                                f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.",
+                                force=True,
+                            )
+                            self._vprint(
+                                f"{self.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.",
+                                force=True,
+                            )
+                            logging.error(
+                                f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts."
+                            )
                             self._persist_session(messages, conversation_history)
                             return {
                                 "messages": messages,
@@ -9491,11 +10949,15 @@ class AIAgent:
                                 "failed": True,
                                 "compression_exhausted": True,
                             }
-                        self._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")
+                        self._emit_status(
+                            f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})..."
+                        )
 
                         original_len = len(messages)
                         messages, active_system_prompt = self._compress_context(
-                            messages, system_message, approx_tokens=approx_tokens,
+                            messages,
+                            system_message,
+                            approx_tokens=approx_tokens,
                             task_id=effective_task_id,
                         )
                         # Compression created a new session — clear history
@@ -9503,17 +10965,31 @@ class AIAgent:
                         # messages to the new session, not skipping them.
                         conversation_history = None
 
-                        if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
+                        if (
+                            len(messages) < original_len
+                            or new_ctx
+                            and new_ctx < old_ctx
+                        ):
                             if len(messages) < original_len:
-                                self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
+                                self._emit_status(
+                                    f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying..."
+                                )
                             time.sleep(2)  # Brief pause between compression retries
                             restart_with_compressed_messages = True
                             break
                         else:
                             # Can't compress further and already at minimum tier
-                            self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
-                            self._vprint(f"{self.log_prefix}   💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
-                            logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
+                            self._vprint(
+                                f"{self.log_prefix}❌ Context length exceeded and cannot compress further.",
+                                force=True,
+                            )
+                            self._vprint(
+                                f"{self.log_prefix}   💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.",
+                                force=True,
+                            )
+                            logging.error(
+                                f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further."
+                            )
                             self._persist_session(messages, conversation_history)
                             return {
                                 "messages": messages,
@@ -9529,16 +11005,16 @@ class AIAgent:
                     # already accounts for 413, 429, 529 (transient), context
                     # overflow, and generic-400 heuristics.  Local validation
                     # errors (ValueError, TypeError) are programming bugs.
-                    is_local_validation_error = (
-                        isinstance(api_error, (ValueError, TypeError))
-                        and not isinstance(api_error, UnicodeEncodeError)
-                    )
+                    is_local_validation_error = isinstance(
+                        api_error, (ValueError, TypeError)
+                    ) and not isinstance(api_error, UnicodeEncodeError)
                     is_client_error = (
                         is_local_validation_error
                         or (
                             not classified.retryable
                             and not classified.should_compress
-                            and classified.reason not in (
+                            and classified.reason
+                            not in (
                                 FailoverReason.rate_limit,
                                 FailoverReason.billing,
                                 FailoverReason.overloaded,
@@ -9553,7 +11029,9 @@ class AIAgent:
                     if is_client_error:
                         # Try fallback before aborting — a different provider
                         # may not have the same issue (rate limit, auth, etc.)
-                        self._emit_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
+                        self._emit_status(
+                            f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback..."
+                        )
                         if self._try_activate_fallback():
                             retry_count = 0
                             compression_attempts = 0
@@ -9561,37 +11039,81 @@ class AIAgent:
                             continue
                         if api_kwargs is not None:
                             self._dump_api_request_debug(
-                                api_kwargs, reason="non_retryable_client_error", error=api_error,
+                                api_kwargs,
+                                reason="non_retryable_client_error",
+                                error=api_error,
                             )
                         self._emit_status(
                             f"❌ Non-retryable error (HTTP {status_code}): "
                             f"{self._summarize_api_error(api_error)}"
                         )
-                        self._vprint(f"{self.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
-                        self._vprint(f"{self.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
-                        self._vprint(f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True)
+                        self._vprint(
+                            f"{self.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}   🔌 Provider: {_provider}  Model: {_model}",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True
+                        )
                         # Actionable guidance for common auth errors
-                        if classified.is_auth or classified.reason == FailoverReason.billing:
+                        if (
+                            classified.is_auth
+                            or classified.reason == FailoverReason.billing
+                        ):
                             if _provider == "openai-codex" and status_code == 401:
-                                self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
-                                self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
-                                self._vprint(f"{self.log_prefix}      1. Run `codex` in your terminal to generate fresh tokens.", force=True)
-                                self._vprint(f"{self.log_prefix}      2. Then run `hermes auth` to re-authenticate.", force=True)
+                                self._vprint(
+                                    f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been",
+                                    force=True,
+                                )
+                                self._vprint(
+                                    f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:",
+                                    force=True,
+                                )
+                                self._vprint(
+                                    f"{self.log_prefix}      1. Run `codex` in your terminal to generate fresh tokens.",
+                                    force=True,
+                                )
+                                self._vprint(
+                                    f"{self.log_prefix}      2. Then run `hermes auth` to re-authenticate.",
+                                    force=True,
+                                )
                             else:
-                                self._vprint(f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:", force=True)
-                                self._vprint(f"{self.log_prefix}      • Is the key valid? Run: hermes setup", force=True)
-                                self._vprint(f"{self.log_prefix}      • Does your account have access to {_model}?", force=True)
+                                self._vprint(
+                                    f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:",
+                                    force=True,
+                                )
+                                self._vprint(
+                                    f"{self.log_prefix}      • Is the key valid? Run: hermes setup",
+                                    force=True,
+                                )
+                                self._vprint(
+                                    f"{self.log_prefix}      • Does your account have access to {_model}?",
+                                    force=True,
+                                )
                                 if "openrouter" in str(_base).lower():
-                                    self._vprint(f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
+                                    self._vprint(
+                                        f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits",
+                                        force=True,
+                                    )
                         else:
-                            self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
-                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
+                            self._vprint(
+                                f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.",
+                                force=True,
+                            )
+                        logging.error(
+                            f"{self.log_prefix}Non-retryable client error: {api_error}"
+                        )
                         # Skip session persistence when the error is likely
                         # context-overflow related (status 400 + large session).
                         # Persisting the failed user message would make the
                         # session even larger, causing the same failure on the
                         # next attempt. (#1630)
-                        if status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80):
+                        if status_code == 400 and (
+                            approx_tokens > 50000 or len(api_messages) > 80
+                        ):
                             self._vprint(
                                 f"{self.log_prefix}⚠️  Skipping session persistence "
                                 f"for large failed session to prevent growth loop.",
@@ -9613,14 +11135,21 @@ class AIAgent:
                         # client once for transient transport errors (stale
                         # connection pool, TCP reset).  Only attempted once
                         # per API call block.
-                        if not primary_recovery_attempted and self._try_recover_primary_transport(
-                            api_error, retry_count=retry_count, max_retries=max_retries,
+                        if (
+                            not primary_recovery_attempted
+                            and self._try_recover_primary_transport(
+                                api_error,
+                                retry_count=retry_count,
+                                max_retries=max_retries,
+                            )
                         ):
                             primary_recovery_attempted = True
                             retry_count = 0
                             continue
                         # Try fallback before giving up entirely
-                        self._emit_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
+                        self._emit_status(
+                            f"⚠️ Max retries ({max_retries}) exhausted — trying fallback..."
+                        )
                         if self._try_activate_fallback():
                             retry_count = 0
                             compression_attempts = 0
@@ -9628,23 +11157,35 @@ class AIAgent:
                             continue
                         _final_summary = self._summarize_api_error(api_error)
                         if is_rate_limited:
-                            self._emit_status(f"❌ Rate limited after {max_retries} retries — {_final_summary}")
+                            self._emit_status(
+                                f"❌ Rate limited after {max_retries} retries — {_final_summary}"
+                            )
                         else:
-                            self._emit_status(f"❌ API failed after {max_retries} retries — {_final_summary}")
-                        self._vprint(f"{self.log_prefix}   💀 Final error: {_final_summary}", force=True)
+                            self._emit_status(
+                                f"❌ API failed after {max_retries} retries — {_final_summary}"
+                            )
+                        self._vprint(
+                            f"{self.log_prefix}   💀 Final error: {_final_summary}",
+                            force=True,
+                        )
 
                         # Detect SSE stream-drop pattern (e.g. "Network
                         # connection lost") and surface actionable guidance.
                         # This typically happens when the model generates a
                         # very large tool call (write_file with huge content)
                         # and the proxy/CDN drops the stream mid-response.
-                        _is_stream_drop = (
-                            not getattr(api_error, "status_code", None)
-                            and any(p in error_msg for p in (
-                                "connection lost", "connection reset",
-                                "connection closed", "network connection",
-                                "network error", "terminated",
-                            ))
+                        _is_stream_drop = not getattr(
+                            api_error, "status_code", None
+                        ) and any(
+                            p in error_msg
+                            for p in (
+                                "connection lost",
+                                "connection reset",
+                                "connection closed",
+                                "network connection",
+                                "network error",
+                                "terminated",
+                            )
                         )
                         if _is_stream_drop:
                             self._vprint(
@@ -9664,12 +11205,19 @@ class AIAgent:
 
                         logging.error(
                             "%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
-                            self.log_prefix, max_retries, _final_summary,
-                            _provider, _model, len(api_messages), f"{approx_tokens:,}",
+                            self.log_prefix,
+                            max_retries,
+                            _final_summary,
+                            _provider,
+                            _model,
+                            len(api_messages),
+                            f"{approx_tokens:,}",
                         )
                         if api_kwargs is not None:
                             self._dump_api_request_debug(
-                                api_kwargs, reason="max_retries_exhausted", error=api_error,
+                                api_kwargs,
+                                reason="max_retries_exhausted",
+                                error=api_error,
                             )
                         self._persist_session(messages, conversation_history)
                         _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
@@ -9694,19 +11242,35 @@ class AIAgent:
                     # For rate limits, respect the Retry-After header if present
                     _retry_after = None
                     if is_rate_limited:
-                        _resp_headers = getattr(getattr(api_error, "response", None), "headers", None)
+                        _resp_headers = getattr(
+                            getattr(api_error, "response", None), "headers", None
+                        )
                         if _resp_headers and hasattr(_resp_headers, "get"):
-                            _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
+                            _ra_raw = _resp_headers.get(
+                                "retry-after"
+                            ) or _resp_headers.get("Retry-After")
                             if _ra_raw:
                                 try:
-                                    _retry_after = min(int(_ra_raw), 120)  # Cap at 2 minutes
+                                    _retry_after = min(
+                                        int(_ra_raw), 120
+                                    )  # Cap at 2 minutes
                                 except (TypeError, ValueError):
                                     pass
-                    wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
+                    wait_time = (
+                        _retry_after
+                        if _retry_after
+                        else jittered_backoff(
+                            retry_count, base_delay=2.0, max_delay=60.0
+                        )
+                    )
                     if is_rate_limited:
-                        self._emit_status(f"⏱️ Rate limit reached. Waiting {wait_time}s before retry (attempt {retry_count + 1}/{max_retries})...")
+                        self._emit_status(
+                            f"⏱️ Rate limit reached. Waiting {wait_time}s before retry (attempt {retry_count + 1}/{max_retries})..."
+                        )
                     else:
-                        self._emit_status(f"⏳ Retrying in {wait_time}s (attempt {retry_count}/{max_retries})...")
+                        self._emit_status(
+                            f"⏳ Retrying in {wait_time}s (attempt {retry_count}/{max_retries})..."
+                        )
                     logger.warning(
                         "Retrying API call in %ss (attempt %s/%s) %s error=%s",
                         wait_time,
@@ -9721,7 +11285,10 @@ class AIAgent:
                     _backoff_touch_counter = 0
                     while time.time() < sleep_end:
                         if self._interrupt_requested:
-                            self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
+                            self._vprint(
+                                f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.",
+                                force=True,
+                            )
                             self._persist_session(messages, conversation_history)
                             self.clear_interrupt()
                             return {
@@ -9740,7 +11307,7 @@ class AIAgent:
                                 f"error retry backoff ({retry_count}/{max_retries}), "
                                 f"{int(sleep_end - time.time())}s remaining"
                             )
-            
+
             # If the API call was interrupted, skip response processing
             if interrupted:
                 _turn_exit_reason = "interrupted_during_api_call"
@@ -9764,28 +11331,39 @@ class AIAgent:
             # the `response` variable is still None. Break out cleanly.
             if response is None:
                 _turn_exit_reason = "all_retries_exhausted_no_response"
-                print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.")
+                print(
+                    f"{self.log_prefix}❌ All API retries exhausted with no successful response."
+                )
                 self._persist_session(messages, conversation_history)
                 break
 
             try:
                 if self.api_mode == "codex_responses":
-                    assistant_message, finish_reason = self._normalize_codex_response(response)
+                    assistant_message, finish_reason = self._normalize_codex_response(
+                        response
+                    )
                 elif self.api_mode == "anthropic_messages":
                     from agent.anthropic_adapter import normalize_anthropic_response
+
                     assistant_message, finish_reason = normalize_anthropic_response(
                         response, strip_tool_prefix=self._is_anthropic_oauth
                     )
                 else:
                     assistant_message = response.choices[0].message
-                
+
                 # Normalize content to string — some OpenAI-compatible servers
                 # (llama-server, etc.) return content as a dict or list instead
                 # of a plain string, which crashes downstream .strip() calls.
-                if assistant_message.content is not None and not isinstance(assistant_message.content, str):
+                if assistant_message.content is not None and not isinstance(
+                    assistant_message.content, str
+                ):
                     raw = assistant_message.content
                     if isinstance(raw, dict):
-                        assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
+                        assistant_message.content = (
+                            raw.get("text", "")
+                            or raw.get("content", "")
+                            or json.dumps(raw)
+                        )
                     elif isinstance(raw, list):
                         # Multimodal content list — extract text parts
                         parts = []
@@ -9802,7 +11380,10 @@ class AIAgent:
 
                 try:
                     from hermes_cli.plugins import invoke_hook as _invoke_hook
-                    _assistant_tool_calls = getattr(assistant_message, "tool_calls", None) or []
+
+                    _assistant_tool_calls = (
+                        getattr(assistant_message, "tool_calls", None) or []
+                    )
                     _assistant_text = assistant_message.content or ""
                     _invoke_hook(
                         "post_api_request",
@@ -9828,87 +11409,125 @@ class AIAgent:
                 # Handle assistant response
                 if assistant_message.content and not self.quiet_mode:
                     if self.verbose_logging:
-                        self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content}")
+                        self._vprint(
+                            f"{self.log_prefix}🤖 Assistant: {assistant_message.content}"
+                        )
                     else:
-                        self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
+                        self._vprint(
+                            f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}"
+                        )
 
                 # Notify progress callback of model's thinking (used by subagent
                 # delegation to relay the child's reasoning to the parent display).
-                if (assistant_message.content and self.tool_progress_callback):
+                if assistant_message.content and self.tool_progress_callback:
                     _think_text = assistant_message.content.strip()
                     # Strip reasoning XML tags that shouldn't leak to parent display
                     _think_text = re.sub(
-                        r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
+                        r"</?(?:REASONING_SCRATCHPAD|think|reasoning)>", "", _think_text
                     ).strip()
                     # For subagents: relay first line to parent display (existing behaviour).
                     # For all agents with a structured callback: emit reasoning.available event.
-                    first_line = _think_text.split('\n')[0][:80] if _think_text else ""
-                    if first_line and getattr(self, '_delegate_depth', 0) > 0:
+                    first_line = _think_text.split("\n")[0][:80] if _think_text else ""
+                    if first_line and getattr(self, "_delegate_depth", 0) > 0:
                         try:
                             self.tool_progress_callback("_thinking", first_line)
                         except Exception:
                             pass
                     elif _think_text:
                         try:
-                            self.tool_progress_callback("reasoning.available", "_thinking", _think_text[:500], None)
+                            self.tool_progress_callback(
+                                "reasoning.available",
+                                "_thinking",
+                                _think_text[:500],
+                                None,
+                            )
                         except Exception:
                             pass
-                
+
                 # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
                 # This means the model ran out of output tokens mid-reasoning — retry up to 2 times
                 if has_incomplete_scratchpad(assistant_message.content or ""):
                     self._incomplete_scratchpad_retries += 1
-                    
-                    self._vprint(f"{self.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
-                    
+
+                    self._vprint(
+                        f"{self.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)"
+                    )
+
                     if self._incomplete_scratchpad_retries <= 2:
-                        self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
+                        self._vprint(
+                            f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)..."
+                        )
                         # Don't add the broken message, just retry
                         continue
                     else:
                         # Max retries - discard this turn and save as partial
-                        self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.", force=True)
+                        self._vprint(
+                            f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.",
+                            force=True,
+                        )
                         self._incomplete_scratchpad_retries = 0
-                        
-                        rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
+
+                        rolled_back_messages = self._get_messages_up_to_last_assistant(
+                            messages
+                        )
                         self._cleanup_task_resources(effective_task_id)
                         self._persist_session(messages, conversation_history)
-                        
+
                         return {
                             "final_response": None,
                             "messages": rolled_back_messages,
                             "api_calls": api_call_count,
                             "completed": False,
                             "partial": True,
-                            "error": "Incomplete REASONING_SCRATCHPAD after 2 retries"
+                            "error": "Incomplete REASONING_SCRATCHPAD after 2 retries",
                         }
-                
+
                 # Reset incomplete scratchpad counter on clean response
                 self._incomplete_scratchpad_retries = 0
 
                 if self.api_mode == "codex_responses" and finish_reason == "incomplete":
                     self._codex_incomplete_retries += 1
 
-                    interim_msg = self._build_assistant_message(assistant_message, finish_reason)
-                    interim_has_content = bool((interim_msg.get("content") or "").strip())
-                    interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
-                    interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items"))
+                    interim_msg = self._build_assistant_message(
+                        assistant_message, finish_reason
+                    )
+                    interim_has_content = bool(
+                        (interim_msg.get("content") or "").strip()
+                    )
+                    interim_has_reasoning = (
+                        bool(interim_msg.get("reasoning", "").strip())
+                        if isinstance(interim_msg.get("reasoning"), str)
+                        else False
+                    )
+                    interim_has_codex_reasoning = bool(
+                        interim_msg.get("codex_reasoning_items")
+                    )
 
-                    if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning:
+                    if (
+                        interim_has_content
+                        or interim_has_reasoning
+                        or interim_has_codex_reasoning
+                    ):
                         last_msg = messages[-1] if messages else None
                         # Duplicate detection: two consecutive incomplete assistant
                         # messages with identical content AND reasoning are collapsed.
                         # For reasoning-only messages (codex_reasoning_items differ but
                         # visible content/reasoning are both empty), we also compare
                         # the encrypted items to avoid silently dropping new state.
-                        last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None
+                        last_codex_items = (
+                            last_msg.get("codex_reasoning_items")
+                            if isinstance(last_msg, dict)
+                            else None
+                        )
                         interim_codex_items = interim_msg.get("codex_reasoning_items")
                         duplicate_interim = (
                             isinstance(last_msg, dict)
                             and last_msg.get("role") == "assistant"
                             and last_msg.get("finish_reason") == "incomplete"
-                            and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
-                            and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
+                            and (last_msg.get("content") or "")
+                            == (interim_msg.get("content") or "")
+                            and (last_msg.get("reasoning") or "")
+                            == (interim_msg.get("reasoning") or "")
                             and last_codex_items == interim_codex_items
                         )
                         if not duplicate_interim:
@@ -9917,7 +11536,9 @@ class AIAgent:
 
                     if self._codex_incomplete_retries < 3:
                         if not self.quiet_mode:
-                            self._vprint(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
+                            self._vprint(
+                                f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)"
+                            )
                         self._session_messages = messages
                         self._save_session_log(messages)
                         continue
@@ -9934,26 +11555,33 @@ class AIAgent:
                     }
                 elif hasattr(self, "_codex_incomplete_retries"):
                     self._codex_incomplete_retries = 0
-                
+
                 # Check for tool calls
                 if assistant_message.tool_calls:
                     if not self.quiet_mode:
-                        self._vprint(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
-                    
+                        self._vprint(
+                            f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)..."
+                        )
+
                     if self.verbose_logging:
                         for tc in assistant_message.tool_calls:
-                            logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...")
-                    
+                            logging.debug(
+                                f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}..."
+                            )
+
                     # Validate tool call names - detect model hallucinations
                     # Repair mismatched tool names before validating
                     for tc in assistant_message.tool_calls:
                         if tc.function.name not in self.valid_tool_names:
                             repaired = self._repair_tool_call(tc.function.name)
                             if repaired:
-                                print(f"{self.log_prefix}🔧 Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'")
+                                print(
+                                    f"{self.log_prefix}🔧 Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'"
+                                )
                                 tc.function.name = repaired
                     invalid_tool_calls = [
-                        tc.function.name for tc in assistant_message.tool_calls
+                        tc.function.name
+                        for tc in assistant_message.tool_calls
                         if tc.function.name not in self.valid_tool_names
                     ]
                     if invalid_tool_calls:
@@ -9963,11 +11591,20 @@ class AIAgent:
                         # Return helpful error to model — model can self-correct next turn
                         available = ", ".join(sorted(self.valid_tool_names))
                         invalid_name = invalid_tool_calls[0]
-                        invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
-                        self._vprint(f"{self.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)")
+                        invalid_preview = (
+                            invalid_name[:80] + "..."
+                            if len(invalid_name) > 80
+                            else invalid_name
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)"
+                        )
 
                         if self._invalid_tool_retries >= 3:
-                            self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", force=True)
+                            self._vprint(
+                                f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.",
+                                force=True,
+                            )
                             self._invalid_tool_retries = 0
                             self._persist_session(messages, conversation_history)
                             return {
@@ -9976,25 +11613,29 @@ class AIAgent:
                                 "api_calls": api_call_count,
                                 "completed": False,
                                 "partial": True,
-                                "error": f"Model generated invalid tool call: {invalid_preview}"
+                                "error": f"Model generated invalid tool call: {invalid_preview}",
                             }
 
-                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
+                        assistant_msg = self._build_assistant_message(
+                            assistant_message, finish_reason
+                        )
                         messages.append(assistant_msg)
                         for tc in assistant_message.tool_calls:
                             if tc.function.name not in self.valid_tool_names:
                                 content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
                             else:
                                 content = "Skipped: another tool call in this turn used an invalid name. Please retry this tool call."
-                            messages.append({
-                                "role": "tool",
-                                "tool_call_id": tc.id,
-                                "content": content,
-                            })
+                            messages.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": tc.id,
+                                    "content": content,
+                                }
+                            )
                         continue
                     # Reset retry counter on successful tool call validation
                     self._invalid_tool_retries = 0
-                    
+
                     # Validate tool call arguments are valid JSON
                     # Handle empty strings as empty objects (common model quirk)
                     invalid_json_args = []
@@ -10014,7 +11655,7 @@ class AIAgent:
                             json.loads(args)
                         except json.JSONDecodeError as e:
                             invalid_json_args.append((tc.function.name, str(e)))
-                    
+
                     if invalid_json_args:
                         # Check if the invalid JSON is due to truncation rather
                         # than a model formatting mistake.  Routers sometimes
@@ -10023,7 +11664,9 @@ class AIAgent:
                         # Detect truncation: args that don't end with } or ]
                         # (after stripping whitespace) are cut off mid-stream.
                         _truncated = any(
-                            not (tc.function.arguments or "").rstrip().endswith(("}", "]"))
+                            not (tc.function.arguments or "")
+                            .rstrip()
+                            .endswith(("}", "]"))
                             for tc in assistant_message.tool_calls
                             if tc.function.name in {n for n, _ in invalid_json_args}
                         )
@@ -10049,27 +11692,39 @@ class AIAgent:
                         self._invalid_json_retries += 1
 
                         tool_name, error_msg = invalid_json_args[0]
-                        self._vprint(f"{self.log_prefix}⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
+                        self._vprint(
+                            f"{self.log_prefix}⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}"
+                        )
 
                         if self._invalid_json_retries < 3:
-                            self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
+                            self._vprint(
+                                f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)..."
+                            )
                             # Don't add anything to messages, just retry the API call
                             continue
                         else:
                             # Instead of returning partial, inject tool error results so the model can recover.
                             # Using tool results (not user messages) preserves role alternation.
-                            self._vprint(f"{self.log_prefix}⚠️  Injecting recovery tool results for invalid JSON...")
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Injecting recovery tool results for invalid JSON..."
+                            )
                             self._invalid_json_retries = 0  # Reset for next attempt
-                            
+
                             # Append the assistant message with its (broken) tool_calls
-                            recovery_assistant = self._build_assistant_message(assistant_message, finish_reason)
+                            recovery_assistant = self._build_assistant_message(
+                                assistant_message, finish_reason
+                            )
                             messages.append(recovery_assistant)
-                            
+
                             # Respond with tool error results for each tool call
                             invalid_names = {name for name, _ in invalid_json_args}
                             for tc in assistant_message.tool_calls:
                                 if tc.function.name in invalid_names:
-                                    err = next(e for n, e in invalid_json_args if n == tc.function.name)
+                                    err = next(
+                                        e
+                                        for n, e in invalid_json_args
+                                        if n == tc.function.name
+                                    )
                                     tool_result = (
                                         f"Error: Invalid JSON arguments. {err}. "
                                         f"For tools with no required parameters, use an empty object: {{}}. "
@@ -10077,13 +11732,15 @@ class AIAgent:
                                     )
                                 else:
                                     tool_result = "Skipped: other tool call in this response had invalid JSON."
-                                messages.append({
-                                    "role": "tool",
-                                    "tool_call_id": tc.id,
-                                    "content": tool_result,
-                                })
+                                messages.append(
+                                    {
+                                        "role": "tool",
+                                        "tool_call_id": tc.id,
+                                        "content": tool_result,
+                                    }
+                                )
                             continue
-                    
+
                     # Reset retry counter on successful JSON validation
                     self._invalid_json_retries = 0
 
@@ -10095,23 +11752,32 @@ class AIAgent:
                         assistant_message.tool_calls
                     )
 
-                    assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
-                    
+                    assistant_msg = self._build_assistant_message(
+                        assistant_message, finish_reason
+                    )
+
                     # If this turn has both content AND tool_calls, capture the content
                     # as a fallback final response. Common pattern: model delivers its
                     # answer and calls memory/skill tools as a side-effect in the same
                     # turn. If the follow-up turn after tools is empty, we use this.
                     turn_content = assistant_message.content or ""
-                    if turn_content and self._has_content_after_think_block(turn_content):
+                    if turn_content and self._has_content_after_think_block(
+                        turn_content
+                    ):
                         self._last_content_with_tools = turn_content
                         # Only mute subsequent output when EVERY tool call in
                         # this turn is post-response housekeeping (memory, todo,
                         # skill_manage, etc.).  If any substantive tool is present
                         # (search_files, read_file, write_file, terminal, ...),
                         # keep output visible so the user sees progress.
-                        _HOUSEKEEPING_TOOLS = frozenset({
-                            "memory", "todo", "skill_manage", "session_search",
-                        })
+                        _HOUSEKEEPING_TOOLS = frozenset(
+                            {
+                                "memory",
+                                "todo",
+                                "skill_manage",
+                                "session_search",
+                            }
+                        )
                         _all_housekeeping = all(
                             tc.function.name in _HOUSEKEEPING_TOOLS
                             for tc in assistant_message.tool_calls
@@ -10122,7 +11788,7 @@ class AIAgent:
                             clean = self._strip_think_blocks(turn_content).strip()
                             if clean:
                                 self._vprint(f"  ┊ 💬 {clean}")
-                    
+
                     # Pop thinking-only prefill message(s) before appending
                     # (tool-call path — same rationale as the final-response path).
                     _had_prefill = False
@@ -10164,7 +11830,9 @@ class AIAgent:
                         except Exception:
                             pass
 
-                    self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
+                    self._execute_tool_calls(
+                        assistant_message, messages, effective_task_id, api_call_count
+                    )
 
                     # Reset per-turn retry counters after successful tool
                     # execution so a single truncation doesn't poison the
@@ -10182,10 +11850,12 @@ class AIAgent:
                     # Refund the iteration if the ONLY tool(s) called were
                     # execute_code (programmatic tool calling).  These are
                     # cheap RPC-style calls that shouldn't eat the budget.
-                    _tc_names = {tc.function.name for tc in assistant_message.tool_calls}
+                    _tc_names = {
+                        tc.function.name for tc in assistant_message.tool_calls
+                    }
                     if _tc_names == {"execute_code"}:
                         self.iteration_budget.refund()
-                    
+
                     # Use real token counts from the API response to decide
                     # compression.  prompt_tokens + completion_tokens is the
                     # actual context size the provider reported plus the
@@ -10217,7 +11887,9 @@ class AIAgent:
                     # and fires status_callback for gateway platforms.
                     # Tiered: 85% (orange) and 95% (red/critical).
                     if _compressor.threshold_tokens > 0:
-                        _compaction_progress = _real_tokens / _compressor.threshold_tokens
+                        _compaction_progress = (
+                            _real_tokens / _compressor.threshold_tokens
+                        )
                         # Determine the warning tier for this progress level
                         _warn_tier = 0.0
                         if _compaction_progress >= 0.95:
@@ -10230,21 +11902,34 @@ class AIAgent:
                             _sid = self.session_id or "default"
                             _last = AIAgent._context_pressure_last_warned.get(_sid)
                             _now = time.time()
-                            if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN:
+                            if (
+                                _last is None
+                                or _last[0] < _warn_tier
+                                or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN
+                            ):
                                 self._context_pressure_warned_at = _warn_tier
-                                AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now)
-                                self._emit_context_pressure(_compaction_progress, _compressor)
+                                AIAgent._context_pressure_last_warned[_sid] = (
+                                    _warn_tier,
+                                    _now,
+                                )
+                                self._emit_context_pressure(
+                                    _compaction_progress, _compressor
+                                )
                                 # Evict stale entries (older than 2x cooldown)
                                 _cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2
                                 AIAgent._context_pressure_last_warned = {
-                                    k: v for k, v in AIAgent._context_pressure_last_warned.items()
+                                    k: v
+                                    for k, v in AIAgent._context_pressure_last_warned.items()
                                     if v[1] > _cutoff
                                 }
 
-                    if self.compression_enabled and _compressor.should_compress(_real_tokens):
+                    if self.compression_enabled and _compressor.should_compress(
+                        _real_tokens
+                    ):
                         self._safe_print("  ⟳ compacting context…")
                         messages, active_system_prompt = self._compress_context(
-                            messages, system_message,
+                            messages,
+                            system_message,
                             approx_tokens=self.context_compressor.last_prompt_tokens,
                             task_id=effective_task_id,
                         )
@@ -10252,25 +11937,25 @@ class AIAgent:
                         # _flush_messages_to_session_db writes compressed messages
                         # to the new session (see preflight compression comment).
                         conversation_history = None
-                    
+
                     # Save session log incrementally (so progress is visible even if interrupted)
                     self._session_messages = messages
                     self._save_session_log(messages)
-                    
+
                     # Continue loop for next response
                     continue
-                
+
                 else:
                     # No tool calls - this is the final response
                     final_response = assistant_message.content or ""
-                    
+
                     # Fix: unmute output when entering the no-tool-call branch
                     # so the user can see empty-response warnings and recovery
                     # status messages.  _mute_post_response was set during a
                     # prior housekeeping tool turn and should not silence the
                     # final response path.
                     self._mute_post_response = False
-                    
+
                     # Check if response only has think block with no actual content after it
                     if not self._has_content_after_think_block(final_response):
                         # ── Partial stream recovery ─────────────────────
@@ -10283,7 +11968,9 @@ class AIAgent:
                         )
                         if self._has_content_after_think_block(_partial_streamed):
                             _turn_exit_reason = "partial_stream_recovery"
-                            _recovered = self._strip_think_blocks(_partial_streamed).strip()
+                            _recovered = self._strip_think_blocks(
+                                _partial_streamed
+                            ).strip()
                             logger.info(
                                 "Partial stream content delivered (%d chars) "
                                 "— using as final response",
@@ -10301,11 +11988,15 @@ class AIAgent:
                         # tool calls (e.g. "You're welcome!" + memory save), the model
                         # has nothing more to say. Use the earlier content immediately
                         # instead of wasting API calls on retries that won't help.
-                        fallback = getattr(self, '_last_content_with_tools', None)
+                        fallback = getattr(self, "_last_content_with_tools", None)
                         if fallback:
                             _turn_exit_reason = "fallback_prior_turn_content"
-                            logger.info("Empty follow-up after tool calls — using prior turn content as final response")
-                            self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
+                            logger.info(
+                                "Empty follow-up after tool calls — using prior turn content as final response"
+                            )
+                            self._emit_status(
+                                "↻ Empty response after tool calls — using earlier content as final answer"
+                            )
                             self._last_content_with_tools = None
                             self._empty_content_retries = 0
                             # Do NOT modify the assistant message content — the
@@ -10328,9 +12019,8 @@ class AIAgent:
                             m.get("role") == "tool"
                             for m in messages[-5:]  # check recent messages
                         )
-                        if (
-                            _prior_was_tool
-                            and not getattr(self, "_post_tool_empty_retried", False)
+                        if _prior_was_tool and not getattr(
+                            self, "_post_tool_empty_retried", False
                         ):
                             self._post_tool_empty_retried = True
                             logger.info(
@@ -10348,14 +12038,16 @@ class AIAgent:
                             # APIs reject as an invalid sequence.
                             assistant_msg["content"] = "(empty)"
                             messages.append(assistant_msg)
-                            messages.append({
-                                "role": "user",
-                                "content": (
-                                    "You just executed tool calls but returned an "
-                                    "empty response. Please process the tool "
-                                    "results above and continue with the task."
-                                ),
-                            })
+                            messages.append(
+                                {
+                                    "role": "user",
+                                    "content": (
+                                        "You just executed tool calls but returned an "
+                                        "empty response. Please process the tool "
+                                        "results above and continue with the task."
+                                    ),
+                                }
+                            )
                             continue
 
                         # ── Thinking-only prefill continuation ──────────
@@ -10403,15 +12095,19 @@ class AIAgent:
                             final_response
                         ).strip()
                         _prefill_exhausted = (
-                            _has_structured
-                            and self._thinking_prefill_retries >= 2
+                            _has_structured and self._thinking_prefill_retries >= 2
                         )
-                        if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3:
+                        if (
+                            _truly_empty
+                            and (not _has_structured or _prefill_exhausted)
+                            and self._empty_content_retries < 3
+                        ):
                             self._empty_content_retries += 1
                             logger.warning(
                                 "Empty response (no content or reasoning) — "
                                 "retry %d/3 (model=%s)",
-                                self._empty_content_retries, self.model,
+                                self._empty_content_retries,
+                                self.model,
                             )
                             self._emit_status(
                                 f"⚠️ Empty response from model — retrying "
@@ -10429,7 +12125,8 @@ class AIAgent:
                             logger.warning(
                                 "Empty response after %d retries — "
                                 "attempting fallback (model=%s, provider=%s)",
-                                self._empty_content_retries, self.model,
+                                self._empty_content_retries,
+                                self.model,
                                 self.provider,
                             )
                             self._emit_status(
@@ -10445,7 +12142,8 @@ class AIAgent:
                                 logger.info(
                                     "Fallback activated after empty responses: "
                                     "now using %s on %s",
-                                    self.model, self.provider,
+                                    self.model,
+                                    self.provider,
                                 )
                                 continue
 
@@ -10454,16 +12152,23 @@ class AIAgent:
                         # "(empty)" terminal.
                         _turn_exit_reason = "empty_response_exhausted"
                         reasoning_text = self._extract_reasoning(assistant_message)
-                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
+                        assistant_msg = self._build_assistant_message(
+                            assistant_message, finish_reason
+                        )
                         assistant_msg["content"] = "(empty)"
                         messages.append(assistant_msg)
 
                         if reasoning_text:
-                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
+                            reasoning_preview = (
+                                reasoning_text[:500] + "..."
+                                if len(reasoning_text) > 500
+                                else reasoning_text
+                            )
                             logger.warning(
                                 "Reasoning-only response (no visible content) "
                                 "after exhausting retries and fallback. "
-                                "Reasoning: %s", reasoning_preview,
+                                "Reasoning: %s",
+                                reasoning_preview,
                             )
                             self._emit_status(
                                 "⚠️ Model produced reasoning but no visible "
@@ -10474,18 +12179,22 @@ class AIAgent:
                                 "Empty response (no content or reasoning) "
                                 "after %d retries. No fallback available. "
                                 "model=%s provider=%s",
-                                self._empty_content_retries, self.model,
+                                self._empty_content_retries,
+                                self.model,
                                 self.provider,
                             )
                             self._emit_status(
                                 "❌ Model returned no content after all retries"
-                                + (" and fallback attempts." if self._fallback_chain else
-                                   ". No fallback providers configured.")
+                                + (
+                                    " and fallback attempts."
+                                    if self._fallback_chain
+                                    else ". No fallback providers configured."
+                                )
                             )
 
                         final_response = "(empty)"
                         break
-                    
+
                     # Reset retry counter/signature on successful content
                     self._empty_content_retries = 0
                     self._thinking_prefill_retries = 0
@@ -10501,7 +12210,9 @@ class AIAgent:
                         )
                     ):
                         codex_ack_continuations += 1
-                        interim_msg = self._build_assistant_message(assistant_message, "incomplete")
+                        interim_msg = self._build_assistant_message(
+                            assistant_message, "incomplete"
+                        )
                         messages.append(interim_msg)
                         self._emit_interim_assistant_message(interim_msg)
 
@@ -10523,11 +12234,13 @@ class AIAgent:
                         final_response = truncated_response_prefix + final_response
                         truncated_response_prefix = ""
                         length_continue_retries = 0
-                    
+
                     # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
                     final_response = self._strip_think_blocks(final_response).strip()
-                    
-                    final_msg = self._build_assistant_message(assistant_message, finish_reason)
+
+                    final_msg = self._build_assistant_message(
+                        assistant_message, finish_reason
+                    )
 
                     # Pop thinking-only prefill message(s) before appending
                     # the final response.  This avoids consecutive assistant
@@ -10541,21 +12254,25 @@ class AIAgent:
                         messages.pop()
 
                     messages.append(final_msg)
-                    
+
                     _turn_exit_reason = f"text_response(finish_reason={finish_reason})"
                     if not self.quiet_mode:
-                        self._safe_print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
+                        self._safe_print(
+                            f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)"
+                        )
                     break
-                
+
             except Exception as e:
                 error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
                 try:
                     print(f"❌ {error_msg}")
                 except (OSError, ValueError):
                     logger.error(error_msg)
-                
-                logger.debug("Outer loop error in API call #%d", api_call_count, exc_info=True)
-                
+
+                logger.debug(
+                    "Outer loop error in API call #%d", api_call_count, exc_info=True
+                )
+
                 # If an assistant message with tool_calls was already appended,
                 # the API expects a role="tool" result for every tool_call_id.
                 # Fill in error results for any that weren't answered yet.
@@ -10568,11 +12285,12 @@ class AIAgent:
                     if msg.get("role") == "assistant" and msg.get("tool_calls"):
                         answered_ids = {
                             m["tool_call_id"]
-                            for m in messages[idx + 1:]
+                            for m in messages[idx + 1 :]
                             if isinstance(m, dict) and m.get("role") == "tool"
                         }
                         for tc in msg["tool_calls"]:
-                            if not tc or not isinstance(tc, dict): continue
+                            if not tc or not isinstance(tc, dict):
+                                continue
                             if tc["id"] not in answered_ids:
                                 err_msg = {
                                     "role": "tool",
@@ -10581,7 +12299,7 @@ class AIAgent:
                                 }
                                 messages.append(err_msg)
                     break
-                
+
                 # Non-tool errors don't need a synthetic message injected.
                 # The error is already printed to the user (line above), and
                 # the retry loop continues.  Injecting a fake user/assistant
@@ -10591,12 +12309,14 @@ class AIAgent:
                 # If we're near the limit, break to avoid infinite loops
                 if api_call_count >= self.max_iterations - 1:
                     _turn_exit_reason = f"error_near_max_iterations({error_msg[:80]})"
-                    final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
+                    final_response = (
+                        f"I apologize, but I encountered repeated errors: {error_msg}"
+                    )
                     # Append as assistant so the history stays valid for
                     # session resume (avoids consecutive user messages).
                     messages.append({"role": "assistant", "content": final_response})
                     break
-        
+
         if final_response is None and (
             api_call_count >= self.max_iterations
             or self.iteration_budget.remaining <= 0
@@ -10604,7 +12324,9 @@ class AIAgent:
             # Budget exhausted — ask the model for a summary via one extra
             # API call with tools stripped.  _handle_max_iterations injects a
             # user message and makes a single toolless request.
-            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
+            _turn_exit_reason = (
+                f"max_iterations_reached({api_call_count}/{self.max_iterations})"
+            )
             self._emit_status(
                 f"⚠️ Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                 "— asking model to summarise"
@@ -10615,7 +12337,7 @@ class AIAgent:
                     "— requesting summary..."
                 )
             final_response = self._handle_max_iterations(messages, api_call_count)
-        
+
         # Determine if conversation completed successfully
         completed = final_response is not None and api_call_count < self.max_iterations
 
@@ -10644,8 +12366,11 @@ class AIAgent:
                     break
 
         _turn_tool_count = sum(
-            1 for m in messages
-            if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls")
+            1
+            for m in messages
+            if isinstance(m, dict)
+            and m.get("role") == "assistant"
+            and m.get("tool_calls")
         )
         _resp_len = len(final_response) if final_response else 0
         _budget_used = self.iteration_budget.used if self.iteration_budget else 0
@@ -10656,9 +12381,15 @@ class AIAgent:
             "tool_turns=%d last_msg_role=%s response_len=%d session=%s"
         )
         _diag_args = (
-            _turn_exit_reason, self.model, api_call_count, self.max_iterations,
-            _budget_used, _budget_max,
-            _turn_tool_count, _last_msg_role, _resp_len,
+            _turn_exit_reason,
+            self.model,
+            api_call_count,
+            self.max_iterations,
+            _budget_used,
+            _budget_max,
+            _turn_tool_count,
+            _last_msg_role,
+            _resp_len,
             self.session_id or "none",
         )
 
@@ -10666,8 +12397,10 @@ class AIAgent:
             # Agent was mid-work — this is the "just stops" case.
             logger.warning(
                 "Turn ended with pending tool result (agent may appear stuck). "
-                + _diag_msg + " last_tool=%s",
-                *_diag_args, _last_tool_name,
+                + _diag_msg
+                + " last_tool=%s",
+                *_diag_args,
+                _last_tool_name,
             )
         else:
             logger.info(_diag_msg, *_diag_args)
@@ -10679,6 +12412,7 @@ class AIAgent:
         if final_response and not interrupted:
             try:
                 from hermes_cli.plugins import invoke_hook as _invoke_hook
+
                 _invoke_hook(
                     "post_llm_call",
                     session_id=self.session_id,
@@ -10719,17 +12453,20 @@ class AIAgent:
             "prompt_tokens": self.session_prompt_tokens,
             "completion_tokens": self.session_completion_tokens,
             "total_tokens": self.session_total_tokens,
-            "last_prompt_tokens": getattr(self.context_compressor, "last_prompt_tokens", 0) or 0,
+            "last_prompt_tokens": getattr(
+                self.context_compressor, "last_prompt_tokens", 0
+            )
+            or 0,
             "estimated_cost_usd": self.session_estimated_cost_usd,
             "cost_status": self.session_cost_status,
             "cost_source": self.session_cost_source,
         }
         self._response_was_previewed = False
-        
+
         # Include interrupt message if one triggered the interrupt
         if interrupted and self._interrupt_message:
             result["interrupt_message"] = self._interrupt_message
-        
+
         # Clear interrupt state after handling
         self.clear_interrupt()
 
@@ -10738,9 +12475,11 @@ class AIAgent:
 
         # Check skill trigger NOW — based on how many tool iterations THIS turn used.
         _should_review_skills = False
-        if (self._skill_nudge_interval > 0
-                and self._iters_since_skill >= self._skill_nudge_interval
-                and "skill_manage" in self.valid_tool_names):
+        if (
+            self._skill_nudge_interval > 0
+            and self._iters_since_skill >= self._skill_nudge_interval
+            and "skill_manage" in self.valid_tool_names
+        ):
             _should_review_skills = True
             self._iters_since_skill = 0
 
@@ -10756,7 +12495,11 @@ class AIAgent:
 
         # Background memory/skill review — runs AFTER the response is delivered
         # so it never competes with the user's task for model attention.
-        if final_response and not interrupted and (_should_review_memory or _should_review_skills):
+        if (
+            final_response
+            and not interrupted
+            and (_should_review_memory or _should_review_skills)
+        ):
             try:
                 self._spawn_background_review(
                     messages_snapshot=list(messages),
@@ -10778,6 +12521,7 @@ class AIAgent:
         # Plugins can use this for cleanup, flushing buffers, etc.
         try:
             from hermes_cli.plugins import invoke_hook as _invoke_hook
+
             _invoke_hook(
                 "on_session_end",
                 session_id=self.session_id,
@@ -10818,7 +12562,7 @@ def main(
     save_trajectories: bool = False,
     save_sample: bool = False,
     verbose: bool = False,
-    log_prefix_chars: int = 20
+    log_prefix_chars: int = 20,
 ):
     """
     Main function for running the agent directly.
@@ -10844,58 +12588,69 @@ def main(
     """
     print("🤖 AI Agent with Tool Calling")
     print("=" * 50)
-    
+
     # Handle tool listing
     if list_tools:
-        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
+        from model_tools import (
+            get_all_tool_names,
+            get_toolset_for_tool,
+            get_available_toolsets,
+        )
         from toolsets import get_all_toolsets, get_toolset_info
-        
+
         print("📋 Available Tools & Toolsets:")
         print("-" * 50)
-        
+
         # Show new toolsets system
         print("\n🎯 Predefined Toolsets (New System):")
         print("-" * 40)
         all_toolsets = get_all_toolsets()
-        
+
         # Group by category
         basic_toolsets = []
         composite_toolsets = []
         scenario_toolsets = []
-        
+
         for name, toolset in all_toolsets.items():
             info = get_toolset_info(name)
             if info:
                 entry = (name, info)
                 if name in ["web", "terminal", "vision", "creative", "reasoning"]:
                     basic_toolsets.append(entry)
-                elif name in ["research", "development", "analysis", "content_creation", "full_stack"]:
+                elif name in [
+                    "research",
+                    "development",
+                    "analysis",
+                    "content_creation",
+                    "full_stack",
+                ]:
                     composite_toolsets.append(entry)
                 else:
                     scenario_toolsets.append(entry)
-        
+
         # Print basic toolsets
         print("\n📌 Basic Toolsets:")
         for name, info in basic_toolsets:
-            tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none'
+            tools_str = (
+                ", ".join(info["resolved_tools"]) if info["resolved_tools"] else "none"
+            )
             print(f"  • {name:15} - {info['description']}")
             print(f"    Tools: {tools_str}")
-        
+
         # Print composite toolsets
         print("\n📂 Composite Toolsets (built from other toolsets):")
         for name, info in composite_toolsets:
-            includes_str = ', '.join(info['includes']) if info['includes'] else 'none'
+            includes_str = ", ".join(info["includes"]) if info["includes"] else "none"
             print(f"  • {name:15} - {info['description']}")
             print(f"    Includes: {includes_str}")
             print(f"    Total tools: {info['tool_count']}")
-        
+
         # Print scenario-specific toolsets
         print("\n🎭 Scenario-Specific Toolsets:")
         for name, info in scenario_toolsets:
             print(f"  • {name:20} - {info['description']}")
             print(f"    Total tools: {info['tool_count']}")
-        
-        
+
         # Show legacy toolset compatibility
         print("\n📦 Legacy Toolsets (for backward compatibility):")
         legacy_toolsets = get_available_toolsets()
@@ -10904,47 +12659,57 @@ def main(
             print(f"  {status} {name}: {info['description']}")
             if not info["available"]:
                 print(f"    Requirements: {', '.join(info['requirements'])}")
-        
+
         # Show individual tools
         all_tools = get_all_tool_names()
         print(f"\n🔧 Individual Tools ({len(all_tools)} available):")
         for tool_name in sorted(all_tools):
             toolset = get_toolset_for_tool(tool_name)
             print(f"  📌 {tool_name} (from {toolset})")
-        
+
         print("\n💡 Usage Examples:")
         print("  # Use predefined toolsets")
-        print("  python run_agent.py --enabled_toolsets=research --query='search for Python news'")
-        print("  python run_agent.py --enabled_toolsets=development --query='debug this code'")
-        print("  python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'")
+        print(
+            "  python run_agent.py --enabled_toolsets=research --query='search for Python news'"
+        )
+        print(
+            "  python run_agent.py --enabled_toolsets=development --query='debug this code'"
+        )
+        print(
+            "  python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'"
+        )
         print("  ")
         print("  # Combine multiple toolsets")
-        print("  python run_agent.py --enabled_toolsets=web,vision --query='analyze website'")
+        print(
+            "  python run_agent.py --enabled_toolsets=web,vision --query='analyze website'"
+        )
         print("  ")
         print("  # Disable toolsets")
-        print("  python run_agent.py --disabled_toolsets=terminal --query='no command execution'")
+        print(
+            "  python run_agent.py --disabled_toolsets=terminal --query='no command execution'"
+        )
         print("  ")
         print("  # Run with trajectory saving enabled")
         print("  python run_agent.py --save_trajectories --query='your question here'")
         return
-    
+
     # Parse toolset selection arguments
     enabled_toolsets_list = None
     disabled_toolsets_list = None
-    
+
     if enabled_toolsets:
         enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
         print(f"🎯 Enabled toolsets: {enabled_toolsets_list}")
-    
+
     if disabled_toolsets:
         disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
         print(f"🚫 Disabled toolsets: {disabled_toolsets_list}")
-    
+
     if save_trajectories:
         print("💾 Trajectory saving: ENABLED")
         print("   - Successful conversations → trajectory_samples.jsonl")
         print("   - Failed conversations → failed_trajectories.jsonl")
-    
+
     # Initialize agent with provided parameters
     try:
         agent = AIAgent(
@@ -10956,12 +12721,12 @@ def main(
             disabled_toolsets=disabled_toolsets_list,
             save_trajectories=save_trajectories,
             verbose_logging=verbose,
-            log_prefix_chars=log_prefix_chars
+            log_prefix_chars=log_prefix_chars,
         )
     except RuntimeError as e:
         print(f"❌ Failed to initialize agent: {e}")
         return
-    
+
     # Use provided query or default to Python 3.13 example
     if query is None:
         user_query = (
@@ -10970,45 +12735,43 @@ def main(
         )
     else:
         user_query = query
-    
+
     print(f"\n📝 User Query: {user_query}")
     print("\n" + "=" * 50)
-    
+
     # Run conversation
     result = agent.run_conversation(user_query)
-    
+
     print("\n" + "=" * 50)
     print("📋 CONVERSATION SUMMARY")
     print("=" * 50)
     print(f"✅ Completed: {result['completed']}")
     print(f"📞 API Calls: {result['api_calls']}")
     print(f"💬 Messages: {len(result['messages'])}")
-    
-    if result['final_response']:
+
+    if result["final_response"]:
         print("\n🎯 FINAL RESPONSE:")
         print("-" * 30)
-        print(result['final_response'])
-    
+        print(result["final_response"])
+
     # Save sample trajectory to UUID-named file if requested
     if save_sample:
         sample_id = str(uuid.uuid4())[:8]
         sample_filename = f"sample_{sample_id}.json"
-        
+
         # Convert messages to trajectory format (same as batch_runner)
         trajectory = agent._convert_to_trajectory_format(
-            result['messages'], 
-            user_query, 
-            result['completed']
+            result["messages"], user_query, result["completed"]
         )
-        
+
         entry = {
             "conversations": trajectory,
             "timestamp": datetime.now().isoformat(),
             "model": model,
-            "completed": result['completed'],
-            "query": user_query
+            "completed": result["completed"],
+            "query": user_query,
         }
-        
+
         try:
             with open(sample_filename, "w", encoding="utf-8") as f:
                 # Pretty-print JSON with indent for readability
@@ -11016,7 +12779,7 @@ def main(
             print(f"\n💾 Sample trajectory saved to: {sample_filename}")
         except Exception as e:
             print(f"\n⚠️ Failed to save sample: {e}")
-    
+
     print("\n👋 Agent execution completed!")
 
 
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index ae78888d86f..76620be5add 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -120,13 +120,17 @@ class TestReadClaudeCodeCredentials:
     def test_reads_valid_credentials(self, tmp_path, monkeypatch):
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "sk-ant-oat01-token",
-                "refreshToken": "sk-ant-oat01-refresh",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "sk-ant-oat01-token",
+                        "refreshToken": "sk-ant-oat01-refresh",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
         creds = read_claude_code_credentials()
         assert creds is not None
@@ -134,7 +138,9 @@ class TestReadClaudeCodeCredentials:
         assert creds["refreshToken"] == "sk-ant-oat01-refresh"
         assert creds["source"] == "claude_code_credentials_file"
 
-    def test_ignores_primary_api_key_for_native_anthropic_resolution(self, tmp_path, monkeypatch):
+    def test_ignores_primary_api_key_for_native_anthropic_resolution(
+        self, tmp_path, monkeypatch
+    ):
         claude_json = tmp_path / ".claude.json"
         claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
@@ -156,9 +162,9 @@ class TestReadClaudeCodeCredentials:
     def test_returns_none_for_empty_access_token(self, tmp_path, monkeypatch):
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {"accessToken": "", "refreshToken": "x"}
-        }))
+        cred_file.write_text(
+            json.dumps({"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}})
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
         assert read_claude_code_credentials() is None
 
@@ -185,16 +191,22 @@ class TestResolveAnthropicToken:
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
         assert resolve_anthropic_token() == "sk-ant-oat01-mytoken"
 
-    def test_does_not_resolve_primary_api_key_as_native_anthropic_token(self, monkeypatch, tmp_path):
+    def test_does_not_resolve_primary_api_key_as_native_anthropic_token(
+        self, monkeypatch, tmp_path
+    ):
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
         monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
         monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
-        (tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
+        (tmp_path / ".claude.json").write_text(
+            json.dumps({"primaryApiKey": "sk-ant-api03-primary"})
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
 
         assert resolve_anthropic_token() is None
 
-    def test_falls_back_to_api_key_when_no_oauth_sources_exist(self, monkeypatch, tmp_path):
+    def test_falls_back_to_api_key_when_no_oauth_sources_exist(
+        self, monkeypatch, tmp_path
+    ):
         monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-mykey")
         monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
         monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
@@ -228,39 +240,53 @@ class TestResolveAnthropicToken:
         monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "cc-auto-token",
-                "refreshToken": "refresh",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "cc-auto-token",
+                        "refreshToken": "refresh",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
         assert resolve_anthropic_token() == "cc-auto-token"
 
-    def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(self, monkeypatch, tmp_path):
+    def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(
+        self, monkeypatch, tmp_path
+    ):
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
         monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
         monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "cc-auto-token",
-                "refreshToken": "refresh-token",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "cc-auto-token",
+                        "refreshToken": "refresh-token",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
 
         assert resolve_anthropic_token() == "cc-auto-token"
 
-    def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(self, monkeypatch, tmp_path):
+    def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(
+        self, monkeypatch, tmp_path
+    ):
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
         monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
         monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
         claude_json = tmp_path / ".claude.json"
-        claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"}))
+        claude_json.write_text(
+            json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"})
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
 
         assert resolve_anthropic_token() == "sk-ant-oat01-static-token"
@@ -280,17 +306,19 @@ class TestRefreshOauthToken:
             "expiresAt": int(time.time() * 1000) - 3600_000,
         }
 
-        mock_response = json.dumps({
-            "access_token": "new-token-abc",
-            "refresh_token": "new-refresh-456",
-            "expires_in": 7200,
-        }).encode()
+        mock_response = json.dumps(
+            {
+                "access_token": "new-token-abc",
+                "refresh_token": "new-refresh-456",
+                "expires_in": 7200,
+            }
+        ).encode()
 
         with patch("urllib.request.urlopen") as mock_urlopen:
             mock_ctx = MagicMock()
-            mock_ctx.__enter__ = MagicMock(return_value=MagicMock(
-                read=MagicMock(return_value=mock_response)
-            ))
+            mock_ctx.__enter__ = MagicMock(
+                return_value=MagicMock(read=MagicMock(return_value=mock_response))
+            )
             mock_ctx.__exit__ = MagicMock(return_value=False)
             mock_urlopen.return_value = mock_ctx
 
@@ -348,38 +376,54 @@ class TestResolveWithRefresh:
         # Set up expired creds with a refresh token
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "expired-tok",
-                "refreshToken": "valid-refresh",
-                "expiresAt": int(time.time() * 1000) - 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "expired-tok",
+                        "refreshToken": "valid-refresh",
+                        "expiresAt": int(time.time() * 1000) - 3600_000,
+                    }
+                }
+            )
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
 
         # Mock refresh to succeed
-        with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
+        with patch(
+            "agent.anthropic_adapter._refresh_oauth_token",
+            return_value="refreshed-token",
+        ):
             result = resolve_anthropic_token()
 
         assert result == "refreshed-token"
 
-    def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(self, monkeypatch, tmp_path):
+    def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(
+        self, monkeypatch, tmp_path
+    ):
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
         monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-expired-env-token")
         monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
 
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "expired-claude-creds-token",
-                "refreshToken": "valid-refresh",
-                "expiresAt": int(time.time() * 1000) - 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "expired-claude-creds-token",
+                        "refreshToken": "valid-refresh",
+                        "expiresAt": int(time.time() * 1000) - 3600_000,
+                    }
+                }
+            )
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
 
-        with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
+        with patch(
+            "agent.anthropic_adapter._refresh_oauth_token",
+            return_value="refreshed-token",
+        ):
             result = resolve_anthropic_token()
 
         assert result == "refreshed-token"
@@ -400,13 +444,17 @@ class TestRunOauthSetupToken:
         # Pre-create credential files that will be found after subprocess
         cred_file = tmp_path / ".claude" / ".credentials.json"
         cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "from-cred-file",
-                "refreshToken": "refresh",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "from-cred-file",
+                        "refreshToken": "refresh",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
         monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
 
         with patch("subprocess.run") as mock_run:
@@ -459,27 +507,45 @@ class TestRunOauthSetupToken:
 
 class TestNormalizeModelName:
     def test_strips_anthropic_prefix(self):
-        assert normalize_model_name("anthropic/claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
+        assert (
+            normalize_model_name("anthropic/claude-sonnet-4-20250514")
+            == "claude-sonnet-4-20250514"
+        )
 
     def test_leaves_bare_name(self):
-        assert normalize_model_name("claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
+        assert (
+            normalize_model_name("claude-sonnet-4-20250514")
+            == "claude-sonnet-4-20250514"
+        )
 
     def test_converts_dots_to_hyphens(self):
         """OpenRouter uses dots (4.6), Anthropic uses hyphens (4-6)."""
         assert normalize_model_name("anthropic/claude-opus-4.6") == "claude-opus-4-6"
-        assert normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5"
+        assert (
+            normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5"
+        )
         assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"
 
     def test_already_hyphenated_unchanged(self):
         """Names already in Anthropic format should pass through."""
         assert normalize_model_name("claude-opus-4-6") == "claude-opus-4-6"
-        assert normalize_model_name("claude-opus-4-5-20251101") == "claude-opus-4-5-20251101"
+        assert (
+            normalize_model_name("claude-opus-4-5-20251101")
+            == "claude-opus-4-5-20251101"
+        )
 
     def test_preserve_dots_for_alibaba_dashscope(self):
         """Alibaba/DashScope use dots in model names (e.g. qwen3.5-plus). Fixes #1739."""
-        assert normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
-        assert normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
-        assert normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash"
+        assert (
+            normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
+        )
+        assert (
+            normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True)
+            == "qwen3.5-plus"
+        )
+        assert (
+            normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash"
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -536,7 +602,10 @@ class TestConvertMessages:
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "Can you see this?"},
-                    {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/cat.png"},
+                    },
                 ],
             }
         ]
@@ -548,7 +617,10 @@ class TestConvertMessages:
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "Can you see this?"},
-                    {"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}},
+                    {
+                        "type": "image",
+                        "source": {"type": "url", "url": "https://example.com/cat.png"},
+                    },
                 ],
             }
         ]
@@ -613,7 +685,10 @@ class TestConvertMessages:
                 "role": "assistant",
                 "content": "",
                 "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
+                    {
+                        "id": "tc_1",
+                        "function": {"name": "test_tool", "arguments": "{}"},
+                    },
                 ],
             },
             {"role": "tool", "tool_call_id": "tc_1", "content": "result data"},
@@ -678,10 +753,9 @@ class TestConvertMessages:
         # tc_gone has no matching tool_use — its tool_result should be stripped
         for m in result:
             if m["role"] == "user" and isinstance(m["content"], list):
-                assert all(
-                    b.get("type") != "tool_result"
-                    for b in m["content"]
-                ), "Orphaned tool_result should have been stripped"
+                assert all(b.get("type") != "tool_result" for b in m["content"]), (
+                    "Orphaned tool_result should have been stripped"
+                )
 
     def test_strips_orphaned_tool_result_preserves_valid(self):
         """Orphaned tool_results are stripped while valid ones survive."""
@@ -690,7 +764,10 @@ class TestConvertMessages:
                 "role": "assistant",
                 "content": "",
                 "tool_calls": [
-                    {"id": "tc_valid", "function": {"name": "search", "arguments": "{}"}},
+                    {
+                        "id": "tc_valid",
+                        "function": {"name": "search", "arguments": "{}"},
+                    },
                 ],
             },
             {"role": "tool", "tool_call_id": "tc_valid", "content": "good result"},
@@ -709,7 +786,11 @@ class TestConvertMessages:
             {
                 "role": "system",
                 "content": [
-                    {"type": "text", "text": "System prompt", "cache_control": {"type": "ephemeral"}},
+                    {
+                        "type": "text",
+                        "text": "System prompt",
+                        "cache_control": {"type": "ephemeral"},
+                    },
                 ],
             },
             {"role": "user", "content": "Hi"},
@@ -720,10 +801,12 @@ class TestConvertMessages:
         assert system[0]["cache_control"] == {"type": "ephemeral"}
 
     def test_assistant_cache_control_blocks_are_preserved(self):
-        messages = apply_anthropic_cache_control([
-            {"role": "system", "content": "System prompt"},
-            {"role": "assistant", "content": "Hello from assistant"},
-        ])
+        messages = apply_anthropic_cache_control(
+            [
+                {"role": "system", "content": "System prompt"},
+                {"role": "assistant", "content": "Hello from assistant"},
+            ]
+        )
 
         _, result = convert_messages_to_anthropic(messages)
         assistant_blocks = result[0]["content"]
@@ -733,17 +816,23 @@ class TestConvertMessages:
         assert assistant_blocks[0]["cache_control"] == {"type": "ephemeral"}
 
     def test_tool_cache_control_is_preserved_on_tool_result_block(self):
-        messages = apply_anthropic_cache_control([
-            {"role": "system", "content": "System prompt"},
-            {
-                "role": "assistant",
-                "content": "",
-                "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
-                ],
-            },
-            {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
-        ], native_anthropic=True)
+        messages = apply_anthropic_cache_control(
+            [
+                {"role": "system", "content": "System prompt"},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "tc_1",
+                            "function": {"name": "test_tool", "arguments": "{}"},
+                        },
+                    ],
+                },
+                {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+            ],
+            native_anthropic=True,
+        )
 
         _, result = convert_messages_to_anthropic(messages)
         user_msg = [m for m in result if m["role"] == "user"][0]
@@ -760,7 +849,10 @@ class TestConvertMessages:
                 "role": "assistant",
                 "content": "",
                 "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
+                    {
+                        "id": "tc_1",
+                        "function": {"name": "test_tool", "arguments": "{}"},
+                    },
                 ],
                 "reasoning_details": [
                     {
@@ -774,10 +866,14 @@ class TestConvertMessages:
         ]
 
         _, result = convert_messages_to_anthropic(messages)
-        assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")["content"]
+        assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")[
+            "content"
+        ]
 
         assert assistant_blocks[0]["type"] == "thinking"
-        assert assistant_blocks[0]["thinking"] == "Need to inspect the tool result first."
+        assert (
+            assistant_blocks[0]["thinking"] == "Need to inspect the tool result first."
+        )
         assert assistant_blocks[0]["signature"] == "sig_123"
         assert assistant_blocks[1]["type"] == "tool_use"
 
@@ -832,25 +928,33 @@ class TestConvertMessages:
         }
 
     def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self):
-        messages = apply_anthropic_cache_control([
-            {"role": "system", "content": "System prompt"},
-            {"role": "user", "content": "Find the skill"},
-            {
-                "role": "assistant",
-                "content": "",
-                "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "skill_view", "arguments": "{}"}},
-                ],
-            },
-            {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
-        ])
+        messages = apply_anthropic_cache_control(
+            [
+                {"role": "system", "content": "System prompt"},
+                {"role": "user", "content": "Find the skill"},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "tc_1",
+                            "function": {"name": "skill_view", "arguments": "{}"},
+                        },
+                    ],
+                },
+                {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+            ]
+        )
 
         _, result = convert_messages_to_anthropic(messages)
 
         assistant_turn = next(msg for msg in result if msg["role"] == "assistant")
         assistant_blocks = assistant_turn["content"]
 
-        assert all(not (b.get("type") == "text" and b.get("text") == "") for b in assistant_blocks)
+        assert all(
+            not (b.get("type") == "text" and b.get("text") == "")
+            for b in assistant_blocks
+        )
         assert any(b.get("type") == "tool_use" for b in assistant_blocks)
 
     def test_empty_user_message_string_gets_placeholder(self):
@@ -888,7 +992,13 @@ class TestConvertMessages:
     def test_user_message_with_empty_text_blocks_gets_placeholder(self):
         """User message with only empty text blocks should get placeholder."""
         messages = [
-            {"role": "user", "content": [{"type": "text", "text": ""}, {"type": "text", "text": "  "}]},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": ""},
+                    {"type": "text", "text": "  "},
+                ],
+            },
         ]
         _, result = convert_messages_to_anthropic(messages)
         assert result[0]["role"] == "user"
@@ -1085,35 +1195,43 @@ class TestBuildAnthropicKwargs:
 class TestGetAnthropicMaxOutput:
     def test_opus_4_6(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-opus-4-6") == 128_000
 
     def test_opus_4_6_variant(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-opus-4-6:1m:fast") == 128_000
 
     def test_sonnet_4_6(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000
 
     def test_sonnet_4_date_stamped(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-sonnet-4-20250514") == 64_000
 
     def test_claude_3_5_sonnet(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192
 
     def test_claude_3_opus(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-3-opus-20240229") == 4_096
 
     def test_unknown_future_model(self):
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         assert _get_anthropic_max_output("claude-ultra-5-20260101") == 128_000
 
     def test_longest_prefix_wins(self):
         """'claude-3-5-sonnet' should match before 'claude-3-5'."""
         from agent.anthropic_adapter import _get_anthropic_max_output
+
         # claude-3-5-sonnet (8192) should win over a hypothetical shorter match
         assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192
 
@@ -1218,7 +1336,9 @@ class TestNormalizeResponse:
         msg, reason = normalize_anthropic_response(self._make_response(blocks))
         assert msg.content == "The answer is 42."
         assert msg.reasoning == "Let me reason about this..."
-        assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}]
+        assert msg.reasoning_details == [
+            {"type": "thinking", "thinking": "Let me reason about this..."}
+        ]
 
     def test_thinking_response_preserves_signature(self):
         blocks = [
@@ -1235,15 +1355,9 @@ class TestNormalizeResponse:
 
     def test_stop_reason_mapping(self):
         block = SimpleNamespace(type="text", text="x")
-        _, r1 = normalize_anthropic_response(
-            self._make_response([block], "end_turn")
-        )
-        _, r2 = normalize_anthropic_response(
-            self._make_response([block], "tool_use")
-        )
-        _, r3 = normalize_anthropic_response(
-            self._make_response([block], "max_tokens")
-        )
+        _, r1 = normalize_anthropic_response(self._make_response([block], "end_turn"))
+        _, r2 = normalize_anthropic_response(self._make_response([block], "tool_use"))
+        _, r3 = normalize_anthropic_response(self._make_response([block], "max_tokens"))
         assert r1 == "stop"
         assert r2 == "tool_calls"
         assert r3 == "length"
@@ -1306,7 +1420,11 @@ class TestThinkingBlockSignatureManagement:
                     {"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}},
                 ],
                 "reasoning_details": [
-                    {"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Old reasoning.",
+                        "signature": "sig_old",
+                    },
                 ],
             },
             {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
@@ -1317,7 +1435,11 @@ class TestThinkingBlockSignatureManagement:
                     {"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}},
                 ],
                 "reasoning_details": [
-                    {"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Latest reasoning.",
+                        "signature": "sig_new",
+                    },
                 ],
             },
             {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
@@ -1348,7 +1470,11 @@ class TestThinkingBlockSignatureManagement:
                 "role": "assistant",
                 "content": "The answer is 42.",
                 "reasoning_details": [
-                    {"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Deep thought.",
+                        "signature": "sig_valid",
+                    },
                 ],
             },
         ]
@@ -1445,14 +1571,22 @@ class TestThinkingBlockSignatureManagement:
                 "role": "assistant",
                 "content": "First response.",
                 "reasoning_details": [
-                    {"type": "thinking", "thinking": "First thought.", "signature": "sig_1"},
+                    {
+                        "type": "thinking",
+                        "thinking": "First thought.",
+                        "signature": "sig_1",
+                    },
                 ],
             },
             {
                 "role": "assistant",
                 "content": "Second response.",
                 "reasoning_details": [
-                    {"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Second thought.",
+                        "signature": "sig_2",
+                    },
                 ],
             },
         ]
@@ -1532,12 +1666,57 @@ class TestThinkingBlockSignatureManagement:
 
         # Last one: thinking preserved
         last_thinking = [
-            b for b in assistants[2]["content"]
+            b
+            for b in assistants[2]["content"]
             if isinstance(b, dict) and b.get("type") == "thinking"
         ]
         assert len(last_thinking) == 1
         assert last_thinking[0]["signature"] == "sig_3"
 
+    def test_third_party_downgrades_thinking_to_text(self):
+        """Third-party Anthropic-compatible endpoints get plain text thinking."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Visible answer.",
+                "reasoning_details": [
+                    {
+                        "type": "thinking",
+                        "thinking": "Third-party-safe reasoning.",
+                        "signature": "sig",
+                    },
+                    {"type": "redacted_thinking", "data": "opaque"},
+                ],
+            }
+        ]
+        _, result = convert_messages_to_anthropic(
+            messages,
+            base_url="https://api.z.ai/api/paas/v4",
+        )
+        blocks = result[0]["content"]
+        assert not any(b.get("type") == "thinking" for b in blocks)
+        assert not any(b.get("type") == "redacted_thinking" for b in blocks)
+        text_blocks = [b.get("text", "") for b in blocks if b.get("type") == "text"]
+        assert "Third-party-safe reasoning." in text_blocks
+        assert "Visible answer." in text_blocks
+
+    def test_third_party_thinking_only_content_gets_placeholder(self):
+        """If third-party turn only has redacted_thinking, use placeholder text."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_details": [
+                    {"type": "redacted_thinking", "data": "opaque"},
+                ],
+            }
+        ]
+        _, result = convert_messages_to_anthropic(
+            messages,
+            base_url="https://api.minimax.io/anthropic",
+        )
+        assert result[0]["content"] == [{"type": "text", "text": "(thinking elided)"}]
+
 
 # ---------------------------------------------------------------------------
 # Tool choice
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index d71e6a62554..d013ca65109 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -124,7 +124,8 @@ def test_aiagent_reuses_existing_errors_log_handler():
             )
 
         matching_handlers = [
-            handler for handler in root_logger.handlers
+            handler
+            for handler in root_logger.handlers
             if isinstance(handler, RotatingFileHandler)
             and error_log_path == Path(handler.baseFilename).resolve()
         ]
@@ -142,7 +143,8 @@ class TestProviderModelNormalization:
     def test_aiagent_strips_matching_native_provider_prefix(self):
         with (
             patch(
-                "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
             ),
             patch("run_agent.check_toolset_requirements", return_value={}),
             patch("run_agent.OpenAI"),
@@ -162,7 +164,8 @@ class TestProviderModelNormalization:
     def test_aiagent_keeps_aggregator_vendor_slug(self):
         with (
             patch(
-                "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
             ),
             patch("run_agent.check_toolset_requirements", return_value={}),
             patch("run_agent.OpenAI"),
@@ -304,7 +307,9 @@ class TestStripThinkBlocks:
 
     def test_thought_block_removed(self, agent):
         """Gemma 4 uses <thought> tags for inline reasoning."""
-        result = agent._strip_think_blocks("<thought>internal reasoning</thought> answer")
+        result = agent._strip_think_blocks(
+            "<thought>internal reasoning</thought> answer"
+        )
         assert "internal reasoning" not in result
         assert "<thought>" not in result
         assert "answer" in result
@@ -669,12 +674,18 @@ class TestBuildSystemPrompt:
         assert "Conversation started:" in prompt
 
     def test_includes_nous_subscription_prompt(self, agent, monkeypatch):
-        monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK")
+        monkeypatch.setattr(
+            run_agent,
+            "build_nous_subscription_prompt",
+            lambda tool_names: "NOUS SUBSCRIPTION BLOCK",
+        )
         prompt = agent._build_system_prompt()
         assert "NOUS SUBSCRIPTION BLOCK" in prompt
 
     def test_skills_prompt_derives_available_toolsets_from_loaded_tools(self):
-        tools = _make_tool_defs("web_search", "skills_list", "skill_view", "skill_manage")
+        tools = _make_tool_defs(
+            "web_search", "skills_list", "skill_view", "skill_manage"
+        )
         toolset_map = {
             "web_search": "web",
             "skills_list": "skills",
@@ -688,8 +699,14 @@ class TestBuildSystemPrompt:
                 "run_agent.check_toolset_requirements",
                 side_effect=AssertionError("should not re-check toolset requirements"),
             ),
-            patch("run_agent.get_toolset_for_tool", create=True, side_effect=toolset_map.get),
-            patch("run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT") as mock_skills,
+            patch(
+                "run_agent.get_toolset_for_tool",
+                create=True,
+                side_effect=toolset_map.get,
+            ),
+            patch(
+                "run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT"
+            ) as mock_skills,
             patch("run_agent.OpenAI"),
         ):
             agent = AIAgent(
@@ -735,54 +752,71 @@ class TestToolUseEnforcementConfig:
 
     def test_auto_injects_for_gpt(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
 
     def test_auto_injects_for_codex(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
 
     def test_auto_skips_for_claude(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
-        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
+
+        agent = self._make_agent(
+            model="anthropic/claude-sonnet-4", tool_use_enforcement="auto"
+        )
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
 
     def test_true_forces_for_all_models(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
-        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
+
+        agent = self._make_agent(
+            model="anthropic/claude-sonnet-4", tool_use_enforcement=True
+        )
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
 
     def test_string_true_forces_for_all_models(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
-        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
+
+        agent = self._make_agent(
+            model="anthropic/claude-sonnet-4", tool_use_enforcement="true"
+        )
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
 
     def test_always_forces_for_all_models(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
-        agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
+
+        agent = self._make_agent(
+            model="deepseek/deepseek-r1", tool_use_enforcement="always"
+        )
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
 
     def test_false_disables_for_gpt(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
 
     def test_string_false_disables(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
         prompt = agent._build_system_prompt()
         assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
 
     def test_custom_list_matches(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(
             model="deepseek/deepseek-r1",
             tool_use_enforcement=["deepseek", "gemini"],
@@ -792,6 +826,7 @@ class TestToolUseEnforcementConfig:
 
     def test_custom_list_no_match(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(
             model="anthropic/claude-sonnet-4",
             tool_use_enforcement=["deepseek", "gemini"],
@@ -801,6 +836,7 @@ class TestToolUseEnforcementConfig:
 
     def test_custom_list_case_insensitive(self):
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         agent = self._make_agent(
             model="openai/GPT-4.1",
             tool_use_enforcement=["GPT", "Codex"],
@@ -811,6 +847,7 @@ class TestToolUseEnforcementConfig:
     def test_no_tools_never_injects(self):
         """Even with enforcement=true, no injection when agent has no tools."""
         from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+
         with (
             patch("run_agent.get_tool_definitions", return_value=[]),
             patch("run_agent.check_toolset_requirements", return_value={}),
@@ -941,7 +978,9 @@ class TestBuildApiKwargs:
         assert kwargs["metadata"]["sessionId"] == "sess-123"
         assert kwargs["extra_body"]["vl_high_resolution_images"] is True
         assert isinstance(kwargs["messages"][0]["content"], list)
-        assert kwargs["messages"][0]["content"][0]["cache_control"] == {"type": "ephemeral"}
+        assert kwargs["messages"][0]["content"][0]["cache_control"] == {
+            "type": "ephemeral"
+        }
         assert kwargs["messages"][2]["content"][0]["text"] == "hi"
 
     def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
@@ -970,7 +1009,10 @@ class TestBuildApiKwargs:
         agent.base_url = "https://portal.qwen.ai/v1"
         agent._base_url_lower = agent.base_url.lower()
         agent.max_tokens = 4096
-        messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+        ]
         kwargs = agent._build_api_kwargs(messages)
         assert kwargs["max_tokens"] == 4096
 
@@ -980,7 +1022,10 @@ class TestBuildApiKwargs:
         agent.base_url = "https://portal.qwen.ai/v1"
         agent._base_url_lower = agent.base_url.lower()
         agent.max_tokens = None
-        messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
+        messages = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "hi"},
+        ]
         kwargs = agent._build_api_kwargs(messages)
         assert kwargs["max_tokens"] == 65536
 
@@ -1125,7 +1170,10 @@ class TestExecuteToolCalls:
             agent._execute_tool_calls(mock_msg, messages, "task-1")
         # Content should be replaced with persisted-output or truncation
         assert len(messages[0]["content"]) < 150_000
-        assert ("Truncated" in messages[0]["content"] or "<persisted-output>" in messages[0]["content"])
+        assert (
+            "Truncated" in messages[0]["content"]
+            or "<persisted-output>" in messages[0]["content"]
+        )
 
     def test_quiet_tool_output_suppressed_when_progress_callback_present(self, agent):
         tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1")
@@ -1133,8 +1181,10 @@ class TestExecuteToolCalls:
         messages = []
         agent.tool_progress_callback = lambda *args, **kwargs: None
 
-        with patch("run_agent.handle_function_call", return_value="search result"), \
-             patch.object(agent, "_safe_print") as mock_print:
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_safe_print") as mock_print,
+        ):
             agent._execute_tool_calls(mock_msg, messages, "task-1")
 
         mock_print.assert_not_called()
@@ -1147,8 +1197,10 @@ class TestExecuteToolCalls:
         messages = []
         agent.tool_progress_callback = None
 
-        with patch("run_agent.handle_function_call", return_value="search result"), \
-             patch.object(agent, "_safe_print") as mock_print:
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_safe_print") as mock_print,
+        ):
             agent._execute_tool_calls(mock_msg, messages, "task-1")
 
         mock_print.assert_called_once()
@@ -1165,7 +1217,9 @@ class TestExecuteToolCalls:
 
         mock_print.assert_not_called()
 
-    def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode(self, agent):
+    def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode(
+        self, agent
+    ):
         class _RateLimitError(Exception):
             status_code = 429
 
@@ -1215,8 +1269,10 @@ class TestConcurrentToolExecution:
 
     def test_clarify_forces_sequential(self, agent):
         """Batch containing clarify should use sequential path."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="clarify", arguments='{"question":"ok?"}', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(
+            name="clarify", arguments='{"question":"ok?"}', call_id="c2"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
@@ -1227,8 +1283,10 @@ class TestConcurrentToolExecution:
 
     def test_multiple_tools_uses_concurrent_path(self, agent):
         """Multiple read-only tools should use concurrent path."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(
+            name="read_file", arguments='{"path":"x.py"}', call_id="c2"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
@@ -1239,8 +1297,10 @@ class TestConcurrentToolExecution:
 
     def test_terminal_batch_forces_sequential(self, agent):
         """Stateful tools should not share the concurrent execution path."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="terminal", arguments='{"command":"pwd"}', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(
+            name="terminal", arguments='{"command":"pwd"}', call_id="c2"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
@@ -1251,8 +1311,14 @@ class TestConcurrentToolExecution:
 
     def test_write_batch_forces_sequential(self, agent):
         """File mutations should stay ordered within a turn."""
-        tc1 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c1")
-        tc2 = _mock_tool_call(name="write_file", arguments='{"path":"x.py","content":"print(1)"}', call_id="c2")
+        tc1 = _mock_tool_call(
+            name="read_file", arguments='{"path":"x.py"}', call_id="c1"
+        )
+        tc2 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"x.py","content":"print(1)"}',
+            call_id="c2",
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
@@ -1303,7 +1369,7 @@ class TestConcurrentToolExecution:
 
     def test_malformed_json_args_forces_sequential(self, agent):
         """Unparseable tool arguments should fall back to sequential."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
         tc2 = _mock_tool_call(name="web_search", arguments="NOT JSON {{{", call_id="c2")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
@@ -1315,8 +1381,10 @@ class TestConcurrentToolExecution:
 
     def test_non_dict_args_forces_sequential(self, agent):
         """Tool arguments that parse to a non-dict type should fall back to sequential."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="web_search", arguments='"just a string"', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(
+            name="web_search", arguments='"just a string"', call_id="c2"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
@@ -1327,9 +1395,13 @@ class TestConcurrentToolExecution:
 
     def test_concurrent_executes_all_tools(self, agent):
         """Concurrent path should execute all tools and append results in order."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{"q":"alpha"}', call_id="c1")
+        tc1 = _mock_tool_call(
+            name="web_search", arguments='{"q":"alpha"}', call_id="c1"
+        )
         tc2 = _mock_tool_call(name="web_search", arguments='{"q":"beta"}', call_id="c2")
-        tc3 = _mock_tool_call(name="web_search", arguments='{"q":"gamma"}', call_id="c3")
+        tc3 = _mock_tool_call(
+            name="web_search", arguments='{"q":"gamma"}', call_id="c3"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2, tc3])
         messages = []
 
@@ -1379,12 +1451,13 @@ class TestConcurrentToolExecution:
 
     def test_concurrent_handles_tool_error(self, agent):
         """If one tool raises, others should still complete."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments="{}", call_id="c2")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
 
         call_count = [0]
+
         def fake_handle(name, args, task_id, **kwargs):
             call_count[0] += 1
             if call_count[0] == 1:
@@ -1402,8 +1475,8 @@ class TestConcurrentToolExecution:
 
     def test_concurrent_interrupt_before_start(self, agent):
         """If interrupt is requested before concurrent execution, all tools are skipped."""
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="read_file", arguments='{}', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(name="read_file", arguments="{}", call_id="c2")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
 
@@ -1412,15 +1485,21 @@ class TestConcurrentToolExecution:
 
         agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
         assert len(messages) == 2
-        assert "cancelled" in messages[0]["content"].lower() or "skipped" in messages[0]["content"].lower()
-        assert "cancelled" in messages[1]["content"].lower() or "skipped" in messages[1]["content"].lower()
+        assert (
+            "cancelled" in messages[0]["content"].lower()
+            or "skipped" in messages[0]["content"].lower()
+        )
+        assert (
+            "cancelled" in messages[1]["content"].lower()
+            or "skipped" in messages[1]["content"].lower()
+        )
 
     def test_concurrent_truncates_large_results(self, agent, tmp_path, monkeypatch):
         """Concurrent path should save oversized results to file."""
         monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
         (tmp_path / ".hermes").mkdir()
-        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
-        tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2")
+        tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments="{}", call_id="c2")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         big_result = "x" * 150_000
@@ -1431,14 +1510,16 @@ class TestConcurrentToolExecution:
         assert len(messages) == 2
         for m in messages:
             assert len(m["content"]) < 150_000
-            assert ("Truncated" in m["content"] or "<persisted-output>" in m["content"])
+            assert "Truncated" in m["content"] or "<persisted-output>" in m["content"]
 
     def test_invoke_tool_dispatches_to_handle_function_call(self, agent):
         """_invoke_tool should route regular tools through handle_function_call."""
         with patch("run_agent.handle_function_call", return_value="result") as mock_hfc:
             result = agent._invoke_tool("web_search", {"q": "test"}, "task-1")
             mock_hfc.assert_called_once_with(
-                "web_search", {"q": "test"}, "task-1",
+                "web_search",
+                {"q": "test"},
+                "task-1",
                 tool_call_id=None,
                 session_id=agent.session_id,
                 enabled_tools=list(agent.valid_tool_names),
@@ -1447,31 +1528,57 @@ class TestConcurrentToolExecution:
             assert result == "result"
 
     def test_sequential_tool_callbacks_fire_in_order(self, agent):
-        tool_call = _mock_tool_call(name="web_search", arguments='{"query":"hello"}', call_id="c1")
+        tool_call = _mock_tool_call(
+            name="web_search", arguments='{"query":"hello"}', call_id="c1"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call])
         messages = []
         starts = []
         completes = []
-        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args))
-        agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result))
+        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: (
+            starts.append((tool_call_id, function_name, function_args))
+        )
+        agent.tool_complete_callback = (
+            lambda tool_call_id, function_name, function_args, function_result: (
+                completes.append(
+                    (tool_call_id, function_name, function_args, function_result)
+                )
+            )
+        )
 
         with patch("run_agent.handle_function_call", return_value='{"success": true}'):
             agent._execute_tool_calls_sequential(mock_msg, messages, "task-1")
 
         assert starts == [("c1", "web_search", {"query": "hello"})]
-        assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')]
+        assert completes == [
+            ("c1", "web_search", {"query": "hello"}, '{"success": true}')
+        ]
 
     def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent):
-        tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1")
-        tc2 = _mock_tool_call(name="web_search", arguments='{"query":"two"}', call_id="c2")
+        tc1 = _mock_tool_call(
+            name="web_search", arguments='{"query":"one"}', call_id="c1"
+        )
+        tc2 = _mock_tool_call(
+            name="web_search", arguments='{"query":"two"}', call_id="c2"
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
         messages = []
         starts = []
         completes = []
-        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args))
-        agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result))
+        agent.tool_start_callback = lambda tool_call_id, function_name, function_args: (
+            starts.append((tool_call_id, function_name, function_args))
+        )
+        agent.tool_complete_callback = (
+            lambda tool_call_id, function_name, function_args, function_result: (
+                completes.append(
+                    (tool_call_id, function_name, function_args, function_result)
+                )
+            )
+        )
 
-        with patch("run_agent.handle_function_call", side_effect=['{"id":1}', '{"id":2}']):
+        with patch(
+            "run_agent.handle_function_call", side_effect=['{"id":1}', '{"id":2}']
+        ):
             agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1")
 
         assert starts == [
@@ -1484,18 +1591,24 @@ class TestConcurrentToolExecution:
 
     def test_invoke_tool_handles_agent_level_tools(self, agent):
         """_invoke_tool should handle todo tool directly."""
-        with patch("tools.todo_tool.todo_tool", return_value='{"ok":true}') as mock_todo:
+        with patch(
+            "tools.todo_tool.todo_tool", return_value='{"ok":true}'
+        ) as mock_todo:
             result = agent._invoke_tool("todo", {"todos": []}, "task-1")
             mock_todo.assert_called_once()
         assert "ok" in result
 
-    def test_invoke_tool_blocked_returns_error_and_skips_execution(self, agent, monkeypatch):
+    def test_invoke_tool_blocked_returns_error_and_skips_execution(
+        self, agent, monkeypatch
+    ):
         """_invoke_tool should return error JSON when a plugin blocks the tool."""
         monkeypatch.setattr(
             "hermes_cli.plugins.get_pre_tool_call_block_message",
             lambda *args, **kwargs: "Blocked by test policy",
         )
-        with patch("tools.todo_tool.todo_tool", side_effect=AssertionError("should not run")) as mock_todo:
+        with patch(
+            "tools.todo_tool.todo_tool", side_effect=AssertionError("should not run")
+        ) as mock_todo:
             result = agent._invoke_tool("todo", {"todos": []}, "task-1")
 
         assert json.loads(result) == {"error": "Blocked by test policy"}
@@ -1507,16 +1620,23 @@ class TestConcurrentToolExecution:
             "hermes_cli.plugins.get_pre_tool_call_block_message",
             lambda *args, **kwargs: "Blocked",
         )
-        with patch("run_agent.handle_function_call", side_effect=AssertionError("should not run")):
+        with patch(
+            "run_agent.handle_function_call",
+            side_effect=AssertionError("should not run"),
+        ):
             result = agent._invoke_tool("web_search", {"q": "test"}, "task-1")
 
         assert json.loads(result) == {"error": "Blocked"}
 
-    def test_sequential_blocked_tool_skips_checkpoints_and_callbacks(self, agent, monkeypatch):
+    def test_sequential_blocked_tool_skips_checkpoints_and_callbacks(
+        self, agent, monkeypatch
+    ):
         """Sequential path: blocked tool should not trigger checkpoints or start callbacks."""
-        tool_call = _mock_tool_call(name="write_file",
-                                    arguments='{"path":"test.txt","content":"hello"}',
-                                    call_id="c1")
+        tool_call = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"test.txt","content":"hello"}',
+            call_id="c1",
+        )
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call])
         messages = []
 
@@ -1532,7 +1652,10 @@ class TestConcurrentToolExecution:
         starts = []
         agent.tool_start_callback = lambda *a: starts.append(a)
 
-        with patch("run_agent.handle_function_call", side_effect=AssertionError("should not run")):
+        with patch(
+            "run_agent.handle_function_call",
+            side_effect=AssertionError("should not run"),
+        ):
             agent._execute_tool_calls_sequential(mock_msg, messages, "task-1")
 
         agent._checkpoint_mgr.ensure_checkpoint.assert_not_called()
@@ -1548,9 +1671,14 @@ class TestConcurrentToolExecution:
             "hermes_cli.plugins.get_pre_tool_call_block_message",
             lambda *args, **kwargs: "Blocked",
         )
-        with patch("tools.memory_tool.memory_tool", side_effect=AssertionError("should not run")):
+        with patch(
+            "tools.memory_tool.memory_tool",
+            side_effect=AssertionError("should not run"),
+        ):
             result = agent._invoke_tool(
-                "memory", {"action": "add", "target": "memory", "content": "x"}, "task-1",
+                "memory",
+                {"action": "add", "target": "memory", "content": "x"},
+                "task-1",
             )
 
         assert json.loads(result) == {"error": "Blocked"}
@@ -1562,36 +1690,45 @@ class TestPathsOverlap:
 
     def test_same_path_overlaps(self):
         from run_agent import _paths_overlap
+
         assert _paths_overlap(Path("src/a.py"), Path("src/a.py"))
 
     def test_siblings_do_not_overlap(self):
         from run_agent import _paths_overlap
+
         assert not _paths_overlap(Path("src/a.py"), Path("src/b.py"))
 
     def test_parent_child_overlap(self):
         from run_agent import _paths_overlap
+
         assert _paths_overlap(Path("src"), Path("src/sub/a.py"))
 
     def test_different_roots_do_not_overlap(self):
         from run_agent import _paths_overlap
+
         assert not _paths_overlap(Path("src/a.py"), Path("other/a.py"))
 
     def test_nested_vs_flat_do_not_overlap(self):
         from run_agent import _paths_overlap
+
         assert not _paths_overlap(Path("src/sub/a.py"), Path("src/a.py"))
 
     def test_empty_paths_do_not_overlap(self):
         from run_agent import _paths_overlap
+
         assert not _paths_overlap(Path(""), Path(""))
 
     def test_one_empty_path_does_not_overlap(self):
         from run_agent import _paths_overlap
+
         assert not _paths_overlap(Path(""), Path("src/a.py"))
         assert not _paths_overlap(Path("src/a.py"), Path(""))
 
 
 class TestParallelScopePathNormalization:
-    def test_extract_parallel_scope_path_normalizes_relative_to_cwd(self, tmp_path, monkeypatch):
+    def test_extract_parallel_scope_path_normalizes_relative_to_cwd(
+        self, tmp_path, monkeypatch
+    ):
         from run_agent import _extract_parallel_scope_path
 
         monkeypatch.chdir(tmp_path)
@@ -1600,7 +1737,9 @@ class TestParallelScopePathNormalization:
 
         assert scoped == tmp_path / "notes.txt"
 
-    def test_extract_parallel_scope_path_treats_relative_and_absolute_same_file_as_same_scope(self, tmp_path, monkeypatch):
+    def test_extract_parallel_scope_path_treats_relative_and_absolute_same_file_as_same_scope(
+        self, tmp_path, monkeypatch
+    ):
         from run_agent import _extract_parallel_scope_path, _paths_overlap
 
         monkeypatch.chdir(tmp_path)
@@ -1612,12 +1751,22 @@ class TestParallelScopePathNormalization:
         assert rel_scoped == abs_scoped
         assert _paths_overlap(rel_scoped, abs_scoped)
 
-    def test_should_parallelize_tool_batch_rejects_same_file_with_mixed_path_spellings(self, tmp_path, monkeypatch):
+    def test_should_parallelize_tool_batch_rejects_same_file_with_mixed_path_spellings(
+        self, tmp_path, monkeypatch
+    ):
         from run_agent import _should_parallelize_tool_batch
 
         monkeypatch.chdir(tmp_path)
-        tc1 = _mock_tool_call(name="write_file", arguments='{"path":"notes.txt","content":"one"}', call_id="c1")
-        tc2 = _mock_tool_call(name="write_file", arguments=f'{{"path":"{tmp_path / "notes.txt"}","content":"two"}}', call_id="c2")
+        tc1 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"notes.txt","content":"one"}',
+            call_id="c1",
+        )
+        tc2 = _mock_tool_call(
+            name="write_file",
+            arguments=f'{{"path":"{tmp_path / "notes.txt"}","content":"two"}}',
+            call_id="c2",
+        )
 
         assert not _should_parallelize_tool_batch([tc1, tc2])
 
@@ -1692,7 +1841,9 @@ class TestRunConversation:
         resp2 = _mock_response(content="Done searching", finish_reason="stop")
         agent.client.chat.completions.create.side_effect = [resp1, resp2]
         with (
-            patch("run_agent.handle_function_call", return_value="search result") as mock_handle_function_call,
+            patch(
+                "run_agent.handle_function_call", return_value="search result"
+            ) as mock_handle_function_call,
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
             patch.object(agent, "_cleanup_task_resources"),
@@ -1701,7 +1852,9 @@ class TestRunConversation:
         assert result["final_response"] == "Done searching"
         assert result["api_calls"] == 2
         assert mock_handle_function_call.call_args.kwargs["tool_call_id"] == "c1"
-        assert mock_handle_function_call.call_args.kwargs["session_id"] == agent.session_id
+        assert (
+            mock_handle_function_call.call_args.kwargs["session_id"] == agent.session_id
+        )
 
     def test_request_scoped_api_hooks_fire_for_each_api_call(self, agent):
         self._setup_agent(agent)
@@ -1727,13 +1880,17 @@ class TestRunConversation:
 
         assert result["final_response"] == "Done searching"
         pre_request_calls = [kw for name, kw in hook_calls if name == "pre_api_request"]
-        post_request_calls = [kw for name, kw in hook_calls if name == "post_api_request"]
+        post_request_calls = [
+            kw for name, kw in hook_calls if name == "post_api_request"
+        ]
         assert len(pre_request_calls) == 2
         assert len(post_request_calls) == 2
         assert [call["api_call_count"] for call in pre_request_calls] == [1, 2]
         assert [call["api_call_count"] for call in post_request_calls] == [1, 2]
         assert all(call["session_id"] == agent.session_id for call in pre_request_calls)
-        assert all("message_count" in c and "messages" not in c for c in pre_request_calls)
+        assert all(
+            "message_count" in c and "messages" not in c for c in pre_request_calls
+        )
         assert all("usage" in c and "response" not in c for c in post_request_calls)
 
     def test_interrupt_breaks_loop(self, agent):
@@ -1791,7 +1948,9 @@ class TestRunConversation:
 
         # 6 responses: original + 2 prefill + 3 retries after prefill exhaustion
         with (
-            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6),
+            patch.object(
+                agent, "_interruptible_api_call", side_effect=[empty_resp] * 6
+            ),
             patch.object(agent, "_compress_context") as mock_compress,
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
@@ -1859,7 +2018,10 @@ class TestRunConversation:
         empty_resp = _mock_response(content=None, finish_reason="stop")
         # 4 responses: 1 original + 3 nudge retries, all empty
         agent.client.chat.completions.create.side_effect = [
-            empty_resp, empty_resp, empty_resp, empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,
         ]
         with (
             patch.object(agent, "_persist_session"),
@@ -1897,7 +2059,9 @@ class TestRunConversation:
         self._setup_agent(agent)
         agent.base_url = "http://127.0.0.1:1234/v1"
         # Configure a fallback chain
-        agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}]
+        agent._fallback_chain = [
+            {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
+        ]
         agent._fallback_index = 0
         agent._fallback_activated = False
 
@@ -1905,7 +2069,11 @@ class TestRunConversation:
         content_resp = _mock_response(content="Fallback answer.", finish_reason="stop")
         # 4 empty (1 orig + 3 retries), then fallback model answers
         agent.client.chat.completions.create.side_effect = [
-            empty_resp, empty_resp, empty_resp, empty_resp, content_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,
+            content_resp,
         ]
 
         fallback_called = {"called": False}
@@ -1935,7 +2103,9 @@ class TestRunConversation:
         """If fallback also returns empty, final response is (empty)."""
         self._setup_agent(agent)
         agent.base_url = "http://127.0.0.1:1234/v1"
-        agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}]
+        agent._fallback_chain = [
+            {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
+        ]
         agent._fallback_index = 0
         agent._fallback_activated = False
 
@@ -1943,8 +2113,14 @@ class TestRunConversation:
         # 4 empty from primary (1 + 3 retries), fallback activated,
         # then 4 more empty from fallback (1 + 3 retries), no more fallbacks
         agent.client.chat.completions.create.side_effect = [
-            empty_resp, empty_resp, empty_resp, empty_resp,  # primary exhausted
-            empty_resp, empty_resp, empty_resp, empty_resp,  # fallback exhausted
+            empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,  # primary exhausted
+            empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,  # fallback exhausted
         ]
 
         def _mock_fallback():
@@ -1974,7 +2150,10 @@ class TestRunConversation:
         empty_resp = _mock_response(content=None, finish_reason="stop")
         # 4 empty: 1 original + 3 retries, all empty, no fallback
         agent.client.chat.completions.create.side_effect = [
-            empty_resp, empty_resp, empty_resp, empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,
+            empty_resp,
         ]
 
         status_messages = []
@@ -1993,9 +2172,17 @@ class TestRunConversation:
         assert result["final_response"] == "(empty)"
         # Should have emitted retry statuses (3 retries) + final failure
         retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
-        assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"
-        failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()]
-        assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}"
+        assert len(retry_msgs) == 3, (
+            f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}"
+        )
+        failure_msgs = [
+            m
+            for m in status_messages
+            if "no content" in m.lower() or "no fallback" in m.lower()
+        ]
+        assert len(failure_msgs) >= 1, (
+            f"Expected at least 1 failure status, got: {status_messages}"
+        )
 
     def test_partial_stream_recovery_uses_streamed_content(self, agent):
         """When streaming fails after partial delivery, recovered partial content becomes final response."""
@@ -2007,7 +2194,9 @@ class TestRunConversation:
         )
         agent.client.chat.completions.create.return_value = partial_resp
         # Simulate that streaming had already delivered this text
-        agent._current_streamed_assistant_text = "Here is the partial answer that was stream"
+        agent._current_streamed_assistant_text = (
+            "Here is the partial answer that was stream"
+        )
         with (
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
@@ -2028,7 +2217,9 @@ class TestRunConversation:
         def _fake_api_call(api_kwargs):
             # Simulate what streaming does: accumulate text before returning
             # a stub with no content (connection died mid-stream)
-            agent._current_streamed_assistant_text = "The answer to your question is that"
+            agent._current_streamed_assistant_text = (
+                "The answer to your question is that"
+            )
             return empty_stub
 
         status_messages = []
@@ -2049,11 +2240,17 @@ class TestRunConversation:
         assert result["final_response"] == "The answer to your question is that"
         assert result["api_calls"] == 1  # No wasted retries
         # Should emit the stream-interrupted status, NOT the empty-retry status
-        recovery_msgs = [m for m in status_messages if "stream interrupted" in m.lower()]
-        assert len(recovery_msgs) >= 1, f"Expected stream recovery status, got: {status_messages}"
+        recovery_msgs = [
+            m for m in status_messages if "stream interrupted" in m.lower()
+        ]
+        assert len(recovery_msgs) >= 1, (
+            f"Expected stream recovery status, got: {status_messages}"
+        )
         # Should NOT have retry statuses
         retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
-        assert len(retry_msgs) == 0, f"Should not retry when stream content exists: {status_messages}"
+        assert len(retry_msgs) == 0, (
+            f"Should not retry when stream content exists: {status_messages}"
+        )
 
     def test_partial_stream_recovery_preempts_prior_turn_fallback(self, agent):
         """Partial streamed content takes priority over _last_content_with_tools fallback."""
@@ -2065,7 +2262,9 @@ class TestRunConversation:
 
         def _fake_api_call(api_kwargs):
             # Simulate partial streaming before connection death
-            agent._current_streamed_assistant_text = "Fresh partial content from this turn"
+            agent._current_streamed_assistant_text = (
+                "Fresh partial content from this turn"
+            )
             return empty_stub
 
         with (
@@ -2157,7 +2356,9 @@ class TestRunConversation:
             "Error code: 400 - {'error': {'code': '1261', 'message': 'Prompt exceeds max length'}}"
         )
         err_400.status_code = 400
-        ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
+        ok_resp = _mock_response(
+            content="Recovered after compression", finish_reason="stop"
+        )
         agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
         prefill = [
             {"role": "user", "content": "previous question"},
@@ -2198,9 +2399,14 @@ class TestRunConversation:
         assert result["api_calls"] == 2
         assert result["final_response"] == "Part 1 Part 2"
 
-        second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"]
+        second_call_messages = agent.client.chat.completions.create.call_args_list[
+            1
+        ].kwargs["messages"]
         assert second_call_messages[-1]["role"] == "user"
-        assert "truncated by the output length limit" in second_call_messages[-1]["content"]
+        assert (
+            "truncated by the output length limit"
+            in second_call_messages[-1]["content"]
+        )
 
     def test_length_thinking_exhausted_skips_continuation(self, agent):
         """When finish_reason='length' but content is only thinking, skip retries."""
@@ -2247,7 +2453,9 @@ class TestRunConversation:
         assert result["api_calls"] == 3
         assert result["completed"] is False
 
-    def test_length_with_tool_calls_returns_partial_without_executing_tools(self, agent):
+    def test_length_with_tool_calls_returns_partial_without_executing_tools(
+        self, agent
+    ):
         self._setup_agent(agent)
         bad_tc = _mock_tool_call(
             name="write_file",
@@ -2281,7 +2489,9 @@ class TestRunConversation:
             call_id="c1",
         )
         truncated_resp = _mock_response(
-            content="", finish_reason="length", tool_calls=[bad_tc],
+            content="",
+            finish_reason="length",
+            tool_calls=[bad_tc],
         )
         good_tc = _mock_tool_call(
             name="write_file",
@@ -2289,10 +2499,14 @@ class TestRunConversation:
             call_id="c2",
         )
         good_resp = _mock_response(
-            content="", finish_reason="stop", tool_calls=[good_tc],
+            content="",
+            finish_reason="stop",
+            tool_calls=[good_tc],
         )
         with (
-            patch("run_agent.handle_function_call", return_value='{"success":true}') as mock_hfc,
+            patch(
+                "run_agent.handle_function_call", return_value='{"success":true}'
+            ) as mock_hfc,
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
             patch.object(agent, "_cleanup_task_resources"),
@@ -2301,7 +2515,9 @@ class TestRunConversation:
             # Third: final text response.
             final_resp = _mock_response(content="Done!", finish_reason="stop")
             agent.client.chat.completions.create.side_effect = [
-                truncated_resp, good_resp, final_resp,
+                truncated_resp,
+                good_resp,
+                final_resp,
             ]
             result = agent.run_conversation("write the report")
 
@@ -2321,7 +2537,9 @@ class TestRunConversation:
             call_id="c1",
         )
         resp = _mock_response(
-            content="", finish_reason="tool_calls", tool_calls=[bad_tc],
+            content="",
+            finish_reason="tool_calls",
+            tool_calls=[bad_tc],
         )
         agent.client.chat.completions.create.return_value = resp
 
@@ -2417,7 +2635,9 @@ class TestRetryExhaustion:
         """
         self._setup_agent(agent)
         with (
-            patch.object(agent, "_build_api_kwargs", side_effect=ValueError("bad messages")),
+            patch.object(
+                agent, "_build_api_kwargs", side_effect=ValueError("bad messages")
+            ),
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
             patch.object(agent, "_cleanup_task_resources"),
@@ -2461,7 +2681,9 @@ class TestFlushSentinelNotLeaked:
         agent.client.chat.completions.create.return_value = mock_response
 
         # Bypass auxiliary client so flush uses agent.client directly
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
+        with patch(
+            "agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")
+        ):
             agent.flush_memories(messages, min_turns=0)
 
         # Check what was actually sent to the API
@@ -2591,7 +2813,9 @@ class TestCredentialPoolRecovery:
         assert retry_same is False
         agent._swap_credential.assert_called_once_with(next_entry)
 
-    def test_recover_with_pool_rotates_on_billing_reason_even_with_http_400(self, agent):
+    def test_recover_with_pool_rotates_on_billing_reason_even_with_http_400(
+        self, agent
+    ):
         next_entry = SimpleNamespace(label="secondary")
 
         class _Pool:
@@ -2645,7 +2869,6 @@ class TestCredentialPoolRecovery:
         assert retry_same is False
         agent._swap_credential.assert_called_once_with(next_entry)
 
-
     def test_recover_with_pool_refreshes_on_401(self, agent):
         """401 with successful refresh should swap to refreshed credential."""
         refreshed_entry = SimpleNamespace(label="refreshed-primary", id="abc")
@@ -2750,7 +2973,10 @@ class TestCredentialPoolRecovery:
         recovered, retry_same = agent._recover_with_credential_pool(
             status_code=429,
             has_retried_429=True,
-            error_context={"reason": "device_code_exhausted", "reset_at": "2026-04-12T10:30:00Z"},
+            error_context={
+                "reason": "device_code_exhausted",
+                "reset_at": "2026-04-12T10:30:00Z",
+            },
         )
 
         assert recovered is True
@@ -2787,6 +3013,7 @@ class TestMaxTokensParam:
 # System prompt stability for prompt caching
 # ---------------------------------------------------------------------------
 
+
 class TestSystemPromptStability:
     """Verify that the system prompt stays stable across turns for cache hits."""
 
@@ -2882,6 +3109,7 @@ class TestSystemPromptStability:
         # Empty string is falsy, so should fall through to fresh build
         assert "Hermes Agent" in agent._cached_system_prompt
 
+
 class TestBudgetPressure:
     """Budget exhaustion grace call system."""
 
@@ -2898,6 +3126,7 @@ class TestSafeWriter:
         """When stdout is healthy, _SafeWriter is transparent."""
         from run_agent import _SafeWriter
         from io import StringIO
+
         inner = StringIO()
         writer = _SafeWriter(inner)
         writer.write("hello")
@@ -2907,6 +3136,7 @@ class TestSafeWriter:
         """OSError on write is silently caught, returns len(data)."""
         from run_agent import _SafeWriter
         from unittest.mock import MagicMock
+
         inner = MagicMock()
         inner.write.side_effect = OSError(5, "Input/output error")
         writer = _SafeWriter(inner)
@@ -2917,6 +3147,7 @@ class TestSafeWriter:
         """OSError on flush is silently caught."""
         from run_agent import _SafeWriter
         from unittest.mock import MagicMock
+
         inner = MagicMock()
         inner.flush.side_effect = OSError(5, "Input/output error")
         writer = _SafeWriter(inner)
@@ -2927,6 +3158,7 @@ class TestSafeWriter:
         import sys
         from run_agent import _SafeWriter
         from unittest.mock import MagicMock
+
         broken = MagicMock()
         broken.write.side_effect = OSError(5, "Input/output error")
         original = sys.stdout
@@ -2940,6 +3172,7 @@ class TestSafeWriter:
         """run_conversation installs _SafeWriter on stdio."""
         import sys
         from run_agent import _SafeWriter
+
         resp = _mock_response(content="Done", finish_reason="stop")
         agent.client.chat.completions.create.return_value = resp
         original_stdout = sys.stdout
@@ -2965,6 +3198,7 @@ class TestSafeWriter:
         import sys
         from run_agent import _SafeWriter
         from io import StringIO
+
         inner = StringIO()
         wrapped = _SafeWriter(inner)
         # isinstance check should prevent double-wrapping
@@ -3009,15 +3243,30 @@ class TestBuildApiKwargsAnthropicMaxTokens:
         agent.reasoning_config = None
 
         with patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build:
-            mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
+            mock_build.return_value = {
+                "model": "claude-sonnet-4-20250514",
+                "messages": [],
+                "max_tokens": 4096,
+            }
             agent._build_api_kwargs([{"role": "user", "content": "test"}])
             _, kwargs = mock_build.call_args
             if not kwargs:
-                kwargs = dict(zip(
-                    ["model", "messages", "tools", "max_tokens", "reasoning_config"],
-                    mock_build.call_args[0],
-                ))
-            assert kwargs.get("max_tokens") == 4096 or mock_build.call_args[1].get("max_tokens") == 4096
+                kwargs = dict(
+                    zip(
+                        [
+                            "model",
+                            "messages",
+                            "tools",
+                            "max_tokens",
+                            "reasoning_config",
+                        ],
+                        mock_build.call_args[0],
+                    )
+                )
+            assert (
+                kwargs.get("max_tokens") == 4096
+                or mock_build.call_args[1].get("max_tokens") == 4096
+            )
 
     def test_max_tokens_none_when_unset(self, agent):
         agent.api_mode = "anthropic_messages"
@@ -3025,7 +3274,11 @@ class TestBuildApiKwargsAnthropicMaxTokens:
         agent.reasoning_config = None
 
         with patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build:
-            mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 16384}
+            mock_build.return_value = {
+                "model": "claude-sonnet-4-20250514",
+                "messages": [],
+                "max_tokens": 16384,
+            }
             agent._build_api_kwargs([{"role": "user", "content": "test"}])
             call_args = mock_build.call_args
             # max_tokens should be None (let adapter use its default)
@@ -3040,32 +3293,55 @@ class TestAnthropicImageFallback:
         agent.api_mode = "anthropic_messages"
         agent.reasoning_config = None
 
-        api_messages = [{
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "Can you see this now?"},
-                {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
-            ],
-        }]
+        api_messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Can you see this now?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/cat.png"},
+                    },
+                ],
+            }
+        ]
 
         with (
-            patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=json.dumps({"success": True, "analysis": "A cat sitting on a chair."}))),
+            patch(
+                "tools.vision_tools.vision_analyze_tool",
+                new=AsyncMock(
+                    return_value=json.dumps(
+                        {"success": True, "analysis": "A cat sitting on a chair."}
+                    )
+                ),
+            ),
             patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build,
         ):
-            mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
+            mock_build.return_value = {
+                "model": "claude-sonnet-4-20250514",
+                "messages": [],
+                "max_tokens": 4096,
+            }
             agent._build_api_kwargs(api_messages)
 
-        kwargs = mock_build.call_args.kwargs or dict(zip(
-            ["model", "messages", "tools", "max_tokens", "reasoning_config"],
-            mock_build.call_args.args,
-        ))
+        kwargs = mock_build.call_args.kwargs or dict(
+            zip(
+                ["model", "messages", "tools", "max_tokens", "reasoning_config"],
+                mock_build.call_args.args,
+            )
+        )
         transformed = kwargs["messages"]
         assert isinstance(transformed[0]["content"], str)
         assert "A cat sitting on a chair." in transformed[0]["content"]
         assert "Can you see this now?" in transformed[0]["content"]
-        assert "vision_analyze with image_url: https://example.com/cat.png" in transformed[0]["content"]
+        assert (
+            "vision_analyze with image_url: https://example.com/cat.png"
+            in transformed[0]["content"]
+        )
 
-    def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images(self, agent):
+    def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images(
+        self, agent
+    ):
         agent.api_mode = "anthropic_messages"
         agent.reasoning_config = None
         data_url = "data:image/png;base64,QUFBQQ=="
@@ -3087,12 +3363,20 @@ class TestAnthropicImageFallback:
             },
         ]
 
-        mock_vision = AsyncMock(return_value=json.dumps({"success": True, "analysis": "A small test image."}))
+        mock_vision = AsyncMock(
+            return_value=json.dumps(
+                {"success": True, "analysis": "A small test image."}
+            )
+        )
         with (
             patch("tools.vision_tools.vision_analyze_tool", new=mock_vision),
             patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build,
         ):
-            mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
+            mock_build.return_value = {
+                "model": "claude-sonnet-4-20250514",
+                "messages": [],
+                "max_tokens": 4096,
+            }
             agent._build_api_kwargs(api_messages)
 
         assert mock_vision.await_count == 1
@@ -3103,7 +3387,10 @@ class TestFallbackAnthropicProvider:
 
     def test_fallback_to_anthropic_sets_api_mode(self, agent):
         agent._fallback_activated = False
-        agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"}
+        agent._fallback_model = {
+            "provider": "anthropic",
+            "model": "claude-sonnet-4-20250514",
+        }
         agent._fallback_chain = [agent._fallback_model]
         agent._fallback_index = 0
 
@@ -3112,7 +3399,10 @@ class TestFallbackAnthropicProvider:
         mock_client.api_key = "sk-ant-api03-test"
 
         with (
-            patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(mock_client, None),
+            ),
             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build,
             patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None),
         ):
@@ -3126,7 +3416,10 @@ class TestFallbackAnthropicProvider:
 
     def test_fallback_to_anthropic_enables_prompt_caching(self, agent):
         agent._fallback_activated = False
-        agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"}
+        agent._fallback_model = {
+            "provider": "anthropic",
+            "model": "claude-sonnet-4-20250514",
+        }
         agent._fallback_chain = [agent._fallback_model]
         agent._fallback_index = 0
 
@@ -3135,8 +3428,14 @@ class TestFallbackAnthropicProvider:
         mock_client.api_key = "sk-ant-api03-test"
 
         with (
-            patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(mock_client, None),
+            ),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
             patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None),
         ):
             agent._try_activate_fallback()
@@ -3145,7 +3444,10 @@ class TestFallbackAnthropicProvider:
 
     def test_fallback_to_openrouter_uses_openai_client(self, agent):
         agent._fallback_activated = False
-        agent._fallback_model = {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
+        agent._fallback_model = {
+            "provider": "openrouter",
+            "model": "anthropic/claude-sonnet-4",
+        }
         agent._fallback_chain = [agent._fallback_model]
         agent._fallback_index = 0
 
@@ -3153,7 +3455,10 @@ class TestFallbackAnthropicProvider:
         mock_client.base_url = "https://openrouter.ai/api/v1"
         mock_client.api_key = "sk-or-test"
 
-        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+        with patch(
+            "agent.auxiliary_client.resolve_provider_client",
+            return_value=(mock_client, None),
+        ):
             result = agent._try_activate_fallback()
 
         assert result is True
@@ -3163,7 +3468,9 @@ class TestFallbackAnthropicProvider:
 
 def test_aiagent_uses_copilot_acp_client():
     with (
-        patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+        patch(
+            "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")
+        ),
         patch("run_agent.check_toolset_requirements", return_value={}),
         patch("run_agent.OpenAI") as mock_openai,
         patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client,
@@ -3250,8 +3557,13 @@ def test_is_openai_client_closed_falls_back_to_http_client():
         def __init__(self, http_closed: bool):
             self._client = SimpleNamespace(is_closed=http_closed)
 
-    assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=False)) is False
-    assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=True)) is True
+    assert (
+        AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=False))
+        is False
+    )
+    assert (
+        AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=True)) is True
+    )
 
 
 class TestAnthropicBaseUrlPassthrough:
@@ -3259,7 +3571,10 @@ class TestAnthropicBaseUrlPassthrough:
 
     def test_custom_proxy_base_url_passed_through(self):
         with (
-            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
+            ),
             patch("run_agent.check_toolset_requirements", return_value={}),
             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build,
         ):
@@ -3278,7 +3593,10 @@ class TestAnthropicBaseUrlPassthrough:
 
     def test_none_base_url_passed_as_none(self):
         with (
-            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
+            ),
             patch("run_agent.check_toolset_requirements", return_value={}),
             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build,
         ):
@@ -3299,7 +3617,10 @@ class TestAnthropicBaseUrlPassthrough:
 class TestAnthropicCredentialRefresh:
     def test_try_refresh_anthropic_client_credentials_rebuilds_client(self):
         with (
-            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
+            ),
             patch("run_agent.check_toolset_requirements", return_value={}),
             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build,
         ):
@@ -3320,21 +3641,37 @@ class TestAnthropicCredentialRefresh:
         agent.provider = "anthropic"
 
         with (
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-fresh-token"),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=new_client) as rebuild,
+            patch(
+                "agent.anthropic_adapter.resolve_anthropic_token",
+                return_value="sk-ant-oat01-fresh-token",
+            ),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=new_client,
+            ) as rebuild,
         ):
             assert agent._try_refresh_anthropic_client_credentials() is True
 
         old_client.close.assert_called_once()
-        rebuild.assert_called_once_with("sk-ant-oat01-fresh-token", "https://api.anthropic.com")
+        rebuild.assert_called_once_with(
+            "sk-ant-oat01-fresh-token", "https://api.anthropic.com"
+        )
         assert agent._anthropic_client is new_client
         assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token"
 
-    def test_try_refresh_anthropic_client_credentials_returns_false_when_token_unchanged(self):
+    def test_try_refresh_anthropic_client_credentials_returns_false_when_token_unchanged(
+        self,
+    ):
         with (
-            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
+            ),
             patch("run_agent.check_toolset_requirements", return_value={}),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
         ):
             agent = AIAgent(
                 api_key="sk-ant-oat01-same-token",
@@ -3349,7 +3686,10 @@ class TestAnthropicCredentialRefresh:
         agent._anthropic_api_key = "sk-ant-oat01-same-token"
 
         with (
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-same-token"),
+            patch(
+                "agent.anthropic_adapter.resolve_anthropic_token",
+                return_value="sk-ant-oat01-same-token",
+            ),
             patch("agent.anthropic_adapter.build_anthropic_client") as rebuild,
         ):
             assert agent._try_refresh_anthropic_client_credentials() is False
@@ -3359,9 +3699,15 @@ class TestAnthropicCredentialRefresh:
 
     def test_anthropic_messages_create_preflights_refresh(self):
         with (
-            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("web_search"),
+            ),
             patch("run_agent.check_toolset_requirements", return_value={}),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
         ):
             agent = AIAgent(
                 api_key="sk-ant-oat01-current-token",
@@ -3375,11 +3721,17 @@ class TestAnthropicCredentialRefresh:
         agent._anthropic_client = MagicMock()
         agent._anthropic_client.messages.create.return_value = response
 
-        with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=True) as refresh:
-            result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
+        with patch.object(
+            agent, "_try_refresh_anthropic_client_credentials", return_value=True
+        ) as refresh:
+            result = agent._anthropic_messages_create(
+                {"model": "claude-sonnet-4-20250514"}
+            )
 
         refresh.assert_called_once_with()
-        agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514")
+        agent._anthropic_client.messages.create.assert_called_once_with(
+            model="claude-sonnet-4-20250514"
+        )
         assert result is response
 
 
@@ -3387,6 +3739,7 @@ class TestAnthropicCredentialRefresh:
 # _streaming_api_call tests
 # ===================================================================
 
+
 def _make_chunk(content=None, tool_calls=None, finish_reason=None, model="test/model"):
     """Build a SimpleNamespace mimicking an OpenAI streaming chunk."""
     delta = SimpleNamespace(content=content, tool_calls=tool_calls)
@@ -3441,8 +3794,8 @@ class TestStreamingApiCall:
 
     def test_multiple_tool_calls(self, agent):
         chunks = [
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{}')]),
-            _make_chunk(tool_calls=[_make_tc_delta(1, "call_b", "read", '{}')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", "{}")]),
+            _make_chunk(tool_calls=[_make_tc_delta(1, "call_b", "read", "{}")]),
             _make_chunk(finish_reason="tool_calls"),
         ]
         agent.client.chat.completions.create.return_value = iter(chunks)
@@ -3456,7 +3809,13 @@ class TestStreamingApiCall:
 
     def test_truncated_tool_call_args_upgrade_finish_reason_to_length(self, agent):
         chunks = [
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "write_file", '{"path":"x.txt","content":"hel')]),
+            _make_chunk(
+                tool_calls=[
+                    _make_tc_delta(
+                        0, "call_1", "write_file", '{"path":"x.txt","content":"hel'
+                    )
+                ]
+            ),
         ]
         agent.client.chat.completions.create.return_value = iter(chunks)
 
@@ -3474,9 +3833,13 @@ class TestStreamingApiCall:
         Without the fix, names and arguments get concatenated into one slot.
         """
         chunks = [
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]),
+            _make_chunk(
+                tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]
+            ),
             # Second tool call at the SAME index 0, but different id
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]),
+            _make_chunk(
+                tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]
+            ),
             _make_chunk(finish_reason="tool_calls"),
         ]
         agent.client.chat.completions.create.return_value = iter(chunks)
@@ -3484,7 +3847,9 @@ class TestStreamingApiCall:
         resp = agent._interruptible_streaming_api_call({"messages": []})
 
         tc = resp.choices[0].message.tool_calls
-        assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}"
+        assert len(tc) == 2, (
+            f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}"
+        )
         assert tc[0].function.name == "search"
         assert tc[0].function.arguments == '{"q":"hello"}'
         assert tc[0].id == "call_a"
@@ -3498,7 +3863,7 @@ class TestStreamingApiCall:
             _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]),
             _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]),
             # New tool call, same index 0
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", "{}")]),
             _make_chunk(finish_reason="tool_calls"),
         ]
         agent.client.chat.completions.create.return_value = iter(chunks)
@@ -3510,12 +3875,12 @@ class TestStreamingApiCall:
         assert tc[0].function.name == "search"
         assert tc[0].function.arguments == '{"q":"hello"}'
         assert tc[1].function.name == "read"
-        assert tc[1].function.arguments == '{}'
+        assert tc[1].function.arguments == "{}"
 
     def test_content_and_tool_calls_together(self, agent):
         chunks = [
             _make_chunk(content="I'll search"),
-            _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "search", '{}')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "search", "{}")]),
             _make_chunk(finish_reason="tool_calls"),
         ]
         agent.client.chat.completions.create.return_value = iter(chunks)
@@ -3565,7 +3930,10 @@ class TestStreamingApiCall:
         agent._interruptible_streaming_api_call({"messages": [], "model": "test"})
 
         call_kwargs = agent.client.chat.completions.create.call_args
-        assert call_kwargs[1].get("stream") is True or call_kwargs.kwargs.get("stream") is True
+        assert (
+            call_kwargs[1].get("stream") is True
+            or call_kwargs.kwargs.get("stream") is True
+        )
 
     def test_api_exception_propagates_no_non_streaming_fallback(self, agent):
         """When streaming fails before any deltas, error propagates to the main retry loop."""
@@ -3611,6 +3979,7 @@ class TestInterruptVprintForceTrue:
     def test_all_interrupt_vprint_have_force_true(self):
         """Scan source for _vprint calls containing 'Interrupt' — each must have force=True."""
         import inspect
+
         source = inspect.getsource(AIAgent)
         lines = source.split("\n")
         violations = []
@@ -3620,8 +3989,7 @@ class TestInterruptVprintForceTrue:
                 if "force=True" not in stripped:
                     violations.append(f"line {i}: {stripped}")
         assert not violations, (
-            f"Interrupt _vprint calls missing force=True:\n"
-            + "\n".join(violations)
+            f"Interrupt _vprint calls missing force=True:\n" + "\n".join(violations)
         )
 
 
@@ -3636,23 +4004,29 @@ class TestAnthropicInterruptHandler:
     def test_interruptible_has_anthropic_branch(self):
         """The interrupt handler must check api_mode == 'anthropic_messages'."""
         import inspect
+
         source = inspect.getsource(AIAgent._interruptible_api_call)
-        assert "anthropic_messages" in source, \
+        assert "anthropic_messages" in source, (
             "_interruptible_api_call must handle Anthropic interrupt (api_mode check)"
+        )
 
     def test_interruptible_rebuilds_anthropic_client(self):
         """After interrupting, the Anthropic client should be rebuilt."""
         import inspect
+
         source = inspect.getsource(AIAgent._interruptible_api_call)
-        assert "build_anthropic_client" in source, \
+        assert "build_anthropic_client" in source, (
             "_interruptible_api_call must rebuild Anthropic client after interrupt"
+        )
 
     def test_streaming_has_anthropic_branch(self):
         """_streaming_api_call must also handle Anthropic interrupt."""
         import inspect
+
         source = inspect.getsource(AIAgent._interruptible_streaming_api_call)
-        assert "anthropic_messages" in source, \
+        assert "anthropic_messages" in source, (
             "_streaming_api_call must handle Anthropic interrupt"
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -3668,11 +4042,18 @@ class TestStreamCallbackNonStreamingProvider:
         """For chat_completions-shaped responses, callback gets content."""
         agent.api_mode = "anthropic_messages"
         mock_response = SimpleNamespace(
-            choices=[SimpleNamespace(
-                message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
-                finish_reason="stop", index=0,
-            )],
-            usage=None, model="test", id="test-id",
+            choices=[
+                SimpleNamespace(
+                    message=SimpleNamespace(
+                        content="Hello", tool_calls=None, reasoning_content=None
+                    ),
+                    finish_reason="stop",
+                    index=0,
+                )
+            ],
+            usage=None,
+            model="test",
+            id="test-id",
         )
         agent._interruptible_api_call = MagicMock(return_value=mock_response)
 
@@ -3686,8 +4067,10 @@ class TestStreamCallbackNonStreamingProvider:
             try:
                 if agent.api_mode == "anthropic_messages":
                     text_parts = [
-                        block.text for block in getattr(response, "content", [])
-                        if getattr(block, "type", None) == "text" and getattr(block, "text", None)
+                        block.text
+                        for block in getattr(response, "content", [])
+                        if getattr(block, "type", None) == "text"
+                        and getattr(block, "text", None)
                     ]
                     content = " ".join(text_parts) if text_parts else None
                 else:
@@ -3729,8 +4112,10 @@ class TestStreamCallbackNonStreamingProvider:
             try:
                 if agent.api_mode == "anthropic_messages":
                     text_parts = [
-                        block.text for block in getattr(mock_response, "content", [])
-                        if getattr(block, "type", None) == "text" and getattr(block, "text", None)
+                        block.text
+                        for block in getattr(mock_response, "content", [])
+                        if getattr(block, "type", None) == "text"
+                        and getattr(block, "text", None)
                     ]
                     content = " ".join(text_parts) if text_parts else None
                 else:
@@ -3879,10 +4264,14 @@ class TestOAuthFlagAfterCredentialRefresh:
         agent._is_anthropic_oauth = False
 
         with (
-            patch("agent.anthropic_adapter.resolve_anthropic_token",
-                  return_value="sk-ant-setup-oauth-token"),
-            patch("agent.anthropic_adapter.build_anthropic_client",
-                  return_value=MagicMock()),
+            patch(
+                "agent.anthropic_adapter.resolve_anthropic_token",
+                return_value="sk-ant-setup-oauth-token",
+            ),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
         ):
             result = agent._try_refresh_anthropic_client_credentials()
 
@@ -3898,10 +4287,14 @@ class TestOAuthFlagAfterCredentialRefresh:
         agent._is_anthropic_oauth = True
 
         with (
-            patch("agent.anthropic_adapter.resolve_anthropic_token",
-                  return_value="sk-ant-api03-new-key"),
-            patch("agent.anthropic_adapter.build_anthropic_client",
-                  return_value=MagicMock()),
+            patch(
+                "agent.anthropic_adapter.resolve_anthropic_token",
+                return_value="sk-ant-api03-new-key",
+            ),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
         ):
             result = agent._try_refresh_anthropic_client_credentials()
 
@@ -3923,12 +4316,15 @@ class TestFallbackSetsOAuthFlag:
         mock_client.api_key = "sk-ant-setup-oauth-token"
 
         with (
-            patch("agent.auxiliary_client.resolve_provider_client",
-                  return_value=(mock_client, None)),
-            patch("agent.anthropic_adapter.build_anthropic_client",
-                  return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token",
-                  return_value=None),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(mock_client, None),
+            ),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None),
         ):
             result = agent._try_activate_fallback()
 
@@ -3946,12 +4342,15 @@ class TestFallbackSetsOAuthFlag:
         mock_client.api_key = "sk-ant-api03-regular-key"
 
         with (
-            patch("agent.auxiliary_client.resolve_provider_client",
-                  return_value=(mock_client, None)),
-            patch("agent.anthropic_adapter.build_anthropic_client",
-                  return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token",
-                  return_value=None),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(mock_client, None),
+            ),
+            patch(
+                "agent.anthropic_adapter.build_anthropic_client",
+                return_value=MagicMock(),
+            ),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None),
         ):
             result = agent._try_activate_fallback()
 
@@ -3966,8 +4365,11 @@ class TestMemoryNudgeCounterPersistence:
         """Counters must exist on the agent after __init__."""
         with patch("run_agent.get_tool_definitions", return_value=[]):
             a = AIAgent(
-                model="test", api_key="test-key", provider="openrouter",
-                skip_context_files=True, skip_memory=True,
+                model="test",
+                api_key="test-key",
+                provider="openrouter",
+                skip_context_files=True,
+                skip_memory=True,
             )
         assert hasattr(a, "_turns_since_memory")
         assert hasattr(a, "_iters_since_skill")
@@ -3977,6 +4379,7 @@ class TestMemoryNudgeCounterPersistence:
     def test_counters_not_reset_in_preamble(self):
         """The run_conversation preamble must not zero the nudge counters."""
         import inspect
+
         src = inspect.getsource(AIAgent.run_conversation)
         # The preamble resets many fields (retry counts, budget, etc.)
         # before the main loop. Find that reset block and verify our
@@ -3992,6 +4395,7 @@ class TestDeadRetryCode:
 
     def test_no_unreachable_max_retries_after_backoff(self):
         import inspect
+
         source = inspect.getsource(AIAgent.run_conversation)
         occurrences = source.count("if retry_count >= max_retries:")
         assert occurrences == 2, (