diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index b85f77a9d23..a3f3d2261ba 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -42,26 +42,26 @@ ADAPTIVE_EFFORT_MAP = { # starves thinking-enabled models (thinking tokens count toward the limit). _ANTHROPIC_OUTPUT_LIMITS = { # Claude 4.6 - "claude-opus-4-6": 128_000, - "claude-sonnet-4-6": 64_000, + "claude-opus-4-6": 128_000, + "claude-sonnet-4-6": 64_000, # Claude 4.5 - "claude-opus-4-5": 64_000, - "claude-sonnet-4-5": 64_000, - "claude-haiku-4-5": 64_000, + "claude-opus-4-5": 64_000, + "claude-sonnet-4-5": 64_000, + "claude-haiku-4-5": 64_000, # Claude 4 - "claude-opus-4": 32_000, - "claude-sonnet-4": 64_000, + "claude-opus-4": 32_000, + "claude-sonnet-4": 64_000, # Claude 3.7 "claude-3-7-sonnet": 128_000, # Claude 3.5 - "claude-3-5-sonnet": 8_192, - "claude-3-5-haiku": 8_192, + "claude-3-5-sonnet": 8_192, + "claude-3-5-haiku": 8_192, # Claude 3 - "claude-3-opus": 4_096, - "claude-3-sonnet": 4_096, - "claude-3-haiku": 4_096, + "claude-3-opus": 4_096, + "claude-3-sonnet": 4_096, + "claude-3-haiku": 4_096, # Third-party Anthropic-compatible providers - "minimax": 131_072, + "minimax": 131_072, } # For any model not in the table, assume the highest current limit. @@ -138,7 +138,9 @@ def _detect_claude_code_version() -> str: try: result = _sp.run( [cmd, "--version"], - capture_output=True, text=True, timeout=5, + capture_output=True, + text=True, + timeout=5, ) if result.returncode == 0 and result.stdout.strip(): # Output is like "2.1.74 (Claude Code)" or just "2.1.74" @@ -224,7 +226,9 @@ def _requires_bearer_auth(base_url: str | None) -> bool: if not normalized: return False normalized = normalized.rstrip("/").lower() - return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")) + return normalized.startswith( + ("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic") + ) def _common_betas_for_base_url(base_url: str | None) -> list[str]: @@ -357,7 +361,9 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool: return now_ms < (expires_at - 60_000) -def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]: +def refresh_anthropic_oauth_pure( + refresh_token: str, *, use_json: bool = False +) -> Dict[str, Any]: """Refresh an Anthropic OAuth token without mutating local credential files.""" import time import urllib.parse @@ -368,18 +374,22 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" if use_json: - data = json.dumps({ - "grant_type": "refresh_token", - "refresh_token": refresh_token, - "client_id": client_id, - }).encode() + data = json.dumps( + { + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + } + ).encode() content_type = "application/json" else: - data = urllib.parse.urlencode({ - "grant_type": "refresh_token", - "refresh_token": refresh_token, - "client_id": client_id, - }).encode() + data = urllib.parse.urlencode( + { + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + } + ).encode() content_type = "application/x-www-form-urlencoded" token_endpoints = [ @@ -485,7 +495,9 @@ def _write_claude_code_credentials( logger.debug("Failed to write refreshed credentials: %s", e) -def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]: +def _resolve_claude_code_token_from_credentials( + creds: Optional[Dict[str, Any]] = None, +) -> Optional[str]: """Resolve a token from Claude Code credential files, refreshing if needed.""" creds = creds or read_claude_code_credentials() if creds and is_claude_code_token_valid(creds): @@ -496,11 +508,15 @@ def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] refreshed = _refresh_oauth_token(creds) if refreshed: return refreshed - logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate") + logger.debug( + "Token refresh failed — re-run 'claude setup-token' to reauthenticate" + ) return None -def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]: +def _prefer_refreshable_claude_code_token( + env_token: str, creds: Optional[Dict[str, Any]] +) -> Optional[str]: """Prefer Claude Code creds when a persisted env OAuth token would shadow refresh. Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes @@ -624,9 +640,11 @@ def _generate_pkce() -> tuple: import secrets verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode() - challenge = base64.urlsafe_b64encode( - hashlib.sha256(verifier.encode()).digest() - ).rstrip(b"=").decode() + challenge = ( + base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()) + .rstrip(b"=") + .decode() + ) return verifier, challenge @@ -687,14 +705,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]: try: import urllib.request - exchange_data = json.dumps({ - "grant_type": "authorization_code", - "client_id": _OAUTH_CLIENT_ID, - "code": code, - "state": state, - "redirect_uri": _OAUTH_REDIRECT_URI, - "code_verifier": verifier, - }).encode() + exchange_data = json.dumps( + { + "grant_type": "authorization_code", + "client_id": _OAUTH_CLIENT_ID, + "code": code, + "state": state, + "redirect_uri": _OAUTH_REDIRECT_URI, + "code_verifier": verifier, + } + ).encode() req = urllib.request.Request( _OAUTH_TOKEN_URL, @@ -755,7 +775,7 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str: """ lower = model.lower() if lower.startswith("anthropic/"): - model = model[len("anthropic/"):] + model = model[len("anthropic/") :] if not preserve_dots: # OpenRouter uses dots for version separators (claude-opus-4.6), # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens. @@ -770,6 +790,7 @@ def _sanitize_tool_id(tool_id: str) -> str: characters with underscores and ensure non-empty. """ import re + if not tool_id: return "tool_0" sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id) @@ -783,11 +804,15 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]: result = [] for t in tools: fn = t.get("function", {}) - result.append({ - "name": fn.get("name", ""), - "description": fn.get("description", ""), - "input_schema": fn.get("parameters", {"type": "object", "properties": {}}), - }) + result.append( + { + "name": fn.get("name", ""), + "description": fn.get("description", ""), + "input_schema": fn.get( + "parameters", {"type": "object", "properties": {}} + ), + } + ) return result @@ -801,7 +826,7 @@ def _image_source_from_openai_url(url: str) -> Dict[str, str]: header, _, data = url.partition(",") media_type = "image/jpeg" if header.startswith("data:"): - mime_part = header[len("data:"):].split(";", 1)[0].strip() + mime_part = header[len("data:") :].split(";", 1)[0].strip() if mime_part.startswith("image/"): media_type = mime_part return { @@ -828,7 +853,11 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]: block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")} elif ptype in {"image_url", "input_image"}: image_value = part.get("image_url", {}) - url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "") + url = ( + image_value.get("url", "") + if isinstance(image_value, dict) + else str(image_value or "") + ) block = {"type": "image", "source": _image_source_from_openai_url(url)} else: block = dict(part) @@ -864,7 +893,10 @@ def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) return result if isinstance(value, dict): _path.add(obj_id) - result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()} + result = { + k: _to_plain_data(v, _depth=_depth + 1, _path=_path) + for k, v in value.items() + } _path.discard(obj_id) return result if isinstance(value, (list, tuple)): @@ -925,9 +957,10 @@ def convert_messages_to_anthropic( system_prompt is a string or list of content blocks (when cache_control present). When *base_url* is provided and points to a third-party Anthropic-compatible - endpoint, all thinking block signatures are stripped. Signatures are - Anthropic-proprietary — third-party endpoints cannot validate them and will - reject them with HTTP 400 "Invalid signature in thinking block". + endpoint, Anthropic thinking signatures are removed. Signed thinking blocks + are downgraded to plain text to preserve useful reasoning context, while + redacted_thinking blocks are dropped. Third-party endpoints cannot validate + Anthropic signatures and may reject them with HTTP 400. """ system = None result = [] @@ -970,12 +1003,14 @@ def convert_messages_to_anthropic( parsed_args = json.loads(args) if isinstance(args, str) else args except (json.JSONDecodeError, ValueError): parsed_args = {} - blocks.append({ - "type": "tool_use", - "id": _sanitize_tool_id(tc.get("id", "")), - "name": fn.get("name", ""), - "input": parsed_args, - }) + blocks.append( + { + "type": "tool_use", + "id": _sanitize_tool_id(tc.get("id", "")), + "name": fn.get("name", ""), + "input": parsed_args, + } + ) # Anthropic rejects empty assistant content effective = blocks or content if not effective or effective == "": @@ -985,7 +1020,9 @@ def convert_messages_to_anthropic( if role == "tool": # Sanitize tool_use_id and ensure non-empty content - result_content = content if isinstance(content, str) else json.dumps(content) + result_content = ( + content if isinstance(content, str) else json.dumps(content) + ) if not result_content: result_content = "(no output)" tool_result = { @@ -1057,7 +1094,8 @@ def convert_messages_to_anthropic( m["content"] = [ b for b in m["content"] - if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids + if b.get("type") != "tool_result" + or b.get("tool_use_id") in tool_use_ids ] if not m["content"]: m["content"] = [{"type": "text", "text": "(tool result removed)"}] @@ -1088,8 +1126,12 @@ def convert_messages_to_anthropic( # and becomes invalid once merged. if isinstance(m["content"], list): m["content"] = [ - b for b in m["content"] - if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")) + b + for b in m["content"] + if not ( + isinstance(b, dict) + and b.get("type") in ("thinking", "redacted_thinking") + ) ] prev_blocks = fixed[-1]["content"] curr_blocks = m["content"] @@ -1117,9 +1159,8 @@ def convert_messages_to_anthropic( # Signatures are Anthropic-proprietary. Third-party endpoints # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate # them and will reject them outright. When targeting a third-party - # endpoint, strip ALL thinking/redacted_thinking blocks from every - # assistant message — the third-party will generate its own - # thinking blocks if it supports extended thinking. + # endpoint, downgrade thinking blocks to plain text and drop + # redacted_thinking blocks. # # For direct Anthropic (strategy following clawdbot/OpenClaw): # 1. Strip thinking/redacted_thinking from all assistant messages @@ -1142,12 +1183,33 @@ def convert_messages_to_anthropic( if m.get("role") != "assistant" or not isinstance(m.get("content"), list): continue - if _is_third_party or idx != last_assistant_idx: - # Third-party endpoint: strip ALL thinking blocks from every - # assistant message — signatures are Anthropic-proprietary. - # Direct Anthropic: strip from non-latest assistant messages only. + if _is_third_party: + # Third-party endpoint: Anthropic signatures are proprietary + # and will be rejected. Downgrade thinking blocks to plain + # text so the model retains reasoning context across turns. + # (Direct Anthropic would validate signatures; third-party + # endpoints like z.ai / GLM-5.1 don't use signatures at all.) + _tp_content = [] + for b in m["content"]: + if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES: + _tp_content.append(b) + continue + # redacted_thinking carries opaque data — drop it. + if b.get("type") == "redacted_thinking": + continue + # Regular thinking → plain text preserves reasoning for next turn. + thinking_text = b.get("thinking", "") + if thinking_text: + _tp_content.append({"type": "text", "text": thinking_text}) + m["content"] = _tp_content or [ + {"type": "text", "text": "(thinking elided)"} + ] + elif idx != last_assistant_idx: + # Direct Anthropic: strip thinking from non-latest assistant + # messages to avoid stale-signature 400s. stripped = [ - b for b in m["content"] + b + for b in m["content"] if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES) ] m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}] @@ -1235,7 +1297,9 @@ def build_anthropic_kwargs( Currently only supported on native Anthropic endpoints (not third-party compatible ones). """ - system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url) + system, anthropic_messages = convert_messages_to_anthropic( + messages, base_url=base_url + ) anthropic_tools = convert_tools_to_anthropic(tools) if tools else [] model = normalize_model_name(model, preserve_dots=preserve_dots) @@ -1287,7 +1351,10 @@ def build_anthropic_kwargs( if block.get("type") == "tool_use" and "name" in block: if not block["name"].startswith(_MCP_TOOL_PREFIX): block["name"] = _MCP_TOOL_PREFIX + block["name"] - elif block.get("type") == "tool_result" and "tool_use_id" in block: + elif ( + block.get("type") == "tool_result" + and "tool_use_id" in block + ): pass # tool_result uses ID, not name kwargs: Dict[str, Any] = { @@ -1319,7 +1386,10 @@ def build_anthropic_kwargs( # MiniMax Anthropic-compat endpoints support thinking (manual mode only, # not adaptive). Haiku does NOT support extended thinking — skip entirely. if reasoning_config and isinstance(reasoning_config, dict): - if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): + if ( + reasoning_config.get("enabled") is not False + and "haiku" not in model.lower() + ): effort = str(reasoning_config.get("effort", "medium")).lower() budget = THINKING_BUDGET.get(effort, 8000) if _supports_adaptive_thinking(model): @@ -1378,7 +1448,7 @@ def normalize_anthropic_response( elif block.type == "tool_use": name = block.name if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX): - name = name[len(_MCP_TOOL_PREFIX):] + name = name[len(_MCP_TOOL_PREFIX) :] tool_calls.append( SimpleNamespace( id=block.id, diff --git a/run_agent.py b/run_agent.py index efaeba82945..bc917884e75 100644 --- a/run_agent.py +++ b/run_agent.py @@ -15,7 +15,7 @@ Features: Usage: from run_agent import AIAgent - + agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514") response = agent.run_conversation("Tell me about the latest Python updates") """ @@ -27,6 +27,7 @@ import copy import hashlib import json import logging + logger = logging.getLogger(__name__) import os import random @@ -50,8 +51,10 @@ from hermes_constants import get_hermes_home from hermes_cli.env_loader import load_hermes_dotenv _hermes_home = get_hermes_home() -_project_env = Path(__file__).parent / '.env' -_loaded_env_paths = load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) +_project_env = Path(__file__).parent / ".env" +_loaded_env_paths = load_hermes_dotenv( + hermes_home=_hermes_home, project_env=_project_env +) if _loaded_env_paths: for _env_path in _loaded_env_paths: logger.info("Loaded environment variables from %s", _env_path) @@ -79,37 +82,55 @@ from agent.memory_manager import build_memory_context_block from agent.retry_utils import jittered_backoff from agent.error_classifier import classify_api_error, FailoverReason from agent.prompt_builder import ( - DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS, - MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE, + DEFAULT_AGENT_IDENTITY, + PLATFORM_HINTS, + MEMORY_GUIDANCE, + SESSION_SEARCH_GUIDANCE, + SKILLS_GUIDANCE, build_nous_subscription_prompt, ) from agent.model_metadata import ( fetch_model_metadata, - estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough, - get_next_probe_tier, parse_context_limit_from_error, + estimate_tokens_rough, + estimate_messages_tokens_rough, + estimate_request_tokens_rough, + get_next_probe_tier, + parse_context_limit_from_error, parse_available_output_tokens_from_error, - save_context_length, is_local_endpoint, + save_context_length, + is_local_endpoint, query_ollama_num_ctx, ) from agent.context_compressor import ContextCompressor from agent.subdirectory_hints import SubdirectoryHintTracker from agent.prompt_caching import apply_anthropic_cache_control -from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE +from agent.prompt_builder import ( + build_skills_system_prompt, + build_context_files_prompt, + build_environment_hints, + load_soul_md, + TOOL_USE_ENFORCEMENT_GUIDANCE, + TOOL_USE_ENFORCEMENT_MODELS, + DEVELOPER_ROLE_MODELS, + GOOGLE_MODEL_OPERATIONAL_GUIDANCE, + OPENAI_MODEL_EXECUTION_GUIDANCE, +) from agent.usage_pricing import estimate_usage_cost, normalize_usage from agent.display import ( - KawaiiSpinner, build_tool_preview as _build_tool_preview, + KawaiiSpinner, + build_tool_preview as _build_tool_preview, get_cute_tool_message as _get_cute_tool_message_impl, _detect_tool_failure, get_tool_emoji as _get_tool_emoji, ) from agent.trajectory import ( - convert_scratchpad_to_think, has_incomplete_scratchpad, + convert_scratchpad_to_think, + has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) from utils import atomic_json_write, env_var_enabled - class _SafeWriter: """Transparent stdio wrapper that catches OSError/ValueError from broken pipes. @@ -216,19 +237,21 @@ class IterationBudget: _NEVER_PARALLEL_TOOLS = frozenset({"clarify"}) # Read-only tools with no shared mutable session state. -_PARALLEL_SAFE_TOOLS = frozenset({ - "ha_get_state", - "ha_list_entities", - "ha_list_services", - "read_file", - "search_files", - "session_search", - "skill_view", - "skills_list", - "vision_analyze", - "web_extract", - "web_search", -}) +_PARALLEL_SAFE_TOOLS = frozenset( + { + "ha_get_state", + "ha_list_entities", + "ha_list_services", + "read_file", + "search_files", + "session_search", + "skill_view", + "skills_list", + "vision_analyze", + "web_extract", + "web_search", + } +) # File tools can run concurrently when they target independent paths. _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"}) @@ -250,7 +273,7 @@ _DESTRUCTIVE_PATTERNS = re.compile( re.VERBOSE, ) # Output redirects that overwrite files (> but not >>) -_REDIRECT_OVERWRITE = re.compile(r'[^>]>[^>]|^>[^>]') +_REDIRECT_OVERWRITE = re.compile(r"[^>]>[^>]|^>[^>]") def _is_destructive_command(cmd: str) -> bool: @@ -297,7 +320,9 @@ def _should_parallelize_tool_batch(tool_calls) -> bool: scoped_path = _extract_parallel_scope_path(tool_name, function_args) if scoped_path is None: return False - if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths): + if any( + _paths_overlap(scoped_path, existing) for existing in reserved_paths + ): return False reserved_paths.append(scoped_path) continue @@ -336,10 +361,7 @@ def _paths_overlap(left: Path, right: Path) -> bool: return left_parts[:common_len] == right_parts[:common_len] - -_SURROGATE_RE = re.compile(r'[\ud800-\udfff]') - - +_SURROGATE_RE = re.compile(r"[\ud800-\udfff]") def _sanitize_surrogates(text: str) -> str: @@ -349,7 +371,7 @@ def _sanitize_surrogates(text: str) -> str: OpenAI SDK. This is a fast no-op when the text contains no surrogates. """ if _SURROGATE_RE.search(text): - return _SURROGATE_RE.sub('\ufffd', text) + return _SURROGATE_RE.sub("\ufffd", text) return text @@ -366,18 +388,18 @@ def _sanitize_messages_surrogates(messages: list) -> bool: continue content = msg.get("content") if isinstance(content, str) and _SURROGATE_RE.search(content): - msg["content"] = _SURROGATE_RE.sub('\ufffd', content) + msg["content"] = _SURROGATE_RE.sub("\ufffd", content) found = True elif isinstance(content, list): for part in content: if isinstance(part, dict): text = part.get("text") if isinstance(text, str) and _SURROGATE_RE.search(text): - part["text"] = _SURROGATE_RE.sub('\ufffd', text) + part["text"] = _SURROGATE_RE.sub("\ufffd", text) found = True name = msg.get("name") if isinstance(name, str) and _SURROGATE_RE.search(name): - msg["name"] = _SURROGATE_RE.sub('\ufffd', name) + msg["name"] = _SURROGATE_RE.sub("\ufffd", name) found = True tool_calls = msg.get("tool_calls") if isinstance(tool_calls, list): @@ -386,17 +408,17 @@ def _sanitize_messages_surrogates(messages: list) -> bool: continue tc_id = tc.get("id") if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id): - tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id) + tc["id"] = _SURROGATE_RE.sub("\ufffd", tc_id) found = True fn = tc.get("function") if isinstance(fn, dict): fn_name = fn.get("name") if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name): - fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name) + fn["name"] = _SURROGATE_RE.sub("\ufffd", fn_name) found = True fn_args = fn.get("arguments") if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args): - fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args) + fn["arguments"] = _SURROGATE_RE.sub("\ufffd", fn_args) found = True return found @@ -407,7 +429,7 @@ def _strip_non_ascii(text: str) -> str: Used as a last resort when the system encoding is ASCII and can't handle any non-ASCII characters (e.g. LANG=C on Chromebooks). """ - return text.encode('ascii', errors='ignore').decode('ascii') + return text.encode("ascii", errors="ignore").decode("ascii") def _sanitize_messages_non_ascii(messages: list) -> bool: @@ -494,9 +516,6 @@ def _sanitize_structure_non_ascii(payload: Any) -> bool: return found - - - # ========================================================================= # Large tool result handler — save oversized output to temp file # ========================================================================= @@ -663,7 +682,9 @@ class AIAgent: # instead of going directly to stdout where patch_stdout's StdoutProxy # would mangle the escape sequences. None = use builtins.print. self._print_fn = None - self.background_review_callback = None # Optional sync callback for gateway delivery + self.background_review_callback = ( + None # Optional sync callback for gateway delivery + ) self.skip_context_files = skip_context_files self.pass_session_id = pass_session_id self.persist_session = persist_session @@ -672,7 +693,11 @@ class AIAgent: self.log_prefix = f"{log_prefix} " if log_prefix else "" # Store effective base URL for feature detection (prompt caching, reasoning, etc.) self.base_url = base_url or "" - provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None + provider_name = ( + provider.strip().lower() + if isinstance(provider, str) and provider.strip() + else None + ) self.provider = provider_name or "" self.acp_command = acp_command or command self.acp_args = list(acp_args or args or []) @@ -680,10 +705,14 @@ class AIAgent: self.api_mode = api_mode elif self.provider == "openai-codex": self.api_mode = "codex_responses" - elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower: + elif ( + provider_name is None + ) and "chatgpt.com/backend-api/codex" in self._base_url_lower: self.api_mode = "codex_responses" self.provider = "openai-codex" - elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower): + elif self.provider == "anthropic" or ( + provider_name is None and "api.anthropic.com" in self._base_url_lower + ): self.api_mode = "anthropic_messages" self.provider = "anthropic" elif self._base_url_lower.rstrip("/").endswith("/anthropic"): @@ -745,7 +774,6 @@ class AIAgent: self.status_callback = status_callback self.tool_gen_callback = tool_gen_callback - # Tool execution state — allows _vprint during tool execution # even when stream consumers are registered (no tokens streaming then) self._executing_tools = False @@ -755,12 +783,12 @@ class AIAgent: self._interrupt_message = None # Optional message that triggered interrupt self._execution_thread_id: int | None = None # Set at run_conversation() start self._client_lock = threading.RLock() - + # Subagent delegation state - self._delegate_depth = 0 # 0 = top-level agent, incremented for children - self._active_children = [] # Running child AIAgents (for interrupt propagation) + self._delegate_depth = 0 # 0 = top-level agent, incremented for children + self._active_children = [] # Running child AIAgents (for interrupt propagation) self._active_children_lock = threading.Lock() - + # Store OpenRouter provider preferences self.providers_allowed = providers_allowed self.providers_ignored = providers_ignored @@ -772,24 +800,28 @@ class AIAgent: # Store toolset filtering options self.enabled_toolsets = enabled_toolsets self.disabled_toolsets = disabled_toolsets - + # Model response configuration self.max_tokens = max_tokens # None = use model default - self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter) + self.reasoning_config = ( + reasoning_config # None = use default (medium for OpenRouter) + ) self.service_tier = service_tier self.request_overrides = dict(request_overrides or {}) self.prefill_messages = prefill_messages or [] # Prefilled conversation turns self._force_ascii_payload = False - + # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. # Reduces input costs by ~75% on multi-turn conversations by caching the # conversation prefix. Uses system_and_3 strategy (4 breakpoints). is_openrouter = self._is_openrouter_url() is_claude = "claude" in self.model.lower() - is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic" + is_native_anthropic = ( + self.api_mode == "anthropic_messages" and self.provider == "anthropic" + ) self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) - + # Iteration budget: the LLM is only notified when it actually exhausts # the iteration budget (api_call_count >= max_iterations). At that # point we inject ONE message, allow one final API call, and if the @@ -822,6 +854,7 @@ class AIAgent: # both live under ~/.hermes/logs/. Idempotent, so gateway mode # (which creates a new AIAgent per message) won't duplicate handlers. from hermes_logging import setup_logging, setup_verbose_logging + setup_logging(hermes_home=_hermes_home) if self.verbose_logging: @@ -834,14 +867,14 @@ class AIAgent: # for status; logger INFO/WARNING messages just clutter it. # File handlers (agent.log, errors.log) still capture everything. for quiet_logger in [ - 'tools', # all tools.* (terminal, browser, web, file, etc.) - 'run_agent', # agent runner internals - 'trajectory_compressor', - 'cron', # scheduler (only relevant in daemon mode) - 'hermes_cli', # CLI helpers + "tools", # all tools.* (terminal, browser, web, file, etc.) + "run_agent", # agent runner internals + "trajectory_compressor", + "cron", # scheduler (only relevant in daemon mode) + "hermes_cli", # CLI helpers ]: logging.getLogger(quiet_logger).setLevel(logging.ERROR) - + # Internal stream callback (set during streaming TTS). # Initialized here so _vprint can reference it before run_conversation. self._stream_callback = None @@ -874,23 +907,34 @@ class AIAgent: self._is_anthropic_oauth = False if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token + from agent.anthropic_adapter import ( + build_anthropic_client, + resolve_anthropic_token, + ) + # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key. # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401). _is_native_anthropic = self.provider == "anthropic" - effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "") + effective_key = ( + (api_key or resolve_anthropic_token() or "") + if _is_native_anthropic + else (api_key or "") + ) self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = base_url from agent.anthropic_adapter import _is_oauth_token as _is_oat + self._is_anthropic_oauth = _is_oat(effective_key) self._anthropic_client = build_anthropic_client(effective_key, base_url) # No OpenAI client needed for Anthropic mode self.client = None self._client_kwargs = {} if not self.quiet_mode: - print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)") + print( + f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)" + ) if effective_key and len(effective_key) > 12: print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}") else: @@ -921,16 +965,23 @@ class AIAgent: else: # No explicit creds — use the centralized provider router from agent.auxiliary_client import resolve_provider_client + _routed_client, _ = resolve_provider_client( - self.provider or "auto", model=self.model, raw_codex=True) + self.provider or "auto", model=self.model, raw_codex=True + ) if _routed_client is not None: client_kwargs = { "api_key": _routed_client.api_key, "base_url": str(_routed_client.base_url), } # Preserve any default_headers the router set - if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers: - client_kwargs["default_headers"] = dict(_routed_client._default_headers) + if ( + hasattr(_routed_client, "_default_headers") + and _routed_client._default_headers + ): + client_kwargs["default_headers"] = dict( + _routed_client._default_headers + ) else: # When the user explicitly chose a non-OpenRouter provider # but no credentials were found, fail fast with a clear @@ -952,7 +1003,7 @@ class AIAgent: "X-OpenRouter-Categories": "productivity,cli-agent", }, } - + self._client_kwargs = client_kwargs # stored for rebuilding after interrupt # Enable fine-grained tool streaming for Claude on OpenRouter. @@ -962,7 +1013,10 @@ class AIAgent: # stream tool call arguments token-by-token, keeping the # connection alive. _effective_base = str(client_kwargs.get("base_url", "")).lower() - if "openrouter" in _effective_base and "claude" in (self.model or "").lower(): + if ( + "openrouter" in _effective_base + and "claude" in (self.model or "").lower() + ): headers = client_kwargs.get("default_headers") or {} existing_beta = headers.get("x-anthropic-beta", "") _FINE_GRAINED = "fine-grained-tool-streaming-2025-05-14" @@ -976,7 +1030,9 @@ class AIAgent: self.api_key = client_kwargs.get("api_key", "") self.base_url = client_kwargs.get("base_url", self.base_url) try: - self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True) + self.client = self._create_openai_client( + client_kwargs, reason="agent_init", shared=True + ) if not self.quiet_mode: print(f"🤖 AI Agent initialized with model: {self.model}") if base_url: @@ -986,20 +1042,27 @@ class AIAgent: if key_used and key_used != "dummy-key" and len(key_used) > 12: print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}") else: - print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')") + print( + f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')" + ) except Exception as e: raise RuntimeError(f"Failed to initialize OpenAI client: {e}") - + # Provider fallback chain — ordered list of backup providers tried # when the primary is exhausted (rate-limit, overload, connection # failure). Supports both legacy single-dict ``fallback_model`` and # new list ``fallback_providers`` format. if isinstance(fallback_model, list): self._fallback_chain = [ - f for f in fallback_model + f + for f in fallback_model if isinstance(f, dict) and f.get("provider") and f.get("model") ] - elif isinstance(fallback_model, dict) and fallback_model.get("provider") and fallback_model.get("model"): + elif ( + isinstance(fallback_model, dict) + and fallback_model.get("provider") + and fallback_model.get("model") + ): self._fallback_chain = [fallback_model] else: self._fallback_chain = [] @@ -1012,8 +1075,12 @@ class AIAgent: fb = self._fallback_chain[0] print(f"🔄 Fallback model: {fb['model']} ({fb['provider']})") else: - print(f"🔄 Fallback chain ({len(self._fallback_chain)} providers): " + - " → ".join(f"{f['model']} ({f['provider']})" for f in self._fallback_chain)) + print( + f"🔄 Fallback chain ({len(self._fallback_chain)} providers): " + + " → ".join( + f"{f['model']} ({f['provider']})" for f in self._fallback_chain + ) + ) # Get available tools with filtering self.tools = get_tool_definitions( @@ -1021,7 +1088,7 @@ class AIAgent: disabled_toolsets=disabled_toolsets, quiet_mode=self.quiet_mode, ) - + # Show tool configuration and store valid tool names for validation self.valid_tool_names = set() if self.tools: @@ -1029,7 +1096,7 @@ class AIAgent: tool_names = sorted(self.valid_tool_names) if not self.quiet_mode: print(f"🛠️ Loaded {len(self.tools)} tools: {', '.join(tool_names)}") - + # Show filtering info if applied if enabled_toolsets: print(f" ✅ Enabled toolsets: {', '.join(enabled_toolsets)}") @@ -1037,28 +1104,40 @@ class AIAgent: print(f" ❌ Disabled toolsets: {', '.join(disabled_toolsets)}") elif not self.quiet_mode: print("🛠️ No tools loaded (all tools filtered out or unavailable)") - + # Check tool requirements if self.tools and not self.quiet_mode: requirements = check_toolset_requirements() - missing_reqs = [name for name, available in requirements.items() if not available] + missing_reqs = [ + name for name, available in requirements.items() if not available + ] if missing_reqs: - print(f"⚠️ Some tools may not work due to missing requirements: {missing_reqs}") - + print( + f"⚠️ Some tools may not work due to missing requirements: {missing_reqs}" + ) + # Show trajectory saving status if self.save_trajectories and not self.quiet_mode: print("📝 Trajectory saving enabled") - + # Show ephemeral system prompt status if self.ephemeral_system_prompt and not self.quiet_mode: - prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt - print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)") - + prompt_preview = ( + self.ephemeral_system_prompt[:60] + "..." + if len(self.ephemeral_system_prompt) > 60 + else self.ephemeral_system_prompt + ) + print( + f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)" + ) + # Show prompt caching status if self._use_prompt_caching and not self.quiet_mode: - source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter" + source = ( + "native Anthropic" if is_native_anthropic else "Claude via OpenRouter" + ) print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)") - + # Session logging setup - auto-save conversation trajectories for debugging self.session_start = datetime.now() if session_id: @@ -1069,35 +1148,39 @@ class AIAgent: timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S") short_uuid = uuid.uuid4().hex[:6] self.session_id = f"{timestamp_str}_{short_uuid}" - + # Session logs go into ~/.hermes/sessions/ alongside gateway sessions hermes_home = get_hermes_home() self.logs_dir = hermes_home / "sessions" self.logs_dir.mkdir(parents=True, exist_ok=True) self.session_log_file = self.logs_dir / f"session_{self.session_id}.json" - + # Track conversation messages for session logging self._session_messages: List[Dict[str, Any]] = [] - + # Cached system prompt -- built once per session, only rebuilt on compression self._cached_system_prompt: Optional[str] = None - + # Filesystem checkpoint manager (transparent — not a tool) from tools.checkpoint_manager import CheckpointManager + self._checkpoint_mgr = CheckpointManager( enabled=checkpoints_enabled, max_snapshots=checkpoint_max_snapshots, ) - + # SQLite session store (optional -- provided by CLI or gateway) self._session_db = session_db self._parent_session_id = parent_session_id - self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes + self._last_flushed_db_idx = ( + 0 # tracks DB-write cursor to prevent duplicate writes + ) if self._session_db: try: self._session_db.create_session( session_id=self.session_id, - source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"), + source=self.platform + or os.environ.get("HERMES_SESSION_SOURCE", "cli"), model=self.model, model_config={ "max_iterations": self.max_iterations, @@ -1115,16 +1198,19 @@ class AIAgent: # lock clears. The session row may be missing from the index # for this run, but that is recoverable (flushes upsert rows). logger.warning( - "Session DB create_session failed (session_search still available): %s", e + "Session DB create_session failed (session_search still available): %s", + e, ) - + # In-memory todo list for task planning (one per agent/session) from tools.todo_tool import TodoStore + self._todo_store = TodoStore() - + # Load config once for memory, skills, and compression sections try: from hermes_cli.config import load_config as _load_agent_config + _agent_cfg = _load_agent_config() except Exception: _agent_cfg = {} @@ -1141,11 +1227,14 @@ class AIAgent: try: mem_config = _agent_cfg.get("memory", {}) self._memory_enabled = mem_config.get("memory_enabled", False) - self._user_profile_enabled = mem_config.get("user_profile_enabled", False) + self._user_profile_enabled = mem_config.get( + "user_profile_enabled", False + ) self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10)) self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6)) if self._memory_enabled or self._user_profile_enabled: from tools.memory_tool import MemoryStore + self._memory_store = MemoryStore( memory_char_limit=mem_config.get("memory_char_limit", 2200), user_char_limit=mem_config.get("user_char_limit", 1375), @@ -1153,15 +1242,15 @@ class AIAgent: self._memory_store.load_from_disk() except Exception: pass # Memory is optional -- don't break agent init - - # Memory provider plugin (external — one at a time, alongside built-in) # Reads memory.provider from config to select which plugin to activate. self._memory_manager = None if not skip_memory: try: - _mem_provider_name = mem_config.get("provider", "") if mem_config else "" + _mem_provider_name = ( + mem_config.get("provider", "") if mem_config else "" + ) # Auto-migrate: if Honcho was actively configured (enabled + # credentials) but memory.provider is not set, activate the @@ -1170,20 +1259,29 @@ class AIAgent: # file may be from a different tool. if not _mem_provider_name: try: - from plugins.memory.honcho.client import HonchoClientConfig as _HCC + from plugins.memory.honcho.client import ( + HonchoClientConfig as _HCC, + ) + _hcfg = _HCC.from_global_config() if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url): _mem_provider_name = "honcho" # Persist so this only auto-migrates once try: - from hermes_cli.config import load_config as _lc, save_config as _sc + from hermes_cli.config import ( + load_config as _lc, + save_config as _sc, + ) + _cfg = _lc() _cfg.setdefault("memory", {})["provider"] = "honcho" _sc(_cfg) except Exception: pass if not self.quiet_mode: - print(" ✓ Auto-migrated Honcho to memory provider plugin.") + print( + " ✓ Auto-migrated Honcho to memory provider plugin." + ) print(" Your config and data are preserved.\n") except Exception: pass @@ -1191,12 +1289,14 @@ class AIAgent: if _mem_provider_name: from agent.memory_manager import MemoryManager as _MemoryManager from plugins.memory import load_memory_provider as _load_mem + self._memory_manager = _MemoryManager() _mp = _load_mem(_mem_provider_name) if _mp and _mp.is_available(): self._memory_manager.add_provider(_mp) if self._memory_manager.providers: from hermes_constants import get_hermes_home as _ghh + _init_kwargs = { "session_id": self.session_id, "platform": platform or "cli", @@ -1209,15 +1309,21 @@ class AIAgent: # Profile identity for per-profile provider scoping try: from hermes_cli.profiles import get_active_profile_name + _profile = get_active_profile_name() _init_kwargs["agent_identity"] = _profile _init_kwargs["agent_workspace"] = "hermes" except Exception: pass self._memory_manager.initialize_all(**_init_kwargs) - logger.info("Memory provider '%s' activated", _mem_provider_name) + logger.info( + "Memory provider '%s' activated", _mem_provider_name + ) else: - logger.debug("Memory provider '%s' not found or not available", _mem_provider_name) + logger.debug( + "Memory provider '%s' not found or not available", + _mem_provider_name, + ) self._memory_manager = None except Exception as _mpe: logger.warning("Memory provider plugin init failed: %s", _mpe) @@ -1236,7 +1342,9 @@ class AIAgent: self._skill_nudge_interval = 10 try: skills_config = _agent_cfg.get("skills", {}) - self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 10)) + self._skill_nudge_interval = int( + skills_config.get("creation_nudge_interval", 10) + ) except Exception: pass @@ -1254,7 +1362,11 @@ class AIAgent: if not isinstance(_compression_cfg, dict): _compression_cfg = {} compression_threshold = float(_compression_cfg.get("threshold", 0.50)) - compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes") + compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ( + "true", + "1", + "yes", + ) compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20)) compression_protect_last = int(_compression_cfg.get("protect_last_n", 20)) @@ -1275,6 +1387,7 @@ class AIAgent: _config_context_length, ) import sys + print( f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n" f" Must be a plain integer (e.g. 256000, not '256K').\n" @@ -1290,6 +1403,7 @@ class AIAgent: if _config_context_length is None: try: from hermes_cli.config import get_compatible_custom_providers + _custom_providers = get_compatible_custom_providers(_agent_cfg) except Exception: _custom_providers = _agent_cfg.get("custom_providers") @@ -1314,9 +1428,11 @@ class AIAgent: "custom_providers: %r — must be a plain " "integer (e.g. 256000, not '256K'). " "Falling back to auto-detection.", - self.model, _cp_ctx, + self.model, + _cp_ctx, ) import sys + print( f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n" f" Must be a plain integer (e.g. 256000, not '256K').\n" @@ -1324,7 +1440,7 @@ class AIAgent: file=sys.stderr, ) break - + # Select context engine: config-driven (like memory providers). # 1. Check config.yaml context.engine setting # 2. Check plugins/context_engine// directory (repo-shipped) @@ -1333,7 +1449,9 @@ class AIAgent: _selected_engine = None _engine_name = "compressor" # default try: - _ctx_cfg = _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {} + _ctx_cfg = ( + _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {} + ) _engine_name = _ctx_cfg.get("engine", "compressor") or "compressor" except Exception: pass @@ -1342,14 +1460,18 @@ class AIAgent: # Try loading from plugins/context_engine// try: from plugins.context_engine import load_context_engine + _selected_engine = load_context_engine(_engine_name) except Exception as _ce_load_err: - logger.debug("Context engine load from plugins/context_engine/: %s", _ce_load_err) + logger.debug( + "Context engine load from plugins/context_engine/: %s", _ce_load_err + ) # Try general plugin system as fallback if _selected_engine is None: try: from hermes_cli.plugins import get_plugin_context_engine + _candidate = get_plugin_context_engine() if _candidate and _candidate.name == _engine_name: _selected_engine = _candidate @@ -1367,6 +1489,7 @@ class AIAgent: self.context_compressor = _selected_engine # Resolve context_length for plugin engines — mirrors switch_model() path from agent.model_metadata import get_model_context_length + _plugin_ctx_len = get_model_context_length( self.model, base_url=self.base_url, @@ -1403,6 +1526,7 @@ class AIAgent: # Reject models whose context window is below the minimum required # for reliable tool-calling workflows (64K tokens). from agent.model_metadata import MINIMUM_CONTEXT_LENGTH + _ctx = getattr(self.context_compressor, "context_length", 0) if _ctx and _ctx < MINIMUM_CONTEXT_LENGTH: raise ValueError( @@ -1415,7 +1539,11 @@ class AIAgent: # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand) self._context_engine_tool_names: set = set() - if hasattr(self, "context_compressor") and self.context_compressor and self.tools is not None: + if ( + hasattr(self, "context_compressor") + and self.context_compressor + and self.tools is not None + ): for _schema in self.context_compressor.get_tool_schemas(): _wrapped = {"type": "function", "function": _schema} self.tools.append(_wrapped) @@ -1432,7 +1560,9 @@ class AIAgent: hermes_home=str(get_hermes_home()), platform=self.platform or "cli", model=self.model, - context_length=getattr(self.context_compressor, "context_length", 0), + context_length=getattr( + self.context_compressor, "context_length", 0 + ), ) except Exception as _ce_err: logger.debug("Context engine on_session_start: %s", _ce_err) @@ -1455,7 +1585,7 @@ class AIAgent: self.session_estimated_cost_usd = 0.0 self.session_cost_status = "unknown" self.session_cost_source = "none" - + # ── Ollama num_ctx injection ── # Ollama defaults to 2048 context regardless of the model's capabilities. # When running against an Ollama server, detect the model's max context @@ -1469,8 +1599,14 @@ class AIAgent: try: self._ollama_num_ctx = int(_ollama_num_ctx_override) except (TypeError, ValueError): - logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override) - if self._ollama_num_ctx is None and self.base_url and is_local_endpoint(self.base_url): + logger.debug( + "Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override + ) + if ( + self._ollama_num_ctx is None + and self.base_url + and is_local_endpoint(self.base_url) + ): try: _detected = query_ollama_num_ctx(self.model, self.base_url) if _detected and _detected > 0: @@ -1485,9 +1621,13 @@ class AIAgent: if not self.quiet_mode: if compression_enabled: - print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})") + print( + f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold * 100)}% = {self.context_compressor.threshold_tokens:,})" + ) else: - print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)") + print( + f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)" + ) # Check immediately so CLI users see the warning at startup. # Gateway status_callback is not yet wired, so any warning is stored @@ -1519,15 +1659,17 @@ class AIAgent: "compressor_threshold_tokens": _cc.threshold_tokens, } if self.api_mode == "anthropic_messages": - self._primary_runtime.update({ - "anthropic_api_key": self._anthropic_api_key, - "anthropic_base_url": self._anthropic_base_url, - "is_anthropic_oauth": self._is_anthropic_oauth, - }) + self._primary_runtime.update( + { + "anthropic_api_key": self._anthropic_api_key, + "anthropic_base_url": self._anthropic_base_url, + "is_anthropic_oauth": self._is_anthropic_oauth, + } + ) def reset_session_state(self): """Reset all session-scoped token counters to 0 for a fresh session. - + This method encapsulates the reset logic for all session-level metrics including: - Token usage counters (input, output, total, prompt, completion) @@ -1536,10 +1678,10 @@ class AIAgent: - Reasoning tokens - Estimated cost tracking - Context compressor internal counters - + The method safely handles optional attributes (e.g., context compressor) using ``hasattr`` checks. - + This keeps the counter reset logic DRY and maintainable in one place rather than scattering it across multiple methods. """ @@ -1556,15 +1698,17 @@ class AIAgent: self.session_estimated_cost_usd = 0.0 self.session_cost_status = "unknown" self.session_cost_source = "none" - + # Turn counter (added after reset_session_state was first written — #2635) self._user_turn_count = 0 # Context engine reset (works for both built-in compressor and plugins) if hasattr(self, "context_compressor") and self.context_compressor: self.context_compressor.on_session_reset() - - def switch_model(self, new_model, new_provider, api_key='', base_url='', api_mode=''): + + def switch_model( + self, new_model, new_provider, api_key="", base_url="", api_mode="" + ): """Switch the model/provider in-place for a live agent. Called by the /model command handlers (CLI and gateway) after @@ -1603,16 +1747,24 @@ class AIAgent: resolve_anthropic_token, _is_oauth_token, ) + # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own # API key — falling back would send Anthropic credentials to third-party endpoints. _is_native_anthropic = new_provider == "anthropic" - effective_key = (api_key or self.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or self.api_key or "") + effective_key = ( + (api_key or self.api_key or resolve_anthropic_token() or "") + if _is_native_anthropic + else (api_key or self.api_key or "") + ) self.api_key = effective_key self._anthropic_api_key = effective_key - self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None) + self._anthropic_base_url = base_url or getattr( + self, "_anthropic_base_url", None + ) self._anthropic_client = build_anthropic_client( - effective_key, self._anthropic_base_url, + effective_key, + self._anthropic_base_url, ) self._is_anthropic_oauth = _is_oauth_token(effective_key) self.client = None @@ -1631,15 +1783,18 @@ class AIAgent: ) # ── Re-evaluate prompt caching ── - is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower()) - or is_native_anthropic + is_native_anthropic = ( + api_mode == "anthropic_messages" and new_provider == "anthropic" ) + self._use_prompt_caching = ( + "openrouter" in (self.base_url or "").lower() + and "claude" in new_model.lower() + ) or is_native_anthropic # ── Update context compressor ── if hasattr(self, "context_compressor") and self.context_compressor: from agent.model_metadata import get_model_context_length + new_context_length = get_model_context_length( self.model, base_url=self.base_url, @@ -1660,7 +1815,11 @@ class AIAgent: self._cached_system_prompt = None # ── Update _primary_runtime so the change persists across turns ── - _cc = self.context_compressor if hasattr(self, "context_compressor") and self.context_compressor else None + _cc = ( + self.context_compressor + if hasattr(self, "context_compressor") and self.context_compressor + else None + ) self._primary_runtime = { "model": self.model, "provider": self.provider, @@ -1669,19 +1828,27 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, - "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model, - "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url, + "compressor_model": getattr(_cc, "model", self.model) + if _cc + else self.model, + "compressor_base_url": getattr(_cc, "base_url", self.base_url) + if _cc + else self.base_url, "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "", - "compressor_provider": getattr(_cc, "provider", self.provider) if _cc else self.provider, + "compressor_provider": getattr(_cc, "provider", self.provider) + if _cc + else self.provider, "compressor_context_length": _cc.context_length if _cc else 0, "compressor_threshold_tokens": _cc.threshold_tokens if _cc else 0, } if api_mode == "anthropic_messages": - self._primary_runtime.update({ - "anthropic_api_key": self._anthropic_api_key, - "anthropic_base_url": self._anthropic_base_url, - "is_anthropic_oauth": self._is_anthropic_oauth, - }) + self._primary_runtime.update( + { + "anthropic_api_key": self._anthropic_api_key, + "anthropic_base_url": self._anthropic_base_url, + "is_anthropic_oauth": self._is_anthropic_oauth, + } + ) # ── Reset fallback state ── self._fallback_activated = False @@ -1689,7 +1856,10 @@ class AIAgent: logging.info( "Model switched in-place: %s (%s) -> %s (%s)", - old_model, old_provider, new_model, new_provider, + old_model, + old_provider, + new_model, + new_provider, ) def _safe_print(self, *args, **kwargs): @@ -1844,7 +2014,9 @@ class AIAgent: # ignoring the explicit config value. Pass it as the highest- # priority hint so the configured value is always respected. _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {}) - _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None + _aux_context_config = ( + _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None + ) if _aux_context_config is not None: try: _aux_context_config = int(_aux_context_config) @@ -1862,7 +2034,9 @@ class AIAgent: if aux_context < threshold: # Suggest a threshold that would fit the aux model, # rounded down to a clean percentage. - safe_pct = int((aux_context / self.context_compressor.context_length) * 100) + safe_pct = int( + (aux_context / self.context_compressor.context_length) * 100 + ) msg = ( f"⚠ Compression model ({aux_model}) context " f"is {aux_context:,} tokens, but the main model's " @@ -1892,9 +2066,7 @@ class AIAgent: threshold, ) except Exception as exc: - logger.debug( - "Compression feasibility check failed (non-fatal): %s", exc - ) + logger.debug("Compression feasibility check failed (non-fatal): %s", exc) def _replay_compression_warning(self) -> None: """Re-send the compression warning through ``status_callback``. @@ -1939,7 +2111,7 @@ class AIAgent: def _max_tokens_param(self, value: int) -> dict: """Return the correct max tokens kwarg for the current provider. - + OpenAI's newer models (gpt-4o, o-series, gpt-5+) require 'max_completion_tokens'. OpenRouter, local models, and older OpenAI models use 'max_tokens'. @@ -1970,19 +2142,33 @@ class AIAgent: # Check if there's any non-whitespace content remaining return bool(cleaned.strip()) - + def _strip_think_blocks(self, content: str) -> str: """Remove reasoning/thinking blocks from content, returning only visible text.""" if not content: return "" # Strip all reasoning tag variants: , , , # , , (Gemma 4) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'\s*', '', content, flags=re.IGNORECASE) + content = re.sub(r".*?", "", content, flags=re.DOTALL) + content = re.sub( + r".*?", "", content, flags=re.DOTALL | re.IGNORECASE + ) + content = re.sub(r".*?", "", content, flags=re.DOTALL) + content = re.sub( + r".*?", + "", + content, + flags=re.DOTALL, + ) + content = re.sub( + r".*?", "", content, flags=re.DOTALL | re.IGNORECASE + ) + content = re.sub( + r"\s*", + "", + content, + flags=re.IGNORECASE, + ) return content def _looks_like_codex_intermediate_ack( @@ -1995,14 +2181,19 @@ class AIAgent: if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages): return False - assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower() + assistant_text = ( + self._strip_think_blocks(assistant_content or "").strip().lower() + ) if not assistant_text: return False if len(assistant_text) > 1200: return False has_future_ack = bool( - re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text) + re.search( + r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", + assistant_text, + ) ) if not has_future_ack: return False @@ -2050,51 +2241,60 @@ class AIAgent: or "~/" in user_text or "/" in user_text ) - assistant_mentions_action = any(marker in assistant_text for marker in action_markers) + assistant_mentions_action = any( + marker in assistant_text for marker in action_markers + ) assistant_targets_workspace = any( marker in assistant_text for marker in workspace_markers ) - return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action - - + return ( + user_targets_workspace or assistant_targets_workspace + ) and assistant_mentions_action + def _extract_reasoning(self, assistant_message) -> Optional[str]: """ Extract reasoning/thinking content from an assistant message. - + OpenRouter and various providers can return reasoning in multiple formats: 1. message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.) 2. message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.) 3. message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified) - + Args: assistant_message: The assistant message object from the API response - + Returns: Combined reasoning text, or None if no reasoning found """ reasoning_parts = [] - + # Check direct reasoning field - if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning: + if hasattr(assistant_message, "reasoning") and assistant_message.reasoning: reasoning_parts.append(assistant_message.reasoning) - + # Check reasoning_content field (alternative name used by some providers) - if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content: + if ( + hasattr(assistant_message, "reasoning_content") + and assistant_message.reasoning_content + ): # Don't duplicate if same as reasoning if assistant_message.reasoning_content not in reasoning_parts: reasoning_parts.append(assistant_message.reasoning_content) - + # Check reasoning_details array (OpenRouter unified format) # Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...] - if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: + if ( + hasattr(assistant_message, "reasoning_details") + and assistant_message.reasoning_details + ): for detail in assistant_message.reasoning_details: if isinstance(detail, dict): # Extract summary from reasoning detail object summary = ( - detail.get('summary') - or detail.get('thinking') - or detail.get('content') - or detail.get('text') + detail.get("summary") + or detail.get("thinking") + or detail.get("content") + or detail.get("text") ) if summary and summary not in reasoning_parts: reasoning_parts.append(summary) @@ -2117,11 +2317,11 @@ class AIAgent: cleaned = block.strip() if cleaned and cleaned not in reasoning_parts: reasoning_parts.append(cleaned) - + # Combine all reasoning parts if reasoning_parts: return "\n\n".join(reasoning_parts) - + return None def _cleanup_task_resources(self, task_id: str) -> None: @@ -2217,11 +2417,14 @@ class AIAgent: def _run_review(): import contextlib, os as _os + review_agent = None try: - with open(_os.devnull, "w") as _devnull, \ - contextlib.redirect_stdout(_devnull), \ - contextlib.redirect_stderr(_devnull): + with ( + open(_os.devnull, "w") as _devnull, + contextlib.redirect_stdout(_devnull), + contextlib.redirect_stderr(_devnull), + ): review_agent = AIAgent( model=self.model, max_iterations=8, @@ -2258,14 +2461,34 @@ class AIAgent: actions.append(message) elif "updated" in message.lower(): actions.append(message) - elif "added" in message.lower() or (target and "add" in message.lower()): - label = "Memory" if target == "memory" else "User profile" if target == "user" else target + elif "added" in message.lower() or ( + target and "add" in message.lower() + ): + label = ( + "Memory" + if target == "memory" + else "User profile" + if target == "user" + else target + ) actions.append(f"{label} updated") elif "Entry added" in message: - label = "Memory" if target == "memory" else "User profile" if target == "user" else target + label = ( + "Memory" + if target == "memory" + else "User profile" + if target == "user" + else target + ) actions.append(f"{label} updated") elif "removed" in message.lower() or "replaced" in message.lower(): - label = "Memory" if target == "memory" else "User profile" if target == "user" else target + label = ( + "Memory" + if target == "memory" + else "User profile" + if target == "user" + else target + ) actions.append(f"{label} updated") if actions: @@ -2311,7 +2534,9 @@ class AIAgent: if isinstance(msg, dict) and msg.get("role") == "user": msg["content"] = override - def _persist_session(self, messages: List[Dict], conversation_history: List[Dict] = None): + def _persist_session( + self, messages: List[Dict], conversation_history: List[Dict] = None + ): """Save session state to both JSON log and SQLite on any exit path. Ensures conversations are never lost, even on errors or early returns. @@ -2324,7 +2549,9 @@ class AIAgent: self._save_session_log(messages) self._flush_messages_to_session_db(messages, conversation_history) - def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None): + def _flush_messages_to_session_db( + self, messages: List[Dict], conversation_history: List[Dict] = None + ): """Persist any un-flushed messages to the SQLite session store. Uses _last_flushed_db_idx to track which messages have already been @@ -2365,8 +2592,12 @@ class AIAgent: tool_call_id=msg.get("tool_call_id"), finish_reason=msg.get("finish_reason"), reasoning=msg.get("reasoning") if role == "assistant" else None, - reasoning_details=msg.get("reasoning_details") if role == "assistant" else None, - codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None, + reasoning_details=msg.get("reasoning_details") + if role == "assistant" + else None, + codex_reasoning_items=msg.get("codex_reasoning_items") + if role == "assistant" + else None, ) self._last_flushed_db_idx = len(messages) except Exception as e: @@ -2375,44 +2606,44 @@ class AIAgent: def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]: """ Get messages up to (but not including) the last assistant turn. - + This is used when we need to "roll back" to the last successful point in the conversation, typically when the final assistant message is incomplete or malformed. - + Args: messages: Full message list - + Returns: Messages up to the last complete assistant turn (ending with user/tool message) """ if not messages: return [] - + # Find the index of the last assistant message last_assistant_idx = None for i in range(len(messages) - 1, -1, -1): if messages[i].get("role") == "assistant": last_assistant_idx = i break - + if last_assistant_idx is None: # No assistant message found, return all messages return messages.copy() - + # Return everything up to (not including) the last assistant message return messages[:last_assistant_idx] - + def _format_tools_for_system_message(self) -> str: """ Format tool definitions for the system message in the trajectory format. - + Returns: str: JSON string representation of tool definitions """ if not self.tools: return "[]" - + # Convert tool definitions to the format expected in trajectories formatted_tools = [] for tool in self.tools: @@ -2421,26 +2652,28 @@ class AIAgent: "name": func["name"], "description": func.get("description", ""), "parameters": func.get("parameters", {}), - "required": None # Match the format in the example + "required": None, # Match the format in the example } formatted_tools.append(formatted_tool) - + return json.dumps(formatted_tools, ensure_ascii=False) - - def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]: + + def _convert_to_trajectory_format( + self, messages: List[Dict[str, Any]], user_query: str, completed: bool + ) -> List[Dict[str, Any]]: """ Convert internal message format to trajectory format for saving. - + Args: messages (List[Dict]): Internal message history user_query (str): Original user query completed (bool): Whether the conversation completed successfully - + Returns: List[Dict]: Messages in trajectory format """ trajectory = [] - + # Add system message with tool definitions system_msg = ( "You are a function calling AI model. You are provided with function signatures within XML tags. " @@ -2455,71 +2688,69 @@ class AIAgent: "Each function call should be enclosed within XML tags.\n" "Example:\n\n{'name': ,'arguments': }\n" ) - - trajectory.append({ - "from": "system", - "value": system_msg - }) - + + trajectory.append({"from": "system", "value": system_msg}) + # Add the actual user prompt (from the dataset) as the first human message - trajectory.append({ - "from": "human", - "value": user_query - }) - + trajectory.append({"from": "human", "value": user_query}) + # Skip the first message (the user query) since we already added it above. # Prefill messages are injected at API-call time only (not in the messages # list), so no offset adjustment is needed here. i = 1 - + while i < len(messages): msg = messages[i] - + if msg["role"] == "assistant": # Check if this message has tool calls if "tool_calls" in msg and msg["tool_calls"]: # Format assistant message with tool calls # Add tags around reasoning for trajectory storage content = "" - + # Prepend reasoning in tags if available (native thinking tokens) if msg.get("reasoning") and msg["reasoning"].strip(): content = f"\n{msg['reasoning']}\n\n" - + if msg.get("content") and msg["content"].strip(): # Convert any tags to tags # (used when native thinking is disabled and model reasons via XML) content += convert_scratchpad_to_think(msg["content"]) + "\n" - + # Add tool calls wrapped in XML tags for tool_call in msg["tool_calls"]: - if not tool_call or not isinstance(tool_call, dict): continue + if not tool_call or not isinstance(tool_call, dict): + continue # Parse arguments - should always succeed since we validate during conversation # but keep try-except as safety net try: - arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"] + arguments = ( + json.loads(tool_call["function"]["arguments"]) + if isinstance(tool_call["function"]["arguments"], str) + else tool_call["function"]["arguments"] + ) except json.JSONDecodeError: # This shouldn't happen since we validate and retry during conversation, # but if it does, log warning and use empty dict - logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}") + logging.warning( + f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}" + ) arguments = {} - + tool_call_json = { "name": tool_call["function"]["name"], - "arguments": arguments + "arguments": arguments, } content += f"\n{json.dumps(tool_call_json, ensure_ascii=False)}\n\n" - + # Ensure every gpt turn has a block (empty if no reasoning) # so the format is consistent for training data if "" not in content: content = "\n\n" + content - - trajectory.append({ - "from": "gpt", - "value": content.rstrip() - }) - + + trajectory.append({"from": "gpt", "value": content.rstrip()}) + # Collect all subsequent tool responses tool_responses = [] j = i + 1 @@ -2527,7 +2758,7 @@ class AIAgent: tool_msg = messages[j] # Format tool response with XML tags tool_response = "\n" - + # Try to parse tool content as JSON if it looks like JSON tool_content = tool_msg["content"] try: @@ -2535,67 +2766,65 @@ class AIAgent: tool_content = json.loads(tool_content) except (json.JSONDecodeError, AttributeError): pass # Keep as string if not valid JSON - + tool_index = len(tool_responses) tool_name = ( msg["tool_calls"][tool_index]["function"]["name"] if tool_index < len(msg["tool_calls"]) else "unknown" ) - tool_response += json.dumps({ - "tool_call_id": tool_msg.get("tool_call_id", ""), - "name": tool_name, - "content": tool_content - }, ensure_ascii=False) + tool_response += json.dumps( + { + "tool_call_id": tool_msg.get("tool_call_id", ""), + "name": tool_name, + "content": tool_content, + }, + ensure_ascii=False, + ) tool_response += "\n" tool_responses.append(tool_response) j += 1 - + # Add all tool responses as a single message if tool_responses: - trajectory.append({ - "from": "tool", - "value": "\n".join(tool_responses) - }) + trajectory.append( + {"from": "tool", "value": "\n".join(tool_responses)} + ) i = j - 1 # Skip the tool messages we just processed - + else: # Regular assistant message without tool calls # Add tags around reasoning for trajectory storage content = "" - + # Prepend reasoning in tags if available (native thinking tokens) if msg.get("reasoning") and msg["reasoning"].strip(): content = f"\n{msg['reasoning']}\n\n" - + # Convert any tags to tags # (used when native thinking is disabled and model reasons via XML) raw_content = msg["content"] or "" content += convert_scratchpad_to_think(raw_content) - + # Ensure every gpt turn has a block (empty if no reasoning) if "" not in content: content = "\n\n" + content - - trajectory.append({ - "from": "gpt", - "value": content.strip() - }) - + + trajectory.append({"from": "gpt", "value": content.strip()}) + elif msg["role"] == "user": - trajectory.append({ - "from": "human", - "value": msg["content"] - }) - + trajectory.append({"from": "human", "value": msg["content"]}) + i += 1 - + return trajectory - - def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool): + + def _save_trajectory( + self, messages: List[Dict[str, Any]], user_query: str, completed: bool + ): """ Save conversation trajectory to JSONL file. - + Args: messages (List[Dict]): Complete message history user_query (str): Original user query @@ -2603,10 +2832,10 @@ class AIAgent: """ if not self.save_trajectories: return - + trajectory = self._convert_to_trajectory_format(messages, user_query, completed) _save_trajectory_to_file(trajectory, self.model, completed) - + @staticmethod def _summarize_api_error(error: Exception) -> str: """Extract a human-readable one-liner from an API error. @@ -2616,6 +2845,7 @@ class AIAgent: str(error) for everything else. """ import re as _re + raw = str(error) # Cloudflare / proxy HTML pages: grab the for a clean summary @@ -2637,7 +2867,11 @@ class AIAgent: # JSON body errors from OpenAI/Anthropic SDKs body = getattr(error, "body", None) if isinstance(body, dict): - msg = body.get("error", {}).get("message") if isinstance(body.get("error"), dict) else body.get("message") + msg = ( + body.get("error", {}).get("message") + if isinstance(body.get("error"), dict) + else body.get("message") + ) if msg: status_code = getattr(error, "status_code", None) prefix = f"HTTP {status_code}: " if status_code else "" @@ -2658,27 +2892,27 @@ class AIAgent: def _clean_error_message(self, error_msg: str) -> str: """ Clean up error messages for user display, removing HTML content and truncating. - + Args: error_msg: Raw error message from API or exception - + Returns: Clean, user-friendly error message """ if not error_msg: return "Unknown error" - + # Remove HTML content (common with CloudFlare and gateway error pages) - if error_msg.strip().startswith('<!DOCTYPE html') or '<html' in error_msg: + if error_msg.strip().startswith("<!DOCTYPE html") or "<html" in error_msg: return "Service temporarily unavailable (HTML error page returned)" - + # Remove newlines and excessive whitespace - cleaned = ' '.join(error_msg.split()) - + cleaned = " ".join(error_msg.split()) + # Truncate if too long if len(cleaned) > 150: cleaned = cleaned[:150] + "..." - + return cleaned @staticmethod @@ -2730,10 +2964,18 @@ class AIAgent: if "reset_at" not in context: message = context.get("message") or "" if isinstance(message, str): - delay_match = re.search(r"quotaResetDelay[:\s\"]+(\\d+(?:\\.\\d+)?)(ms|s)", message, re.IGNORECASE) + delay_match = re.search( + r"quotaResetDelay[:\s\"]+(\\d+(?:\\.\\d+)?)(ms|s)", + message, + re.IGNORECASE, + ) if delay_match: value = float(delay_match.group(1)) - seconds = value / 1000.0 if delay_match.group(2).lower() == "ms" else value + seconds = ( + value / 1000.0 + if delay_match.group(2).lower() == "ms" + else value + ) context["reset_at"] = time.time() + seconds else: sec_match = re.search( @@ -2746,7 +2988,9 @@ class AIAgent: return context - def _usage_summary_for_api_request_hook(self, response: Any) -> Optional[Dict[str, Any]]: + def _usage_summary_for_api_request_hook( + self, response: Any + ) -> Optional[Dict[str, Any]]: """Token buckets for ``post_api_request`` plugins (no raw ``response`` object).""" if response is None: return None @@ -2819,7 +3063,9 @@ class AIAgent: response_obj = getattr(error, "response", None) if response_obj is not None: try: - error_info["response_status"] = getattr(response_obj, "status_code", None) + error_info["response_status"] = getattr( + response_obj, "status_code", None + ) error_info["response_text"] = response_obj.text except Exception as e: logger.debug("Could not extract error response details: %s", e) @@ -2827,21 +3073,29 @@ class AIAgent: dump_payload["error"] = error_info timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") - dump_file = self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json" + dump_file = ( + self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json" + ) dump_file.write_text( json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str), encoding="utf-8", ) - self._vprint(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}") + self._vprint( + f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}" + ) if env_var_enabled("HERMES_DUMP_REQUEST_STDOUT"): - print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)) + print( + json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str) + ) return dump_file except Exception as dump_error: if self.verbose_logging: - logging.warning(f"Failed to dump API request debug payload: {dump_error}") + logging.warning( + f"Failed to dump API request debug payload: {dump_error}" + ) return None @staticmethod @@ -2850,8 +3104,8 @@ class AIAgent: if not content: return content content = convert_scratchpad_to_think(content) - content = re.sub(r'\n+(<think>)', r'\n\1', content) - content = re.sub(r'(</think>)\n+', r'\1\n', content) + content = re.sub(r"\n+(<think>)", r"\n\1", content) + content = re.sub(r"(</think>)\n+", r"\1\n", content) return content.strip() def _save_session_log(self, messages: List[Dict[str, Any]] = None): @@ -2885,12 +3139,17 @@ class AIAgent: # with partial history and would otherwise clobber the full JSON log. if self.session_log_file.exists(): try: - existing = json.loads(self.session_log_file.read_text(encoding="utf-8")) - existing_count = existing.get("message_count", len(existing.get("messages", []))) + existing = json.loads( + self.session_log_file.read_text(encoding="utf-8") + ) + existing_count = existing.get( + "message_count", len(existing.get("messages", [])) + ) if existing_count > len(cleaned): logging.debug( "Skipping session log overwrite: existing has %d messages, current has %d", - existing_count, len(cleaned), + existing_count, + len(cleaned), ) return except Exception: @@ -2919,26 +3178,26 @@ class AIAgent: except Exception as e: if self.verbose_logging: logging.warning(f"Failed to save session log: {e}") - + def interrupt(self, message: str = None) -> None: """ Request the agent to interrupt its current tool-calling loop. - + Call this from another thread (e.g., input handler, message receiver) to gracefully stop the agent and process a new message. - + Also signals long-running tool executions (e.g. terminal commands) to terminate early, so the agent can respond immediately. - + Args: message: Optional new message that triggered the interrupt. If provided, the agent will include this in its response context. - + Example (CLI): # In a separate input thread: if user_typed_something: agent.interrupt(user_input) - + Example (Messaging): # When new message arrives for active session: if session_has_running_agent: @@ -2959,8 +3218,17 @@ class AIAgent: except Exception as e: logger.debug("Failed to propagate interrupt to child agent: %s", e) if not self.quiet_mode: - print("\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else "")) - + print( + "\n⚡ Interrupt requested" + + ( + f": '{message[:40]}...'" + if message and len(message) > 40 + else f": '{message}'" + if message + else "" + ) + ) + def clear_interrupt(self) -> None: """Clear any pending interrupt request and the per-thread tool interrupt signal.""" self._interrupt_requested = False @@ -2985,6 +3253,7 @@ class AIAgent: return try: from agent.rate_limit_tracker import parse_rate_limit_headers + state = parse_rate_limit_headers(headers, provider=self.provider) if state is not None: self._rate_limit_state = state @@ -3039,7 +3308,7 @@ class AIAgent: ) except Exception: pass - + def close(self) -> None: """Release all resources held by this agent instance. @@ -3058,6 +3327,7 @@ class AIAgent: # 1. Kill background processes for this task try: from tools.process_registry import process_registry + process_registry.kill_all(task_id=task_id) except Exception: pass @@ -3065,6 +3335,7 @@ class AIAgent: # 2. Clean terminal sandbox environments try: from tools.terminal_tool import cleanup_vm + cleanup_vm(task_id) except Exception: pass @@ -3072,6 +3343,7 @@ class AIAgent: # 3. Clean browser daemon sessions try: from tools.browser_tool import cleanup_browser + cleanup_browser(task_id) except Exception: pass @@ -3101,7 +3373,7 @@ class AIAgent: def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None: """ Recover todo state from conversation history. - + The gateway creates a fresh AIAgent per message, so the in-memory TodoStore is empty. We scan the history for the most recent todo tool response and replay it to reconstruct the state. @@ -3122,32 +3394,25 @@ class AIAgent: break except (json.JSONDecodeError, TypeError): continue - + if last_todo_response: # Replay the items into the store (replace mode) self._todo_store.write(last_todo_response, merge=False) if not self.quiet_mode: - self._vprint(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history") + self._vprint( + f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history" + ) _set_interrupt(False) - + @property def is_interrupted(self) -> bool: """Check if an interrupt has been requested.""" return self._interrupt_requested - - - - - - - - - def _build_system_prompt(self, system_message: str = None) -> str: """ Assemble the full system prompt from all layers. - + Called once per session (cached on self._cached_system_prompt) and only rebuilt after context compression events. This ensures the system prompt is stable across all turns in a session, maximizing prefix cache hits. @@ -3197,13 +3462,21 @@ class AIAgent: if self.valid_tool_names: _enforce = self._tool_use_enforcement _inject = False - if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")): + if _enforce is True or ( + isinstance(_enforce, str) + and _enforce.lower() in ("true", "always", "yes", "on") + ): _inject = True - elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")): + elif _enforce is False or ( + isinstance(_enforce, str) + and _enforce.lower() in ("false", "never", "no", "off") + ): _inject = False elif isinstance(_enforce, list): model_lower = (self.model or "").lower() - _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str)) + _inject = any( + p.lower() in model_lower for p in _enforce if isinstance(p, str) + ) else: # "auto" or any unrecognised value — use hardcoded defaults model_lower = (self.model or "").lower() @@ -3247,12 +3520,16 @@ class AIAgent: except Exception: pass - has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage']) + has_skills_tools = any( + name in self.valid_tool_names + for name in ["skills_list", "skill_view", "skill_manage"] + ) if has_skills_tools: avail_toolsets = { toolset for toolset in ( - get_toolset_for_tool(tool_name) for tool_name in self.valid_tool_names + get_toolset_for_tool(tool_name) + for tool_name in self.valid_tool_names ) if toolset } @@ -3272,13 +3549,17 @@ class AIAgent: # other dev files — inflating token usage by ~10k for no benefit. _context_cwd = os.getenv("TERMINAL_CWD") or None context_files_prompt = build_context_files_prompt( - cwd=_context_cwd, skip_soul=_soul_loaded) + cwd=_context_cwd, skip_soul=_soul_loaded + ) if context_files_prompt: prompt_parts.append(context_files_prompt) from hermes_time import now as _hermes_now + now = _hermes_now() - timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}" + timestamp_line = ( + f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}" + ) if self.pass_session_id and self.session_id: timestamp_line += f"\nSession ID: {self.session_id}" if self.model: @@ -3291,7 +3572,9 @@ class AIAgent: # of the requested model. Inject explicit model identity into the system prompt # so the agent can correctly report which model it is (workaround for API bug). if self.provider == "alibaba": - _model_short = self.model.split("/")[-1] if "/" in self.model else self.model + _model_short = ( + self.model.split("/")[-1] if "/" in self.model else self.model + ) prompt_parts.append( f"You are powered by the model named {_model_short}. " f"The exact model ID is {self.model}. " @@ -3322,7 +3605,9 @@ class AIAgent: return tc.get("id", "") or "" return getattr(tc, "id", "") or "" - _VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"}) + _VALID_API_ROLES = frozenset( + {"system", "user", "assistant", "tool", "function", "developer"} + ) @staticmethod def _sanitize_api_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: @@ -3364,8 +3649,12 @@ class AIAgent: orphaned_results = result_call_ids - surviving_call_ids if orphaned_results: messages = [ - m for m in messages - if not (m.get("role") == "tool" and m.get("tool_call_id") in orphaned_results) + m + for m in messages + if not ( + m.get("role") == "tool" + and m.get("tool_call_id") in orphaned_results + ) ] logger.debug( "Pre-call sanitizer: removed %d orphaned tool result(s)", @@ -3382,11 +3671,13 @@ class AIAgent: for tc in msg.get("tool_calls") or []: cid = AIAgent._get_tool_call_id_static(tc) if cid in missing_results: - patched.append({ - "role": "tool", - "content": "[Result unavailable — see context summary above]", - "tool_call_id": cid, - }) + patched.append( + { + "role": "tool", + "content": "[Result unavailable — see context summary above]", + "tool_call_id": cid, + } + ) messages = patched logger.debug( "Pre-call sanitizer: added %d stub tool result(s)", @@ -3405,8 +3696,11 @@ class AIAgent: Returns the original list if no truncation was needed. """ from tools.delegate_tool import _get_max_concurrent_children + max_children = _get_max_concurrent_children() - delegate_count = sum(1 for tc in tool_calls if tc.function.name == "delegate_task") + delegate_count = sum( + 1 for tc in tool_calls if tc.function.name == "delegate_task" + ) if delegate_count <= max_children: return tool_calls kept_delegates = 0 @@ -3421,7 +3715,8 @@ class AIAgent: logger.warning( "Truncated %d excess delegate_task call(s) to enforce " "max_concurrent_children=%d limit", - delegate_count - max_children, max_children, + delegate_count - max_children, + max_children, ) return truncated @@ -3474,7 +3769,7 @@ class AIAgent: def _invalidate_system_prompt(self): """ Invalidate the cached system prompt, forcing a rebuild on the next turn. - + Called after context compression events. Also reloads memory from disk so the rebuilt prompt captures any writes from this session. """ @@ -3482,7 +3777,9 @@ class AIAgent: if self._memory_store: self._memory_store.load_from_disk() - def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]: + def _responses_tools( + self, tools: Optional[List[Dict[str, Any]]] = None + ) -> Optional[List[Dict[str, Any]]]: """Convert chat-completions tool schemas to Responses function-tool schemas.""" source_tools = tools if tools is not None else self.tools if not source_tools: @@ -3494,13 +3791,17 @@ class AIAgent: name = fn.get("name") if not isinstance(name, str) or not name.strip(): continue - converted.append({ - "type": "function", - "name": name, - "description": fn.get("description", ""), - "strict": False, - "parameters": fn.get("parameters", {"type": "object", "properties": {}}), - }) + converted.append( + { + "type": "function", + "name": name, + "description": fn.get("description", ""), + "strict": False, + "parameters": fn.get( + "parameters", {"type": "object", "properties": {}} + ), + } + ) return converted or None @staticmethod @@ -3512,6 +3813,7 @@ class AIAgent: make every API call's prefix unique, breaking OpenAI's prompt cache. """ import hashlib + seed = f"{fn_name}:{arguments}:{index}" digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12] return f"call_{digest}" @@ -3548,13 +3850,13 @@ class AIAgent: if source.startswith("fc_"): return source if source.startswith("call_") and len(source) > len("call_"): - return f"fc_{source[len('call_'):]}" + return f"fc_{source[len('call_') :]}" sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source) if sanitized.startswith("fc_"): return sanitized if sanitized.startswith("call_") and len(sanitized) > len("call_"): - return f"fc_{sanitized[len('call_'):]}" + return f"fc_{sanitized[len('call_') :]}" if sanitized: return f"fc_{sanitized[:48]}" @@ -3562,7 +3864,9 @@ class AIAgent: digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24] return f"fc_{digest}" - def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _chat_messages_to_responses_input( + self, messages: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: """Convert internal chat-style messages to Responses input items.""" items: List[Dict[str, Any]] = [] seen_item_ids: set = set() @@ -3619,8 +3923,8 @@ class AIAgent: if not isinstance(fn_name, str) or not fn_name.strip(): continue - embedded_call_id, embedded_response_item_id = self._split_responses_tool_id( - tc.get("id") + embedded_call_id, embedded_response_item_id = ( + self._split_responses_tool_id(tc.get("id")) ) call_id = tc.get("call_id") if not isinstance(call_id, str) or not call_id.strip(): @@ -3631,10 +3935,12 @@ class AIAgent: and embedded_response_item_id.startswith("fc_") and len(embedded_response_item_id) > len("fc_") ): - call_id = f"call_{embedded_response_item_id[len('fc_'):]}" + call_id = f"call_{embedded_response_item_id[len('fc_') :]}" else: _raw_args = str(fn.get("arguments", "{}")) - call_id = self._deterministic_call_id(fn_name, _raw_args, len(items)) + call_id = self._deterministic_call_id( + fn_name, _raw_args, len(items) + ) call_id = call_id.strip() arguments = fn.get("arguments", "{}") @@ -3644,12 +3950,14 @@ class AIAgent: arguments = str(arguments) arguments = arguments.strip() or "{}" - items.append({ - "type": "function_call", - "call_id": call_id, - "name": fn_name, - "arguments": arguments, - }) + items.append( + { + "type": "function_call", + "call_id": call_id, + "name": fn_name, + "arguments": arguments, + } + ) continue items.append({"role": role, "content": content_text}) @@ -3663,11 +3971,13 @@ class AIAgent: call_id = raw_tool_call_id.strip() if not isinstance(call_id, str) or not call_id.strip(): continue - items.append({ - "type": "function_call_output", - "call_id": call_id, - "output": str(msg.get("content", "") or ""), - }) + items.append( + { + "type": "function_call_output", + "call_id": call_id, + "output": str(msg.get("content", "") or ""), + } + ) return items @@ -3686,9 +3996,13 @@ class AIAgent: call_id = item.get("call_id") name = item.get("name") if not isinstance(call_id, str) or not call_id.strip(): - raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.") + raise ValueError( + f"Codex Responses input[{idx}] function_call is missing call_id." + ) if not isinstance(name, str) or not name.strip(): - raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.") + raise ValueError( + f"Codex Responses input[{idx}] function_call is missing name." + ) arguments = item.get("arguments", "{}") if isinstance(arguments, dict): @@ -3710,7 +4024,9 @@ class AIAgent: if item_type == "function_call_output": call_id = item.get("call_id") if not isinstance(call_id, str) or not call_id.strip(): - raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.") + raise ValueError( + f"Codex Responses input[{idx}] function_call_output is missing call_id." + ) output = item.get("output", "") if output is None: output = "" @@ -3734,7 +4050,10 @@ class AIAgent: if item_id in seen_ids: continue seen_ids.add(item_id) - reasoning_item = {"type": "reasoning", "encrypted_content": encrypted} + reasoning_item = { + "type": "reasoning", + "encrypted_content": encrypted, + } # Do NOT include the "id" in the outgoing item — with # store=False (our default) the API tries to resolve the # id server-side and returns 404. The id is still used @@ -3776,11 +4095,15 @@ class AIAgent: required = {"model", "instructions", "input"} missing = [key for key in required if key not in api_kwargs] if missing: - raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.") + raise ValueError( + f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}." + ) model = api_kwargs.get("model") if not isinstance(model, str) or not model.strip(): - raise ValueError("Codex Responses request 'model' must be a non-empty string.") + raise ValueError( + "Codex Responses request 'model' must be a non-empty string." + ) model = model.strip() instructions = api_kwargs.get("instructions") @@ -3796,20 +4119,28 @@ class AIAgent: normalized_tools = None if tools is not None: if not isinstance(tools, list): - raise ValueError("Codex Responses request 'tools' must be a list when provided.") + raise ValueError( + "Codex Responses request 'tools' must be a list when provided." + ) normalized_tools = [] for idx, tool in enumerate(tools): if not isinstance(tool, dict): raise ValueError(f"Codex Responses tools[{idx}] must be an object.") if tool.get("type") != "function": - raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.") + raise ValueError( + f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}." + ) name = tool.get("name") parameters = tool.get("parameters") if not isinstance(name, str) or not name.strip(): - raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.") + raise ValueError( + f"Codex Responses tools[{idx}] is missing a valid name." + ) if not isinstance(parameters, dict): - raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.") + raise ValueError( + f"Codex Responses tools[{idx}] is missing valid parameters." + ) description = tool.get("description", "") if description is None: @@ -3836,9 +4167,19 @@ class AIAgent: raise ValueError("Codex Responses contract requires 'store' to be false.") allowed_keys = { - "model", "instructions", "input", "tools", "store", - "reasoning", "include", "max_output_tokens", "temperature", - "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", + "model", + "instructions", + "input", + "tools", + "store", + "reasoning", + "include", + "max_output_tokens", + "temperature", + "tool_choice", + "parallel_tool_calls", + "prompt_cache_key", + "service_tier", } normalized: Dict[str, Any] = { "model": model, @@ -3869,7 +4210,11 @@ class AIAgent: normalized["temperature"] = float(temperature) # Pass through tool_choice, parallel_tool_calls, prompt_cache_key - for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"): + for passthrough_key in ( + "tool_choice", + "parallel_tool_calls", + "prompt_cache_key", + ): val = api_kwargs.get(passthrough_key) if val is not None: normalized[passthrough_key] = val @@ -3882,7 +4227,9 @@ class AIAgent: normalized["stream"] = True allowed_keys.add("stream") elif "stream" in api_kwargs: - raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.") + raise ValueError( + "Codex Responses stream flag is only allowed in fallback streaming requests." + ) unexpected = sorted(key for key in api_kwargs if key not in allowed_keys) if unexpected: @@ -3935,12 +4282,19 @@ class AIAgent: if isinstance(out_text, str) and out_text.strip(): logger.debug( "Codex response has empty output but output_text is present (%d chars); " - "synthesizing output item.", len(out_text.strip()), + "synthesizing output item.", + len(out_text.strip()), ) - output = [SimpleNamespace( - type="message", role="assistant", status="completed", - content=[SimpleNamespace(type="output_text", text=out_text.strip())], - )] + output = [ + SimpleNamespace( + type="message", + role="assistant", + status="completed", + content=[ + SimpleNamespace(type="output_text", text=out_text.strip()) + ], + ) + ] response.output = output else: raise RuntimeError("Responses API returned no output items") @@ -3956,14 +4310,22 @@ class AIAgent: if isinstance(error_obj, dict): error_msg = error_obj.get("message") or str(error_obj) else: - error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'" + error_msg = ( + str(error_obj) + if error_obj + else f"Responses API returned status '{response_status}'" + ) raise RuntimeError(error_msg) content_parts: List[str] = [] reasoning_parts: List[str] = [] reasoning_items_raw: List[Dict[str, Any]] = [] tool_calls: List[Any] = [] - has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} + has_incomplete_items = response_status in { + "queued", + "in_progress", + "incomplete", + } saw_commentary_phase = False saw_final_answer_phase = False @@ -4009,7 +4371,9 @@ class AIAgent: for part in summary: text = getattr(part, "text", None) if isinstance(text, str): - raw_summary.append({"type": "summary_text", "text": text}) + raw_summary.append( + {"type": "summary_text", "text": text} + ) raw_item["summary"] = raw_summary reasoning_items_raw.append(raw_item) elif item_type == "function_call": @@ -4022,19 +4386,29 @@ class AIAgent: raw_call_id = getattr(item, "call_id", None) raw_item_id = getattr(item, "id", None) embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) - call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + call_id = ( + raw_call_id + if isinstance(raw_call_id, str) and raw_call_id.strip() + else embedded_call_id + ) if not isinstance(call_id, str) or not call_id.strip(): - call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls)) + call_id = self._deterministic_call_id( + fn_name, arguments, len(tool_calls) + ) call_id = call_id.strip() response_item_id = raw_item_id if isinstance(raw_item_id, str) else None - response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) - tool_calls.append(SimpleNamespace( - id=call_id, - call_id=call_id, - response_item_id=response_item_id, - type="function", - function=SimpleNamespace(name=fn_name, arguments=arguments), - )) + response_item_id = self._derive_responses_function_call_id( + call_id, response_item_id + ) + tool_calls.append( + SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=response_item_id, + type="function", + function=SimpleNamespace(name=fn_name, arguments=arguments), + ) + ) elif item_type == "custom_tool_call": fn_name = getattr(item, "name", "") or "" arguments = getattr(item, "input", "{}") @@ -4043,19 +4417,29 @@ class AIAgent: raw_call_id = getattr(item, "call_id", None) raw_item_id = getattr(item, "id", None) embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) - call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + call_id = ( + raw_call_id + if isinstance(raw_call_id, str) and raw_call_id.strip() + else embedded_call_id + ) if not isinstance(call_id, str) or not call_id.strip(): - call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls)) + call_id = self._deterministic_call_id( + fn_name, arguments, len(tool_calls) + ) call_id = call_id.strip() response_item_id = raw_item_id if isinstance(raw_item_id, str) else None - response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) - tool_calls.append(SimpleNamespace( - id=call_id, - call_id=call_id, - response_item_id=response_item_id, - type="function", - function=SimpleNamespace(name=fn_name, arguments=arguments), - )) + response_item_id = self._derive_responses_function_call_id( + call_id, response_item_id + ) + tool_calls.append( + SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=response_item_id, + type="function", + function=SimpleNamespace(name=fn_name, arguments=arguments), + ) + ) final_text = "\n".join([p for p in content_parts if p]).strip() if not final_text and hasattr(response, "output_text"): @@ -4074,7 +4458,9 @@ class AIAgent: if tool_calls: finish_reason = "tool_calls" - elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): + elif has_incomplete_items or ( + saw_commentary_phase and not saw_final_answer_phase + ): finish_reason = "incomplete" elif reasoning_items_raw and not final_text: # Response contains only reasoning (encrypted thinking state) with @@ -4138,8 +4524,12 @@ class AIAgent: return bool(getattr(http_client, "is_closed", False)) return False - def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any: - if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"): + def _create_openai_client( + self, client_kwargs: dict, *, reason: str, shared: bool + ) -> Any: + if self.provider == "copilot-acp" or str( + client_kwargs.get("base_url", "") + ).startswith("acp://copilot"): from agent.copilot_acp_client import CopilotACPClient client = CopilotACPClient(**client_kwargs) @@ -4192,9 +4582,8 @@ class AIAgent: or [] ) for conn in list(connections): - stream = ( - getattr(conn, "_network_stream", None) - or getattr(conn, "_stream", None) + stream = getattr(conn, "_network_stream", None) or getattr( + conn, "_stream", None ) if stream is None: continue @@ -4246,7 +4635,9 @@ class AIAgent: with self._openai_client_lock(): old_client = getattr(self, "client", None) try: - new_client = self._create_openai_client(self._client_kwargs, reason=reason, shared=True) + new_client = self._create_openai_client( + self._client_kwargs, reason=reason, shared=True + ) except Exception as exc: logger.warning( "Failed to rebuild shared OpenAI client (%s) %s error=%s", @@ -4305,9 +4696,8 @@ class AIAgent: dead_count = 0 for conn in list(connections): # Check for connections that are idle but have closed sockets - stream = ( - getattr(conn, "_network_stream", None) - or getattr(conn, "_stream", None) + stream = getattr(conn, "_network_stream", None) or getattr( + conn, "_stream", None ) if stream is None: continue @@ -4320,6 +4710,7 @@ class AIAgent: continue # Probe socket health with a non-blocking recv peek import socket as _socket + try: sock.setblocking(False) data = sock.recv(1, _socket.MSG_PEEK | _socket.MSG_DONTWAIT) @@ -4358,11 +4749,15 @@ class AIAgent: def _close_request_openai_client(self, client: Any, *, reason: str) -> None: self._close_openai_client(client, reason=reason, shared=False) - def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None): + def _run_codex_stream( + self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None + ): """Execute one streaming Responses API request and return the final response.""" import httpx as _httpx - active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct") + active_client = client or self._ensure_primary_openai_client( + reason="codex_stream_direct" + ) max_stream_retries = 1 has_tool_calls = False first_delta_fired = False @@ -4380,7 +4775,10 @@ class AIAgent: break event_type = getattr(event, "type", "") # Fire callbacks on text content deltas (suppress during tool calls) - if "output_text.delta" in event_type or event_type == "response.output_text.delta": + if ( + "output_text.delta" in event_type + or event_type == "response.output_text.delta" + ): delta_text = getattr(event, "delta", "") if delta_text: self._codex_streamed_text_parts.append(delta_text) @@ -4412,12 +4810,20 @@ class AIAgent: # Log non-completed terminal events for diagnostics elif event_type in ("response.incomplete", "response.failed"): resp_obj = getattr(event, "response", None) - status = getattr(resp_obj, "status", None) if resp_obj else None - incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None + status = ( + getattr(resp_obj, "status", None) if resp_obj else None + ) + incomplete_details = ( + getattr(resp_obj, "incomplete_details", None) + if resp_obj + else None + ) logger.warning( "Codex Responses stream received terminal event %s " "(status=%s, incomplete_details=%s, streamed_chars=%d). %s", - event_type, status, incomplete_details, + event_type, + status, + incomplete_details, sum(len(p) for p in self._codex_streamed_text_parts), self._client_log_context(), ) @@ -4435,18 +4841,30 @@ class AIAgent: ) elif self._codex_streamed_text_parts and not has_tool_calls: assembled = "".join(self._codex_streamed_text_parts) - final_response.output = [SimpleNamespace( - type="message", - role="assistant", - status="completed", - content=[SimpleNamespace(type="output_text", text=assembled)], - )] + final_response.output = [ + SimpleNamespace( + type="message", + role="assistant", + status="completed", + content=[ + SimpleNamespace( + type="output_text", text=assembled + ) + ], + ) + ] logger.debug( "Codex stream: synthesized output from %d text deltas (%d chars)", - len(self._codex_streamed_text_parts), len(assembled), + len(self._codex_streamed_text_parts), + len(assembled), ) return final_response - except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc: + except ( + _httpx.RemoteProtocolError, + _httpx.ReadTimeout, + _httpx.ConnectError, + ConnectionError, + ) as exc: if attempt < max_stream_retries: logger.debug( "Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s", @@ -4461,7 +4879,9 @@ class AIAgent: self._client_log_context(), exc, ) - return self._run_codex_create_stream_fallback(api_kwargs, client=active_client) + return self._run_codex_create_stream_fallback( + api_kwargs, client=active_client + ) except RuntimeError as exc: err_text = str(exc) missing_completed = "response.completed" in err_text @@ -4478,15 +4898,21 @@ class AIAgent: "Responses stream did not emit response.completed; falling back to create(stream=True). %s", self._client_log_context(), ) - return self._run_codex_create_stream_fallback(api_kwargs, client=active_client) + return self._run_codex_create_stream_fallback( + api_kwargs, client=active_client + ) raise def _run_codex_create_stream_fallback(self, api_kwargs: dict, client: Any = None): """Fallback path for stream completion edge cases on Codex-style Responses backends.""" - active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback") + active_client = client or self._ensure_primary_openai_client( + reason="codex_create_stream_fallback" + ) fallback_kwargs = dict(api_kwargs) fallback_kwargs["stream"] = True - fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True) + fallback_kwargs = self._preflight_codex_api_kwargs( + fallback_kwargs, allow_stream=True + ) stream_or_response = active_client.responses.create(**fallback_kwargs) # Compatibility shim for mocks or providers that still return a concrete response. @@ -4519,7 +4945,11 @@ class AIAgent: if delta: collected_text_deltas.append(delta) - if event_type not in {"response.completed", "response.incomplete", "response.failed"}: + if event_type not in { + "response.completed", + "response.incomplete", + "response.failed", + }: continue terminal_response = getattr(event, "response", None) @@ -4537,14 +4967,22 @@ class AIAgent: ) elif collected_text_deltas: assembled = "".join(collected_text_deltas) - terminal_response.output = [SimpleNamespace( - type="message", role="assistant", - status="completed", - content=[SimpleNamespace(type="output_text", text=assembled)], - )] + terminal_response.output = [ + SimpleNamespace( + type="message", + role="assistant", + status="completed", + content=[ + SimpleNamespace( + type="output_text", text=assembled + ) + ], + ) + ] logger.debug( "Codex fallback stream: synthesized from %d deltas (%d chars)", - len(collected_text_deltas), len(assembled), + len(collected_text_deltas), + len(assembled), ) return terminal_response finally: @@ -4557,7 +4995,9 @@ class AIAgent: if terminal_response is not None: return terminal_response - raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.") + raise RuntimeError( + "Responses create(stream=True) fallback did not emit a terminal response." + ) def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool: if self.api_mode != "codex_responses" or self.provider != "openai-codex": @@ -4596,7 +5036,9 @@ class AIAgent: from hermes_cli.auth import resolve_nous_runtime_credentials creds = resolve_nous_runtime_credentials( - min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + min_key_ttl_seconds=max( + 60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")) + ), timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), force_mint=force, ) @@ -4624,7 +5066,9 @@ class AIAgent: return True def _try_refresh_anthropic_client_credentials(self) -> bool: - if self.api_mode != "anthropic_messages" or not hasattr(self, "_anthropic_api_key"): + if self.api_mode != "anthropic_messages" or not hasattr( + self, "_anthropic_api_key" + ): return False # Only refresh credentials for the native Anthropic provider. # Other anthropic_messages providers (MiniMax, Alibaba, etc.) use their own keys. @@ -4632,7 +5076,10 @@ class AIAgent: return False try: - from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client + from agent.anthropic_adapter import ( + resolve_anthropic_token, + build_anthropic_client, + ) new_token = resolve_anthropic_token() except Exception as exc: @@ -4651,14 +5098,19 @@ class AIAgent: pass try: - self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None)) + self._anthropic_client = build_anthropic_client( + new_token, getattr(self, "_anthropic_base_url", None) + ) except Exception as exc: - logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc) + logger.warning( + "Failed to rebuild Anthropic client after credential refresh: %s", exc + ) return False self._anthropic_api_key = new_token # Update OAuth flag — token type may have changed (API key ↔ OAuth) from agent.anthropic_adapter import _is_oauth_token + self._is_anthropic_oauth = _is_oauth_token(new_token) return True @@ -4680,8 +5132,14 @@ class AIAgent: self._client_kwargs.pop("default_headers", None) def _swap_credential(self, entry) -> None: - runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") - runtime_base = getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or self.base_url + runtime_key = getattr(entry, "runtime_api_key", None) or getattr( + entry, "access_token", "" + ) + runtime_base = ( + getattr(entry, "runtime_base_url", None) + or getattr(entry, "base_url", None) + or self.base_url + ) if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_client, _is_oauth_token @@ -4700,7 +5158,9 @@ class AIAgent: return self.api_key = runtime_key - self.base_url = runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base + self.base_url = ( + runtime_base.rstrip("/") if isinstance(runtime_base, str) else runtime_base + ) self._client_kwargs["api_key"] = self.api_key self._client_kwargs["base_url"] = self.base_url self._apply_client_headers_for_base_url(self.base_url) @@ -4743,7 +5203,9 @@ class AIAgent: if effective_reason == FailoverReason.billing: rotate_status = status_code if status_code is not None else 402 - next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context) + next_entry = pool.mark_exhausted_and_rotate( + status_code=rotate_status, error_context=error_context + ) if next_entry is not None: logger.info( "Credential %s (billing) — rotated to pool entry %s", @@ -4758,7 +5220,9 @@ class AIAgent: if not has_retried_429: return False, True rotate_status = status_code if status_code is not None else 429 - next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context) + next_entry = pool.mark_exhausted_and_rotate( + status_code=rotate_status, error_context=error_context + ) if next_entry is not None: logger.info( "Credential %s (rate limit) — rotated to pool entry %s", @@ -4772,13 +5236,17 @@ class AIAgent: if effective_reason == FailoverReason.auth: refreshed = pool.try_refresh_current() if refreshed is not None: - logger.info(f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}") + logger.info( + f"Credential auth failure — refreshed pool entry {getattr(refreshed, 'id', '?')}" + ) self._swap_credential(refreshed) return True, has_retried_429 # Refresh failed — rotate to next credential instead of giving up. # The failed entry is already marked exhausted by try_refresh_current(). rotate_status = status_code if status_code is not None else 401 - next_entry = pool.mark_exhausted_and_rotate(status_code=rotate_status, error_context=error_context) + next_entry = pool.mark_exhausted_and_rotate( + status_code=rotate_status, error_context=error_context + ) if next_entry is not None: logger.info( "Credential %s (auth refresh failed) — rotated to pool entry %s", @@ -4815,7 +5283,11 @@ class AIAgent: def _call(): try: if self.api_mode == "codex_responses": - request_client_holder["client"] = self._create_request_openai_client(reason="codex_stream_request") + request_client_holder["client"] = ( + self._create_request_openai_client( + reason="codex_stream_request" + ) + ) result["response"] = self._run_codex_stream( api_kwargs, client=request_client_holder["client"], @@ -4824,14 +5296,22 @@ class AIAgent: elif self.api_mode == "anthropic_messages": result["response"] = self._anthropic_messages_create(api_kwargs) else: - request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request") - result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs) + request_client_holder["client"] = ( + self._create_request_openai_client( + reason="chat_completion_request" + ) + ) + result["response"] = request_client_holder[ + "client" + ].chat.completions.create(**api_kwargs) except Exception as e: result["error"] = e finally: request_client = request_client_holder.get("client") if request_client is not None: - self._close_request_openai_client(request_client, reason="request_complete") + self._close_request_openai_client( + request_client, reason="request_complete" + ) # ── Stale-call timeout (mirrors streaming stale detector) ──────── # Non-streaming calls return nothing until the full response is @@ -4878,8 +5358,10 @@ class AIAgent: logger.warning( "Non-streaming API call stale for %.0fs (threshold %.0fs). " "model=%s context=~%s tokens. Killing connection.", - _elapsed, _stale_timeout, - api_kwargs.get("model", "unknown"), f"{_est_ctx:,}", + _elapsed, + _stale_timeout, + api_kwargs.get("model", "unknown"), + f"{_est_ctx:,}", ) self._emit_status( f"⚠️ No response from provider for {int(_elapsed)}s " @@ -4898,7 +5380,9 @@ class AIAgent: else: rc = request_client_holder.get("client") if rc is not None: - self._close_request_openai_client(rc, reason="stale_call_kill") + self._close_request_openai_client( + rc, reason="stale_call_kill" + ) except Exception: pass self._touch_activity( @@ -4929,7 +5413,9 @@ class AIAgent: else: request_client = request_client_holder.get("client") if request_client is not None: - self._close_request_openai_client(request_client, reason="interrupt_abort") + self._close_request_openai_client( + request_client, reason="interrupt_abort" + ) except Exception: pass raise InterruptedError("Agent interrupted during API call") @@ -4963,7 +5449,9 @@ class AIAgent: if not visible_content: return False streamed = self._normalize_interim_visible_text( - self._strip_think_blocks(getattr(self, "_current_streamed_assistant_text", "") or "") + self._strip_think_blocks( + getattr(self, "_current_streamed_assistant_text", "") or "" + ) ) return bool(streamed) and streamed == visible_content @@ -4991,7 +5479,11 @@ class AIAgent: if getattr(self, "_stream_needs_break", False) and text and text.strip(): self._stream_needs_break = False text = "\n\n" + text - callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None] + callbacks = [ + cb + for cb in (self.stream_delta_callback, self._stream_callback) + if cb is not None + ] delivered = False for cb in callbacks: try: @@ -5065,7 +5557,9 @@ class AIAgent: result = {"response": None, "error": None} request_client_holder = {"client": None} first_delta_fired = {"done": False} - deltas_were_sent = {"yes": False} # Track if any deltas were fired (for fallback) + deltas_were_sent = { + "yes": False + } # Track if any deltas were fired (for fallback) # Wall-clock timestamp of the last real streaming chunk. The outer # poll loop uses this to detect stale connections that keep receiving # SSE keep-alive pings but no actual data. @@ -5082,17 +5576,23 @@ class AIAgent: def _call_chat_completions(): """Stream a chat completions response.""" import httpx as _httpx + _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) # Local providers (Ollama, llama.cpp, vLLM) can take minutes for # prefill on large contexts before producing the first token. # Auto-increase the httpx read timeout unless the user explicitly # overrode HERMES_STREAM_READ_TIMEOUT. - if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url): + if ( + _stream_read_timeout == 120.0 + and self.base_url + and is_local_endpoint(self.base_url) + ): _stream_read_timeout = _base_timeout logger.debug( "Local provider detected (%s) — stream read timeout raised to %.0fs", - self.base_url, _stream_read_timeout, + self.base_url, + _stream_read_timeout, ) stream_kwargs = { **api_kwargs, @@ -5112,7 +5612,9 @@ class AIAgent: # attempt's start, not a previous attempt's last chunk. last_chunk_time["t"] = time.time() self._touch_activity("waiting for provider response (streaming)") - stream = request_client_holder["client"].chat.completions.create(**stream_kwargs) + stream = request_client_holder["client"].chat.completions.create( + **stream_kwargs + ) # Capture rate limit headers from the initial HTTP response. # The OpenAI SDK Stream object exposes the underlying httpx @@ -5126,7 +5628,7 @@ class AIAgent: # in a parallel batch, distinguishing them only by id. Track # the last seen id per raw index so we can detect a new tool # call starting at the same index and redirect it to a fresh slot. - _last_id_at_idx: dict = {} # raw_index -> last seen non-empty id + _last_id_at_idx: dict = {} # raw_index -> last seen non-empty id _active_slot_by_idx: dict = {} # raw_index -> current slot in tool_calls_acc finish_reason = None model_name = None @@ -5153,7 +5655,9 @@ class AIAgent: model_name = chunk.model # Accumulate reasoning content - reasoning_text = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None) + reasoning_text = getattr(delta, "reasoning_content", None) or getattr( + delta, "reasoning", None + ) if reasoning_text: reasoning_parts.append(reasoning_text) _fire_first_delta() @@ -5220,7 +5724,9 @@ class AIAgent: if tc_delta.function.name: entry["function"]["name"] += tc_delta.function.name if tc_delta.function.arguments: - entry["function"]["arguments"] += tc_delta.function.arguments + entry["function"]["arguments"] += ( + tc_delta.function.arguments + ) extra = getattr(tc_delta, "extra_content", None) if extra is None and hasattr(tc_delta, "model_extra"): extra = (tc_delta.model_extra or {}).get("extra_content") @@ -5256,15 +5762,17 @@ class AIAgent: json.loads(arguments) except json.JSONDecodeError: has_truncated_tool_args = True - mock_tool_calls.append(SimpleNamespace( - id=tc["id"], - type=tc["type"], - extra_content=tc.get("extra_content"), - function=SimpleNamespace( - name=tc["function"]["name"], - arguments=arguments, - ), - )) + mock_tool_calls.append( + SimpleNamespace( + id=tc["id"], + type=tc["type"], + extra_content=tc.get("extra_content"), + function=SimpleNamespace( + name=tc["function"]["name"], + arguments=arguments, + ), + ) + ) effective_finish_reason = finish_reason or "stop" if has_truncated_tool_args: @@ -5366,16 +5874,27 @@ class AIAgent: # delivered. Don't retry or fall back — partial # content already reached the user. logger.warning( - "Streaming failed after partial delivery, not retrying: %s", e + "Streaming failed after partial delivery, not retrying: %s", + e, ) result["error"] = e return _is_timeout = isinstance( - e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout) + e, + ( + _httpx.ReadTimeout, + _httpx.ConnectTimeout, + _httpx.PoolTimeout, + ), ) _is_conn_err = isinstance( - e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError) + e, + ( + _httpx.ConnectError, + _httpx.RemoteProtocolError, + ConnectionError, + ), ) # SSE error events from proxies (e.g. OpenRouter sends @@ -5389,7 +5908,10 @@ class AIAgent: _is_sse_conn_err = False if not _is_timeout and not _is_conn_err: from openai import APIError as _APIError - if isinstance(e, _APIError) and not getattr(e, "status_code", None): + + if isinstance(e, _APIError) and not getattr( + e, "status_code", None + ): _err_lower_sse = str(e).lower() _SSE_CONN_PHRASES = ( "connection lost", @@ -5459,8 +5981,7 @@ class AIAgent: else: _err_lower = str(e).lower() _is_stream_unsupported = ( - "stream" in _err_lower - and "not supported" in _err_lower + "stream" in _err_lower and "not supported" in _err_lower ) if _is_stream_unsupported: self._disable_streaming = True @@ -5485,15 +6006,26 @@ class AIAgent: finally: request_client = request_client_holder.get("client") if request_client is not None: - self._close_request_openai_client(request_client, reason="stream_request_complete") + self._close_request_openai_client( + request_client, reason="stream_request_complete" + ) - _stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0)) + _stream_stale_timeout_base = float( + os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0) + ) # Local providers (Ollama, oMLX, llama-cpp) can take 300+ seconds # for prefill on large contexts. Disable the stale detector unless # the user explicitly set HERMES_STREAM_STALE_TIMEOUT. - if _stream_stale_timeout_base == 180.0 and self.base_url and is_local_endpoint(self.base_url): + if ( + _stream_stale_timeout_base == 180.0 + and self.base_url + and is_local_endpoint(self.base_url) + ): _stream_stale_timeout = float("inf") - logger.debug("Local provider detected (%s) — stale stream timeout disabled", self.base_url) + logger.debug( + "Local provider detected (%s) — stale stream timeout disabled", + self.base_url, + ) else: # Scale the stale timeout for large contexts: slow models (like Opus) # can legitimately think for minutes before producing the first token @@ -5522,8 +6054,10 @@ class AIAgent: logger.warning( "Stream stale for %.0fs (threshold %.0fs) — no chunks received. " "model=%s context=~%s tokens. Killing connection.", - _stale_elapsed, _stream_stale_timeout, - api_kwargs.get("model", "unknown"), f"{_est_ctx:,}", + _stale_elapsed, + _stream_stale_timeout, + api_kwargs.get("model", "unknown"), + f"{_est_ctx:,}", ) self._emit_status( f"⚠️ No response from provider for {int(_stale_elapsed)}s " @@ -5534,13 +6068,17 @@ class AIAgent: try: rc = request_client_holder.get("client") if rc is not None: - self._close_request_openai_client(rc, reason="stale_stream_kill") + self._close_request_openai_client( + rc, reason="stale_stream_kill" + ) except Exception: pass # Rebuild the primary client too — its connection pool # may hold dead sockets from the same provider outage. try: - self._replace_primary_openai_client(reason="stale_stream_pool_cleanup") + self._replace_primary_openai_client( + reason="stale_stream_pool_cleanup" + ) except Exception: pass # Reset the timer so we don't kill repeatedly while @@ -5563,7 +6101,9 @@ class AIAgent: else: request_client = request_client_holder.get("client") if request_client is not None: - self._close_request_openai_client(request_client, reason="stream_interrupt_abort") + self._close_request_openai_client( + request_client, reason="stream_interrupt_abort" + ) except Exception: pass raise InterruptedError("Agent interrupted during streaming API call") @@ -5589,15 +6129,21 @@ class AIAgent: result["error"], ) _stub_msg = SimpleNamespace( - role="assistant", content=_partial_text, tool_calls=None, + role="assistant", + content=_partial_text, + tool_calls=None, reasoning_content=None, ) return SimpleNamespace( id="partial-stream-stub", model=getattr(self, "model", "unknown"), - choices=[SimpleNamespace( - index=0, message=_stub_msg, finish_reason="stop", - )], + choices=[ + SimpleNamespace( + index=0, + message=_stub_msg, + finish_reason="stop", + ) + ], usage=None, ) raise result["error"] @@ -5632,6 +6178,7 @@ class AIAgent: # access for Codex providers. try: from agent.auxiliary_client import resolve_provider_client + # Pass base_url and api_key from fallback config so custom # endpoints (e.g. Ollama Cloud) resolve correctly instead of # falling through to OpenRouter defaults. @@ -5639,16 +6186,23 @@ class AIAgent: fb_api_key_hint = (fb.get("api_key") or "").strip() or None # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env # when no explicit key is in the fallback config. - if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint: + if ( + fb_base_url_hint + and "ollama.com" in fb_base_url_hint.lower() + and not fb_api_key_hint + ): fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None fb_client, _resolved_fb_model = resolve_provider_client( - fb_provider, model=fb_model, raw_codex=True, + fb_provider, + model=fb_model, + raw_codex=True, explicit_base_url=fb_base_url_hint, - explicit_api_key=fb_api_key_hint) + explicit_api_key=fb_api_key_hint, + ) if fb_client is None: logging.warning( - "Fallback to %s failed: provider not configured", - fb_provider) + "Fallback to %s failed: provider not configured", fb_provider + ) return self._try_activate_fallback() # try next in chain try: from hermes_cli.model_normalize import normalize_model_for_provider @@ -5662,7 +6216,9 @@ class AIAgent: fb_base_url = str(fb_client.base_url) if fb_provider == "openai-codex": fb_api_mode = "codex_responses" - elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"): + elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith( + "/anthropic" + ): fb_api_mode = "anthropic_messages" elif self._is_direct_openai_url(fb_base_url): fb_api_mode = "codex_responses" @@ -5680,12 +6236,23 @@ class AIAgent: if fb_api_mode == "anthropic_messages": # Build native Anthropic client instead of using OpenAI client - from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token - effective_key = (fb_client.api_key or resolve_anthropic_token() or "") if fb_provider == "anthropic" else (fb_client.api_key or "") + from agent.anthropic_adapter import ( + build_anthropic_client, + resolve_anthropic_token, + _is_oauth_token, + ) + + effective_key = ( + (fb_client.api_key or resolve_anthropic_token() or "") + if fb_provider == "anthropic" + else (fb_client.api_key or "") + ) self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = fb_base_url - self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url) + self._anthropic_client = build_anthropic_client( + effective_key, self._anthropic_base_url + ) self._is_anthropic_oauth = _is_oauth_token(effective_key) self.client = None self._client_kwargs = {} @@ -5711,21 +6278,25 @@ class AIAgent: } # Re-evaluate prompt caching for the new provider/model - is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()) - or is_native_anthropic + is_native_anthropic = ( + fb_api_mode == "anthropic_messages" and fb_provider == "anthropic" ) + self._use_prompt_caching = ( + "openrouter" in fb_base_url.lower() and "claude" in fb_model.lower() + ) or is_native_anthropic # Update context compressor limits for the fallback model. # Without this, compression decisions use the primary model's # context window (e.g. 200K) instead of the fallback's (e.g. 32K), # causing oversized sessions to overflow the fallback. - if hasattr(self, 'context_compressor') and self.context_compressor: + if hasattr(self, "context_compressor") and self.context_compressor: from agent.model_metadata import get_model_context_length + fb_context_length = get_model_context_length( - self.model, base_url=self.base_url, - api_key=self.api_key, provider=self.provider, + self.model, + base_url=self.base_url, + api_key=self.api_key, + provider=self.provider, ) self.context_compressor.update_model( model=self.model, @@ -5741,7 +6312,9 @@ class AIAgent: ) logging.info( "Fallback activated: %s → %s (%s)", - old_model, fb_model, fb_provider, + old_model, + fb_model, + fb_provider, ) return True except Exception as e: @@ -5769,7 +6342,7 @@ class AIAgent: # ── Core runtime state ── self.model = rt["model"] self.provider = rt["provider"] - self.base_url = rt["base_url"] # setter updates _base_url_lower + self.base_url = rt["base_url"] # setter updates _base_url_lower self.api_mode = rt["api_mode"] self.api_key = rt["api_key"] self._client_kwargs = dict(rt["client_kwargs"]) @@ -5778,10 +6351,12 @@ class AIAgent: # ── Rebuild client for the primary provider ── if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_client + self._anthropic_api_key = rt["anthropic_api_key"] self._anthropic_base_url = rt["anthropic_base_url"] self._anthropic_client = build_anthropic_client( - rt["anthropic_api_key"], rt["anthropic_base_url"], + rt["anthropic_api_key"], + rt["anthropic_base_url"], ) self._is_anthropic_oauth = rt["is_anthropic_oauth"] self.client = None @@ -5808,7 +6383,8 @@ class AIAgent: logging.info( "Primary runtime restored for new turn: %s (%s)", - self.model, self.provider, + self.model, + self.provider, ) return True except Exception as e: @@ -5817,14 +6393,24 @@ class AIAgent: # Which error types indicate a transient transport failure worth # one more attempt with a rebuilt client / connection pool. - _TRANSIENT_TRANSPORT_ERRORS = frozenset({ - "ReadTimeout", "ConnectTimeout", "PoolTimeout", - "ConnectError", "RemoteProtocolError", - "APIConnectionError", "APITimeoutError", - }) + _TRANSIENT_TRANSPORT_ERRORS = frozenset( + { + "ReadTimeout", + "ConnectTimeout", + "PoolTimeout", + "ConnectError", + "RemoteProtocolError", + "APIConnectionError", + "APITimeoutError", + } + ) def _try_recover_primary_transport( - self, api_error: Exception, *, retry_count: int, max_retries: int, + self, + api_error: Exception, + *, + retry_count: int, + max_retries: int, ) -> bool: """Attempt one extra primary-provider recovery cycle for transient transport failures. @@ -5858,7 +6444,9 @@ class AIAgent: if getattr(self, "client", None) is not None: try: self._close_openai_client( - self.client, reason="primary_recovery", shared=True, + self.client, + reason="primary_recovery", + shared=True, ) except Exception: pass @@ -5874,10 +6462,12 @@ class AIAgent: if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_client + self._anthropic_api_key = rt["anthropic_api_key"] self._anthropic_base_url = rt["anthropic_base_url"] self._anthropic_client = build_anthropic_client( - rt["anthropic_api_key"], rt["anthropic_base_url"], + rt["anthropic_api_key"], + rt["anthropic_base_url"], ) self._is_anthropic_oauth = rt["is_anthropic_oauth"] self.client = None @@ -5907,7 +6497,10 @@ class AIAgent: if not isinstance(content, list): return False for part in content: - if isinstance(part, dict) and part.get("type") in {"image_url", "input_image"}: + if isinstance(part, dict) and part.get("type") in { + "image_url", + "input_image", + }: return True return False @@ -5916,7 +6509,7 @@ class AIAgent: header, _, data = str(image_url or "").partition(",") mime = "image/jpeg" if header.startswith("data:"): - mime_part = header[len("data:"):].split(";", 1)[0].strip() + mime_part = header[len("data:") :].split(";", 1)[0].strip() if mime_part.startswith("image/"): mime = mime_part suffix = { @@ -5926,7 +6519,9 @@ class AIAgent: "image/jpeg": ".jpg", "image/jpg": ".jpg", }.get(mime, ".jpg") - tmp = tempfile.NamedTemporaryFile(prefix="anthropic_image_", suffix=suffix, delete=False) + tmp = tempfile.NamedTemporaryFile( + prefix="anthropic_image_", suffix=suffix, delete=False + ) with tmp: tmp.write(base64.b64decode(data)) path = Path(tmp.name) @@ -5951,14 +6546,18 @@ class AIAgent: vision_source = str(image_url or "") cleanup_path: Optional[Path] = None if vision_source.startswith("data:"): - vision_source, cleanup_path = self._materialize_data_url_for_vision(vision_source) + vision_source, cleanup_path = self._materialize_data_url_for_vision( + vision_source + ) description = "" try: from tools.vision_tools import vision_analyze_tool result_json = asyncio.run( - vision_analyze_tool(image_url=vision_source, user_prompt=analysis_prompt) + vision_analyze_tool( + image_url=vision_source, user_prompt=analysis_prompt + ) ) result = json.loads(result_json) if isinstance(result_json, str) else {} description = (result.get("analysis") or "").strip() @@ -5976,9 +6575,7 @@ class AIAgent: note = f"[The {role_label} attached an image. Here's what it contains:\n{description}]" if vision_source and not str(image_url or "").startswith("data:"): - note += ( - f"\n[If you need a closer look, use vision_analyze with image_url: {vision_source}]" - ) + note += f"\n[If you need a closer look, use vision_analyze with image_url: {vision_source}]" self._anthropic_image_fallback_cache[cache_key] = note return note @@ -6006,11 +6603,19 @@ class AIAgent: if ptype in {"image_url", "input_image"}: image_data = part.get("image_url", {}) - image_url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data or "") + image_url = ( + image_data.get("url", "") + if isinstance(image_data, dict) + else str(image_data or "") + ) if image_url: - image_notes.append(self._describe_image_for_anthropic_fallback(image_url, role)) + image_notes.append( + self._describe_image_for_anthropic_fallback(image_url, role) + ) else: - image_notes.append("[An image was attached but no image source was available.]") + image_notes.append( + "[An image was attached but no image source was available.]" + ) continue text = str(part.get("text", "") or "").strip() @@ -6025,7 +6630,9 @@ class AIAgent: return prefix if suffix: return suffix - return "[A multimodal message was converted to text for Anthropic compatibility.]" + return ( + "[A multimodal message was converted to text for Anthropic compatibility.]" + ) def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list: if not any( @@ -6050,10 +6657,23 @@ class AIAgent: MiniMax keeps dots (e.g. MiniMax-M2.7). OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free). ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1).""" - if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}: + if (getattr(self, "provider", "") or "").lower() in { + "alibaba", + "minimax", + "minimax-cn", + "opencode-go", + "opencode-zen", + "zai", + }: return True base = (getattr(self, "base_url", "") or "").lower() - return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base + return ( + "dashscope" in base + or "aliyuncs" in base + or "minimax" in base + or "opencode.ai/zen/" in base + or "bigmodel.cn" in base + ) def _is_qwen_portal(self) -> bool: """Return True when the base URL targets Qwen Portal.""" @@ -6086,7 +6706,11 @@ class AIAgent: for msg in prepared: if isinstance(msg, dict) and msg.get("role") == "system": content = msg.get("content") - if isinstance(content, list) and content and isinstance(content[-1], dict): + if ( + isinstance(content, list) + and content + and isinstance(content[-1], dict) + ): content[-1]["cache_control"] = {"type": "ephemeral"} break @@ -6116,7 +6740,11 @@ class AIAgent: for msg in messages: if isinstance(msg, dict) and msg.get("role") == "system": content = msg.get("content") - if isinstance(content, list) and content and isinstance(content[-1], dict): + if ( + isinstance(content, list) + and content + and isinstance(content[-1], dict) + ): content[-1]["cache_control"] = {"type": "ephemeral"} break @@ -6124,6 +6752,7 @@ class AIAgent: """Build the keyword arguments dict for the active API mode.""" if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_kwargs + anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages) # Pass context_length (total input+output window) so the adapter can # clamp max_tokens (output cap) when the user configured a smaller @@ -6140,7 +6769,9 @@ class AIAgent: model=self.model, messages=anthropic_messages, tools=self.tools, - max_tokens=ephemeral_out if ephemeral_out is not None else self.max_tokens, + max_tokens=ephemeral_out + if ephemeral_out is not None + else self.max_tokens, reasoning_config=self.reasoning_config, is_oauth=self._is_anthropic_oauth, preserve_dots=self._anthropic_preserve_dots(), @@ -6204,7 +6835,10 @@ class AIAgent: if github_reasoning is not None: kwargs["reasoning"] = github_reasoning else: - kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} + kwargs["reasoning"] = { + "effort": reasoning_effort, + "summary": "auto", + } kwargs["include"] = ["reasoning.encrypted_content"] elif not is_github_responses: kwargs["include"] = [] @@ -6259,7 +6893,9 @@ class AIAgent: if self._is_qwen_portal(): if sanitized_messages is api_messages: # No sanitization was done — we need our own copy. - sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages) + sanitized_messages = self._qwen_prepare_chat_messages( + sanitized_messages + ) else: # Already a deepcopy — transform in place to avoid a second deepcopy. self._qwen_prepare_chat_messages_inplace(sanitized_messages) @@ -6315,7 +6951,9 @@ class AIAgent: # (the documented max output for qwen3-coder models) so the # model has adequate output budget for tool calls. api_kwargs.update(self._max_tokens_param(65536)) - elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): + elif ( + self._is_openrouter_url() or "nousresearch" in self._base_url_lower + ) and "claude" in (self.model or "").lower(): # OpenRouter and Nous Portal translate requests to Anthropic's # Messages API, which requires max_tokens as a mandatory field. # When we omit it, the proxy picks a default that can be too @@ -6325,6 +6963,7 @@ class AIAgent: # limit ensures full capacity. try: from agent.anthropic_adapter import _get_anthropic_max_output + _model_output_limit = _get_anthropic_max_output(self.model) api_kwargs["max_tokens"] = _model_output_limit except Exception: @@ -6361,10 +7000,7 @@ class AIAgent: else: extra_body["reasoning"] = rc else: - extra_body["reasoning"] = { - "enabled": True, - "effort": "medium" - } + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} # Nous Portal product attribution if _is_nous: @@ -6388,7 +7024,11 @@ class AIAgent: # xAI prompt caching: send x-grok-conv-id header to route requests # to the same server, maximizing automatic cache hits. # https://docs.x.ai/developers/advanced-api-usage/prompt-caching - if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id: + if ( + "x.ai" in self._base_url_lower + and hasattr(self, "session_id") + and self.session_id + ): api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id} # Priority Processing / generic request overrides (e.g. service_tier). @@ -6409,7 +7049,10 @@ class AIAgent: return True if "ai-gateway.vercel.sh" in self._base_url_lower: return True - if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower: + if ( + "models.github.ai" in self._base_url_lower + or "api.githubcopilot.com" in self._base_url_lower + ): try: from hermes_cli.models import github_model_reasoning_efforts @@ -6446,9 +7089,9 @@ class AIAgent: if self.reasoning_config and isinstance(self.reasoning_config, dict): if self.reasoning_config.get("enabled") is False: return None - requested_effort = str( - self.reasoning_config.get("effort", "medium") - ).strip().lower() + requested_effort = ( + str(self.reasoning_config.get("effort", "medium")).strip().lower() + ) else: requested_effort = "medium" @@ -6478,13 +7121,15 @@ class AIAgent: # directly in the content rather than returning separate API fields). if not reasoning_text: content = assistant_message.content or "" - think_blocks = re.findall(r'<think>(.*?)</think>', content, flags=re.DOTALL) + think_blocks = re.findall(r"<think>(.*?)</think>", content, flags=re.DOTALL) if think_blocks: combined = "\n\n".join(b.strip() for b in think_blocks if b.strip()) reasoning_text = combined or None if reasoning_text and self.verbose_logging: - logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}") + logging.debug( + f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}" + ) if reasoning_text and self.reasoning_callback: # Skip callback when streaming is active — reasoning was already @@ -6508,7 +7153,10 @@ class AIAgent: "finish_reason": finish_reason, } - if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: + if ( + hasattr(assistant_message, "reasoning_details") + and assistant_message.reasoning_details + ): # Pass reasoning_details back unmodified so providers (OpenRouter, # Anthropic, OpenAI) can maintain reasoning continuity across turns. # Each provider may include opaque fields (signature, encrypted_content) @@ -6546,11 +7194,16 @@ class AIAgent: _fn = getattr(tool_call, "function", None) _fn_name = getattr(_fn, "name", "") if _fn else "" _fn_args = getattr(_fn, "arguments", "{}") if _fn else "{}" - call_id = self._deterministic_call_id(_fn_name, _fn_args, len(tool_calls)) + call_id = self._deterministic_call_id( + _fn_name, _fn_args, len(tool_calls) + ) call_id = call_id.strip() response_item_id = getattr(tool_call, "response_item_id", None) - if not isinstance(response_item_id, str) or not response_item_id.strip(): + if ( + not isinstance(response_item_id, str) + or not response_item_id.strip() + ): _, embedded_response_item_id = self._split_responses_tool_id(raw_id) response_item_id = embedded_response_item_id @@ -6566,7 +7219,7 @@ class AIAgent: "type": tool_call.type, "function": { "name": tool_call.function.name, - "arguments": tool_call.function.arguments + "arguments": tool_call.function.arguments, }, } # Preserve extra_content (e.g. Gemini thought_signature) so it @@ -6605,7 +7258,8 @@ class AIAgent: _STRIP_KEYS = {"call_id", "response_item_id"} api_msg["tool_calls"] = [ {k: v for k, v in tc.items() if k not in _STRIP_KEYS} - if isinstance(tc, dict) else tc + if isinstance(tc, dict) + else tc for tc in tool_calls ] return api_msg @@ -6641,12 +7295,14 @@ class AIAgent: return if "memory" not in self.valid_tool_names or not self._memory_store: return - effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns + effective_min = ( + min_turns if min_turns is not None else self._memory_flush_min_turns + ) if self._user_turn_count < effective_min: return if messages is None: - messages = getattr(self, '_session_messages', None) + messages = getattr(self, "_session_messages", None) if not messages or len(messages) < 3: return @@ -6656,7 +7312,11 @@ class AIAgent: "corrections, and recurring patterns over task-specific details.]" ) _sentinel = f"__flush_{id(self)}_{time.monotonic()}" - flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel} + flush_msg = { + "role": "user", + "content": flush_content, + "_flush_sentinel": _sentinel, + } messages.append(flush_msg) try: @@ -6678,11 +7338,13 @@ class AIAgent: api_messages.append(api_msg) if self._cached_system_prompt: - api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages + api_messages = [ + {"role": "system", "content": self._cached_system_prompt} + ] + api_messages # Make one API call with only the memory tool available memory_tool_def = None - for t in (self.tools or []): + for t in self.tools or []: if t.get("function", {}).get("name") == "memory": memory_tool_def = t break @@ -6694,6 +7356,7 @@ class AIAgent: # Use auxiliary client for the flush call when available -- # it's cheaper and avoids Codex Responses API incompatibility. from agent.auxiliary_client import call_llm as _call_llm + _aux_available = True try: response = _call_llm( @@ -6718,10 +7381,15 @@ class AIAgent: response = self._run_codex_stream(codex_kwargs) elif not _aux_available and self.api_mode == "anthropic_messages": # Native Anthropic — use the Anthropic client directly - from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs + from agent.anthropic_adapter import ( + build_anthropic_kwargs as _build_ant_kwargs, + ) + ant_kwargs = _build_ant_kwargs( - model=self.model, messages=api_messages, - tools=[memory_tool_def], max_tokens=5120, + model=self.model, + messages=api_messages, + tools=[memory_tool_def], + max_tokens=5120, reasoning_config=None, preserve_dots=self._anthropic_preserve_dots(), ) @@ -6735,7 +7403,10 @@ class AIAgent: **self._max_tokens_param(5120), } from agent.auxiliary_client import _get_task_timeout - response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create( + + response = self._ensure_primary_openai_client( + reason="flush_memories" + ).chat.completions.create( **api_kwargs, timeout=_get_task_timeout("flush_memories") ) @@ -6746,8 +7417,13 @@ class AIAgent: if assistant_msg and assistant_msg.tool_calls: tool_calls = assistant_msg.tool_calls elif self.api_mode == "anthropic_messages" and not _aux_available: - from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush - _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth) + from agent.anthropic_adapter import ( + normalize_anthropic_response as _nar_flush, + ) + + _flush_msg, _ = _nar_flush( + response, strip_tool_prefix=self._is_anthropic_oauth + ) if _flush_msg and _flush_msg.tool_calls: tool_calls = _flush_msg.tool_calls elif hasattr(response, "choices") and response.choices: @@ -6761,6 +7437,7 @@ class AIAgent: args = json.loads(tc.function.arguments) flush_target = args.get("target", "memory") from tools.memory_tool import memory_tool as _memory_tool + _memory_tool( action=args.get("action"), target=flush_target, @@ -6769,7 +7446,9 @@ class AIAgent: store=self._memory_store, ) if not self.quiet_mode: - print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") + print( + f" 🧠 Memory flush: saved to {args.get('target', 'memory')}" + ) except Exception as e: logger.debug("Memory flush tool call failed: %s", e) except Exception as e: @@ -6784,7 +7463,15 @@ class AIAgent: if messages and messages[-1].get("_flush_sentinel") == _sentinel: messages.pop() - def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple: + def _compress_context( + self, + messages: list, + system_message: str, + *, + approx_tokens: int = None, + task_id: str = "default", + focus_topic: str = None, + ) -> tuple: """Compress conversation context and split the session in SQLite. Args: @@ -6798,8 +7485,10 @@ class AIAgent: _pre_msg_count = len(messages) logger.info( "context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r", - self.session_id or "none", _pre_msg_count, - f"{approx_tokens:,}" if approx_tokens else "unknown", self.model, + self.session_id or "none", + _pre_msg_count, + f"{approx_tokens:,}" if approx_tokens else "unknown", + self.model, focus_topic, ) # Pre-compression memory flush: let the model save memories before they're lost @@ -6812,7 +7501,9 @@ class AIAgent: except Exception: pass - compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) + compressed = self.context_compressor.compress( + messages, current_tokens=approx_tokens, focus_topic=focus_topic + ) todo_snapshot = self._todo_store.format_for_injection() if todo_snapshot: @@ -6828,27 +7519,39 @@ class AIAgent: old_title = self._session_db.get_session_title(self.session_id) self._session_db.end_session(self.session_id, "compression") old_session_id = self.session_id - self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}" + self.session_id = ( + f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}" + ) # Update session_log_file to point to the new session's JSON file - self.session_log_file = self.logs_dir / f"session_{self.session_id}.json" + self.session_log_file = ( + self.logs_dir / f"session_{self.session_id}.json" + ) self._session_db.create_session( session_id=self.session_id, - source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"), + source=self.platform + or os.environ.get("HERMES_SESSION_SOURCE", "cli"), model=self.model, parent_session_id=old_session_id, ) # Auto-number the title for the continuation session if old_title: try: - new_title = self._session_db.get_next_title_in_lineage(old_title) + new_title = self._session_db.get_next_title_in_lineage( + old_title + ) self._session_db.set_session_title(self.session_id, new_title) except (ValueError, Exception) as e: logger.debug("Could not propagate title on compression: %s", e) - self._session_db.update_system_prompt(self.session_id, new_system_prompt) + self._session_db.update_system_prompt( + self.session_id, new_system_prompt + ) # Reset flush cursor — new session starts with no messages written self._last_flushed_db_idx = 0 except Exception as e: - logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e) + logger.warning( + "Session DB compression split failed — new session will NOT be indexed: %s", + e, + ) # Warn on repeated compressions (quality degrades with each pass) _cc = self.context_compressor.compression_count @@ -6861,10 +7564,9 @@ class AIAgent: # Update token estimate after compaction so pressure calculations # use the post-compression count, not the stale pre-compression one. - _compressed_est = ( - estimate_tokens_rough(new_system_prompt) - + estimate_messages_tokens_rough(compressed) - ) + _compressed_est = estimate_tokens_rough( + new_system_prompt + ) + estimate_messages_tokens_rough(compressed) self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0 @@ -6887,18 +7589,27 @@ class AIAgent: # file it needs the full content, not a "file unchanged" stub. try: from tools.file_tools import reset_file_dedup + reset_file_dedup(task_id) except Exception: pass logger.info( "context compression done: session=%s messages=%d->%d tokens=~%s", - self.session_id or "none", _pre_msg_count, len(compressed), + self.session_id or "none", + _pre_msg_count, + len(compressed), f"{_compressed_est:,}", ) return compressed, new_system_prompt - def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: + def _execute_tool_calls( + self, + assistant_message, + messages: list, + effective_task_id: str, + api_call_count: int = 0, + ) -> None: """Execute tool calls from the assistant message and append results to messages. Dispatches to concurrent execution only for batches that look @@ -6921,8 +7632,13 @@ class AIAgent: finally: self._executing_tools = False - def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str, - tool_call_id: Optional[str] = None) -> str: + def _invoke_tool( + self, + function_name: str, + function_args: dict, + effective_task_id: str, + tool_call_id: Optional[str] = None, + ) -> str: """Invoke a single tool and return the result string. No display logic. Handles both agent-level tools (todo, memory, etc.) and registry-dispatched @@ -6933,8 +7649,11 @@ class AIAgent: block_message: Optional[str] = None try: from hermes_cli.plugins import get_pre_tool_call_block_message + block_message = get_pre_tool_call_block_message( - function_name, function_args, task_id=effective_task_id or "", + function_name, + function_args, + task_id=effective_task_id or "", ) except Exception: pass @@ -6943,6 +7662,7 @@ class AIAgent: if function_name == "todo": from tools.todo_tool import todo_tool as _todo_tool + return _todo_tool( todos=function_args.get("todos"), merge=function_args.get("merge", False), @@ -6950,8 +7670,11 @@ class AIAgent: ) elif function_name == "session_search": if not self._session_db: - return json.dumps({"success": False, "error": "Session database not available."}) + return json.dumps( + {"success": False, "error": "Session database not available."} + ) from tools.session_search_tool import session_search as _session_search + return _session_search( query=function_args.get("query", ""), role_filter=function_args.get("role_filter"), @@ -6962,6 +7685,7 @@ class AIAgent: elif function_name == "memory": target = function_args.get("target", "memory") from tools.memory_tool import memory_tool as _memory_tool + result = _memory_tool( action=function_args.get("action"), target=target, @@ -6970,7 +7694,10 @@ class AIAgent: store=self._memory_store, ) # Bridge: notify external memory provider of built-in memory writes - if self._memory_manager and function_args.get("action") in ("add", "replace"): + if self._memory_manager and function_args.get("action") in ( + "add", + "replace", + ): try: self._memory_manager.on_memory_write( function_args.get("action", ""), @@ -6984,6 +7711,7 @@ class AIAgent: return self._memory_manager.handle_tool_call(function_name, function_args) elif function_name == "clarify": from tools.clarify_tool import clarify_tool as _clarify_tool + return _clarify_tool( question=function_args.get("question", ""), choices=function_args.get("choices"), @@ -6991,6 +7719,7 @@ class AIAgent: ) elif function_name == "delegate_task": from tools.delegate_tool import delegate_task as _delegate_task + return _delegate_task( goal=function_args.get("goal"), context=function_args.get("context"), @@ -7001,10 +7730,14 @@ class AIAgent: ) else: return handle_function_call( - function_name, function_args, effective_task_id, + function_name, + function_args, + effective_task_id, tool_call_id=tool_call_id, session_id=self.session_id or "", - enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None, + enabled_tools=list(self.valid_tool_names) + if self.valid_tool_names + else None, skip_pre_tool_call_hook=True, ) @@ -7019,6 +7752,7 @@ class AIAgent: """ import shutil as _shutil import textwrap as _tw + cols = _shutil.get_terminal_size((120, 24)).columns wrap_width = max(40, cols - len(indent)) out_lines: list[str] = [] @@ -7026,14 +7760,23 @@ class AIAgent: if len(raw_line) <= wrap_width: out_lines.append(raw_line) else: - wrapped = _tw.wrap(raw_line, width=wrap_width, - break_long_words=True, - break_on_hyphens=False) + wrapped = _tw.wrap( + raw_line, + width=wrap_width, + break_long_words=True, + break_on_hyphens=False, + ) out_lines.extend(wrapped or [raw_line]) body = ("\n" + indent).join(out_lines) return f"{indent}{label}{body}" - def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: + def _execute_tool_calls_concurrent( + self, + assistant_message, + messages: list, + effective_task_id: str, + api_call_count: int = 0, + ) -> None: """Execute multiple tool calls concurrently using a thread pool. Results are collected in the original tool-call order and appended to @@ -7046,11 +7789,13 @@ class AIAgent: if self._interrupt_requested: print(f"{self.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)") for tc in tool_calls: - messages.append({ - "role": "tool", - "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]", - "tool_call_id": tc.id, - }) + messages.append( + { + "role": "tool", + "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]", + "tool_call_id": tc.id, + } + ) return # ── Parse args + pre-execution bookkeeping ─────────────────────── @@ -7072,12 +7817,19 @@ class AIAgent: function_args = {} # Checkpoint for file-mutating tools - if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled: + if ( + function_name in ("write_file", "patch") + and self._checkpoint_mgr.enabled + ): try: file_path = function_args.get("path", "") if file_path: - work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path) - self._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}") + work_dir = self._checkpoint_mgr.get_working_dir_for_path( + file_path + ) + self._checkpoint_mgr.ensure_checkpoint( + work_dir, f"before {function_name}" + ) except Exception: pass @@ -7086,7 +7838,9 @@ class AIAgent: try: cmd = function_args.get("command", "") if _is_destructive_command(cmd): - cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd()) + cwd = function_args.get("workdir") or os.getenv( + "TERMINAL_CWD", os.getcwd() + ) self._checkpoint_mgr.ensure_checkpoint( cwd, f"before terminal: {cmd[:60]}" ) @@ -7103,10 +7857,20 @@ class AIAgent: args_str = json.dumps(args, ensure_ascii=False) if self.verbose_logging: print(f" 📞 Tool {i}: {name}({list(args.keys())})") - print(self._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False))) + print( + self._wrap_verbose( + "Args: ", json.dumps(args, indent=2, ensure_ascii=False) + ) + ) else: - args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str - print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}") + args_preview = ( + args_str[: self.log_prefix_chars] + "..." + if len(args_str) > self.log_prefix_chars + else args_str + ) + print( + f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}" + ) for tc, name, args in parsed_calls: if self.tool_progress_callback: @@ -7131,28 +7895,51 @@ class AIAgent: """Worker function executed in a thread.""" start = time.time() try: - result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id) + result = self._invoke_tool( + function_name, function_args, effective_task_id, tool_call.id + ) except Exception as tool_error: result = f"Error executing tool '{function_name}': {tool_error}" - logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True) + logger.error( + "_invoke_tool raised for %s: %s", + function_name, + tool_error, + exc_info=True, + ) duration = time.time() - start is_error, _ = _detect_tool_failure(function_name, result) if is_error: - logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200]) + logger.info( + "tool %s failed (%.2fs): %s", function_name, duration, result[:200] + ) else: - logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result)) + logger.info( + "tool %s completed (%.2fs, %d chars)", + function_name, + duration, + len(result), + ) results[index] = (function_name, function_args, result, duration, is_error) # Start spinner for CLI mode (skip when TUI handles tool progress) spinner = None - if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): + if ( + self._should_emit_quiet_tool_messages() + and self._should_start_quiet_spinner() + ): face = random.choice(KawaiiSpinner.KAWAII_WAITING) - spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn) + spinner = KawaiiSpinner( + f"{face} ⚡ running {num_tools} tools concurrently", + spinner_type="dots", + print_fn=self._print_fn, + ) spinner.start() try: max_workers = min(num_tools, _MAX_TOOL_WORKERS) - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + with concurrent.futures.ThreadPoolExecutor( + max_workers=max_workers + ) as executor: futures = [] for i, (tc, name, args) in enumerate(parsed_calls): f = executor.submit(_run_tool, i, tc, name, args) @@ -7165,46 +7952,81 @@ class AIAgent: # Build a summary message for the spinner stop completed = sum(1 for r in results if r is not None) total_dur = sum(r[3] for r in results if r is not None) - spinner.stop(f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total") + spinner.stop( + f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total" + ) # ── Post-execution: display per-tool results ───────────────────── for i, (tc, name, args) in enumerate(parsed_calls): r = results[i] if r is None: # Shouldn't happen, but safety fallback - function_result = f"Error executing tool '{name}': thread did not return a result" + function_result = ( + f"Error executing tool '{name}': thread did not return a result" + ) tool_duration = 0.0 else: - function_name, function_args, function_result, tool_duration, is_error = r + ( + function_name, + function_args, + function_result, + tool_duration, + is_error, + ) = r if is_error: - result_preview = function_result[:200] if len(function_result) > 200 else function_result - logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview) + result_preview = ( + function_result[:200] + if len(function_result) > 200 + else function_result + ) + logger.warning( + "Tool %s returned error (%.2fs): %s", + function_name, + tool_duration, + result_preview, + ) if self.tool_progress_callback: try: self.tool_progress_callback( - "tool.completed", function_name, None, None, - duration=tool_duration, is_error=is_error, + "tool.completed", + function_name, + None, + None, + duration=tool_duration, + is_error=is_error, ) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") if self.verbose_logging: - logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s") - logging.debug(f"Tool result ({len(function_result)} chars): {function_result}") + logging.debug( + f"Tool {function_name} completed in {tool_duration:.2f}s" + ) + logging.debug( + f"Tool result ({len(function_result)} chars): {function_result}" + ) # Print cute message per tool if self._should_emit_quiet_tool_messages(): - cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result) + cute_msg = _get_cute_tool_message_impl( + name, args, tool_duration, result=function_result + ) self._safe_print(f" {cute_msg}") elif not self.quiet_mode: if self.verbose_logging: - print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s") + print(f" ✅ Tool {i + 1} completed in {tool_duration:.2f}s") print(self._wrap_verbose("Result: ", function_result)) else: - response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result - print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}") + response_preview = ( + function_result[: self.log_prefix_chars] + "..." + if len(function_result) > self.log_prefix_chars + else function_result + ) + print( + f" ✅ Tool {i + 1} completed in {tool_duration:.2f}s - {response_preview}" + ) self._current_tool = None self._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)") @@ -7239,16 +8061,25 @@ class AIAgent: turn_tool_msgs = messages[-num_tools:] enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id)) - def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: + def _execute_tool_calls_sequential( + self, + assistant_message, + messages: list, + effective_task_id: str, + api_call_count: int = 0, + ) -> None: """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools.""" for i, tool_call in enumerate(assistant_message.tool_calls, 1): # SAFETY: check interrupt BEFORE starting each tool. # If the user sent "stop" during a previous tool's execution, # do NOT start any more tools -- skip them all immediately. if self._interrupt_requested: - remaining_calls = assistant_message.tool_calls[i-1:] + remaining_calls = assistant_message.tool_calls[i - 1 :] if remaining_calls: - self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True) + self._vprint( + f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", + force=True, + ) for skipped_tc in remaining_calls: skipped_name = skipped_tc.function.name skip_msg = { @@ -7273,8 +8104,11 @@ class AIAgent: _block_msg: Optional[str] = None try: from hermes_cli.plugins import get_pre_tool_call_block_message + _block_msg = get_pre_tool_call_block_message( - function_name, function_args, task_id=effective_task_id or "", + function_name, + function_args, + task_id=effective_task_id or "", ) except Exception: pass @@ -7293,11 +8127,24 @@ class AIAgent: if not self.quiet_mode: args_str = json.dumps(function_args, ensure_ascii=False) if self.verbose_logging: - print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})") - print(self._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False))) + print( + f" 📞 Tool {i}: {function_name}({list(function_args.keys())})" + ) + print( + self._wrap_verbose( + "Args: ", + json.dumps(function_args, indent=2, ensure_ascii=False), + ) + ) else: - args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str - print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}") + args_preview = ( + args_str[: self.log_prefix_chars] + "..." + if len(args_str) > self.log_prefix_chars + else args_str + ) + print( + f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}" + ) if _block_msg is None: self._current_tool = function_name @@ -7309,6 +8156,7 @@ class AIAgent: if _block_msg is None: try: from tools.environments.base import set_activity_callback + set_activity_callback(self._touch_activity) except Exception: pass @@ -7316,7 +8164,9 @@ class AIAgent: if _block_msg is None and self.tool_progress_callback: try: preview = _build_tool_preview(function_name, function_args) - self.tool_progress_callback("tool.started", function_name, preview, function_args) + self.tool_progress_callback( + "tool.started", function_name, preview, function_args + ) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") @@ -7327,11 +8177,17 @@ class AIAgent: logging.debug(f"Tool start callback error: {cb_err}") # Checkpoint: snapshot working dir before file-mutating tools - if _block_msg is None and function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled: + if ( + _block_msg is None + and function_name in ("write_file", "patch") + and self._checkpoint_mgr.enabled + ): try: file_path = function_args.get("path", "") if file_path: - work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path) + work_dir = self._checkpoint_mgr.get_working_dir_for_path( + file_path + ) self._checkpoint_mgr.ensure_checkpoint( work_dir, f"before {function_name}" ) @@ -7339,11 +8195,17 @@ class AIAgent: pass # never block tool execution # Checkpoint before destructive terminal commands - if _block_msg is None and function_name == "terminal" and self._checkpoint_mgr.enabled: + if ( + _block_msg is None + and function_name == "terminal" + and self._checkpoint_mgr.enabled + ): try: cmd = function_args.get("command", "") if _is_destructive_command(cmd): - cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd()) + cwd = function_args.get("workdir") or os.getenv( + "TERMINAL_CWD", os.getcwd() + ) self._checkpoint_mgr.ensure_checkpoint( cwd, f"before terminal: {cmd[:60]}" ) @@ -7358,6 +8220,7 @@ class AIAgent: tool_duration = 0.0 elif function_name == "todo": from tools.todo_tool import todo_tool as _todo_tool + function_result = _todo_tool( todos=function_args.get("todos"), merge=function_args.get("merge", False), @@ -7365,12 +8228,19 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self._should_emit_quiet_tool_messages(): - self._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}") + self._vprint( + f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}" + ) elif function_name == "session_search": if not self._session_db: - function_result = json.dumps({"success": False, "error": "Session database not available."}) + function_result = json.dumps( + {"success": False, "error": "Session database not available."} + ) else: - from tools.session_search_tool import session_search as _session_search + from tools.session_search_tool import ( + session_search as _session_search, + ) + function_result = _session_search( query=function_args.get("query", ""), role_filter=function_args.get("role_filter"), @@ -7380,10 +8250,13 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self._should_emit_quiet_tool_messages(): - self._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}") + self._vprint( + f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}" + ) elif function_name == "memory": target = function_args.get("target", "memory") from tools.memory_tool import memory_tool as _memory_tool + function_result = _memory_tool( action=function_args.get("action"), target=target, @@ -7393,9 +8266,12 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self._should_emit_quiet_tool_messages(): - self._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}") + self._vprint( + f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}" + ) elif function_name == "clarify": from tools.clarify_tool import clarify_tool as _clarify_tool + function_result = _clarify_tool( question=function_args.get("question", ""), choices=function_args.get("choices"), @@ -7403,19 +8279,31 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self._should_emit_quiet_tool_messages(): - self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}") + self._vprint( + f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}" + ) elif function_name == "delegate_task": from tools.delegate_tool import delegate_task as _delegate_task + tasks_arg = function_args.get("tasks") if tasks_arg and isinstance(tasks_arg, list): spinner_label = f"🔀 delegating {len(tasks_arg)} tasks" else: goal_preview = (function_args.get("goal") or "")[:30] - spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating" + spinner_label = ( + f"🔀 {goal_preview}" if goal_preview else "🔀 delegating" + ) spinner = None - if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): + if ( + self._should_emit_quiet_tool_messages() + and self._should_start_quiet_spinner() + ): face = random.choice(KawaiiSpinner.KAWAII_WAITING) - spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn) + spinner = KawaiiSpinner( + f"{face} {spinner_label}", + spinner_type="dots", + print_fn=self._print_fn, + ) spinner.start() self._delegate_spinner = spinner _delegate_result = None @@ -7432,30 +8320,58 @@ class AIAgent: finally: self._delegate_spinner = None tool_duration = time.time() - tool_start_time - cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result) + cute_msg = _get_cute_tool_message_impl( + "delegate_task", + function_args, + tool_duration, + result=_delegate_result, + ) if spinner: spinner.stop(cute_msg) elif self._should_emit_quiet_tool_messages(): self._vprint(f" {cute_msg}") - elif self._context_engine_tool_names and function_name in self._context_engine_tool_names: + elif ( + self._context_engine_tool_names + and function_name in self._context_engine_tool_names + ): # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.) spinner = None if self.quiet_mode and not self.tool_progress_callback: face = random.choice(KawaiiSpinner.KAWAII_WAITING) emoji = _get_tool_emoji(function_name) - preview = _build_tool_preview(function_name, function_args) or function_name - spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn) + preview = ( + _build_tool_preview(function_name, function_args) + or function_name + ) + spinner = KawaiiSpinner( + f"{face} {emoji} {preview}", + spinner_type="dots", + print_fn=self._print_fn, + ) spinner.start() _ce_result = None try: - function_result = self.context_compressor.handle_tool_call(function_name, function_args, messages=messages) + function_result = self.context_compressor.handle_tool_call( + function_name, function_args, messages=messages + ) _ce_result = function_result except Exception as tool_error: - function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"}) - logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True) + function_result = json.dumps( + { + "error": f"Context engine tool '{function_name}' failed: {tool_error}" + } + ) + logger.error( + "context_engine.handle_tool_call raised for %s: %s", + function_name, + tool_error, + exc_info=True, + ) finally: tool_duration = time.time() - tool_start_time - cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result) + cute_msg = _get_cute_tool_message_impl( + function_name, function_args, tool_duration, result=_ce_result + ) if spinner: spinner.stop(cute_msg) elif self.quiet_mode: @@ -7464,50 +8380,97 @@ class AIAgent: # Memory provider tools (hindsight_retain, honcho_search, etc.) # These are not in the tool registry — route through MemoryManager. spinner = None - if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): + if ( + self._should_emit_quiet_tool_messages() + and self._should_start_quiet_spinner() + ): face = random.choice(KawaiiSpinner.KAWAII_WAITING) emoji = _get_tool_emoji(function_name) - preview = _build_tool_preview(function_name, function_args) or function_name - spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn) + preview = ( + _build_tool_preview(function_name, function_args) + or function_name + ) + spinner = KawaiiSpinner( + f"{face} {emoji} {preview}", + spinner_type="dots", + print_fn=self._print_fn, + ) spinner.start() _mem_result = None try: - function_result = self._memory_manager.handle_tool_call(function_name, function_args) + function_result = self._memory_manager.handle_tool_call( + function_name, function_args + ) _mem_result = function_result except Exception as tool_error: - function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"}) - logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True) + function_result = json.dumps( + {"error": f"Memory tool '{function_name}' failed: {tool_error}"} + ) + logger.error( + "memory_manager.handle_tool_call raised for %s: %s", + function_name, + tool_error, + exc_info=True, + ) finally: tool_duration = time.time() - tool_start_time - cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result) + cute_msg = _get_cute_tool_message_impl( + function_name, function_args, tool_duration, result=_mem_result + ) if spinner: spinner.stop(cute_msg) elif self._should_emit_quiet_tool_messages(): self._vprint(f" {cute_msg}") elif self.quiet_mode: spinner = None - if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): + if ( + self._should_emit_quiet_tool_messages() + and self._should_start_quiet_spinner() + ): face = random.choice(KawaiiSpinner.KAWAII_WAITING) emoji = _get_tool_emoji(function_name) - preview = _build_tool_preview(function_name, function_args) or function_name - spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn) + preview = ( + _build_tool_preview(function_name, function_args) + or function_name + ) + spinner = KawaiiSpinner( + f"{face} {emoji} {preview}", + spinner_type="dots", + print_fn=self._print_fn, + ) spinner.start() _spinner_result = None try: function_result = handle_function_call( - function_name, function_args, effective_task_id, + function_name, + function_args, + effective_task_id, tool_call_id=tool_call.id, session_id=self.session_id or "", - enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None, + enabled_tools=list(self.valid_tool_names) + if self.valid_tool_names + else None, skip_pre_tool_call_hook=True, ) _spinner_result = function_result except Exception as tool_error: - function_result = f"Error executing tool '{function_name}': {tool_error}" - logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True) + function_result = ( + f"Error executing tool '{function_name}': {tool_error}" + ) + logger.error( + "handle_function_call raised for %s: %s", + function_name, + tool_error, + exc_info=True, + ) finally: tool_duration = time.time() - tool_start_time - cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result) + cute_msg = _get_cute_tool_message_impl( + function_name, + function_args, + tool_duration, + result=_spinner_result, + ) if spinner: spinner.stop(cute_msg) elif self._should_emit_quiet_tool_messages(): @@ -7515,48 +8478,85 @@ class AIAgent: else: try: function_result = handle_function_call( - function_name, function_args, effective_task_id, + function_name, + function_args, + effective_task_id, tool_call_id=tool_call.id, session_id=self.session_id or "", - enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None, + enabled_tools=list(self.valid_tool_names) + if self.valid_tool_names + else None, skip_pre_tool_call_hook=True, ) except Exception as tool_error: - function_result = f"Error executing tool '{function_name}': {tool_error}" - logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True) + function_result = ( + f"Error executing tool '{function_name}': {tool_error}" + ) + logger.error( + "handle_function_call raised for %s: %s", + function_name, + tool_error, + exc_info=True, + ) tool_duration = time.time() - tool_start_time - result_preview = function_result if self.verbose_logging else ( - function_result[:200] if len(function_result) > 200 else function_result + result_preview = ( + function_result + if self.verbose_logging + else ( + function_result[:200] + if len(function_result) > 200 + else function_result + ) ) # Log tool errors to the persistent error log so [error] tags # in the UI always have a corresponding detailed entry on disk. _is_error_result, _ = _detect_tool_failure(function_name, function_result) if _is_error_result: - logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview) + logger.warning( + "Tool %s returned error (%.2fs): %s", + function_name, + tool_duration, + result_preview, + ) else: - logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result)) + logger.info( + "tool %s completed (%.2fs, %d chars)", + function_name, + tool_duration, + len(function_result), + ) if self.tool_progress_callback: try: self.tool_progress_callback( - "tool.completed", function_name, None, None, - duration=tool_duration, is_error=_is_error_result, + "tool.completed", + function_name, + None, + None, + duration=tool_duration, + is_error=_is_error_result, ) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") self._current_tool = None - self._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)") + self._touch_activity( + f"tool completed: {function_name} ({tool_duration:.1f}s)" + ) if self.verbose_logging: logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s") - logging.debug(f"Tool result ({len(function_result)} chars): {function_result}") + logging.debug( + f"Tool result ({len(function_result)} chars): {function_result}" + ) if self.tool_complete_callback: try: - self.tool_complete_callback(tool_call.id, function_name, function_args, function_result) + self.tool_complete_callback( + tool_call.id, function_name, function_args, function_result + ) except Exception as cb_err: logging.debug(f"Tool complete callback error: {cb_err}") @@ -7568,14 +8568,16 @@ class AIAgent: ) # Discover subdirectory context files from tool arguments - subdir_hints = self._subdirectory_hints.check_tool_call(function_name, function_args) + subdir_hints = self._subdirectory_hints.check_tool_call( + function_name, function_args + ) if subdir_hints: function_result += subdir_hints tool_msg = { "role": "tool", "content": function_result, - "tool_call_id": tool_call.id + "tool_call_id": tool_call.id, } messages.append(tool_msg) @@ -7584,18 +8586,27 @@ class AIAgent: print(f" ✅ Tool {i} completed in {tool_duration:.2f}s") print(self._wrap_verbose("Result: ", function_result)) else: - response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result - print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}") + response_preview = ( + function_result[: self.log_prefix_chars] + "..." + if len(function_result) > self.log_prefix_chars + else function_result + ) + print( + f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}" + ) if self._interrupt_requested and i < len(assistant_message.tool_calls): remaining = len(assistant_message.tool_calls) - i - self._vprint(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True) + self._vprint( + f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", + force=True, + ) for skipped_tc in assistant_message.tool_calls[i:]: skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]", - "tool_call_id": skipped_tc.id + "tool_call_id": skipped_tc.id, } messages.append(skip_msg) break @@ -7606,9 +8617,9 @@ class AIAgent: # ── Per-turn aggregate budget enforcement ───────────────────────── num_tools_seq = len(assistant_message.tool_calls) if num_tools_seq > 0: - enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id)) - - + enforce_turn_budget( + messages[-num_tools_seq:], env=get_active_env(effective_task_id) + ) def _emit_context_pressure(self, compaction_progress: float, compressor) -> None: """Notify the user that context is approaching the compaction threshold. @@ -7621,9 +8632,16 @@ class AIAgent: For CLI: prints a formatted line with a progress bar. For gateway: fires status_callback so the platform can send a chat message. """ - from agent.display import format_context_pressure, format_context_pressure_gateway + from agent.display import ( + format_context_pressure, + format_context_pressure_gateway, + ) - threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5 + threshold_pct = ( + compressor.threshold_tokens / compressor.context_length + if compressor.context_length + else 0.5 + ) # CLI output — always shown (these are user-facing status notifications, # not verbose debug output, so they bypass quiet_mode). @@ -7651,7 +8669,9 @@ class AIAgent: def _handle_max_iterations(self, messages: list, api_call_count: int) -> str: """Request a summary when max iterations are reached. Returns the final response text.""" - print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...") + print( + f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary..." + ) summary_request = ( "You've reached the maximum number of tool-calling iterations allowed. " @@ -7667,7 +8687,11 @@ class AIAgent: api_messages = [] for msg in messages: api_msg = msg.copy() - for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"): + for internal_field in ( + "reasoning", + "finish_reason", + "_thinking_prefill", + ): api_msg.pop(internal_field, None) if _needs_sanitize: self._sanitize_tool_calls_for_strict_api(api_msg) @@ -7675,9 +8699,13 @@ class AIAgent: effective_system = self._cached_system_prompt or "" if self.ephemeral_system_prompt: - effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() + effective_system = ( + effective_system + "\n\n" + self.ephemeral_system_prompt + ).strip() if effective_system: - api_messages = [{"role": "system", "content": effective_system}] + api_messages + api_messages = [ + {"role": "system", "content": effective_system} + ] + api_messages if self.prefill_messages: sys_offset = 1 if effective_system else 0 for idx, pfm in enumerate(self.prefill_messages): @@ -7691,7 +8719,7 @@ class AIAgent: else: summary_extra_body["reasoning"] = { "enabled": True, - "effort": "medium" + "effort": "medium", } if _is_nous: summary_extra_body["tags"] = ["product=hermes-agent"] @@ -7701,7 +8729,11 @@ class AIAgent: codex_kwargs.pop("tools", None) summary_response = self._run_codex_stream(codex_kwargs) assistant_message, _ = self._normalize_codex_response(summary_response) - final_response = (assistant_message.content or "").strip() if assistant_message else "" + final_response = ( + (assistant_message.content or "").strip() + if assistant_message + else "" + ) else: summary_kwargs = { "model": self.model, @@ -7727,29 +8759,49 @@ class AIAgent: summary_kwargs["extra_body"] = summary_extra_body if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar - _ant_kw = _bak(model=self.model, messages=api_messages, tools=None, - max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, - is_oauth=self._is_anthropic_oauth, - preserve_dots=self._anthropic_preserve_dots()) + from agent.anthropic_adapter import ( + build_anthropic_kwargs as _bak, + normalize_anthropic_response as _nar, + ) + + _ant_kw = _bak( + model=self.model, + messages=api_messages, + tools=None, + max_tokens=self.max_tokens, + reasoning_config=self.reasoning_config, + is_oauth=self._is_anthropic_oauth, + preserve_dots=self._anthropic_preserve_dots(), + ) summary_response = self._anthropic_messages_create(_ant_kw) - _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth) + _msg, _ = _nar( + summary_response, strip_tool_prefix=self._is_anthropic_oauth + ) final_response = (_msg.content or "").strip() else: - summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs) + summary_response = self._ensure_primary_openai_client( + reason="iteration_limit_summary" + ).chat.completions.create(**summary_kwargs) - if summary_response.choices and summary_response.choices[0].message.content: + if ( + summary_response.choices + and summary_response.choices[0].message.content + ): final_response = summary_response.choices[0].message.content else: final_response = "" if final_response: if "<think>" in final_response: - final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip() + final_response = re.sub( + r"<think>.*?</think>\s*", "", final_response, flags=re.DOTALL + ).strip() if final_response: messages.append({"role": "assistant", "content": final_response}) else: - final_response = "I reached the iteration limit and couldn't generate a summary." + final_response = ( + "I reached the iteration limit and couldn't generate a summary." + ) else: # Retry summary generation if self.api_mode == "codex_responses": @@ -7757,15 +8809,28 @@ class AIAgent: codex_kwargs.pop("tools", None) retry_response = self._run_codex_stream(codex_kwargs) retry_msg, _ = self._normalize_codex_response(retry_response) - final_response = (retry_msg.content or "").strip() if retry_msg else "" + final_response = ( + (retry_msg.content or "").strip() if retry_msg else "" + ) elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2 - _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None, - is_oauth=self._is_anthropic_oauth, - max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, - preserve_dots=self._anthropic_preserve_dots()) + from agent.anthropic_adapter import ( + build_anthropic_kwargs as _bak2, + normalize_anthropic_response as _nar2, + ) + + _ant_kw2 = _bak2( + model=self.model, + messages=api_messages, + tools=None, + is_oauth=self._is_anthropic_oauth, + max_tokens=self.max_tokens, + reasoning_config=self.reasoning_config, + preserve_dots=self._anthropic_preserve_dots(), + ) retry_response = self._anthropic_messages_create(_ant_kw2) - _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth) + _retry_msg, _ = _nar2( + retry_response, strip_tool_prefix=self._is_anthropic_oauth + ) final_response = (_retry_msg.content or "").strip() else: summary_kwargs = { @@ -7777,22 +8842,36 @@ class AIAgent: if summary_extra_body: summary_kwargs["extra_body"] = summary_extra_body - summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs) + summary_response = self._ensure_primary_openai_client( + reason="iteration_limit_summary_retry" + ).chat.completions.create(**summary_kwargs) - if summary_response.choices and summary_response.choices[0].message.content: + if ( + summary_response.choices + and summary_response.choices[0].message.content + ): final_response = summary_response.choices[0].message.content else: final_response = "" if final_response: if "<think>" in final_response: - final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip() + final_response = re.sub( + r"<think>.*?</think>\s*", + "", + final_response, + flags=re.DOTALL, + ).strip() if final_response: - messages.append({"role": "assistant", "content": final_response}) + messages.append( + {"role": "assistant", "content": final_response} + ) else: final_response = "I reached the iteration limit and couldn't generate a summary." else: - final_response = "I reached the iteration limit and couldn't generate a summary." + final_response = ( + "I reached the iteration limit and couldn't generate a summary." + ) except Exception as e: logging.warning(f"Failed to get summary response: {e}") @@ -7835,6 +8914,7 @@ class AIAgent: # Tag all log records on this thread with the session ID so # ``hermes logs --session <id>`` can filter a single conversation. from hermes_logging import set_session_context + set_session_context(self.session_id) # If the previous turn activated fallback, restore the primary @@ -7856,7 +8936,7 @@ class AIAgent: self._persist_user_message_override = persist_user_message # Generate unique task_id if not provided to isolate VMs between concurrent tasks effective_task_id = task_id or str(uuid.uuid4()) - + # Reset retry counters and iteration budget at the start of each turn # so subagent usage from a previous turn doesn't eat into the next one. self._invalid_tool_retries = 0 @@ -7895,12 +8975,17 @@ class AIAgent: self.iteration_budget = IterationBudget(self.max_iterations) # Log conversation turn start for debugging/observability - _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message + _msg_preview = ( + (user_message[:80] + "...") if len(user_message) > 80 else user_message + ) _msg_preview = _msg_preview.replace("\n", " ") logger.info( "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r", - self.session_id or "none", self.model, self.provider or "unknown", - self.platform or "unknown", len(conversation_history or []), + self.session_id or "none", + self.model, + self.provider or "unknown", + self.platform or "unknown", + len(conversation_history or []), _msg_preview, ) @@ -7912,25 +8997,29 @@ class AIAgent: # recover the todo state from the most recent todo tool response in history) if conversation_history and not self._todo_store.has_items(): self._hydrate_todo_store(conversation_history) - + # Prefill messages (few-shot priming) are injected at API-call time only, # never stored in the messages list. This keeps them ephemeral: they won't # be saved to session DB, session logs, or batch trajectories, but they're # automatically re-applied on every API call (including session continuations). - + # Track user turns for memory flush and periodic nudge logic self._user_turn_count += 1 # Preserve the original user message (no nudge injection). - original_user_message = persist_user_message if persist_user_message is not None else user_message + original_user_message = ( + persist_user_message if persist_user_message is not None else user_message + ) # Track memory nudge trigger (turn-based, checked here). # Skill trigger is checked AFTER the agent loop completes, based on # how many tool iterations THIS turn used. _should_review_memory = False - if (self._memory_nudge_interval > 0 - and "memory" in self.valid_tool_names - and self._memory_store): + if ( + self._memory_nudge_interval > 0 + and "memory" in self.valid_tool_names + and self._memory_store + ): self._turns_since_memory += 1 if self._turns_since_memory >= self._memory_nudge_interval: _should_review_memory = True @@ -7941,10 +9030,12 @@ class AIAgent: messages.append(user_msg) current_turn_user_idx = len(messages) - 1 self._persist_user_message_idx = current_turn_user_idx - + if not self.quiet_mode: - self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'") - + self._safe_print( + f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'" + ) + # ── System prompt (cached per session for prefix caching) ── # Built once on first call, reused for all subsequent calls. # Only rebuilt after context compression events (which invalidate @@ -7979,6 +9070,7 @@ class AIAgent: # session-scoped state (e.g. warm a memory cache). try: from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( "on_session_start", session_id=self.session_id, @@ -7991,7 +9083,9 @@ class AIAgent: # Store the system prompt snapshot in SQLite if self._session_db: try: - self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt) + self._session_db.update_system_prompt( + self.session_id, self._cached_system_prompt + ) except Exception as e: logger.debug("Session DB update_system_prompt failed: %s", e) @@ -8006,8 +9100,10 @@ class AIAgent: # 4xx and abort the request entirely). if ( self.compression_enabled - and len(messages) > self.context_compressor.protect_first_n - + self.context_compressor.protect_last_n + 1 + and len(messages) + > self.context_compressor.protect_first_n + + self.context_compressor.protect_last_n + + 1 ): # Include tool schema tokens — with many tools these can add # 20-30K+ tokens that the old sys+msg estimate missed entirely. @@ -8035,7 +9131,9 @@ class AIAgent: for _pass in range(3): _orig_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=_preflight_tokens, + messages, + system_message, + approx_tokens=_preflight_tokens, task_id=effective_task_id, ) if len(messages) >= _orig_len: @@ -8079,6 +9177,7 @@ class AIAgent: _plugin_user_context = "" try: from hermes_cli.plugins import invoke_hook as _invoke_hook + _pre_results = _invoke_hook( "pre_llm_call", session_id=self.session_id, @@ -8110,7 +9209,7 @@ class AIAgent: truncated_response_prefix = "" compression_attempts = 0 _turn_exit_reason = "unknown" # Diagnostic: why the loop ended - + # Record the execution thread so interrupt()/clear_interrupt() can # scope the tool-level interrupt signal to THIS agent's thread only. # Must be set before clear_interrupt() which uses it. @@ -8127,12 +9226,18 @@ class AIAgent: _ext_prefetch_cache = "" if self._memory_manager: try: - _query = original_user_message if isinstance(original_user_message, str) else "" + _query = ( + original_user_message + if isinstance(original_user_message, str) + else "" + ) _ext_prefetch_cache = self._memory_manager.prefetch_all(_query) or "" except Exception: pass - while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) or self._budget_grace_call: + while ( + api_call_count < self.max_iterations and self.iteration_budget.remaining > 0 + ) or self._budget_grace_call: # Reset per-turn checkpoint dedup so each iteration can take one snapshot self._checkpoint_mgr.new_turn() @@ -8141,9 +9246,11 @@ class AIAgent: interrupted = True _turn_exit_reason = "interrupted_by_user" if not self.quiet_mode: - self._safe_print("\n⚡ Breaking out of tool loop due to interrupt...") + self._safe_print( + "\n⚡ Breaking out of tool loop due to interrupt..." + ) break - + api_call_count += 1 self._api_call_count = api_call_count self._touch_activity(f"starting API call #{api_call_count}") @@ -8156,7 +9263,9 @@ class AIAgent: elif not self.iteration_budget.consume(): _turn_exit_reason = "budget_exhausted" if not self.quiet_mode: - self._safe_print(f"\n⚠️ Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)") + self._safe_print( + f"\n⚠️ Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)" + ) break # Fire step_callback for gateway hooks (agent:step event) @@ -8184,14 +9293,20 @@ class AIAgent: break self.step_callback(api_call_count, prev_tools) except Exception as _step_err: - logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err) + logger.debug( + "step_callback error (iteration %s): %s", + api_call_count, + _step_err, + ) # Track tool-calling iterations for skill nudge. # Counter resets whenever skill_manage is actually used. - if (self._skill_nudge_interval > 0 - and "skill_manage" in self.valid_tool_names): + if ( + self._skill_nudge_interval > 0 + and "skill_manage" in self.valid_tool_names + ): self._iters_since_skill += 1 - + # Prepare messages for API call # If we have an ephemeral system prompt, prepend it to the messages # Note: Reasoning is embedded in content via <think> tags for trajectory storage. @@ -8217,7 +9332,9 @@ class AIAgent: if _injections: _base = api_msg.get("content", "") if isinstance(_base, str): - api_msg["content"] = _base + "\n\n" + "\n\n".join(_injections) + api_msg["content"] = ( + _base + "\n\n" + "\n\n".join(_injections) + ) # For ALL assistant messages, pass reasoning back to the API # This ensures multi-turn reasoning context is preserved @@ -8252,13 +9369,17 @@ class AIAgent: # prompt, so the stable cache prefix remains unchanged. effective_system = active_system_prompt or "" if self.ephemeral_system_prompt: - effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() + effective_system = ( + effective_system + "\n\n" + self.ephemeral_system_prompt + ).strip() # NOTE: Plugin context from pre_llm_call hooks is injected into the # user message (see injection block above), NOT the system prompt. # This is intentional — system prompt modifications break the prompt # cache prefix. The system prompt is reserved for Hermes internals. if effective_system: - api_messages = [{"role": "system", "content": effective_system}] + api_messages + api_messages = [ + {"role": "system", "content": effective_system} + ] + api_messages # Inject ephemeral prefill messages right after the system prompt # but before conversation history. Same API-call-time-only pattern. @@ -8272,7 +9393,11 @@ class AIAgent: # inject cache_control breakpoints (system + last 3 messages) to reduce # input token costs by ~75% on multi-turn conversations. if self._use_prompt_caching: - api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages')) + api_messages = apply_anthropic_cache_control( + api_messages, + cache_ttl=self._cache_ttl, + native_anthropic=(self.api_mode == "anthropic_messages"), + ) # Safety net: strip orphaned tool results / add stubs for missing # results before sending to the API. Runs unconditionally — not @@ -8298,13 +9423,17 @@ class AIAgent: if isinstance(tc, dict) and "function" in tc: try: args_obj = json.loads(tc["function"]["arguments"]) - tc = {**tc, "function": { - **tc["function"], - "arguments": json.dumps( - args_obj, separators=(",", ":"), - sort_keys=True, - ), - }} + tc = { + **tc, + "function": { + **tc["function"], + "arguments": json.dumps( + args_obj, + separators=(",", ":"), + sort_keys=True, + ), + }, + } except Exception: pass new_tcs.append(tc) @@ -8313,14 +9442,20 @@ class AIAgent: # Calculate approximate request size for logging total_chars = sum(len(str(msg)) for msg in api_messages) approx_tokens = estimate_messages_tokens_rough(api_messages) - + # Thinking spinner for quiet mode (animated during API call) thinking_spinner = None - + if not self.quiet_mode: - self._vprint(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...") - self._vprint(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)") - self._vprint(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}") + self._vprint( + f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}..." + ) + self._vprint( + f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)" + ) + self._vprint( + f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}" + ) else: # Animated thinking spinner in quiet mode face = random.choice(KawaiiSpinner.KAWAII_THINKING) @@ -8329,27 +9464,40 @@ class AIAgent: # CLI TUI mode: use prompt_toolkit widget instead of raw spinner # (works in both streaming and non-streaming modes) self.thinking_callback(f"{face} {verb}...") - elif not self._has_stream_consumers() and self._should_start_quiet_spinner(): + elif ( + not self._has_stream_consumers() + and self._should_start_quiet_spinner() + ): # Raw KawaiiSpinner only when no streaming consumers and the # spinner output has a safe sink. - spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star']) - thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type, print_fn=self._print_fn) + spinner_type = random.choice( + ["brain", "sparkle", "pulse", "moon", "star"] + ) + thinking_spinner = KawaiiSpinner( + f"{face} {verb}...", + spinner_type=spinner_type, + print_fn=self._print_fn, + ) thinking_spinner.start() - + # Log request details if verbose if self.verbose_logging: - logging.debug(f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}") - logging.debug(f"Last message role: {messages[-1]['role'] if messages else 'none'}") + logging.debug( + f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}" + ) + logging.debug( + f"Last message role: {messages[-1]['role'] if messages else 'none'}" + ) logging.debug(f"Total message size: ~{approx_tokens:,} tokens") - + api_start_time = time.time() retry_count = 0 max_retries = 3 primary_recovery_attempted = False max_compression_attempts = 3 - codex_auth_retry_attempted=False - anthropic_auth_retry_attempted=False - nous_auth_retry_attempted=False + codex_auth_retry_attempted = False + anthropic_auth_retry_attempted = False + nous_auth_retry_attempted = False thinking_sig_retry_attempted = False has_retried_429 = False restart_with_compressed_messages = False @@ -8366,10 +9514,13 @@ class AIAgent: if self._force_ascii_payload: _sanitize_structure_non_ascii(api_kwargs) if self.api_mode == "codex_responses": - api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) + api_kwargs = self._preflight_codex_api_kwargs( + api_kwargs, allow_stream=False + ) try: from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( "pre_api_request", task_id=effective_task_id, @@ -8422,6 +9573,7 @@ class AIAgent: # health checking, but skip for Mock clients in tests # (mocks return SimpleNamespace, not stream iterators). from unittest.mock import Mock + if isinstance(getattr(self, "client", None), Mock): _use_streaming = False @@ -8431,9 +9583,9 @@ class AIAgent: ) else: response = self._interruptible_api_call(api_kwargs) - + api_duration = time.time() - api_start_time - + # Stop thinking spinner silently -- the response box or tool # execution messages that follow are more informative. if thinking_spinner: @@ -8441,20 +9593,30 @@ class AIAgent: thinking_spinner = None if self.thinking_callback: self.thinking_callback("") - + if not self.quiet_mode: - self._vprint(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s") - + self._vprint( + f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s" + ) + if self.verbose_logging: # Log response with provider info if available - resp_model = getattr(response, 'model', 'N/A') if response else 'N/A' - logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}") - + resp_model = ( + getattr(response, "model", "N/A") if response else "N/A" + ) + logging.debug( + f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}" + ) + # Validate response shape before proceeding response_invalid = False error_details = [] if self.api_mode == "codex_responses": - output_items = getattr(response, "output", None) if response is not None else None + output_items = ( + getattr(response, "output", None) + if response is not None + else None + ) if response is None: response_invalid = True error_details.append("response is None") @@ -8467,7 +9629,9 @@ class AIAgent: # from response.output_text. Only mark invalid # when that fallback is also absent. _out_text = getattr(response, "output_text", None) - _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" + _out_text_stripped = ( + _out_text.strip() if isinstance(_out_text, str) else "" + ) if _out_text_stripped: logger.debug( "Codex response.output is empty but output_text is present " @@ -8476,18 +9640,25 @@ class AIAgent: ) else: _resp_status = getattr(response, "status", None) - _resp_incomplete = getattr(response, "incomplete_details", None) + _resp_incomplete = getattr( + response, "incomplete_details", None + ) logger.warning( "Codex response.output is empty after stream backfill " "(status=%s, incomplete_details=%s, model=%s). %s", - _resp_status, _resp_incomplete, + _resp_status, + _resp_incomplete, getattr(response, "model", None), f"api_mode={self.api_mode} provider={self.provider}", ) response_invalid = True error_details.append("response.output is empty") elif self.api_mode == "anthropic_messages": - content_blocks = getattr(response, "content", None) if response is not None else None + content_blocks = ( + getattr(response, "content", None) + if response is not None + else None + ) if response is None: response_invalid = True error_details.append("response is None") @@ -8498,12 +9669,19 @@ class AIAgent: response_invalid = True error_details.append("response.content is empty") else: - if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices: + if ( + response is None + or not hasattr(response, "choices") + or response.choices is None + or not response.choices + ): response_invalid = True if response is None: error_details.append("response is None") - elif not hasattr(response, 'choices'): - error_details.append("response has no 'choices' attribute") + elif not hasattr(response, "choices"): + error_details.append( + "response has no 'choices' attribute" + ) elif response.choices is None: error_details.append("response.choices is None") else: @@ -8516,16 +9694,18 @@ class AIAgent: thinking_spinner = None if self.thinking_callback: self.thinking_callback("") - + # Invalid response — could be rate limiting, provider timeout, # upstream server error, or malformed response. retry_count += 1 - + # Eager fallback: empty/malformed responses are a common # rate-limit symptom. Switch to fallback immediately # rather than retrying with extended backoff. if self._fallback_index < len(self._fallback_chain): - self._emit_status("⚠️ Empty/malformed response — switching to fallback...") + self._emit_status( + "⚠️ Empty/malformed response — switching to fallback..." + ) if self._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -8535,31 +9715,51 @@ class AIAgent: # Check for error field in response (some providers include this) error_msg = "Unknown" provider_name = "Unknown" - if response and hasattr(response, 'error') and response.error: + if response and hasattr(response, "error") and response.error: error_msg = str(response.error) # Try to extract provider from error metadata - if hasattr(response.error, 'metadata') and response.error.metadata: - provider_name = response.error.metadata.get('provider_name', 'Unknown') - elif response and hasattr(response, 'message') and response.message: + if ( + hasattr(response.error, "metadata") + and response.error.metadata + ): + provider_name = response.error.metadata.get( + "provider_name", "Unknown" + ) + elif ( + response + and hasattr(response, "message") + and response.message + ): error_msg = str(response.message) - + # Try to get provider from model field (OpenRouter often returns actual model used) - if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model: + if ( + provider_name == "Unknown" + and response + and hasattr(response, "model") + and response.model + ): provider_name = f"model={response.model}" - + # Check for x-openrouter-provider or similar metadata if provider_name == "Unknown" and response: # Log all response attributes for debugging - resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')} + resp_attrs = { + k: str(v)[:100] + for k, v in vars(response).items() + if not k.startswith("_") + } if self.verbose_logging: - logging.debug(f"Response attributes for invalid response: {resp_attrs}") - + logging.debug( + f"Response attributes for invalid response: {resp_attrs}" + ) + # Extract error code from response for contextual diagnostics _resp_error_code = None - if response and hasattr(response, 'error') and response.error: - _code_raw = getattr(response.error, 'code', None) + if response and hasattr(response, "error") and response.error: + _code_raw = getattr(response.error, "code", None) if _code_raw is None and isinstance(response.error, dict): - _code_raw = response.error.get('code') + _code_raw = response.error.get("code") if _code_raw is not None: try: _resp_error_code = int(_code_raw) @@ -8571,13 +9771,17 @@ class AIAgent: if _resp_error_code == 524: _failure_hint = f"upstream provider timed out (Cloudflare 524, {api_duration:.0f}s)" elif _resp_error_code == 504: - _failure_hint = f"upstream gateway timeout (504, {api_duration:.0f}s)" + _failure_hint = ( + f"upstream gateway timeout (504, {api_duration:.0f}s)" + ) elif _resp_error_code == 429: _failure_hint = f"rate limited by upstream provider (429)" elif _resp_error_code in (500, 502): _failure_hint = f"upstream server error ({_resp_error_code}, {api_duration:.0f}s)" elif _resp_error_code in (503, 529): - _failure_hint = f"upstream provider overloaded ({_resp_error_code})" + _failure_hint = ( + f"upstream provider overloaded ({_resp_error_code})" + ) elif _resp_error_code is not None: _failure_hint = f"upstream error (code {_resp_error_code}, {api_duration:.0f}s)" elif api_duration < 10: @@ -8587,42 +9791,69 @@ class AIAgent: else: _failure_hint = f"response time {api_duration:.1f}s" - self._vprint(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True) - self._vprint(f"{self.log_prefix} 🏢 Provider: {provider_name}", force=True) + self._vprint( + f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", + force=True, + ) + self._vprint( + f"{self.log_prefix} 🏢 Provider: {provider_name}", + force=True, + ) cleaned_provider_error = self._clean_error_message(error_msg) - self._vprint(f"{self.log_prefix} 📝 Provider message: {cleaned_provider_error}", force=True) - self._vprint(f"{self.log_prefix} ⏱️ {_failure_hint}", force=True) - + self._vprint( + f"{self.log_prefix} 📝 Provider message: {cleaned_provider_error}", + force=True, + ) + self._vprint( + f"{self.log_prefix} ⏱️ {_failure_hint}", force=True + ) + if retry_count >= max_retries: # Try fallback before giving up - self._emit_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...") + self._emit_status( + f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback..." + ) if self._try_activate_fallback(): retry_count = 0 compression_attempts = 0 primary_recovery_attempted = False continue - self._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.") - logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.") + self._emit_status( + f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up." + ) + logging.error( + f"{self.log_prefix}Invalid API response after {max_retries} retries." + ) self._persist_session(messages, conversation_history) return { "messages": messages, "completed": False, "api_calls": api_call_count, "error": f"Invalid API response after {max_retries} retries: {_failure_hint}", - "failed": True # Mark as failure for filtering + "failed": True, # Mark as failure for filtering } - + # Backoff before retry — jittered exponential: 5s base, 120s cap - wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0) - self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True) - logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}") - + wait_time = jittered_backoff( + retry_count, base_delay=5.0, max_delay=120.0 + ) + self._vprint( + f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", + force=True, + ) + logging.warning( + f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}" + ) + # Sleep in small increments to stay responsive to interrupts sleep_end = time.time() + wait_time _backoff_touch_counter = 0 while time.time() < sleep_end: if self._interrupt_requested: - self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True) + self._vprint( + f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", + force=True, + ) self._persist_session(messages, conversation_history) self.clear_interrupt() return { @@ -8646,24 +9877,41 @@ class AIAgent: # Check finish_reason before proceeding if self.api_mode == "codex_responses": status = getattr(response, "status", None) - incomplete_details = getattr(response, "incomplete_details", None) + incomplete_details = getattr( + response, "incomplete_details", None + ) incomplete_reason = None if isinstance(incomplete_details, dict): incomplete_reason = incomplete_details.get("reason") else: - incomplete_reason = getattr(incomplete_details, "reason", None) - if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}: + incomplete_reason = getattr( + incomplete_details, "reason", None + ) + if status == "incomplete" and incomplete_reason in { + "max_output_tokens", + "length", + }: finish_reason = "length" else: finish_reason = "stop" elif self.api_mode == "anthropic_messages": - stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"} - finish_reason = stop_reason_map.get(response.stop_reason, "stop") + stop_reason_map = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + } + finish_reason = stop_reason_map.get( + response.stop_reason, "stop" + ) else: finish_reason = response.choices[0].finish_reason if finish_reason == "length": - self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True) + self._vprint( + f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", + force=True, + ) # ── Detect thinking-budget exhaustion ────────────── # When the model spends ALL output tokens on reasoning @@ -8673,16 +9921,30 @@ class AIAgent: _trunc_content = None _trunc_has_tool_calls = False if self.api_mode == "chat_completions": - _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None - _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None - _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False + _trunc_msg = ( + response.choices[0].message + if (hasattr(response, "choices") and response.choices) + else None + ) + _trunc_content = ( + getattr(_trunc_msg, "content", None) + if _trunc_msg + else None + ) + _trunc_has_tool_calls = ( + bool(getattr(_trunc_msg, "tool_calls", None)) + if _trunc_msg + else False + ) elif self.api_mode == "anthropic_messages": # Anthropic response.content is a list of blocks _text_parts = [] for _blk in getattr(response, "content", []): if getattr(_blk, "type", None) == "text": _text_parts.append(getattr(_blk, "text", "")) - _trunc_content = "\n".join(_text_parts) if _text_parts else None + _trunc_content = ( + "\n".join(_text_parts) if _text_parts else None + ) # A response is "thinking exhausted" only when the model # actually produced reasoning blocks but no visible text after @@ -8692,8 +9954,9 @@ class AIAgent: # truncations that deserve continuation retries, not as # thinking-budget exhaustion. _has_think_tags = bool( - _trunc_content and re.search( - r'<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)[^>]*>', + _trunc_content + and re.search( + r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)[^>]*>", _trunc_content, re.IGNORECASE, ) @@ -8702,7 +9965,12 @@ class AIAgent: not _trunc_has_tool_calls and _has_think_tags and ( - (_trunc_content is not None and not self._has_content_after_think_block(_trunc_content)) + ( + _trunc_content is not None + and not self._has_content_after_think_block( + _trunc_content + ) + ) or _trunc_content is None ) ) @@ -8745,10 +10013,14 @@ class AIAgent: assistant_message = response.choices[0].message if not assistant_message.tool_calls: length_continue_retries += 1 - interim_msg = self._build_assistant_message(assistant_message, finish_reason) + interim_msg = self._build_assistant_message( + assistant_message, finish_reason + ) messages.append(interim_msg) if assistant_message.content: - truncated_response_prefix += assistant_message.content + truncated_response_prefix += ( + assistant_message.content + ) if length_continue_retries < 3: self._vprint( @@ -8769,7 +10041,9 @@ class AIAgent: restart_with_length_continuation = True break - partial_response = self._strip_think_blocks(truncated_response_prefix).strip() + partial_response = self._strip_think_blocks( + truncated_response_prefix + ).strip() self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) return { @@ -8811,8 +10085,12 @@ class AIAgent: # If we have prior messages, roll back to last complete state if len(messages) > 1: - self._vprint(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn") - rolled_back_messages = self._get_messages_up_to_last_assistant(messages) + self._vprint( + f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn" + ) + rolled_back_messages = ( + self._get_messages_up_to_last_assistant(messages) + ) self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) @@ -8823,11 +10101,14 @@ class AIAgent: "api_calls": api_call_count, "completed": False, "partial": True, - "error": "Response truncated due to output length limit" + "error": "Response truncated due to output length limit", } else: # First message was truncated - mark as failed - self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover", force=True) + self._vprint( + f"{self.log_prefix}❌ First response truncated - cannot recover", + force=True, + ) self._persist_session(messages, conversation_history) return { "final_response": None, @@ -8835,11 +10116,11 @@ class AIAgent: "api_calls": api_call_count, "completed": False, "failed": True, - "error": "First response truncated due to output length limit" + "error": "First response truncated due to output length limit", } - + # Track actual token usage from response for context management - if hasattr(response, 'usage') and response.usage: + if hasattr(response, "usage") and response.usage: canonical_usage = normalize_usage( response.usage, provider=self.provider, @@ -8860,9 +10141,15 @@ class AIAgent: # from the error message), not guessed probe tiers. if getattr(self.context_compressor, "_context_probed", False): ctx = self.context_compressor.context_length - if getattr(self.context_compressor, "_context_probe_persistable", False): + if getattr( + self.context_compressor, + "_context_probe_persistable", + False, + ): save_context_length(self.model, self.base_url, ctx) - self._safe_print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}") + self._safe_print( + f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}" + ) self.context_compressor._context_probed = False self.context_compressor._context_probe_persistable = False @@ -8872,19 +10159,30 @@ class AIAgent: self.session_api_calls += 1 self.session_input_tokens += canonical_usage.input_tokens self.session_output_tokens += canonical_usage.output_tokens - self.session_cache_read_tokens += canonical_usage.cache_read_tokens - self.session_cache_write_tokens += canonical_usage.cache_write_tokens - self.session_reasoning_tokens += canonical_usage.reasoning_tokens + self.session_cache_read_tokens += ( + canonical_usage.cache_read_tokens + ) + self.session_cache_write_tokens += ( + canonical_usage.cache_write_tokens + ) + self.session_reasoning_tokens += ( + canonical_usage.reasoning_tokens + ) # Log API call details for debugging/observability _cache_pct = "" if canonical_usage.cache_read_tokens and prompt_tokens: - _cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100*canonical_usage.cache_read_tokens/prompt_tokens:.0f}%)" + _cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100 * canonical_usage.cache_read_tokens / prompt_tokens:.0f}%)" logger.info( "API call #%d: model=%s provider=%s in=%d out=%d total=%d latency=%.1fs%s", - self.session_api_calls, self.model, self.provider or "unknown", - prompt_tokens, completion_tokens, total_tokens, - api_duration, _cache_pct, + self.session_api_calls, + self.model, + self.provider or "unknown", + prompt_tokens, + completion_tokens, + total_tokens, + api_duration, + _cache_pct, ) cost_result = estimate_usage_cost( @@ -8895,7 +10193,9 @@ class AIAgent: api_key=getattr(self, "api_key", ""), ) if cost_result.amount_usd is not None: - self.session_estimated_cost_usd += float(cost_result.amount_usd) + self.session_estimated_cost_usd += float( + cost_result.amount_usd + ) self.session_cost_status = cost_result.status self.session_cost_source = cost_result.source @@ -8916,37 +10216,63 @@ class AIAgent: cache_write_tokens=canonical_usage.cache_write_tokens, reasoning_tokens=canonical_usage.reasoning_tokens, estimated_cost_usd=float(cost_result.amount_usd) - if cost_result.amount_usd is not None else None, + if cost_result.amount_usd is not None + else None, cost_status=cost_result.status, cost_source=cost_result.source, billing_provider=self.provider, billing_base_url=self.base_url, billing_mode="subscription_included" - if cost_result.status == "included" else None, + if cost_result.status == "included" + else None, model=self.model, ) except Exception: pass # never block the agent loop - + if self.verbose_logging: - logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") - + logging.debug( + f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}" + ) + # Log cache hit stats when prompt caching is active if self._use_prompt_caching: if self.api_mode == "anthropic_messages": # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens - cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0 - written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0 + cached = ( + getattr( + response.usage, "cache_read_input_tokens", 0 + ) + or 0 + ) + written = ( + getattr( + response.usage, "cache_creation_input_tokens", 0 + ) + or 0 + ) else: # OpenRouter uses prompt_tokens_details.cached_tokens - details = getattr(response.usage, 'prompt_tokens_details', None) - cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0 - written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0 + details = getattr( + response.usage, "prompt_tokens_details", None + ) + cached = ( + getattr(details, "cached_tokens", 0) or 0 + if details + else 0 + ) + written = ( + getattr(details, "cache_write_tokens", 0) or 0 + if details + else 0 + ) prompt = usage_dict["prompt_tokens"] hit_pct = (cached / prompt * 100) if prompt > 0 else 0 if not self.quiet_mode: - self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") - + self._vprint( + f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)" + ) + has_retried_429 = False # Reset on success self._touch_activity(f"API call #{api_call_count} completed") break # Success, exit retry loop @@ -8958,7 +10284,9 @@ class AIAgent: if self.thinking_callback: self.thinking_callback("") api_elapsed = time.time() - api_start_time - self._vprint(f"{self.log_prefix}⚡ Interrupted during API call.", force=True) + self._vprint( + f"{self.log_prefix}⚡ Interrupted during API call.", force=True + ) self._persist_session(messages, conversation_history) interrupted = True final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)." @@ -8983,7 +10311,10 @@ class AIAgent: # first to strip surrogates, then once more for pure # ASCII-only locale sanitization if needed. # ----------------------------------------------------------- - if isinstance(api_error, UnicodeEncodeError) and getattr(self, '_unicode_sanitization_passes', 0) < 2: + if ( + isinstance(api_error, UnicodeEncodeError) + and getattr(self, "_unicode_sanitization_passes", 0) < 2 + ): _err_str = str(api_error).lower() _is_ascii_codec = "'ascii'" in _err_str or "ascii" in _err_str _surrogates_found = _sanitize_messages_surrogates(messages) @@ -9001,8 +10332,12 @@ class AIAgent: # non-ASCII content from messages/tool schemas and retry. _messages_sanitized = _sanitize_messages_non_ascii(messages) _prefill_sanitized = False - if isinstance(getattr(self, "prefill_messages", None), list): - _prefill_sanitized = _sanitize_messages_non_ascii(self.prefill_messages) + if isinstance( + getattr(self, "prefill_messages", None), list + ): + _prefill_sanitized = _sanitize_messages_non_ascii( + self.prefill_messages + ) _tools_sanitized = False if isinstance(getattr(self, "tools", None), list): @@ -9010,13 +10345,19 @@ class AIAgent: _system_sanitized = False if isinstance(active_system_prompt, str): - _sanitized_system = _strip_non_ascii(active_system_prompt) + _sanitized_system = _strip_non_ascii( + active_system_prompt + ) if _sanitized_system != active_system_prompt: active_system_prompt = _sanitized_system self._cached_system_prompt = _sanitized_system _system_sanitized = True - if isinstance(getattr(self, "ephemeral_system_prompt", None), str): - _sanitized_ephemeral = _strip_non_ascii(self.ephemeral_system_prompt) + if isinstance( + getattr(self, "ephemeral_system_prompt", None), str + ): + _sanitized_ephemeral = _strip_non_ascii( + self.ephemeral_system_prompt + ) if _sanitized_ephemeral != self.ephemeral_system_prompt: self.ephemeral_system_prompt = _sanitized_ephemeral _system_sanitized = True @@ -9024,11 +10365,15 @@ class AIAgent: _headers_sanitized = False _default_headers = ( self._client_kwargs.get("default_headers") - if isinstance(getattr(self, "_client_kwargs", None), dict) + if isinstance( + getattr(self, "_client_kwargs", None), dict + ) else None ) if isinstance(_default_headers, dict): - _headers_sanitized = _sanitize_structure_non_ascii(_default_headers) + _headers_sanitized = _sanitize_structure_non_ascii( + _default_headers + ) # Sanitize the API key — non-ASCII characters in # credentials (e.g. ʋ instead of v from a bad @@ -9042,12 +10387,16 @@ class AIAgent: _clean_key = _strip_non_ascii(_raw_key) if _clean_key != _raw_key: self.api_key = _clean_key - if isinstance(getattr(self, "_client_kwargs", None), dict): + if isinstance( + getattr(self, "_client_kwargs", None), dict + ): self._client_kwargs["api_key"] = _clean_key # Also update the live client — it holds its # own copy of api_key which auth_headers reads # dynamically on every request. - if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"): + if getattr( + self, "client", None + ) is not None and hasattr(self.client, "api_key"): self.client.api_key = _clean_key _credential_sanitized = True self._vprint( @@ -9079,7 +10428,11 @@ class AIAgent: # ── Classify the error for structured recovery decisions ── _compressor = getattr(self, "context_compressor", None) - _ctx_len = getattr(_compressor, "context_length", 200000) if _compressor else 200000 + _ctx_len = ( + getattr(_compressor, "context_length", 200000) + if _compressor + else 200000 + ) classified = classify_api_error( api_error, provider=getattr(self, "provider", "") or "", @@ -9090,16 +10443,21 @@ class AIAgent: ) logger.debug( "Error classified: reason=%s status=%s retryable=%s compress=%s rotate=%s fallback=%s", - classified.reason.value, classified.status_code, - classified.retryable, classified.should_compress, - classified.should_rotate_credential, classified.should_fallback, + classified.reason.value, + classified.status_code, + classified.retryable, + classified.should_compress, + classified.should_rotate_credential, + classified.should_fallback, ) - recovered_with_pool, has_retried_429 = self._recover_with_credential_pool( - status_code=status_code, - has_retried_429=has_retried_429, - classified_reason=classified.reason, - error_context=error_context, + recovered_with_pool, has_retried_429 = ( + self._recover_with_credential_pool( + status_code=status_code, + has_retried_429=has_retried_429, + classified_reason=classified.reason, + error_context=error_context, + ) ) if recovered_with_pool: continue @@ -9111,7 +10469,9 @@ class AIAgent: ): codex_auth_retry_attempted = True if self._try_refresh_codex_client_credentials(force=True): - self._vprint(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...") + self._vprint( + f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request..." + ) continue if ( self.api_mode == "chat_completions" @@ -9121,34 +10481,62 @@ class AIAgent: ): nous_auth_retry_attempted = True if self._try_refresh_nous_client_credentials(force=True): - print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...") + print( + f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request..." + ) continue if ( self.api_mode == "anthropic_messages" and status_code == 401 - and hasattr(self, '_anthropic_api_key') + and hasattr(self, "_anthropic_api_key") and not anthropic_auth_retry_attempted ): anthropic_auth_retry_attempted = True from agent.anthropic_adapter import _is_oauth_token + if self._try_refresh_anthropic_client_credentials(): - print(f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...") + print( + f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request..." + ) continue # Credential refresh didn't help — show diagnostic info key = self._anthropic_api_key - auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)" - print(f"{self.log_prefix}🔐 Anthropic 401 — authentication failed.") + auth_method = ( + "Bearer (OAuth/setup-token)" + if _is_oauth_token(key) + else "x-api-key (API key)" + ) + print( + f"{self.log_prefix}🔐 Anthropic 401 — authentication failed." + ) print(f"{self.log_prefix} Auth method: {auth_method}") - print(f"{self.log_prefix} Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{self.log_prefix} Token: (empty or short)") + print( + f"{self.log_prefix} Token prefix: {key[:12]}..." + if key and len(key) > 12 + else f"{self.log_prefix} Token: (empty or short)" + ) print(f"{self.log_prefix} Troubleshooting:") from hermes_constants import display_hermes_home as _dhh_fn + _dhh = _dhh_fn() - print(f"{self.log_prefix} • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens") - print(f"{self.log_prefix} • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values") - print(f"{self.log_prefix} • For API keys: verify at https://console.anthropic.com/settings/keys") - print(f"{self.log_prefix} • For Claude Code: run 'claude /login' to refresh, then retry") - print(f"{self.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"") - print(f"{self.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"") + print( + f"{self.log_prefix} • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens" + ) + print( + f"{self.log_prefix} • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values" + ) + print( + f"{self.log_prefix} • For API keys: verify at https://console.anthropic.com/settings/keys" + ) + print( + f"{self.log_prefix} • For Claude Code: run 'claude /login' to refresh, then retry" + ) + print( + f'{self.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_TOKEN ""' + ) + print( + f'{self.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_API_KEY ""' + ) # ── Thinking block signature recovery ───────────────── # Anthropic signs thinking blocks against the full turn @@ -9173,7 +10561,8 @@ class AIAgent: logging.warning( "%sThinking block signature recovery: stripped " "reasoning_details from %d messages", - self.log_prefix, len(messages), + self.log_prefix, + len(messages), ) continue @@ -9182,7 +10571,7 @@ class AIAgent: self._touch_activity( f"API error recovery (attempt {retry_count}/{max_retries})" ) - + error_type = type(api_error).__name__ error_msg = str(api_error).lower() _error_summary = self._summarize_api_error(api_error) @@ -9199,25 +10588,37 @@ class AIAgent: _base = getattr(self, "base_url", "unknown") _model = getattr(self, "model", "unknown") _status_code_str = f" [HTTP {status_code}]" if status_code else "" - self._vprint(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}", force=True) - self._vprint(f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True) - self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True) - self._vprint(f"{self.log_prefix} 📝 Error: {_error_summary}", force=True) + self._vprint( + f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}", + force=True, + ) + self._vprint( + f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", + force=True, + ) + self._vprint( + f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True + ) + self._vprint( + f"{self.log_prefix} 📝 Error: {_error_summary}", force=True + ) if status_code and status_code < 500: _err_body = getattr(api_error, "body", None) _err_body_str = str(_err_body)[:300] if _err_body else None if _err_body_str: - self._vprint(f"{self.log_prefix} 📋 Details: {_err_body_str}", force=True) - self._vprint(f"{self.log_prefix} ⏱️ Elapsed: {elapsed_time:.2f}s Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens") + self._vprint( + f"{self.log_prefix} 📋 Details: {_err_body_str}", + force=True, + ) + self._vprint( + f"{self.log_prefix} ⏱️ Elapsed: {elapsed_time:.2f}s Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens" + ) # Actionable hint for OpenRouter "no tool endpoints" error. # This fires regardless of whether fallback succeeds — the # user needs to know WHY their model failed so they can fix # their provider routing, not just silently fall back. - if ( - self._is_openrouter_url() - and "support tool use" in error_msg - ): + if self._is_openrouter_url() and "support tool use" in error_msg: self._vprint( f"{self.log_prefix} 💡 No OpenRouter providers for {_model} support tool calling with your current settings.", force=True, @@ -9238,7 +10639,10 @@ class AIAgent: # Check for interrupt before deciding to retry if self._interrupt_requested: - self._vprint(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", force=True) + self._vprint( + f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", + force=True, + ) self._persist_session(messages, conversation_history) self.clear_interrupt() return { @@ -9248,7 +10652,7 @@ class AIAgent: "completed": False, "interrupted": True, } - + # Check for 413 payload-too-large BEFORE generic 4xx handler. # A 413 is a payload-size error — the correct response is to # compress history and retry, not abort immediately. @@ -9293,7 +10697,8 @@ class AIAgent: if compression_attempts <= max_compression_attempts: original_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, + messages, + system_message, approx_tokens=approx_tokens, task_id=effective_task_id, ) @@ -9320,7 +10725,9 @@ class AIAgent: FailoverReason.rate_limit, FailoverReason.billing, ) - if is_rate_limited and self._fallback_index < len(self._fallback_chain): + if is_rate_limited and self._fallback_index < len( + self._fallback_chain + ): # Don't eagerly fallback if credential pool rotation may # still recover. The pool's retry-then-rotate cycle needs # at least one more attempt to fire — jumping to a fallback @@ -9328,7 +10735,9 @@ class AIAgent: pool = self._credential_pool pool_may_recover = pool is not None and pool.has_available() if not pool_may_recover: - self._emit_status("⚠️ Rate limited — switching to fallback provider...") + self._emit_status( + "⚠️ Rate limited — switching to fallback provider..." + ) if self._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -9342,9 +10751,17 @@ class AIAgent: if is_payload_too_large: compression_attempts += 1 if compression_attempts > max_compression_attempts: - self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True) - self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) - logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.") + self._vprint( + f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", + force=True, + ) + logging.error( + f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts." + ) self._persist_session(messages, conversation_history) return { "messages": messages, @@ -9355,11 +10772,15 @@ class AIAgent: "failed": True, "compression_exhausted": True, } - self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...") + self._emit_status( + f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}..." + ) original_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=approx_tokens, + messages, + system_message, + approx_tokens=approx_tokens, task_id=effective_task_id, ) # Compression created a new session — clear history @@ -9368,14 +10789,24 @@ class AIAgent: conversation_history = None if len(messages) < original_len: - self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") + self._emit_status( + f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying..." + ) time.sleep(2) # Brief pause between compression retries restart_with_compressed_messages = True break else: - self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True) - self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) - logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.") + self._vprint( + f"{self.log_prefix}❌ Payload too large and cannot compress further.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", + force=True, + ) + logging.error( + f"{self.log_prefix}413 payload too large. Cannot compress further." + ) self._persist_session(messages, conversation_history) return { "messages": messages, @@ -9409,7 +10840,9 @@ class AIAgent: # # Note: max_tokens = output token cap (one response). # context_length = total window (input + output combined). - available_out = parse_available_output_tokens_from_error(error_msg) + available_out = parse_available_output_tokens_from_error( + error_msg + ) if available_out is not None: # Error is purely about the output cap being too large. # Cap output to the available space and retry without @@ -9426,9 +10859,17 @@ class AIAgent: # loop forever if the error keeps recurring. compression_attempts += 1 if compression_attempts > max_compression_attempts: - self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True) - self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) - logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.") + self._vprint( + f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", + force=True, + ) + logging.error( + f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts." + ) self._persist_session(messages, conversation_history) return { "messages": messages, @@ -9447,7 +10888,10 @@ class AIAgent: parsed_limit = parse_context_limit_from_error(error_msg) if parsed_limit and parsed_limit < old_ctx: new_ctx = parsed_limit - self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True) + self._vprint( + f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", + force=True, + ) else: # Step down to the next probe tier new_ctx = get_next_probe_tier(old_ctx) @@ -9472,15 +10916,29 @@ class AIAgent: compressor._context_probe_persistable = bool( parsed_limit and parsed_limit == new_ctx ) - self._vprint(f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", force=True) + self._vprint( + f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", + force=True, + ) else: - self._vprint(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...", force=True) + self._vprint( + f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...", + force=True, + ) compression_attempts += 1 if compression_attempts > max_compression_attempts: - self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True) - self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) - logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.") + self._vprint( + f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", + force=True, + ) + logging.error( + f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts." + ) self._persist_session(messages, conversation_history) return { "messages": messages, @@ -9491,11 +10949,15 @@ class AIAgent: "failed": True, "compression_exhausted": True, } - self._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...") + self._emit_status( + f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})..." + ) original_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=approx_tokens, + messages, + system_message, + approx_tokens=approx_tokens, task_id=effective_task_id, ) # Compression created a new session — clear history @@ -9503,17 +10965,31 @@ class AIAgent: # messages to the new session, not skipping them. conversation_history = None - if len(messages) < original_len or new_ctx and new_ctx < old_ctx: + if ( + len(messages) < original_len + or new_ctx + and new_ctx < old_ctx + ): if len(messages) < original_len: - self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") + self._emit_status( + f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying..." + ) time.sleep(2) # Brief pause between compression retries restart_with_compressed_messages = True break else: # Can't compress further and already at minimum tier - self._vprint(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", force=True) - self._vprint(f"{self.log_prefix} 💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True) - logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.") + self._vprint( + f"{self.log_prefix}❌ Context length exceeded and cannot compress further.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", + force=True, + ) + logging.error( + f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further." + ) self._persist_session(messages, conversation_history) return { "messages": messages, @@ -9529,16 +11005,16 @@ class AIAgent: # already accounts for 413, 429, 529 (transient), context # overflow, and generic-400 heuristics. Local validation # errors (ValueError, TypeError) are programming bugs. - is_local_validation_error = ( - isinstance(api_error, (ValueError, TypeError)) - and not isinstance(api_error, UnicodeEncodeError) - ) + is_local_validation_error = isinstance( + api_error, (ValueError, TypeError) + ) and not isinstance(api_error, UnicodeEncodeError) is_client_error = ( is_local_validation_error or ( not classified.retryable and not classified.should_compress - and classified.reason not in ( + and classified.reason + not in ( FailoverReason.rate_limit, FailoverReason.billing, FailoverReason.overloaded, @@ -9553,7 +11029,9 @@ class AIAgent: if is_client_error: # Try fallback before aborting — a different provider # may not have the same issue (rate limit, auth, etc.) - self._emit_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...") + self._emit_status( + f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback..." + ) if self._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -9561,37 +11039,81 @@ class AIAgent: continue if api_kwargs is not None: self._dump_api_request_debug( - api_kwargs, reason="non_retryable_client_error", error=api_error, + api_kwargs, + reason="non_retryable_client_error", + error=api_error, ) self._emit_status( f"❌ Non-retryable error (HTTP {status_code}): " f"{self._summarize_api_error(api_error)}" ) - self._vprint(f"{self.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True) - self._vprint(f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True) - self._vprint(f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True) + self._vprint( + f"{self.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 🔌 Provider: {_provider} Model: {_model}", + force=True, + ) + self._vprint( + f"{self.log_prefix} 🌐 Endpoint: {_base}", force=True + ) # Actionable guidance for common auth errors - if classified.is_auth or classified.reason == FailoverReason.billing: + if ( + classified.is_auth + or classified.reason == FailoverReason.billing + ): if _provider == "openai-codex" and status_code == 401: - self._vprint(f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True) - self._vprint(f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", force=True) - self._vprint(f"{self.log_prefix} 1. Run `codex` in your terminal to generate fresh tokens.", force=True) - self._vprint(f"{self.log_prefix} 2. Then run `hermes auth` to re-authenticate.", force=True) + self._vprint( + f"{self.log_prefix} 💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", + force=True, + ) + self._vprint( + f"{self.log_prefix} refreshed by another client (Codex CLI, VS Code). To fix:", + force=True, + ) + self._vprint( + f"{self.log_prefix} 1. Run `codex` in your terminal to generate fresh tokens.", + force=True, + ) + self._vprint( + f"{self.log_prefix} 2. Then run `hermes auth` to re-authenticate.", + force=True, + ) else: - self._vprint(f"{self.log_prefix} 💡 Your API key was rejected by the provider. Check:", force=True) - self._vprint(f"{self.log_prefix} • Is the key valid? Run: hermes setup", force=True) - self._vprint(f"{self.log_prefix} • Does your account have access to {_model}?", force=True) + self._vprint( + f"{self.log_prefix} 💡 Your API key was rejected by the provider. Check:", + force=True, + ) + self._vprint( + f"{self.log_prefix} • Is the key valid? Run: hermes setup", + force=True, + ) + self._vprint( + f"{self.log_prefix} • Does your account have access to {_model}?", + force=True, + ) if "openrouter" in str(_base).lower(): - self._vprint(f"{self.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True) + self._vprint( + f"{self.log_prefix} • Check credits: https://openrouter.ai/settings/credits", + force=True, + ) else: - self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True) - logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}") + self._vprint( + f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", + force=True, + ) + logging.error( + f"{self.log_prefix}Non-retryable client error: {api_error}" + ) # Skip session persistence when the error is likely # context-overflow related (status 400 + large session). # Persisting the failed user message would make the # session even larger, causing the same failure on the # next attempt. (#1630) - if status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80): + if status_code == 400 and ( + approx_tokens > 50000 or len(api_messages) > 80 + ): self._vprint( f"{self.log_prefix}⚠️ Skipping session persistence " f"for large failed session to prevent growth loop.", @@ -9613,14 +11135,21 @@ class AIAgent: # client once for transient transport errors (stale # connection pool, TCP reset). Only attempted once # per API call block. - if not primary_recovery_attempted and self._try_recover_primary_transport( - api_error, retry_count=retry_count, max_retries=max_retries, + if ( + not primary_recovery_attempted + and self._try_recover_primary_transport( + api_error, + retry_count=retry_count, + max_retries=max_retries, + ) ): primary_recovery_attempted = True retry_count = 0 continue # Try fallback before giving up entirely - self._emit_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...") + self._emit_status( + f"⚠️ Max retries ({max_retries}) exhausted — trying fallback..." + ) if self._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -9628,23 +11157,35 @@ class AIAgent: continue _final_summary = self._summarize_api_error(api_error) if is_rate_limited: - self._emit_status(f"❌ Rate limited after {max_retries} retries — {_final_summary}") + self._emit_status( + f"❌ Rate limited after {max_retries} retries — {_final_summary}" + ) else: - self._emit_status(f"❌ API failed after {max_retries} retries — {_final_summary}") - self._vprint(f"{self.log_prefix} 💀 Final error: {_final_summary}", force=True) + self._emit_status( + f"❌ API failed after {max_retries} retries — {_final_summary}" + ) + self._vprint( + f"{self.log_prefix} 💀 Final error: {_final_summary}", + force=True, + ) # Detect SSE stream-drop pattern (e.g. "Network # connection lost") and surface actionable guidance. # This typically happens when the model generates a # very large tool call (write_file with huge content) # and the proxy/CDN drops the stream mid-response. - _is_stream_drop = ( - not getattr(api_error, "status_code", None) - and any(p in error_msg for p in ( - "connection lost", "connection reset", - "connection closed", "network connection", - "network error", "terminated", - )) + _is_stream_drop = not getattr( + api_error, "status_code", None + ) and any( + p in error_msg + for p in ( + "connection lost", + "connection reset", + "connection closed", + "network connection", + "network error", + "terminated", + ) ) if _is_stream_drop: self._vprint( @@ -9664,12 +11205,19 @@ class AIAgent: logging.error( "%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s", - self.log_prefix, max_retries, _final_summary, - _provider, _model, len(api_messages), f"{approx_tokens:,}", + self.log_prefix, + max_retries, + _final_summary, + _provider, + _model, + len(api_messages), + f"{approx_tokens:,}", ) if api_kwargs is not None: self._dump_api_request_debug( - api_kwargs, reason="max_retries_exhausted", error=api_error, + api_kwargs, + reason="max_retries_exhausted", + error=api_error, ) self._persist_session(messages, conversation_history) _final_response = f"API call failed after {max_retries} retries: {_final_summary}" @@ -9694,19 +11242,35 @@ class AIAgent: # For rate limits, respect the Retry-After header if present _retry_after = None if is_rate_limited: - _resp_headers = getattr(getattr(api_error, "response", None), "headers", None) + _resp_headers = getattr( + getattr(api_error, "response", None), "headers", None + ) if _resp_headers and hasattr(_resp_headers, "get"): - _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After") + _ra_raw = _resp_headers.get( + "retry-after" + ) or _resp_headers.get("Retry-After") if _ra_raw: try: - _retry_after = min(int(_ra_raw), 120) # Cap at 2 minutes + _retry_after = min( + int(_ra_raw), 120 + ) # Cap at 2 minutes except (TypeError, ValueError): pass - wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0) + wait_time = ( + _retry_after + if _retry_after + else jittered_backoff( + retry_count, base_delay=2.0, max_delay=60.0 + ) + ) if is_rate_limited: - self._emit_status(f"⏱️ Rate limit reached. Waiting {wait_time}s before retry (attempt {retry_count + 1}/{max_retries})...") + self._emit_status( + f"⏱️ Rate limit reached. Waiting {wait_time}s before retry (attempt {retry_count + 1}/{max_retries})..." + ) else: - self._emit_status(f"⏳ Retrying in {wait_time}s (attempt {retry_count}/{max_retries})...") + self._emit_status( + f"⏳ Retrying in {wait_time}s (attempt {retry_count}/{max_retries})..." + ) logger.warning( "Retrying API call in %ss (attempt %s/%s) %s error=%s", wait_time, @@ -9721,7 +11285,10 @@ class AIAgent: _backoff_touch_counter = 0 while time.time() < sleep_end: if self._interrupt_requested: - self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True) + self._vprint( + f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", + force=True, + ) self._persist_session(messages, conversation_history) self.clear_interrupt() return { @@ -9740,7 +11307,7 @@ class AIAgent: f"error retry backoff ({retry_count}/{max_retries}), " f"{int(sleep_end - time.time())}s remaining" ) - + # If the API call was interrupted, skip response processing if interrupted: _turn_exit_reason = "interrupted_during_api_call" @@ -9764,28 +11331,39 @@ class AIAgent: # the `response` variable is still None. Break out cleanly. if response is None: _turn_exit_reason = "all_retries_exhausted_no_response" - print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.") + print( + f"{self.log_prefix}❌ All API retries exhausted with no successful response." + ) self._persist_session(messages, conversation_history) break try: if self.api_mode == "codex_responses": - assistant_message, finish_reason = self._normalize_codex_response(response) + assistant_message, finish_reason = self._normalize_codex_response( + response + ) elif self.api_mode == "anthropic_messages": from agent.anthropic_adapter import normalize_anthropic_response + assistant_message, finish_reason = normalize_anthropic_response( response, strip_tool_prefix=self._is_anthropic_oauth ) else: assistant_message = response.choices[0].message - + # Normalize content to string — some OpenAI-compatible servers # (llama-server, etc.) return content as a dict or list instead # of a plain string, which crashes downstream .strip() calls. - if assistant_message.content is not None and not isinstance(assistant_message.content, str): + if assistant_message.content is not None and not isinstance( + assistant_message.content, str + ): raw = assistant_message.content if isinstance(raw, dict): - assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw) + assistant_message.content = ( + raw.get("text", "") + or raw.get("content", "") + or json.dumps(raw) + ) elif isinstance(raw, list): # Multimodal content list — extract text parts parts = [] @@ -9802,7 +11380,10 @@ class AIAgent: try: from hermes_cli.plugins import invoke_hook as _invoke_hook - _assistant_tool_calls = getattr(assistant_message, "tool_calls", None) or [] + + _assistant_tool_calls = ( + getattr(assistant_message, "tool_calls", None) or [] + ) _assistant_text = assistant_message.content or "" _invoke_hook( "post_api_request", @@ -9828,87 +11409,125 @@ class AIAgent: # Handle assistant response if assistant_message.content and not self.quiet_mode: if self.verbose_logging: - self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content}") + self._vprint( + f"{self.log_prefix}🤖 Assistant: {assistant_message.content}" + ) else: - self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}") + self._vprint( + f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}" + ) # Notify progress callback of model's thinking (used by subagent # delegation to relay the child's reasoning to the parent display). - if (assistant_message.content and self.tool_progress_callback): + if assistant_message.content and self.tool_progress_callback: _think_text = assistant_message.content.strip() # Strip reasoning XML tags that shouldn't leak to parent display _think_text = re.sub( - r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text + r"</?(?:REASONING_SCRATCHPAD|think|reasoning)>", "", _think_text ).strip() # For subagents: relay first line to parent display (existing behaviour). # For all agents with a structured callback: emit reasoning.available event. - first_line = _think_text.split('\n')[0][:80] if _think_text else "" - if first_line and getattr(self, '_delegate_depth', 0) > 0: + first_line = _think_text.split("\n")[0][:80] if _think_text else "" + if first_line and getattr(self, "_delegate_depth", 0) > 0: try: self.tool_progress_callback("_thinking", first_line) except Exception: pass elif _think_text: try: - self.tool_progress_callback("reasoning.available", "_thinking", _think_text[:500], None) + self.tool_progress_callback( + "reasoning.available", + "_thinking", + _think_text[:500], + None, + ) except Exception: pass - + # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed) # This means the model ran out of output tokens mid-reasoning — retry up to 2 times if has_incomplete_scratchpad(assistant_message.content or ""): self._incomplete_scratchpad_retries += 1 - - self._vprint(f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)") - + + self._vprint( + f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)" + ) + if self._incomplete_scratchpad_retries <= 2: - self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...") + self._vprint( + f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)..." + ) # Don't add the broken message, just retry continue else: # Max retries - discard this turn and save as partial - self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.", force=True) + self._vprint( + f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.", + force=True, + ) self._incomplete_scratchpad_retries = 0 - - rolled_back_messages = self._get_messages_up_to_last_assistant(messages) + + rolled_back_messages = self._get_messages_up_to_last_assistant( + messages + ) self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) - + return { "final_response": None, "messages": rolled_back_messages, "api_calls": api_call_count, "completed": False, "partial": True, - "error": "Incomplete REASONING_SCRATCHPAD after 2 retries" + "error": "Incomplete REASONING_SCRATCHPAD after 2 retries", } - + # Reset incomplete scratchpad counter on clean response self._incomplete_scratchpad_retries = 0 if self.api_mode == "codex_responses" and finish_reason == "incomplete": self._codex_incomplete_retries += 1 - interim_msg = self._build_assistant_message(assistant_message, finish_reason) - interim_has_content = bool((interim_msg.get("content") or "").strip()) - interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False - interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items")) + interim_msg = self._build_assistant_message( + assistant_message, finish_reason + ) + interim_has_content = bool( + (interim_msg.get("content") or "").strip() + ) + interim_has_reasoning = ( + bool(interim_msg.get("reasoning", "").strip()) + if isinstance(interim_msg.get("reasoning"), str) + else False + ) + interim_has_codex_reasoning = bool( + interim_msg.get("codex_reasoning_items") + ) - if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning: + if ( + interim_has_content + or interim_has_reasoning + or interim_has_codex_reasoning + ): last_msg = messages[-1] if messages else None # Duplicate detection: two consecutive incomplete assistant # messages with identical content AND reasoning are collapsed. # For reasoning-only messages (codex_reasoning_items differ but # visible content/reasoning are both empty), we also compare # the encrypted items to avoid silently dropping new state. - last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None + last_codex_items = ( + last_msg.get("codex_reasoning_items") + if isinstance(last_msg, dict) + else None + ) interim_codex_items = interim_msg.get("codex_reasoning_items") duplicate_interim = ( isinstance(last_msg, dict) and last_msg.get("role") == "assistant" and last_msg.get("finish_reason") == "incomplete" - and (last_msg.get("content") or "") == (interim_msg.get("content") or "") - and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "") + and (last_msg.get("content") or "") + == (interim_msg.get("content") or "") + and (last_msg.get("reasoning") or "") + == (interim_msg.get("reasoning") or "") and last_codex_items == interim_codex_items ) if not duplicate_interim: @@ -9917,7 +11536,9 @@ class AIAgent: if self._codex_incomplete_retries < 3: if not self.quiet_mode: - self._vprint(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)") + self._vprint( + f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)" + ) self._session_messages = messages self._save_session_log(messages) continue @@ -9934,26 +11555,33 @@ class AIAgent: } elif hasattr(self, "_codex_incomplete_retries"): self._codex_incomplete_retries = 0 - + # Check for tool calls if assistant_message.tool_calls: if not self.quiet_mode: - self._vprint(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...") - + self._vprint( + f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)..." + ) + if self.verbose_logging: for tc in assistant_message.tool_calls: - logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...") - + logging.debug( + f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}..." + ) + # Validate tool call names - detect model hallucinations # Repair mismatched tool names before validating for tc in assistant_message.tool_calls: if tc.function.name not in self.valid_tool_names: repaired = self._repair_tool_call(tc.function.name) if repaired: - print(f"{self.log_prefix}🔧 Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'") + print( + f"{self.log_prefix}🔧 Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'" + ) tc.function.name = repaired invalid_tool_calls = [ - tc.function.name for tc in assistant_message.tool_calls + tc.function.name + for tc in assistant_message.tool_calls if tc.function.name not in self.valid_tool_names ] if invalid_tool_calls: @@ -9963,11 +11591,20 @@ class AIAgent: # Return helpful error to model — model can self-correct next turn available = ", ".join(sorted(self.valid_tool_names)) invalid_name = invalid_tool_calls[0] - invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name - self._vprint(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)") + invalid_preview = ( + invalid_name[:80] + "..." + if len(invalid_name) > 80 + else invalid_name + ) + self._vprint( + f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)" + ) if self._invalid_tool_retries >= 3: - self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", force=True) + self._vprint( + f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", + force=True, + ) self._invalid_tool_retries = 0 self._persist_session(messages, conversation_history) return { @@ -9976,25 +11613,29 @@ class AIAgent: "api_calls": api_call_count, "completed": False, "partial": True, - "error": f"Model generated invalid tool call: {invalid_preview}" + "error": f"Model generated invalid tool call: {invalid_preview}", } - assistant_msg = self._build_assistant_message(assistant_message, finish_reason) + assistant_msg = self._build_assistant_message( + assistant_message, finish_reason + ) messages.append(assistant_msg) for tc in assistant_message.tool_calls: if tc.function.name not in self.valid_tool_names: content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}" else: content = "Skipped: another tool call in this turn used an invalid name. Please retry this tool call." - messages.append({ - "role": "tool", - "tool_call_id": tc.id, - "content": content, - }) + messages.append( + { + "role": "tool", + "tool_call_id": tc.id, + "content": content, + } + ) continue # Reset retry counter on successful tool call validation self._invalid_tool_retries = 0 - + # Validate tool call arguments are valid JSON # Handle empty strings as empty objects (common model quirk) invalid_json_args = [] @@ -10014,7 +11655,7 @@ class AIAgent: json.loads(args) except json.JSONDecodeError as e: invalid_json_args.append((tc.function.name, str(e))) - + if invalid_json_args: # Check if the invalid JSON is due to truncation rather # than a model formatting mistake. Routers sometimes @@ -10023,7 +11664,9 @@ class AIAgent: # Detect truncation: args that don't end with } or ] # (after stripping whitespace) are cut off mid-stream. _truncated = any( - not (tc.function.arguments or "").rstrip().endswith(("}", "]")) + not (tc.function.arguments or "") + .rstrip() + .endswith(("}", "]")) for tc in assistant_message.tool_calls if tc.function.name in {n for n, _ in invalid_json_args} ) @@ -10049,27 +11692,39 @@ class AIAgent: self._invalid_json_retries += 1 tool_name, error_msg = invalid_json_args[0] - self._vprint(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}") + self._vprint( + f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}" + ) if self._invalid_json_retries < 3: - self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...") + self._vprint( + f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)..." + ) # Don't add anything to messages, just retry the API call continue else: # Instead of returning partial, inject tool error results so the model can recover. # Using tool results (not user messages) preserves role alternation. - self._vprint(f"{self.log_prefix}⚠️ Injecting recovery tool results for invalid JSON...") + self._vprint( + f"{self.log_prefix}⚠️ Injecting recovery tool results for invalid JSON..." + ) self._invalid_json_retries = 0 # Reset for next attempt - + # Append the assistant message with its (broken) tool_calls - recovery_assistant = self._build_assistant_message(assistant_message, finish_reason) + recovery_assistant = self._build_assistant_message( + assistant_message, finish_reason + ) messages.append(recovery_assistant) - + # Respond with tool error results for each tool call invalid_names = {name for name, _ in invalid_json_args} for tc in assistant_message.tool_calls: if tc.function.name in invalid_names: - err = next(e for n, e in invalid_json_args if n == tc.function.name) + err = next( + e + for n, e in invalid_json_args + if n == tc.function.name + ) tool_result = ( f"Error: Invalid JSON arguments. {err}. " f"For tools with no required parameters, use an empty object: {{}}. " @@ -10077,13 +11732,15 @@ class AIAgent: ) else: tool_result = "Skipped: other tool call in this response had invalid JSON." - messages.append({ - "role": "tool", - "tool_call_id": tc.id, - "content": tool_result, - }) + messages.append( + { + "role": "tool", + "tool_call_id": tc.id, + "content": tool_result, + } + ) continue - + # Reset retry counter on successful JSON validation self._invalid_json_retries = 0 @@ -10095,23 +11752,32 @@ class AIAgent: assistant_message.tool_calls ) - assistant_msg = self._build_assistant_message(assistant_message, finish_reason) - + assistant_msg = self._build_assistant_message( + assistant_message, finish_reason + ) + # If this turn has both content AND tool_calls, capture the content # as a fallback final response. Common pattern: model delivers its # answer and calls memory/skill tools as a side-effect in the same # turn. If the follow-up turn after tools is empty, we use this. turn_content = assistant_message.content or "" - if turn_content and self._has_content_after_think_block(turn_content): + if turn_content and self._has_content_after_think_block( + turn_content + ): self._last_content_with_tools = turn_content # Only mute subsequent output when EVERY tool call in # this turn is post-response housekeeping (memory, todo, # skill_manage, etc.). If any substantive tool is present # (search_files, read_file, write_file, terminal, ...), # keep output visible so the user sees progress. - _HOUSEKEEPING_TOOLS = frozenset({ - "memory", "todo", "skill_manage", "session_search", - }) + _HOUSEKEEPING_TOOLS = frozenset( + { + "memory", + "todo", + "skill_manage", + "session_search", + } + ) _all_housekeeping = all( tc.function.name in _HOUSEKEEPING_TOOLS for tc in assistant_message.tool_calls @@ -10122,7 +11788,7 @@ class AIAgent: clean = self._strip_think_blocks(turn_content).strip() if clean: self._vprint(f" ┊ 💬 {clean}") - + # Pop thinking-only prefill message(s) before appending # (tool-call path — same rationale as the final-response path). _had_prefill = False @@ -10164,7 +11830,9 @@ class AIAgent: except Exception: pass - self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count) + self._execute_tool_calls( + assistant_message, messages, effective_task_id, api_call_count + ) # Reset per-turn retry counters after successful tool # execution so a single truncation doesn't poison the @@ -10182,10 +11850,12 @@ class AIAgent: # Refund the iteration if the ONLY tool(s) called were # execute_code (programmatic tool calling). These are # cheap RPC-style calls that shouldn't eat the budget. - _tc_names = {tc.function.name for tc in assistant_message.tool_calls} + _tc_names = { + tc.function.name for tc in assistant_message.tool_calls + } if _tc_names == {"execute_code"}: self.iteration_budget.refund() - + # Use real token counts from the API response to decide # compression. prompt_tokens + completion_tokens is the # actual context size the provider reported plus the @@ -10217,7 +11887,9 @@ class AIAgent: # and fires status_callback for gateway platforms. # Tiered: 85% (orange) and 95% (red/critical). if _compressor.threshold_tokens > 0: - _compaction_progress = _real_tokens / _compressor.threshold_tokens + _compaction_progress = ( + _real_tokens / _compressor.threshold_tokens + ) # Determine the warning tier for this progress level _warn_tier = 0.0 if _compaction_progress >= 0.95: @@ -10230,21 +11902,34 @@ class AIAgent: _sid = self.session_id or "default" _last = AIAgent._context_pressure_last_warned.get(_sid) _now = time.time() - if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN: + if ( + _last is None + or _last[0] < _warn_tier + or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN + ): self._context_pressure_warned_at = _warn_tier - AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now) - self._emit_context_pressure(_compaction_progress, _compressor) + AIAgent._context_pressure_last_warned[_sid] = ( + _warn_tier, + _now, + ) + self._emit_context_pressure( + _compaction_progress, _compressor + ) # Evict stale entries (older than 2x cooldown) _cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2 AIAgent._context_pressure_last_warned = { - k: v for k, v in AIAgent._context_pressure_last_warned.items() + k: v + for k, v in AIAgent._context_pressure_last_warned.items() if v[1] > _cutoff } - if self.compression_enabled and _compressor.should_compress(_real_tokens): + if self.compression_enabled and _compressor.should_compress( + _real_tokens + ): self._safe_print(" ⟳ compacting context…") messages, active_system_prompt = self._compress_context( - messages, system_message, + messages, + system_message, approx_tokens=self.context_compressor.last_prompt_tokens, task_id=effective_task_id, ) @@ -10252,25 +11937,25 @@ class AIAgent: # _flush_messages_to_session_db writes compressed messages # to the new session (see preflight compression comment). conversation_history = None - + # Save session log incrementally (so progress is visible even if interrupted) self._session_messages = messages self._save_session_log(messages) - + # Continue loop for next response continue - + else: # No tool calls - this is the final response final_response = assistant_message.content or "" - + # Fix: unmute output when entering the no-tool-call branch # so the user can see empty-response warnings and recovery # status messages. _mute_post_response was set during a # prior housekeeping tool turn and should not silence the # final response path. self._mute_post_response = False - + # Check if response only has think block with no actual content after it if not self._has_content_after_think_block(final_response): # ── Partial stream recovery ───────────────────── @@ -10283,7 +11968,9 @@ class AIAgent: ) if self._has_content_after_think_block(_partial_streamed): _turn_exit_reason = "partial_stream_recovery" - _recovered = self._strip_think_blocks(_partial_streamed).strip() + _recovered = self._strip_think_blocks( + _partial_streamed + ).strip() logger.info( "Partial stream content delivered (%d chars) " "— using as final response", @@ -10301,11 +11988,15 @@ class AIAgent: # tool calls (e.g. "You're welcome!" + memory save), the model # has nothing more to say. Use the earlier content immediately # instead of wasting API calls on retries that won't help. - fallback = getattr(self, '_last_content_with_tools', None) + fallback = getattr(self, "_last_content_with_tools", None) if fallback: _turn_exit_reason = "fallback_prior_turn_content" - logger.info("Empty follow-up after tool calls — using prior turn content as final response") - self._emit_status("↻ Empty response after tool calls — using earlier content as final answer") + logger.info( + "Empty follow-up after tool calls — using prior turn content as final response" + ) + self._emit_status( + "↻ Empty response after tool calls — using earlier content as final answer" + ) self._last_content_with_tools = None self._empty_content_retries = 0 # Do NOT modify the assistant message content — the @@ -10328,9 +12019,8 @@ class AIAgent: m.get("role") == "tool" for m in messages[-5:] # check recent messages ) - if ( - _prior_was_tool - and not getattr(self, "_post_tool_empty_retried", False) + if _prior_was_tool and not getattr( + self, "_post_tool_empty_retried", False ): self._post_tool_empty_retried = True logger.info( @@ -10348,14 +12038,16 @@ class AIAgent: # APIs reject as an invalid sequence. assistant_msg["content"] = "(empty)" messages.append(assistant_msg) - messages.append({ - "role": "user", - "content": ( - "You just executed tool calls but returned an " - "empty response. Please process the tool " - "results above and continue with the task." - ), - }) + messages.append( + { + "role": "user", + "content": ( + "You just executed tool calls but returned an " + "empty response. Please process the tool " + "results above and continue with the task." + ), + } + ) continue # ── Thinking-only prefill continuation ────────── @@ -10403,15 +12095,19 @@ class AIAgent: final_response ).strip() _prefill_exhausted = ( - _has_structured - and self._thinking_prefill_retries >= 2 + _has_structured and self._thinking_prefill_retries >= 2 ) - if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3: + if ( + _truly_empty + and (not _has_structured or _prefill_exhausted) + and self._empty_content_retries < 3 + ): self._empty_content_retries += 1 logger.warning( "Empty response (no content or reasoning) — " "retry %d/3 (model=%s)", - self._empty_content_retries, self.model, + self._empty_content_retries, + self.model, ) self._emit_status( f"⚠️ Empty response from model — retrying " @@ -10429,7 +12125,8 @@ class AIAgent: logger.warning( "Empty response after %d retries — " "attempting fallback (model=%s, provider=%s)", - self._empty_content_retries, self.model, + self._empty_content_retries, + self.model, self.provider, ) self._emit_status( @@ -10445,7 +12142,8 @@ class AIAgent: logger.info( "Fallback activated after empty responses: " "now using %s on %s", - self.model, self.provider, + self.model, + self.provider, ) continue @@ -10454,16 +12152,23 @@ class AIAgent: # "(empty)" terminal. _turn_exit_reason = "empty_response_exhausted" reasoning_text = self._extract_reasoning(assistant_message) - assistant_msg = self._build_assistant_message(assistant_message, finish_reason) + assistant_msg = self._build_assistant_message( + assistant_message, finish_reason + ) assistant_msg["content"] = "(empty)" messages.append(assistant_msg) if reasoning_text: - reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text + reasoning_preview = ( + reasoning_text[:500] + "..." + if len(reasoning_text) > 500 + else reasoning_text + ) logger.warning( "Reasoning-only response (no visible content) " "after exhausting retries and fallback. " - "Reasoning: %s", reasoning_preview, + "Reasoning: %s", + reasoning_preview, ) self._emit_status( "⚠️ Model produced reasoning but no visible " @@ -10474,18 +12179,22 @@ class AIAgent: "Empty response (no content or reasoning) " "after %d retries. No fallback available. " "model=%s provider=%s", - self._empty_content_retries, self.model, + self._empty_content_retries, + self.model, self.provider, ) self._emit_status( "❌ Model returned no content after all retries" - + (" and fallback attempts." if self._fallback_chain else - ". No fallback providers configured.") + + ( + " and fallback attempts." + if self._fallback_chain + else ". No fallback providers configured." + ) ) final_response = "(empty)" break - + # Reset retry counter/signature on successful content self._empty_content_retries = 0 self._thinking_prefill_retries = 0 @@ -10501,7 +12210,9 @@ class AIAgent: ) ): codex_ack_continuations += 1 - interim_msg = self._build_assistant_message(assistant_message, "incomplete") + interim_msg = self._build_assistant_message( + assistant_message, "incomplete" + ) messages.append(interim_msg) self._emit_interim_assistant_message(interim_msg) @@ -10523,11 +12234,13 @@ class AIAgent: final_response = truncated_response_prefix + final_response truncated_response_prefix = "" length_continue_retries = 0 - + # Strip <think> blocks from user-facing response (keep raw in messages for trajectory) final_response = self._strip_think_blocks(final_response).strip() - - final_msg = self._build_assistant_message(assistant_message, finish_reason) + + final_msg = self._build_assistant_message( + assistant_message, finish_reason + ) # Pop thinking-only prefill message(s) before appending # the final response. This avoids consecutive assistant @@ -10541,21 +12254,25 @@ class AIAgent: messages.pop() messages.append(final_msg) - + _turn_exit_reason = f"text_response(finish_reason={finish_reason})" if not self.quiet_mode: - self._safe_print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)") + self._safe_print( + f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)" + ) break - + except Exception as e: error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}" try: print(f"❌ {error_msg}") except (OSError, ValueError): logger.error(error_msg) - - logger.debug("Outer loop error in API call #%d", api_call_count, exc_info=True) - + + logger.debug( + "Outer loop error in API call #%d", api_call_count, exc_info=True + ) + # If an assistant message with tool_calls was already appended, # the API expects a role="tool" result for every tool_call_id. # Fill in error results for any that weren't answered yet. @@ -10568,11 +12285,12 @@ class AIAgent: if msg.get("role") == "assistant" and msg.get("tool_calls"): answered_ids = { m["tool_call_id"] - for m in messages[idx + 1:] + for m in messages[idx + 1 :] if isinstance(m, dict) and m.get("role") == "tool" } for tc in msg["tool_calls"]: - if not tc or not isinstance(tc, dict): continue + if not tc or not isinstance(tc, dict): + continue if tc["id"] not in answered_ids: err_msg = { "role": "tool", @@ -10581,7 +12299,7 @@ class AIAgent: } messages.append(err_msg) break - + # Non-tool errors don't need a synthetic message injected. # The error is already printed to the user (line above), and # the retry loop continues. Injecting a fake user/assistant @@ -10591,12 +12309,14 @@ class AIAgent: # If we're near the limit, break to avoid infinite loops if api_call_count >= self.max_iterations - 1: _turn_exit_reason = f"error_near_max_iterations({error_msg[:80]})" - final_response = f"I apologize, but I encountered repeated errors: {error_msg}" + final_response = ( + f"I apologize, but I encountered repeated errors: {error_msg}" + ) # Append as assistant so the history stays valid for # session resume (avoids consecutive user messages). messages.append({"role": "assistant", "content": final_response}) break - + if final_response is None and ( api_call_count >= self.max_iterations or self.iteration_budget.remaining <= 0 @@ -10604,7 +12324,9 @@ class AIAgent: # Budget exhausted — ask the model for a summary via one extra # API call with tools stripped. _handle_max_iterations injects a # user message and makes a single toolless request. - _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})" + _turn_exit_reason = ( + f"max_iterations_reached({api_call_count}/{self.max_iterations})" + ) self._emit_status( f"⚠️ Iteration budget exhausted ({api_call_count}/{self.max_iterations}) " "— asking model to summarise" @@ -10615,7 +12337,7 @@ class AIAgent: "— requesting summary..." ) final_response = self._handle_max_iterations(messages, api_call_count) - + # Determine if conversation completed successfully completed = final_response is not None and api_call_count < self.max_iterations @@ -10644,8 +12366,11 @@ class AIAgent: break _turn_tool_count = sum( - 1 for m in messages - if isinstance(m, dict) and m.get("role") == "assistant" and m.get("tool_calls") + 1 + for m in messages + if isinstance(m, dict) + and m.get("role") == "assistant" + and m.get("tool_calls") ) _resp_len = len(final_response) if final_response else 0 _budget_used = self.iteration_budget.used if self.iteration_budget else 0 @@ -10656,9 +12381,15 @@ class AIAgent: "tool_turns=%d last_msg_role=%s response_len=%d session=%s" ) _diag_args = ( - _turn_exit_reason, self.model, api_call_count, self.max_iterations, - _budget_used, _budget_max, - _turn_tool_count, _last_msg_role, _resp_len, + _turn_exit_reason, + self.model, + api_call_count, + self.max_iterations, + _budget_used, + _budget_max, + _turn_tool_count, + _last_msg_role, + _resp_len, self.session_id or "none", ) @@ -10666,8 +12397,10 @@ class AIAgent: # Agent was mid-work — this is the "just stops" case. logger.warning( "Turn ended with pending tool result (agent may appear stuck). " - + _diag_msg + " last_tool=%s", - *_diag_args, _last_tool_name, + + _diag_msg + + " last_tool=%s", + *_diag_args, + _last_tool_name, ) else: logger.info(_diag_msg, *_diag_args) @@ -10679,6 +12412,7 @@ class AIAgent: if final_response and not interrupted: try: from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( "post_llm_call", session_id=self.session_id, @@ -10719,17 +12453,20 @@ class AIAgent: "prompt_tokens": self.session_prompt_tokens, "completion_tokens": self.session_completion_tokens, "total_tokens": self.session_total_tokens, - "last_prompt_tokens": getattr(self.context_compressor, "last_prompt_tokens", 0) or 0, + "last_prompt_tokens": getattr( + self.context_compressor, "last_prompt_tokens", 0 + ) + or 0, "estimated_cost_usd": self.session_estimated_cost_usd, "cost_status": self.session_cost_status, "cost_source": self.session_cost_source, } self._response_was_previewed = False - + # Include interrupt message if one triggered the interrupt if interrupted and self._interrupt_message: result["interrupt_message"] = self._interrupt_message - + # Clear interrupt state after handling self.clear_interrupt() @@ -10738,9 +12475,11 @@ class AIAgent: # Check skill trigger NOW — based on how many tool iterations THIS turn used. _should_review_skills = False - if (self._skill_nudge_interval > 0 - and self._iters_since_skill >= self._skill_nudge_interval - and "skill_manage" in self.valid_tool_names): + if ( + self._skill_nudge_interval > 0 + and self._iters_since_skill >= self._skill_nudge_interval + and "skill_manage" in self.valid_tool_names + ): _should_review_skills = True self._iters_since_skill = 0 @@ -10756,7 +12495,11 @@ class AIAgent: # Background memory/skill review — runs AFTER the response is delivered # so it never competes with the user's task for model attention. - if final_response and not interrupted and (_should_review_memory or _should_review_skills): + if ( + final_response + and not interrupted + and (_should_review_memory or _should_review_skills) + ): try: self._spawn_background_review( messages_snapshot=list(messages), @@ -10778,6 +12521,7 @@ class AIAgent: # Plugins can use this for cleanup, flushing buffers, etc. try: from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( "on_session_end", session_id=self.session_id, @@ -10818,7 +12562,7 @@ def main( save_trajectories: bool = False, save_sample: bool = False, verbose: bool = False, - log_prefix_chars: int = 20 + log_prefix_chars: int = 20, ): """ Main function for running the agent directly. @@ -10844,58 +12588,69 @@ def main( """ print("🤖 AI Agent with Tool Calling") print("=" * 50) - + # Handle tool listing if list_tools: - from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets + from model_tools import ( + get_all_tool_names, + get_toolset_for_tool, + get_available_toolsets, + ) from toolsets import get_all_toolsets, get_toolset_info - + print("📋 Available Tools & Toolsets:") print("-" * 50) - + # Show new toolsets system print("\n🎯 Predefined Toolsets (New System):") print("-" * 40) all_toolsets = get_all_toolsets() - + # Group by category basic_toolsets = [] composite_toolsets = [] scenario_toolsets = [] - + for name, toolset in all_toolsets.items(): info = get_toolset_info(name) if info: entry = (name, info) if name in ["web", "terminal", "vision", "creative", "reasoning"]: basic_toolsets.append(entry) - elif name in ["research", "development", "analysis", "content_creation", "full_stack"]: + elif name in [ + "research", + "development", + "analysis", + "content_creation", + "full_stack", + ]: composite_toolsets.append(entry) else: scenario_toolsets.append(entry) - + # Print basic toolsets print("\n📌 Basic Toolsets:") for name, info in basic_toolsets: - tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none' + tools_str = ( + ", ".join(info["resolved_tools"]) if info["resolved_tools"] else "none" + ) print(f" • {name:15} - {info['description']}") print(f" Tools: {tools_str}") - + # Print composite toolsets print("\n📂 Composite Toolsets (built from other toolsets):") for name, info in composite_toolsets: - includes_str = ', '.join(info['includes']) if info['includes'] else 'none' + includes_str = ", ".join(info["includes"]) if info["includes"] else "none" print(f" • {name:15} - {info['description']}") print(f" Includes: {includes_str}") print(f" Total tools: {info['tool_count']}") - + # Print scenario-specific toolsets print("\n🎭 Scenario-Specific Toolsets:") for name, info in scenario_toolsets: print(f" • {name:20} - {info['description']}") print(f" Total tools: {info['tool_count']}") - - + # Show legacy toolset compatibility print("\n📦 Legacy Toolsets (for backward compatibility):") legacy_toolsets = get_available_toolsets() @@ -10904,47 +12659,57 @@ def main( print(f" {status} {name}: {info['description']}") if not info["available"]: print(f" Requirements: {', '.join(info['requirements'])}") - + # Show individual tools all_tools = get_all_tool_names() print(f"\n🔧 Individual Tools ({len(all_tools)} available):") for tool_name in sorted(all_tools): toolset = get_toolset_for_tool(tool_name) print(f" 📌 {tool_name} (from {toolset})") - + print("\n💡 Usage Examples:") print(" # Use predefined toolsets") - print(" python run_agent.py --enabled_toolsets=research --query='search for Python news'") - print(" python run_agent.py --enabled_toolsets=development --query='debug this code'") - print(" python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'") + print( + " python run_agent.py --enabled_toolsets=research --query='search for Python news'" + ) + print( + " python run_agent.py --enabled_toolsets=development --query='debug this code'" + ) + print( + " python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'" + ) print(" ") print(" # Combine multiple toolsets") - print(" python run_agent.py --enabled_toolsets=web,vision --query='analyze website'") + print( + " python run_agent.py --enabled_toolsets=web,vision --query='analyze website'" + ) print(" ") print(" # Disable toolsets") - print(" python run_agent.py --disabled_toolsets=terminal --query='no command execution'") + print( + " python run_agent.py --disabled_toolsets=terminal --query='no command execution'" + ) print(" ") print(" # Run with trajectory saving enabled") print(" python run_agent.py --save_trajectories --query='your question here'") return - + # Parse toolset selection arguments enabled_toolsets_list = None disabled_toolsets_list = None - + if enabled_toolsets: enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")] print(f"🎯 Enabled toolsets: {enabled_toolsets_list}") - + if disabled_toolsets: disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")] print(f"🚫 Disabled toolsets: {disabled_toolsets_list}") - + if save_trajectories: print("💾 Trajectory saving: ENABLED") print(" - Successful conversations → trajectory_samples.jsonl") print(" - Failed conversations → failed_trajectories.jsonl") - + # Initialize agent with provided parameters try: agent = AIAgent( @@ -10956,12 +12721,12 @@ def main( disabled_toolsets=disabled_toolsets_list, save_trajectories=save_trajectories, verbose_logging=verbose, - log_prefix_chars=log_prefix_chars + log_prefix_chars=log_prefix_chars, ) except RuntimeError as e: print(f"❌ Failed to initialize agent: {e}") return - + # Use provided query or default to Python 3.13 example if query is None: user_query = ( @@ -10970,45 +12735,43 @@ def main( ) else: user_query = query - + print(f"\n📝 User Query: {user_query}") print("\n" + "=" * 50) - + # Run conversation result = agent.run_conversation(user_query) - + print("\n" + "=" * 50) print("📋 CONVERSATION SUMMARY") print("=" * 50) print(f"✅ Completed: {result['completed']}") print(f"📞 API Calls: {result['api_calls']}") print(f"💬 Messages: {len(result['messages'])}") - - if result['final_response']: + + if result["final_response"]: print("\n🎯 FINAL RESPONSE:") print("-" * 30) - print(result['final_response']) - + print(result["final_response"]) + # Save sample trajectory to UUID-named file if requested if save_sample: sample_id = str(uuid.uuid4())[:8] sample_filename = f"sample_{sample_id}.json" - + # Convert messages to trajectory format (same as batch_runner) trajectory = agent._convert_to_trajectory_format( - result['messages'], - user_query, - result['completed'] + result["messages"], user_query, result["completed"] ) - + entry = { "conversations": trajectory, "timestamp": datetime.now().isoformat(), "model": model, - "completed": result['completed'], - "query": user_query + "completed": result["completed"], + "query": user_query, } - + try: with open(sample_filename, "w", encoding="utf-8") as f: # Pretty-print JSON with indent for readability @@ -11016,7 +12779,7 @@ def main( print(f"\n💾 Sample trajectory saved to: {sample_filename}") except Exception as e: print(f"\n⚠️ Failed to save sample: {e}") - + print("\n👋 Agent execution completed!") diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index ae78888d86f..76620be5add 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -120,13 +120,17 @@ class TestReadClaudeCodeCredentials: def test_reads_valid_credentials(self, tmp_path, monkeypatch): cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": { - "accessToken": "sk-ant-oat01-token", - "refreshToken": "sk-ant-oat01-refresh", - "expiresAt": int(time.time() * 1000) + 3600_000, - } - })) + cred_file.write_text( + json.dumps( + { + "claudeAiOauth": { + "accessToken": "sk-ant-oat01-token", + "refreshToken": "sk-ant-oat01-refresh", + "expiresAt": int(time.time() * 1000) + 3600_000, + } + } + ) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) creds = read_claude_code_credentials() assert creds is not None @@ -134,7 +138,9 @@ class TestReadClaudeCodeCredentials: assert creds["refreshToken"] == "sk-ant-oat01-refresh" assert creds["source"] == "claude_code_credentials_file" - def test_ignores_primary_api_key_for_native_anthropic_resolution(self, tmp_path, monkeypatch): + def test_ignores_primary_api_key_for_native_anthropic_resolution( + self, tmp_path, monkeypatch + ): claude_json = tmp_path / ".claude.json" claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"})) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) @@ -156,9 +162,9 @@ class TestReadClaudeCodeCredentials: def test_returns_none_for_empty_access_token(self, tmp_path, monkeypatch): cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": {"accessToken": "", "refreshToken": "x"} - })) + cred_file.write_text( + json.dumps({"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}}) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) assert read_claude_code_credentials() is None @@ -185,16 +191,22 @@ class TestResolveAnthropicToken: monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) assert resolve_anthropic_token() == "sk-ant-oat01-mytoken" - def test_does_not_resolve_primary_api_key_as_native_anthropic_token(self, monkeypatch, tmp_path): + def test_does_not_resolve_primary_api_key_as_native_anthropic_token( + self, monkeypatch, tmp_path + ): monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) - (tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"})) + (tmp_path / ".claude.json").write_text( + json.dumps({"primaryApiKey": "sk-ant-api03-primary"}) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) assert resolve_anthropic_token() is None - def test_falls_back_to_api_key_when_no_oauth_sources_exist(self, monkeypatch, tmp_path): + def test_falls_back_to_api_key_when_no_oauth_sources_exist( + self, monkeypatch, tmp_path + ): monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-mykey") monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False) monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) @@ -228,39 +240,53 @@ class TestResolveAnthropicToken: monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": { - "accessToken": "cc-auto-token", - "refreshToken": "refresh", - "expiresAt": int(time.time() * 1000) + 3600_000, - } - })) + cred_file.write_text( + json.dumps( + { + "claudeAiOauth": { + "accessToken": "cc-auto-token", + "refreshToken": "refresh", + "expiresAt": int(time.time() * 1000) + 3600_000, + } + } + ) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) assert resolve_anthropic_token() == "cc-auto-token" - def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(self, monkeypatch, tmp_path): + def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token( + self, monkeypatch, tmp_path + ): monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token") monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": { - "accessToken": "cc-auto-token", - "refreshToken": "refresh-token", - "expiresAt": int(time.time() * 1000) + 3600_000, - } - })) + cred_file.write_text( + json.dumps( + { + "claudeAiOauth": { + "accessToken": "cc-auto-token", + "refreshToken": "refresh-token", + "expiresAt": int(time.time() * 1000) + 3600_000, + } + } + ) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) assert resolve_anthropic_token() == "cc-auto-token" - def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(self, monkeypatch, tmp_path): + def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists( + self, monkeypatch, tmp_path + ): monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token") monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) claude_json = tmp_path / ".claude.json" - claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"})) + claude_json.write_text( + json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"}) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) assert resolve_anthropic_token() == "sk-ant-oat01-static-token" @@ -280,17 +306,19 @@ class TestRefreshOauthToken: "expiresAt": int(time.time() * 1000) - 3600_000, } - mock_response = json.dumps({ - "access_token": "new-token-abc", - "refresh_token": "new-refresh-456", - "expires_in": 7200, - }).encode() + mock_response = json.dumps( + { + "access_token": "new-token-abc", + "refresh_token": "new-refresh-456", + "expires_in": 7200, + } + ).encode() with patch("urllib.request.urlopen") as mock_urlopen: mock_ctx = MagicMock() - mock_ctx.__enter__ = MagicMock(return_value=MagicMock( - read=MagicMock(return_value=mock_response) - )) + mock_ctx.__enter__ = MagicMock( + return_value=MagicMock(read=MagicMock(return_value=mock_response)) + ) mock_ctx.__exit__ = MagicMock(return_value=False) mock_urlopen.return_value = mock_ctx @@ -348,38 +376,54 @@ class TestResolveWithRefresh: # Set up expired creds with a refresh token cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": { - "accessToken": "expired-tok", - "refreshToken": "valid-refresh", - "expiresAt": int(time.time() * 1000) - 3600_000, - } - })) + cred_file.write_text( + json.dumps( + { + "claudeAiOauth": { + "accessToken": "expired-tok", + "refreshToken": "valid-refresh", + "expiresAt": int(time.time() * 1000) - 3600_000, + } + } + ) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) # Mock refresh to succeed - with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"): + with patch( + "agent.anthropic_adapter._refresh_oauth_token", + return_value="refreshed-token", + ): result = resolve_anthropic_token() assert result == "refreshed-token" - def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(self, monkeypatch, tmp_path): + def test_static_env_oauth_token_does_not_block_refreshable_claude_creds( + self, monkeypatch, tmp_path + ): monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-expired-env-token") monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": { - "accessToken": "expired-claude-creds-token", - "refreshToken": "valid-refresh", - "expiresAt": int(time.time() * 1000) - 3600_000, - } - })) + cred_file.write_text( + json.dumps( + { + "claudeAiOauth": { + "accessToken": "expired-claude-creds-token", + "refreshToken": "valid-refresh", + "expiresAt": int(time.time() * 1000) - 3600_000, + } + } + ) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) - with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"): + with patch( + "agent.anthropic_adapter._refresh_oauth_token", + return_value="refreshed-token", + ): result = resolve_anthropic_token() assert result == "refreshed-token" @@ -400,13 +444,17 @@ class TestRunOauthSetupToken: # Pre-create credential files that will be found after subprocess cred_file = tmp_path / ".claude" / ".credentials.json" cred_file.parent.mkdir(parents=True) - cred_file.write_text(json.dumps({ - "claudeAiOauth": { - "accessToken": "from-cred-file", - "refreshToken": "refresh", - "expiresAt": int(time.time() * 1000) + 3600_000, - } - })) + cred_file.write_text( + json.dumps( + { + "claudeAiOauth": { + "accessToken": "from-cred-file", + "refreshToken": "refresh", + "expiresAt": int(time.time() * 1000) + 3600_000, + } + } + ) + ) monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) with patch("subprocess.run") as mock_run: @@ -459,27 +507,45 @@ class TestRunOauthSetupToken: class TestNormalizeModelName: def test_strips_anthropic_prefix(self): - assert normalize_model_name("anthropic/claude-sonnet-4-20250514") == "claude-sonnet-4-20250514" + assert ( + normalize_model_name("anthropic/claude-sonnet-4-20250514") + == "claude-sonnet-4-20250514" + ) def test_leaves_bare_name(self): - assert normalize_model_name("claude-sonnet-4-20250514") == "claude-sonnet-4-20250514" + assert ( + normalize_model_name("claude-sonnet-4-20250514") + == "claude-sonnet-4-20250514" + ) def test_converts_dots_to_hyphens(self): """OpenRouter uses dots (4.6), Anthropic uses hyphens (4-6).""" assert normalize_model_name("anthropic/claude-opus-4.6") == "claude-opus-4-6" - assert normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5" + assert ( + normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5" + ) assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6" def test_already_hyphenated_unchanged(self): """Names already in Anthropic format should pass through.""" assert normalize_model_name("claude-opus-4-6") == "claude-opus-4-6" - assert normalize_model_name("claude-opus-4-5-20251101") == "claude-opus-4-5-20251101" + assert ( + normalize_model_name("claude-opus-4-5-20251101") + == "claude-opus-4-5-20251101" + ) def test_preserve_dots_for_alibaba_dashscope(self): """Alibaba/DashScope use dots in model names (e.g. qwen3.5-plus). Fixes #1739.""" - assert normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus" - assert normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus" - assert normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash" + assert ( + normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus" + ) + assert ( + normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True) + == "qwen3.5-plus" + ) + assert ( + normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash" + ) # --------------------------------------------------------------------------- @@ -536,7 +602,10 @@ class TestConvertMessages: "role": "user", "content": [ {"type": "text", "text": "Can you see this?"}, - {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/cat.png"}, + }, ], } ] @@ -548,7 +617,10 @@ class TestConvertMessages: "role": "user", "content": [ {"type": "text", "text": "Can you see this?"}, - {"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}}, + { + "type": "image", + "source": {"type": "url", "url": "https://example.com/cat.png"}, + }, ], } ] @@ -613,7 +685,10 @@ class TestConvertMessages: "role": "assistant", "content": "", "tool_calls": [ - {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}}, + { + "id": "tc_1", + "function": {"name": "test_tool", "arguments": "{}"}, + }, ], }, {"role": "tool", "tool_call_id": "tc_1", "content": "result data"}, @@ -678,10 +753,9 @@ class TestConvertMessages: # tc_gone has no matching tool_use — its tool_result should be stripped for m in result: if m["role"] == "user" and isinstance(m["content"], list): - assert all( - b.get("type") != "tool_result" - for b in m["content"] - ), "Orphaned tool_result should have been stripped" + assert all(b.get("type") != "tool_result" for b in m["content"]), ( + "Orphaned tool_result should have been stripped" + ) def test_strips_orphaned_tool_result_preserves_valid(self): """Orphaned tool_results are stripped while valid ones survive.""" @@ -690,7 +764,10 @@ class TestConvertMessages: "role": "assistant", "content": "", "tool_calls": [ - {"id": "tc_valid", "function": {"name": "search", "arguments": "{}"}}, + { + "id": "tc_valid", + "function": {"name": "search", "arguments": "{}"}, + }, ], }, {"role": "tool", "tool_call_id": "tc_valid", "content": "good result"}, @@ -709,7 +786,11 @@ class TestConvertMessages: { "role": "system", "content": [ - {"type": "text", "text": "System prompt", "cache_control": {"type": "ephemeral"}}, + { + "type": "text", + "text": "System prompt", + "cache_control": {"type": "ephemeral"}, + }, ], }, {"role": "user", "content": "Hi"}, @@ -720,10 +801,12 @@ class TestConvertMessages: assert system[0]["cache_control"] == {"type": "ephemeral"} def test_assistant_cache_control_blocks_are_preserved(self): - messages = apply_anthropic_cache_control([ - {"role": "system", "content": "System prompt"}, - {"role": "assistant", "content": "Hello from assistant"}, - ]) + messages = apply_anthropic_cache_control( + [ + {"role": "system", "content": "System prompt"}, + {"role": "assistant", "content": "Hello from assistant"}, + ] + ) _, result = convert_messages_to_anthropic(messages) assistant_blocks = result[0]["content"] @@ -733,17 +816,23 @@ class TestConvertMessages: assert assistant_blocks[0]["cache_control"] == {"type": "ephemeral"} def test_tool_cache_control_is_preserved_on_tool_result_block(self): - messages = apply_anthropic_cache_control([ - {"role": "system", "content": "System prompt"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}}, - ], - }, - {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, - ], native_anthropic=True) + messages = apply_anthropic_cache_control( + [ + {"role": "system", "content": "System prompt"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "tc_1", + "function": {"name": "test_tool", "arguments": "{}"}, + }, + ], + }, + {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, + ], + native_anthropic=True, + ) _, result = convert_messages_to_anthropic(messages) user_msg = [m for m in result if m["role"] == "user"][0] @@ -760,7 +849,10 @@ class TestConvertMessages: "role": "assistant", "content": "", "tool_calls": [ - {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}}, + { + "id": "tc_1", + "function": {"name": "test_tool", "arguments": "{}"}, + }, ], "reasoning_details": [ { @@ -774,10 +866,14 @@ class TestConvertMessages: ] _, result = convert_messages_to_anthropic(messages) - assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")["content"] + assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")[ + "content" + ] assert assistant_blocks[0]["type"] == "thinking" - assert assistant_blocks[0]["thinking"] == "Need to inspect the tool result first." + assert ( + assistant_blocks[0]["thinking"] == "Need to inspect the tool result first." + ) assert assistant_blocks[0]["signature"] == "sig_123" assert assistant_blocks[1]["type"] == "tool_use" @@ -832,25 +928,33 @@ class TestConvertMessages: } def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self): - messages = apply_anthropic_cache_control([ - {"role": "system", "content": "System prompt"}, - {"role": "user", "content": "Find the skill"}, - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"id": "tc_1", "function": {"name": "skill_view", "arguments": "{}"}}, - ], - }, - {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, - ]) + messages = apply_anthropic_cache_control( + [ + {"role": "system", "content": "System prompt"}, + {"role": "user", "content": "Find the skill"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "tc_1", + "function": {"name": "skill_view", "arguments": "{}"}, + }, + ], + }, + {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, + ] + ) _, result = convert_messages_to_anthropic(messages) assistant_turn = next(msg for msg in result if msg["role"] == "assistant") assistant_blocks = assistant_turn["content"] - assert all(not (b.get("type") == "text" and b.get("text") == "") for b in assistant_blocks) + assert all( + not (b.get("type") == "text" and b.get("text") == "") + for b in assistant_blocks + ) assert any(b.get("type") == "tool_use" for b in assistant_blocks) def test_empty_user_message_string_gets_placeholder(self): @@ -888,7 +992,13 @@ class TestConvertMessages: def test_user_message_with_empty_text_blocks_gets_placeholder(self): """User message with only empty text blocks should get placeholder.""" messages = [ - {"role": "user", "content": [{"type": "text", "text": ""}, {"type": "text", "text": " "}]}, + { + "role": "user", + "content": [ + {"type": "text", "text": ""}, + {"type": "text", "text": " "}, + ], + }, ] _, result = convert_messages_to_anthropic(messages) assert result[0]["role"] == "user" @@ -1085,35 +1195,43 @@ class TestBuildAnthropicKwargs: class TestGetAnthropicMaxOutput: def test_opus_4_6(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-opus-4-6") == 128_000 def test_opus_4_6_variant(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-opus-4-6:1m:fast") == 128_000 def test_sonnet_4_6(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000 def test_sonnet_4_date_stamped(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-sonnet-4-20250514") == 64_000 def test_claude_3_5_sonnet(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192 def test_claude_3_opus(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-3-opus-20240229") == 4_096 def test_unknown_future_model(self): from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-ultra-5-20260101") == 128_000 def test_longest_prefix_wins(self): """'claude-3-5-sonnet' should match before 'claude-3-5'.""" from agent.anthropic_adapter import _get_anthropic_max_output + # claude-3-5-sonnet (8192) should win over a hypothetical shorter match assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192 @@ -1218,7 +1336,9 @@ class TestNormalizeResponse: msg, reason = normalize_anthropic_response(self._make_response(blocks)) assert msg.content == "The answer is 42." assert msg.reasoning == "Let me reason about this..." - assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}] + assert msg.reasoning_details == [ + {"type": "thinking", "thinking": "Let me reason about this..."} + ] def test_thinking_response_preserves_signature(self): blocks = [ @@ -1235,15 +1355,9 @@ class TestNormalizeResponse: def test_stop_reason_mapping(self): block = SimpleNamespace(type="text", text="x") - _, r1 = normalize_anthropic_response( - self._make_response([block], "end_turn") - ) - _, r2 = normalize_anthropic_response( - self._make_response([block], "tool_use") - ) - _, r3 = normalize_anthropic_response( - self._make_response([block], "max_tokens") - ) + _, r1 = normalize_anthropic_response(self._make_response([block], "end_turn")) + _, r2 = normalize_anthropic_response(self._make_response([block], "tool_use")) + _, r3 = normalize_anthropic_response(self._make_response([block], "max_tokens")) assert r1 == "stop" assert r2 == "tool_calls" assert r3 == "length" @@ -1306,7 +1420,11 @@ class TestThinkingBlockSignatureManagement: {"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}}, ], "reasoning_details": [ - {"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"}, + { + "type": "thinking", + "thinking": "Old reasoning.", + "signature": "sig_old", + }, ], }, {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"}, @@ -1317,7 +1435,11 @@ class TestThinkingBlockSignatureManagement: {"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}}, ], "reasoning_details": [ - {"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"}, + { + "type": "thinking", + "thinking": "Latest reasoning.", + "signature": "sig_new", + }, ], }, {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"}, @@ -1348,7 +1470,11 @@ class TestThinkingBlockSignatureManagement: "role": "assistant", "content": "The answer is 42.", "reasoning_details": [ - {"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"}, + { + "type": "thinking", + "thinking": "Deep thought.", + "signature": "sig_valid", + }, ], }, ] @@ -1445,14 +1571,22 @@ class TestThinkingBlockSignatureManagement: "role": "assistant", "content": "First response.", "reasoning_details": [ - {"type": "thinking", "thinking": "First thought.", "signature": "sig_1"}, + { + "type": "thinking", + "thinking": "First thought.", + "signature": "sig_1", + }, ], }, { "role": "assistant", "content": "Second response.", "reasoning_details": [ - {"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"}, + { + "type": "thinking", + "thinking": "Second thought.", + "signature": "sig_2", + }, ], }, ] @@ -1532,12 +1666,57 @@ class TestThinkingBlockSignatureManagement: # Last one: thinking preserved last_thinking = [ - b for b in assistants[2]["content"] + b + for b in assistants[2]["content"] if isinstance(b, dict) and b.get("type") == "thinking" ] assert len(last_thinking) == 1 assert last_thinking[0]["signature"] == "sig_3" + def test_third_party_downgrades_thinking_to_text(self): + """Third-party Anthropic-compatible endpoints get plain text thinking.""" + messages = [ + { + "role": "assistant", + "content": "Visible answer.", + "reasoning_details": [ + { + "type": "thinking", + "thinking": "Third-party-safe reasoning.", + "signature": "sig", + }, + {"type": "redacted_thinking", "data": "opaque"}, + ], + } + ] + _, result = convert_messages_to_anthropic( + messages, + base_url="https://api.z.ai/api/paas/v4", + ) + blocks = result[0]["content"] + assert not any(b.get("type") == "thinking" for b in blocks) + assert not any(b.get("type") == "redacted_thinking" for b in blocks) + text_blocks = [b.get("text", "") for b in blocks if b.get("type") == "text"] + assert "Third-party-safe reasoning." in text_blocks + assert "Visible answer." in text_blocks + + def test_third_party_thinking_only_content_gets_placeholder(self): + """If third-party turn only has redacted_thinking, use placeholder text.""" + messages = [ + { + "role": "assistant", + "content": "", + "reasoning_details": [ + {"type": "redacted_thinking", "data": "opaque"}, + ], + } + ] + _, result = convert_messages_to_anthropic( + messages, + base_url="https://api.minimax.io/anthropic", + ) + assert result[0]["content"] == [{"type": "text", "text": "(thinking elided)"}] + # --------------------------------------------------------------------------- # Tool choice diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index d71e6a62554..d013ca65109 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -124,7 +124,8 @@ def test_aiagent_reuses_existing_errors_log_handler(): ) matching_handlers = [ - handler for handler in root_logger.handlers + handler + for handler in root_logger.handlers if isinstance(handler, RotatingFileHandler) and error_log_path == Path(handler.baseFilename).resolve() ] @@ -142,7 +143,8 @@ class TestProviderModelNormalization: def test_aiagent_strips_matching_native_provider_prefix(self): with ( patch( - "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search") + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), ), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI"), @@ -162,7 +164,8 @@ class TestProviderModelNormalization: def test_aiagent_keeps_aggregator_vendor_slug(self): with ( patch( - "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search") + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), ), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI"), @@ -304,7 +307,9 @@ class TestStripThinkBlocks: def test_thought_block_removed(self, agent): """Gemma 4 uses <thought> tags for inline reasoning.""" - result = agent._strip_think_blocks("<thought>internal reasoning</thought> answer") + result = agent._strip_think_blocks( + "<thought>internal reasoning</thought> answer" + ) assert "internal reasoning" not in result assert "<thought>" not in result assert "answer" in result @@ -669,12 +674,18 @@ class TestBuildSystemPrompt: assert "Conversation started:" in prompt def test_includes_nous_subscription_prompt(self, agent, monkeypatch): - monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK") + monkeypatch.setattr( + run_agent, + "build_nous_subscription_prompt", + lambda tool_names: "NOUS SUBSCRIPTION BLOCK", + ) prompt = agent._build_system_prompt() assert "NOUS SUBSCRIPTION BLOCK" in prompt def test_skills_prompt_derives_available_toolsets_from_loaded_tools(self): - tools = _make_tool_defs("web_search", "skills_list", "skill_view", "skill_manage") + tools = _make_tool_defs( + "web_search", "skills_list", "skill_view", "skill_manage" + ) toolset_map = { "web_search": "web", "skills_list": "skills", @@ -688,8 +699,14 @@ class TestBuildSystemPrompt: "run_agent.check_toolset_requirements", side_effect=AssertionError("should not re-check toolset requirements"), ), - patch("run_agent.get_toolset_for_tool", create=True, side_effect=toolset_map.get), - patch("run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT") as mock_skills, + patch( + "run_agent.get_toolset_for_tool", + create=True, + side_effect=toolset_map.get, + ), + patch( + "run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT" + ) as mock_skills, patch("run_agent.OpenAI"), ): agent = AIAgent( @@ -735,54 +752,71 @@ class TestToolUseEnforcementConfig: def test_auto_injects_for_gpt(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto") prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt def test_auto_injects_for_codex(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto") prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt def test_auto_skips_for_claude(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE - agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto") + + agent = self._make_agent( + model="anthropic/claude-sonnet-4", tool_use_enforcement="auto" + ) prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt def test_true_forces_for_all_models(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE - agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True) + + agent = self._make_agent( + model="anthropic/claude-sonnet-4", tool_use_enforcement=True + ) prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt def test_string_true_forces_for_all_models(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE - agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true") + + agent = self._make_agent( + model="anthropic/claude-sonnet-4", tool_use_enforcement="true" + ) prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt def test_always_forces_for_all_models(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE - agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always") + + agent = self._make_agent( + model="deepseek/deepseek-r1", tool_use_enforcement="always" + ) prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt def test_false_disables_for_gpt(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False) prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt def test_string_false_disables(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off") prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt def test_custom_list_matches(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent( model="deepseek/deepseek-r1", tool_use_enforcement=["deepseek", "gemini"], @@ -792,6 +826,7 @@ class TestToolUseEnforcementConfig: def test_custom_list_no_match(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent( model="anthropic/claude-sonnet-4", tool_use_enforcement=["deepseek", "gemini"], @@ -801,6 +836,7 @@ class TestToolUseEnforcementConfig: def test_custom_list_case_insensitive(self): from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent( model="openai/GPT-4.1", tool_use_enforcement=["GPT", "Codex"], @@ -811,6 +847,7 @@ class TestToolUseEnforcementConfig: def test_no_tools_never_injects(self): """Even with enforcement=true, no injection when agent has no tools.""" from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + with ( patch("run_agent.get_tool_definitions", return_value=[]), patch("run_agent.check_toolset_requirements", return_value={}), @@ -941,7 +978,9 @@ class TestBuildApiKwargs: assert kwargs["metadata"]["sessionId"] == "sess-123" assert kwargs["extra_body"]["vl_high_resolution_images"] is True assert isinstance(kwargs["messages"][0]["content"], list) - assert kwargs["messages"][0]["content"][0]["cache_control"] == {"type": "ephemeral"} + assert kwargs["messages"][0]["content"][0]["cache_control"] == { + "type": "ephemeral" + } assert kwargs["messages"][2]["content"][0]["text"] == "hi" def test_qwen_portal_normalizes_bare_string_content_parts(self, agent): @@ -970,7 +1009,10 @@ class TestBuildApiKwargs: agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.max_tokens = 4096 - messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hi"}, + ] kwargs = agent._build_api_kwargs(messages) assert kwargs["max_tokens"] == 4096 @@ -980,7 +1022,10 @@ class TestBuildApiKwargs: agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.max_tokens = None - messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hi"}, + ] kwargs = agent._build_api_kwargs(messages) assert kwargs["max_tokens"] == 65536 @@ -1125,7 +1170,10 @@ class TestExecuteToolCalls: agent._execute_tool_calls(mock_msg, messages, "task-1") # Content should be replaced with persisted-output or truncation assert len(messages[0]["content"]) < 150_000 - assert ("Truncated" in messages[0]["content"] or "<persisted-output>" in messages[0]["content"]) + assert ( + "Truncated" in messages[0]["content"] + or "<persisted-output>" in messages[0]["content"] + ) def test_quiet_tool_output_suppressed_when_progress_callback_present(self, agent): tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") @@ -1133,8 +1181,10 @@ class TestExecuteToolCalls: messages = [] agent.tool_progress_callback = lambda *args, **kwargs: None - with patch("run_agent.handle_function_call", return_value="search result"), \ - patch.object(agent, "_safe_print") as mock_print: + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_safe_print") as mock_print, + ): agent._execute_tool_calls(mock_msg, messages, "task-1") mock_print.assert_not_called() @@ -1147,8 +1197,10 @@ class TestExecuteToolCalls: messages = [] agent.tool_progress_callback = None - with patch("run_agent.handle_function_call", return_value="search result"), \ - patch.object(agent, "_safe_print") as mock_print: + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_safe_print") as mock_print, + ): agent._execute_tool_calls(mock_msg, messages, "task-1") mock_print.assert_called_once() @@ -1165,7 +1217,9 @@ class TestExecuteToolCalls: mock_print.assert_not_called() - def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode(self, agent): + def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode( + self, agent + ): class _RateLimitError(Exception): status_code = 429 @@ -1215,8 +1269,10 @@ class TestConcurrentToolExecution: def test_clarify_forces_sequential(self, agent): """Batch containing clarify should use sequential path.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="clarify", arguments='{"question":"ok?"}', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call( + name="clarify", arguments='{"question":"ok?"}', call_id="c2" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: @@ -1227,8 +1283,10 @@ class TestConcurrentToolExecution: def test_multiple_tools_uses_concurrent_path(self, agent): """Multiple read-only tools should use concurrent path.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call( + name="read_file", arguments='{"path":"x.py"}', call_id="c2" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: @@ -1239,8 +1297,10 @@ class TestConcurrentToolExecution: def test_terminal_batch_forces_sequential(self, agent): """Stateful tools should not share the concurrent execution path.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="terminal", arguments='{"command":"pwd"}', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call( + name="terminal", arguments='{"command":"pwd"}', call_id="c2" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: @@ -1251,8 +1311,14 @@ class TestConcurrentToolExecution: def test_write_batch_forces_sequential(self, agent): """File mutations should stay ordered within a turn.""" - tc1 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c1") - tc2 = _mock_tool_call(name="write_file", arguments='{"path":"x.py","content":"print(1)"}', call_id="c2") + tc1 = _mock_tool_call( + name="read_file", arguments='{"path":"x.py"}', call_id="c1" + ) + tc2 = _mock_tool_call( + name="write_file", + arguments='{"path":"x.py","content":"print(1)"}', + call_id="c2", + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: @@ -1303,7 +1369,7 @@ class TestConcurrentToolExecution: def test_malformed_json_args_forces_sequential(self, agent): """Unparseable tool arguments should fall back to sequential.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") tc2 = _mock_tool_call(name="web_search", arguments="NOT JSON {{{", call_id="c2") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] @@ -1315,8 +1381,10 @@ class TestConcurrentToolExecution: def test_non_dict_args_forces_sequential(self, agent): """Tool arguments that parse to a non-dict type should fall back to sequential.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="web_search", arguments='"just a string"', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call( + name="web_search", arguments='"just a string"', call_id="c2" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: @@ -1327,9 +1395,13 @@ class TestConcurrentToolExecution: def test_concurrent_executes_all_tools(self, agent): """Concurrent path should execute all tools and append results in order.""" - tc1 = _mock_tool_call(name="web_search", arguments='{"q":"alpha"}', call_id="c1") + tc1 = _mock_tool_call( + name="web_search", arguments='{"q":"alpha"}', call_id="c1" + ) tc2 = _mock_tool_call(name="web_search", arguments='{"q":"beta"}', call_id="c2") - tc3 = _mock_tool_call(name="web_search", arguments='{"q":"gamma"}', call_id="c3") + tc3 = _mock_tool_call( + name="web_search", arguments='{"q":"gamma"}', call_id="c3" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2, tc3]) messages = [] @@ -1379,12 +1451,13 @@ class TestConcurrentToolExecution: def test_concurrent_handles_tool_error(self, agent): """If one tool raises, others should still complete.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call(name="web_search", arguments="{}", call_id="c2") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] call_count = [0] + def fake_handle(name, args, task_id, **kwargs): call_count[0] += 1 if call_count[0] == 1: @@ -1402,8 +1475,8 @@ class TestConcurrentToolExecution: def test_concurrent_interrupt_before_start(self, agent): """If interrupt is requested before concurrent execution, all tools are skipped.""" - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="read_file", arguments='{}', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call(name="read_file", arguments="{}", call_id="c2") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] @@ -1412,15 +1485,21 @@ class TestConcurrentToolExecution: agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") assert len(messages) == 2 - assert "cancelled" in messages[0]["content"].lower() or "skipped" in messages[0]["content"].lower() - assert "cancelled" in messages[1]["content"].lower() or "skipped" in messages[1]["content"].lower() + assert ( + "cancelled" in messages[0]["content"].lower() + or "skipped" in messages[0]["content"].lower() + ) + assert ( + "cancelled" in messages[1]["content"].lower() + or "skipped" in messages[1]["content"].lower() + ) def test_concurrent_truncates_large_results(self, agent, tmp_path, monkeypatch): """Concurrent path should save oversized results to file.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) (tmp_path / ".hermes").mkdir() - tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") - tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2") + tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + tc2 = _mock_tool_call(name="web_search", arguments="{}", call_id="c2") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] big_result = "x" * 150_000 @@ -1431,14 +1510,16 @@ class TestConcurrentToolExecution: assert len(messages) == 2 for m in messages: assert len(m["content"]) < 150_000 - assert ("Truncated" in m["content"] or "<persisted-output>" in m["content"]) + assert "Truncated" in m["content"] or "<persisted-output>" in m["content"] def test_invoke_tool_dispatches_to_handle_function_call(self, agent): """_invoke_tool should route regular tools through handle_function_call.""" with patch("run_agent.handle_function_call", return_value="result") as mock_hfc: result = agent._invoke_tool("web_search", {"q": "test"}, "task-1") mock_hfc.assert_called_once_with( - "web_search", {"q": "test"}, "task-1", + "web_search", + {"q": "test"}, + "task-1", tool_call_id=None, session_id=agent.session_id, enabled_tools=list(agent.valid_tool_names), @@ -1447,31 +1528,57 @@ class TestConcurrentToolExecution: assert result == "result" def test_sequential_tool_callbacks_fire_in_order(self, agent): - tool_call = _mock_tool_call(name="web_search", arguments='{"query":"hello"}', call_id="c1") + tool_call = _mock_tool_call( + name="web_search", arguments='{"query":"hello"}', call_id="c1" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call]) messages = [] starts = [] completes = [] - agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args)) - agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) + agent.tool_start_callback = lambda tool_call_id, function_name, function_args: ( + starts.append((tool_call_id, function_name, function_args)) + ) + agent.tool_complete_callback = ( + lambda tool_call_id, function_name, function_args, function_result: ( + completes.append( + (tool_call_id, function_name, function_args, function_result) + ) + ) + ) with patch("run_agent.handle_function_call", return_value='{"success": true}'): agent._execute_tool_calls_sequential(mock_msg, messages, "task-1") assert starts == [("c1", "web_search", {"query": "hello"})] - assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')] + assert completes == [ + ("c1", "web_search", {"query": "hello"}, '{"success": true}') + ] def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent): - tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1") - tc2 = _mock_tool_call(name="web_search", arguments='{"query":"two"}', call_id="c2") + tc1 = _mock_tool_call( + name="web_search", arguments='{"query":"one"}', call_id="c1" + ) + tc2 = _mock_tool_call( + name="web_search", arguments='{"query":"two"}', call_id="c2" + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) messages = [] starts = [] completes = [] - agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args)) - agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) + agent.tool_start_callback = lambda tool_call_id, function_name, function_args: ( + starts.append((tool_call_id, function_name, function_args)) + ) + agent.tool_complete_callback = ( + lambda tool_call_id, function_name, function_args, function_result: ( + completes.append( + (tool_call_id, function_name, function_args, function_result) + ) + ) + ) - with patch("run_agent.handle_function_call", side_effect=['{"id":1}', '{"id":2}']): + with patch( + "run_agent.handle_function_call", side_effect=['{"id":1}', '{"id":2}'] + ): agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") assert starts == [ @@ -1484,18 +1591,24 @@ class TestConcurrentToolExecution: def test_invoke_tool_handles_agent_level_tools(self, agent): """_invoke_tool should handle todo tool directly.""" - with patch("tools.todo_tool.todo_tool", return_value='{"ok":true}') as mock_todo: + with patch( + "tools.todo_tool.todo_tool", return_value='{"ok":true}' + ) as mock_todo: result = agent._invoke_tool("todo", {"todos": []}, "task-1") mock_todo.assert_called_once() assert "ok" in result - def test_invoke_tool_blocked_returns_error_and_skips_execution(self, agent, monkeypatch): + def test_invoke_tool_blocked_returns_error_and_skips_execution( + self, agent, monkeypatch + ): """_invoke_tool should return error JSON when a plugin blocks the tool.""" monkeypatch.setattr( "hermes_cli.plugins.get_pre_tool_call_block_message", lambda *args, **kwargs: "Blocked by test policy", ) - with patch("tools.todo_tool.todo_tool", side_effect=AssertionError("should not run")) as mock_todo: + with patch( + "tools.todo_tool.todo_tool", side_effect=AssertionError("should not run") + ) as mock_todo: result = agent._invoke_tool("todo", {"todos": []}, "task-1") assert json.loads(result) == {"error": "Blocked by test policy"} @@ -1507,16 +1620,23 @@ class TestConcurrentToolExecution: "hermes_cli.plugins.get_pre_tool_call_block_message", lambda *args, **kwargs: "Blocked", ) - with patch("run_agent.handle_function_call", side_effect=AssertionError("should not run")): + with patch( + "run_agent.handle_function_call", + side_effect=AssertionError("should not run"), + ): result = agent._invoke_tool("web_search", {"q": "test"}, "task-1") assert json.loads(result) == {"error": "Blocked"} - def test_sequential_blocked_tool_skips_checkpoints_and_callbacks(self, agent, monkeypatch): + def test_sequential_blocked_tool_skips_checkpoints_and_callbacks( + self, agent, monkeypatch + ): """Sequential path: blocked tool should not trigger checkpoints or start callbacks.""" - tool_call = _mock_tool_call(name="write_file", - arguments='{"path":"test.txt","content":"hello"}', - call_id="c1") + tool_call = _mock_tool_call( + name="write_file", + arguments='{"path":"test.txt","content":"hello"}', + call_id="c1", + ) mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call]) messages = [] @@ -1532,7 +1652,10 @@ class TestConcurrentToolExecution: starts = [] agent.tool_start_callback = lambda *a: starts.append(a) - with patch("run_agent.handle_function_call", side_effect=AssertionError("should not run")): + with patch( + "run_agent.handle_function_call", + side_effect=AssertionError("should not run"), + ): agent._execute_tool_calls_sequential(mock_msg, messages, "task-1") agent._checkpoint_mgr.ensure_checkpoint.assert_not_called() @@ -1548,9 +1671,14 @@ class TestConcurrentToolExecution: "hermes_cli.plugins.get_pre_tool_call_block_message", lambda *args, **kwargs: "Blocked", ) - with patch("tools.memory_tool.memory_tool", side_effect=AssertionError("should not run")): + with patch( + "tools.memory_tool.memory_tool", + side_effect=AssertionError("should not run"), + ): result = agent._invoke_tool( - "memory", {"action": "add", "target": "memory", "content": "x"}, "task-1", + "memory", + {"action": "add", "target": "memory", "content": "x"}, + "task-1", ) assert json.loads(result) == {"error": "Blocked"} @@ -1562,36 +1690,45 @@ class TestPathsOverlap: def test_same_path_overlaps(self): from run_agent import _paths_overlap + assert _paths_overlap(Path("src/a.py"), Path("src/a.py")) def test_siblings_do_not_overlap(self): from run_agent import _paths_overlap + assert not _paths_overlap(Path("src/a.py"), Path("src/b.py")) def test_parent_child_overlap(self): from run_agent import _paths_overlap + assert _paths_overlap(Path("src"), Path("src/sub/a.py")) def test_different_roots_do_not_overlap(self): from run_agent import _paths_overlap + assert not _paths_overlap(Path("src/a.py"), Path("other/a.py")) def test_nested_vs_flat_do_not_overlap(self): from run_agent import _paths_overlap + assert not _paths_overlap(Path("src/sub/a.py"), Path("src/a.py")) def test_empty_paths_do_not_overlap(self): from run_agent import _paths_overlap + assert not _paths_overlap(Path(""), Path("")) def test_one_empty_path_does_not_overlap(self): from run_agent import _paths_overlap + assert not _paths_overlap(Path(""), Path("src/a.py")) assert not _paths_overlap(Path("src/a.py"), Path("")) class TestParallelScopePathNormalization: - def test_extract_parallel_scope_path_normalizes_relative_to_cwd(self, tmp_path, monkeypatch): + def test_extract_parallel_scope_path_normalizes_relative_to_cwd( + self, tmp_path, monkeypatch + ): from run_agent import _extract_parallel_scope_path monkeypatch.chdir(tmp_path) @@ -1600,7 +1737,9 @@ class TestParallelScopePathNormalization: assert scoped == tmp_path / "notes.txt" - def test_extract_parallel_scope_path_treats_relative_and_absolute_same_file_as_same_scope(self, tmp_path, monkeypatch): + def test_extract_parallel_scope_path_treats_relative_and_absolute_same_file_as_same_scope( + self, tmp_path, monkeypatch + ): from run_agent import _extract_parallel_scope_path, _paths_overlap monkeypatch.chdir(tmp_path) @@ -1612,12 +1751,22 @@ class TestParallelScopePathNormalization: assert rel_scoped == abs_scoped assert _paths_overlap(rel_scoped, abs_scoped) - def test_should_parallelize_tool_batch_rejects_same_file_with_mixed_path_spellings(self, tmp_path, monkeypatch): + def test_should_parallelize_tool_batch_rejects_same_file_with_mixed_path_spellings( + self, tmp_path, monkeypatch + ): from run_agent import _should_parallelize_tool_batch monkeypatch.chdir(tmp_path) - tc1 = _mock_tool_call(name="write_file", arguments='{"path":"notes.txt","content":"one"}', call_id="c1") - tc2 = _mock_tool_call(name="write_file", arguments=f'{{"path":"{tmp_path / "notes.txt"}","content":"two"}}', call_id="c2") + tc1 = _mock_tool_call( + name="write_file", + arguments='{"path":"notes.txt","content":"one"}', + call_id="c1", + ) + tc2 = _mock_tool_call( + name="write_file", + arguments=f'{{"path":"{tmp_path / "notes.txt"}","content":"two"}}', + call_id="c2", + ) assert not _should_parallelize_tool_batch([tc1, tc2]) @@ -1692,7 +1841,9 @@ class TestRunConversation: resp2 = _mock_response(content="Done searching", finish_reason="stop") agent.client.chat.completions.create.side_effect = [resp1, resp2] with ( - patch("run_agent.handle_function_call", return_value="search result") as mock_handle_function_call, + patch( + "run_agent.handle_function_call", return_value="search result" + ) as mock_handle_function_call, patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), patch.object(agent, "_cleanup_task_resources"), @@ -1701,7 +1852,9 @@ class TestRunConversation: assert result["final_response"] == "Done searching" assert result["api_calls"] == 2 assert mock_handle_function_call.call_args.kwargs["tool_call_id"] == "c1" - assert mock_handle_function_call.call_args.kwargs["session_id"] == agent.session_id + assert ( + mock_handle_function_call.call_args.kwargs["session_id"] == agent.session_id + ) def test_request_scoped_api_hooks_fire_for_each_api_call(self, agent): self._setup_agent(agent) @@ -1727,13 +1880,17 @@ class TestRunConversation: assert result["final_response"] == "Done searching" pre_request_calls = [kw for name, kw in hook_calls if name == "pre_api_request"] - post_request_calls = [kw for name, kw in hook_calls if name == "post_api_request"] + post_request_calls = [ + kw for name, kw in hook_calls if name == "post_api_request" + ] assert len(pre_request_calls) == 2 assert len(post_request_calls) == 2 assert [call["api_call_count"] for call in pre_request_calls] == [1, 2] assert [call["api_call_count"] for call in post_request_calls] == [1, 2] assert all(call["session_id"] == agent.session_id for call in pre_request_calls) - assert all("message_count" in c and "messages" not in c for c in pre_request_calls) + assert all( + "message_count" in c and "messages" not in c for c in pre_request_calls + ) assert all("usage" in c and "response" not in c for c in post_request_calls) def test_interrupt_breaks_loop(self, agent): @@ -1791,7 +1948,9 @@ class TestRunConversation: # 6 responses: original + 2 prefill + 3 retries after prefill exhaustion with ( - patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6), + patch.object( + agent, "_interruptible_api_call", side_effect=[empty_resp] * 6 + ), patch.object(agent, "_compress_context") as mock_compress, patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1859,7 +2018,10 @@ class TestRunConversation: empty_resp = _mock_response(content=None, finish_reason="stop") # 4 responses: 1 original + 3 nudge retries, all empty agent.client.chat.completions.create.side_effect = [ - empty_resp, empty_resp, empty_resp, empty_resp, + empty_resp, + empty_resp, + empty_resp, + empty_resp, ] with ( patch.object(agent, "_persist_session"), @@ -1897,7 +2059,9 @@ class TestRunConversation: self._setup_agent(agent) agent.base_url = "http://127.0.0.1:1234/v1" # Configure a fallback chain - agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}] + agent._fallback_chain = [ + {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"} + ] agent._fallback_index = 0 agent._fallback_activated = False @@ -1905,7 +2069,11 @@ class TestRunConversation: content_resp = _mock_response(content="Fallback answer.", finish_reason="stop") # 4 empty (1 orig + 3 retries), then fallback model answers agent.client.chat.completions.create.side_effect = [ - empty_resp, empty_resp, empty_resp, empty_resp, content_resp, + empty_resp, + empty_resp, + empty_resp, + empty_resp, + content_resp, ] fallback_called = {"called": False} @@ -1935,7 +2103,9 @@ class TestRunConversation: """If fallback also returns empty, final response is (empty).""" self._setup_agent(agent) agent.base_url = "http://127.0.0.1:1234/v1" - agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}] + agent._fallback_chain = [ + {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"} + ] agent._fallback_index = 0 agent._fallback_activated = False @@ -1943,8 +2113,14 @@ class TestRunConversation: # 4 empty from primary (1 + 3 retries), fallback activated, # then 4 more empty from fallback (1 + 3 retries), no more fallbacks agent.client.chat.completions.create.side_effect = [ - empty_resp, empty_resp, empty_resp, empty_resp, # primary exhausted - empty_resp, empty_resp, empty_resp, empty_resp, # fallback exhausted + empty_resp, + empty_resp, + empty_resp, + empty_resp, # primary exhausted + empty_resp, + empty_resp, + empty_resp, + empty_resp, # fallback exhausted ] def _mock_fallback(): @@ -1974,7 +2150,10 @@ class TestRunConversation: empty_resp = _mock_response(content=None, finish_reason="stop") # 4 empty: 1 original + 3 retries, all empty, no fallback agent.client.chat.completions.create.side_effect = [ - empty_resp, empty_resp, empty_resp, empty_resp, + empty_resp, + empty_resp, + empty_resp, + empty_resp, ] status_messages = [] @@ -1993,9 +2172,17 @@ class TestRunConversation: assert result["final_response"] == "(empty)" # Should have emitted retry statuses (3 retries) + final failure retry_msgs = [m for m in status_messages if "retrying" in m.lower()] - assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}" - failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()] - assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}" + assert len(retry_msgs) == 3, ( + f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}" + ) + failure_msgs = [ + m + for m in status_messages + if "no content" in m.lower() or "no fallback" in m.lower() + ] + assert len(failure_msgs) >= 1, ( + f"Expected at least 1 failure status, got: {status_messages}" + ) def test_partial_stream_recovery_uses_streamed_content(self, agent): """When streaming fails after partial delivery, recovered partial content becomes final response.""" @@ -2007,7 +2194,9 @@ class TestRunConversation: ) agent.client.chat.completions.create.return_value = partial_resp # Simulate that streaming had already delivered this text - agent._current_streamed_assistant_text = "Here is the partial answer that was stream" + agent._current_streamed_assistant_text = ( + "Here is the partial answer that was stream" + ) with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -2028,7 +2217,9 @@ class TestRunConversation: def _fake_api_call(api_kwargs): # Simulate what streaming does: accumulate text before returning # a stub with no content (connection died mid-stream) - agent._current_streamed_assistant_text = "The answer to your question is that" + agent._current_streamed_assistant_text = ( + "The answer to your question is that" + ) return empty_stub status_messages = [] @@ -2049,11 +2240,17 @@ class TestRunConversation: assert result["final_response"] == "The answer to your question is that" assert result["api_calls"] == 1 # No wasted retries # Should emit the stream-interrupted status, NOT the empty-retry status - recovery_msgs = [m for m in status_messages if "stream interrupted" in m.lower()] - assert len(recovery_msgs) >= 1, f"Expected stream recovery status, got: {status_messages}" + recovery_msgs = [ + m for m in status_messages if "stream interrupted" in m.lower() + ] + assert len(recovery_msgs) >= 1, ( + f"Expected stream recovery status, got: {status_messages}" + ) # Should NOT have retry statuses retry_msgs = [m for m in status_messages if "retrying" in m.lower()] - assert len(retry_msgs) == 0, f"Should not retry when stream content exists: {status_messages}" + assert len(retry_msgs) == 0, ( + f"Should not retry when stream content exists: {status_messages}" + ) def test_partial_stream_recovery_preempts_prior_turn_fallback(self, agent): """Partial streamed content takes priority over _last_content_with_tools fallback.""" @@ -2065,7 +2262,9 @@ class TestRunConversation: def _fake_api_call(api_kwargs): # Simulate partial streaming before connection death - agent._current_streamed_assistant_text = "Fresh partial content from this turn" + agent._current_streamed_assistant_text = ( + "Fresh partial content from this turn" + ) return empty_stub with ( @@ -2157,7 +2356,9 @@ class TestRunConversation: "Error code: 400 - {'error': {'code': '1261', 'message': 'Prompt exceeds max length'}}" ) err_400.status_code = 400 - ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") + ok_resp = _mock_response( + content="Recovered after compression", finish_reason="stop" + ) agent.client.chat.completions.create.side_effect = [err_400, ok_resp] prefill = [ {"role": "user", "content": "previous question"}, @@ -2198,9 +2399,14 @@ class TestRunConversation: assert result["api_calls"] == 2 assert result["final_response"] == "Part 1 Part 2" - second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"] + second_call_messages = agent.client.chat.completions.create.call_args_list[ + 1 + ].kwargs["messages"] assert second_call_messages[-1]["role"] == "user" - assert "truncated by the output length limit" in second_call_messages[-1]["content"] + assert ( + "truncated by the output length limit" + in second_call_messages[-1]["content"] + ) def test_length_thinking_exhausted_skips_continuation(self, agent): """When finish_reason='length' but content is only thinking, skip retries.""" @@ -2247,7 +2453,9 @@ class TestRunConversation: assert result["api_calls"] == 3 assert result["completed"] is False - def test_length_with_tool_calls_returns_partial_without_executing_tools(self, agent): + def test_length_with_tool_calls_returns_partial_without_executing_tools( + self, agent + ): self._setup_agent(agent) bad_tc = _mock_tool_call( name="write_file", @@ -2281,7 +2489,9 @@ class TestRunConversation: call_id="c1", ) truncated_resp = _mock_response( - content="", finish_reason="length", tool_calls=[bad_tc], + content="", + finish_reason="length", + tool_calls=[bad_tc], ) good_tc = _mock_tool_call( name="write_file", @@ -2289,10 +2499,14 @@ class TestRunConversation: call_id="c2", ) good_resp = _mock_response( - content="", finish_reason="stop", tool_calls=[good_tc], + content="", + finish_reason="stop", + tool_calls=[good_tc], ) with ( - patch("run_agent.handle_function_call", return_value='{"success":true}') as mock_hfc, + patch( + "run_agent.handle_function_call", return_value='{"success":true}' + ) as mock_hfc, patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), patch.object(agent, "_cleanup_task_resources"), @@ -2301,7 +2515,9 @@ class TestRunConversation: # Third: final text response. final_resp = _mock_response(content="Done!", finish_reason="stop") agent.client.chat.completions.create.side_effect = [ - truncated_resp, good_resp, final_resp, + truncated_resp, + good_resp, + final_resp, ] result = agent.run_conversation("write the report") @@ -2321,7 +2537,9 @@ class TestRunConversation: call_id="c1", ) resp = _mock_response( - content="", finish_reason="tool_calls", tool_calls=[bad_tc], + content="", + finish_reason="tool_calls", + tool_calls=[bad_tc], ) agent.client.chat.completions.create.return_value = resp @@ -2417,7 +2635,9 @@ class TestRetryExhaustion: """ self._setup_agent(agent) with ( - patch.object(agent, "_build_api_kwargs", side_effect=ValueError("bad messages")), + patch.object( + agent, "_build_api_kwargs", side_effect=ValueError("bad messages") + ), patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), patch.object(agent, "_cleanup_task_resources"), @@ -2461,7 +2681,9 @@ class TestFlushSentinelNotLeaked: agent.client.chat.completions.create.return_value = mock_response # Bypass auxiliary client so flush uses agent.client directly - with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")): + with patch( + "agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider") + ): agent.flush_memories(messages, min_turns=0) # Check what was actually sent to the API @@ -2591,7 +2813,9 @@ class TestCredentialPoolRecovery: assert retry_same is False agent._swap_credential.assert_called_once_with(next_entry) - def test_recover_with_pool_rotates_on_billing_reason_even_with_http_400(self, agent): + def test_recover_with_pool_rotates_on_billing_reason_even_with_http_400( + self, agent + ): next_entry = SimpleNamespace(label="secondary") class _Pool: @@ -2645,7 +2869,6 @@ class TestCredentialPoolRecovery: assert retry_same is False agent._swap_credential.assert_called_once_with(next_entry) - def test_recover_with_pool_refreshes_on_401(self, agent): """401 with successful refresh should swap to refreshed credential.""" refreshed_entry = SimpleNamespace(label="refreshed-primary", id="abc") @@ -2750,7 +2973,10 @@ class TestCredentialPoolRecovery: recovered, retry_same = agent._recover_with_credential_pool( status_code=429, has_retried_429=True, - error_context={"reason": "device_code_exhausted", "reset_at": "2026-04-12T10:30:00Z"}, + error_context={ + "reason": "device_code_exhausted", + "reset_at": "2026-04-12T10:30:00Z", + }, ) assert recovered is True @@ -2787,6 +3013,7 @@ class TestMaxTokensParam: # System prompt stability for prompt caching # --------------------------------------------------------------------------- + class TestSystemPromptStability: """Verify that the system prompt stays stable across turns for cache hits.""" @@ -2882,6 +3109,7 @@ class TestSystemPromptStability: # Empty string is falsy, so should fall through to fresh build assert "Hermes Agent" in agent._cached_system_prompt + class TestBudgetPressure: """Budget exhaustion grace call system.""" @@ -2898,6 +3126,7 @@ class TestSafeWriter: """When stdout is healthy, _SafeWriter is transparent.""" from run_agent import _SafeWriter from io import StringIO + inner = StringIO() writer = _SafeWriter(inner) writer.write("hello") @@ -2907,6 +3136,7 @@ class TestSafeWriter: """OSError on write is silently caught, returns len(data).""" from run_agent import _SafeWriter from unittest.mock import MagicMock + inner = MagicMock() inner.write.side_effect = OSError(5, "Input/output error") writer = _SafeWriter(inner) @@ -2917,6 +3147,7 @@ class TestSafeWriter: """OSError on flush is silently caught.""" from run_agent import _SafeWriter from unittest.mock import MagicMock + inner = MagicMock() inner.flush.side_effect = OSError(5, "Input/output error") writer = _SafeWriter(inner) @@ -2927,6 +3158,7 @@ class TestSafeWriter: import sys from run_agent import _SafeWriter from unittest.mock import MagicMock + broken = MagicMock() broken.write.side_effect = OSError(5, "Input/output error") original = sys.stdout @@ -2940,6 +3172,7 @@ class TestSafeWriter: """run_conversation installs _SafeWriter on stdio.""" import sys from run_agent import _SafeWriter + resp = _mock_response(content="Done", finish_reason="stop") agent.client.chat.completions.create.return_value = resp original_stdout = sys.stdout @@ -2965,6 +3198,7 @@ class TestSafeWriter: import sys from run_agent import _SafeWriter from io import StringIO + inner = StringIO() wrapped = _SafeWriter(inner) # isinstance check should prevent double-wrapping @@ -3009,15 +3243,30 @@ class TestBuildApiKwargsAnthropicMaxTokens: agent.reasoning_config = None with patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build: - mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096} + mock_build.return_value = { + "model": "claude-sonnet-4-20250514", + "messages": [], + "max_tokens": 4096, + } agent._build_api_kwargs([{"role": "user", "content": "test"}]) _, kwargs = mock_build.call_args if not kwargs: - kwargs = dict(zip( - ["model", "messages", "tools", "max_tokens", "reasoning_config"], - mock_build.call_args[0], - )) - assert kwargs.get("max_tokens") == 4096 or mock_build.call_args[1].get("max_tokens") == 4096 + kwargs = dict( + zip( + [ + "model", + "messages", + "tools", + "max_tokens", + "reasoning_config", + ], + mock_build.call_args[0], + ) + ) + assert ( + kwargs.get("max_tokens") == 4096 + or mock_build.call_args[1].get("max_tokens") == 4096 + ) def test_max_tokens_none_when_unset(self, agent): agent.api_mode = "anthropic_messages" @@ -3025,7 +3274,11 @@ class TestBuildApiKwargsAnthropicMaxTokens: agent.reasoning_config = None with patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build: - mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 16384} + mock_build.return_value = { + "model": "claude-sonnet-4-20250514", + "messages": [], + "max_tokens": 16384, + } agent._build_api_kwargs([{"role": "user", "content": "test"}]) call_args = mock_build.call_args # max_tokens should be None (let adapter use its default) @@ -3040,32 +3293,55 @@ class TestAnthropicImageFallback: agent.api_mode = "anthropic_messages" agent.reasoning_config = None - api_messages = [{ - "role": "user", - "content": [ - {"type": "text", "text": "Can you see this now?"}, - {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}}, - ], - }] + api_messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Can you see this now?"}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/cat.png"}, + }, + ], + } + ] with ( - patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=json.dumps({"success": True, "analysis": "A cat sitting on a chair."}))), + patch( + "tools.vision_tools.vision_analyze_tool", + new=AsyncMock( + return_value=json.dumps( + {"success": True, "analysis": "A cat sitting on a chair."} + ) + ), + ), patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build, ): - mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096} + mock_build.return_value = { + "model": "claude-sonnet-4-20250514", + "messages": [], + "max_tokens": 4096, + } agent._build_api_kwargs(api_messages) - kwargs = mock_build.call_args.kwargs or dict(zip( - ["model", "messages", "tools", "max_tokens", "reasoning_config"], - mock_build.call_args.args, - )) + kwargs = mock_build.call_args.kwargs or dict( + zip( + ["model", "messages", "tools", "max_tokens", "reasoning_config"], + mock_build.call_args.args, + ) + ) transformed = kwargs["messages"] assert isinstance(transformed[0]["content"], str) assert "A cat sitting on a chair." in transformed[0]["content"] assert "Can you see this now?" in transformed[0]["content"] - assert "vision_analyze with image_url: https://example.com/cat.png" in transformed[0]["content"] + assert ( + "vision_analyze with image_url: https://example.com/cat.png" + in transformed[0]["content"] + ) - def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images(self, agent): + def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images( + self, agent + ): agent.api_mode = "anthropic_messages" agent.reasoning_config = None data_url = "data:image/png;base64,QUFBQQ==" @@ -3087,12 +3363,20 @@ class TestAnthropicImageFallback: }, ] - mock_vision = AsyncMock(return_value=json.dumps({"success": True, "analysis": "A small test image."})) + mock_vision = AsyncMock( + return_value=json.dumps( + {"success": True, "analysis": "A small test image."} + ) + ) with ( patch("tools.vision_tools.vision_analyze_tool", new=mock_vision), patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build, ): - mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096} + mock_build.return_value = { + "model": "claude-sonnet-4-20250514", + "messages": [], + "max_tokens": 4096, + } agent._build_api_kwargs(api_messages) assert mock_vision.await_count == 1 @@ -3103,7 +3387,10 @@ class TestFallbackAnthropicProvider: def test_fallback_to_anthropic_sets_api_mode(self, agent): agent._fallback_activated = False - agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"} + agent._fallback_model = { + "provider": "anthropic", + "model": "claude-sonnet-4-20250514", + } agent._fallback_chain = [agent._fallback_model] agent._fallback_index = 0 @@ -3112,7 +3399,10 @@ class TestFallbackAnthropicProvider: mock_client.api_key = "sk-ant-api03-test" with ( - patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)), + patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(mock_client, None), + ), patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None), ): @@ -3126,7 +3416,10 @@ class TestFallbackAnthropicProvider: def test_fallback_to_anthropic_enables_prompt_caching(self, agent): agent._fallback_activated = False - agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"} + agent._fallback_model = { + "provider": "anthropic", + "model": "claude-sonnet-4-20250514", + } agent._fallback_chain = [agent._fallback_model] agent._fallback_index = 0 @@ -3135,8 +3428,14 @@ class TestFallbackAnthropicProvider: mock_client.api_key = "sk-ant-api03-test" with ( - patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)), - patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), + patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(mock_client, None), + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None), ): agent._try_activate_fallback() @@ -3145,7 +3444,10 @@ class TestFallbackAnthropicProvider: def test_fallback_to_openrouter_uses_openai_client(self, agent): agent._fallback_activated = False - agent._fallback_model = {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"} + agent._fallback_model = { + "provider": "openrouter", + "model": "anthropic/claude-sonnet-4", + } agent._fallback_chain = [agent._fallback_model] agent._fallback_index = 0 @@ -3153,7 +3455,10 @@ class TestFallbackAnthropicProvider: mock_client.base_url = "https://openrouter.ai/api/v1" mock_client.api_key = "sk-or-test" - with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + with patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(mock_client, None), + ): result = agent._try_activate_fallback() assert result is True @@ -3163,7 +3468,9 @@ class TestFallbackAnthropicProvider: def test_aiagent_uses_copilot_acp_client(): with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch( + "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search") + ), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI") as mock_openai, patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, @@ -3250,8 +3557,13 @@ def test_is_openai_client_closed_falls_back_to_http_client(): def __init__(self, http_closed: bool): self._client = SimpleNamespace(is_closed=http_closed) - assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=False)) is False - assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=True)) is True + assert ( + AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=False)) + is False + ) + assert ( + AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=True)) is True + ) class TestAnthropicBaseUrlPassthrough: @@ -3259,7 +3571,10 @@ class TestAnthropicBaseUrlPassthrough: def test_custom_proxy_base_url_passed_through(self): with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch( + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), + ), patch("run_agent.check_toolset_requirements", return_value={}), patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, ): @@ -3278,7 +3593,10 @@ class TestAnthropicBaseUrlPassthrough: def test_none_base_url_passed_as_none(self): with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch( + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), + ), patch("run_agent.check_toolset_requirements", return_value={}), patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, ): @@ -3299,7 +3617,10 @@ class TestAnthropicBaseUrlPassthrough: class TestAnthropicCredentialRefresh: def test_try_refresh_anthropic_client_credentials_rebuilds_client(self): with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch( + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), + ), patch("run_agent.check_toolset_requirements", return_value={}), patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, ): @@ -3320,21 +3641,37 @@ class TestAnthropicCredentialRefresh: agent.provider = "anthropic" with ( - patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-fresh-token"), - patch("agent.anthropic_adapter.build_anthropic_client", return_value=new_client) as rebuild, + patch( + "agent.anthropic_adapter.resolve_anthropic_token", + return_value="sk-ant-oat01-fresh-token", + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=new_client, + ) as rebuild, ): assert agent._try_refresh_anthropic_client_credentials() is True old_client.close.assert_called_once() - rebuild.assert_called_once_with("sk-ant-oat01-fresh-token", "https://api.anthropic.com") + rebuild.assert_called_once_with( + "sk-ant-oat01-fresh-token", "https://api.anthropic.com" + ) assert agent._anthropic_client is new_client assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token" - def test_try_refresh_anthropic_client_credentials_returns_false_when_token_unchanged(self): + def test_try_refresh_anthropic_client_credentials_returns_false_when_token_unchanged( + self, + ): with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch( + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), + ), patch("run_agent.check_toolset_requirements", return_value={}), - patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), ): agent = AIAgent( api_key="sk-ant-oat01-same-token", @@ -3349,7 +3686,10 @@ class TestAnthropicCredentialRefresh: agent._anthropic_api_key = "sk-ant-oat01-same-token" with ( - patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-same-token"), + patch( + "agent.anthropic_adapter.resolve_anthropic_token", + return_value="sk-ant-oat01-same-token", + ), patch("agent.anthropic_adapter.build_anthropic_client") as rebuild, ): assert agent._try_refresh_anthropic_client_credentials() is False @@ -3359,9 +3699,15 @@ class TestAnthropicCredentialRefresh: def test_anthropic_messages_create_preflights_refresh(self): with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch( + "run_agent.get_tool_definitions", + return_value=_make_tool_defs("web_search"), + ), patch("run_agent.check_toolset_requirements", return_value={}), - patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), ): agent = AIAgent( api_key="sk-ant-oat01-current-token", @@ -3375,11 +3721,17 @@ class TestAnthropicCredentialRefresh: agent._anthropic_client = MagicMock() agent._anthropic_client.messages.create.return_value = response - with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=True) as refresh: - result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"}) + with patch.object( + agent, "_try_refresh_anthropic_client_credentials", return_value=True + ) as refresh: + result = agent._anthropic_messages_create( + {"model": "claude-sonnet-4-20250514"} + ) refresh.assert_called_once_with() - agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514") + agent._anthropic_client.messages.create.assert_called_once_with( + model="claude-sonnet-4-20250514" + ) assert result is response @@ -3387,6 +3739,7 @@ class TestAnthropicCredentialRefresh: # _streaming_api_call tests # =================================================================== + def _make_chunk(content=None, tool_calls=None, finish_reason=None, model="test/model"): """Build a SimpleNamespace mimicking an OpenAI streaming chunk.""" delta = SimpleNamespace(content=content, tool_calls=tool_calls) @@ -3441,8 +3794,8 @@ class TestStreamingApiCall: def test_multiple_tool_calls(self, agent): chunks = [ - _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{}')]), - _make_chunk(tool_calls=[_make_tc_delta(1, "call_b", "read", '{}')]), + _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", "{}")]), + _make_chunk(tool_calls=[_make_tc_delta(1, "call_b", "read", "{}")]), _make_chunk(finish_reason="tool_calls"), ] agent.client.chat.completions.create.return_value = iter(chunks) @@ -3456,7 +3809,13 @@ class TestStreamingApiCall: def test_truncated_tool_call_args_upgrade_finish_reason_to_length(self, agent): chunks = [ - _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "write_file", '{"path":"x.txt","content":"hel')]), + _make_chunk( + tool_calls=[ + _make_tc_delta( + 0, "call_1", "write_file", '{"path":"x.txt","content":"hel' + ) + ] + ), ] agent.client.chat.completions.create.return_value = iter(chunks) @@ -3474,9 +3833,13 @@ class TestStreamingApiCall: Without the fix, names and arguments get concatenated into one slot. """ chunks = [ - _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]), + _make_chunk( + tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')] + ), # Second tool call at the SAME index 0, but different id - _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]), + _make_chunk( + tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')] + ), _make_chunk(finish_reason="tool_calls"), ] agent.client.chat.completions.create.return_value = iter(chunks) @@ -3484,7 +3847,9 @@ class TestStreamingApiCall: resp = agent._interruptible_streaming_api_call({"messages": []}) tc = resp.choices[0].message.tool_calls - assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}" + assert len(tc) == 2, ( + f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}" + ) assert tc[0].function.name == "search" assert tc[0].function.arguments == '{"q":"hello"}' assert tc[0].id == "call_a" @@ -3498,7 +3863,7 @@ class TestStreamingApiCall: _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]), _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]), # New tool call, same index 0 - _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]), + _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", "{}")]), _make_chunk(finish_reason="tool_calls"), ] agent.client.chat.completions.create.return_value = iter(chunks) @@ -3510,12 +3875,12 @@ class TestStreamingApiCall: assert tc[0].function.name == "search" assert tc[0].function.arguments == '{"q":"hello"}' assert tc[1].function.name == "read" - assert tc[1].function.arguments == '{}' + assert tc[1].function.arguments == "{}" def test_content_and_tool_calls_together(self, agent): chunks = [ _make_chunk(content="I'll search"), - _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "search", '{}')]), + _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "search", "{}")]), _make_chunk(finish_reason="tool_calls"), ] agent.client.chat.completions.create.return_value = iter(chunks) @@ -3565,7 +3930,10 @@ class TestStreamingApiCall: agent._interruptible_streaming_api_call({"messages": [], "model": "test"}) call_kwargs = agent.client.chat.completions.create.call_args - assert call_kwargs[1].get("stream") is True or call_kwargs.kwargs.get("stream") is True + assert ( + call_kwargs[1].get("stream") is True + or call_kwargs.kwargs.get("stream") is True + ) def test_api_exception_propagates_no_non_streaming_fallback(self, agent): """When streaming fails before any deltas, error propagates to the main retry loop.""" @@ -3611,6 +3979,7 @@ class TestInterruptVprintForceTrue: def test_all_interrupt_vprint_have_force_true(self): """Scan source for _vprint calls containing 'Interrupt' — each must have force=True.""" import inspect + source = inspect.getsource(AIAgent) lines = source.split("\n") violations = [] @@ -3620,8 +3989,7 @@ class TestInterruptVprintForceTrue: if "force=True" not in stripped: violations.append(f"line {i}: {stripped}") assert not violations, ( - f"Interrupt _vprint calls missing force=True:\n" - + "\n".join(violations) + f"Interrupt _vprint calls missing force=True:\n" + "\n".join(violations) ) @@ -3636,23 +4004,29 @@ class TestAnthropicInterruptHandler: def test_interruptible_has_anthropic_branch(self): """The interrupt handler must check api_mode == 'anthropic_messages'.""" import inspect + source = inspect.getsource(AIAgent._interruptible_api_call) - assert "anthropic_messages" in source, \ + assert "anthropic_messages" in source, ( "_interruptible_api_call must handle Anthropic interrupt (api_mode check)" + ) def test_interruptible_rebuilds_anthropic_client(self): """After interrupting, the Anthropic client should be rebuilt.""" import inspect + source = inspect.getsource(AIAgent._interruptible_api_call) - assert "build_anthropic_client" in source, \ + assert "build_anthropic_client" in source, ( "_interruptible_api_call must rebuild Anthropic client after interrupt" + ) def test_streaming_has_anthropic_branch(self): """_streaming_api_call must also handle Anthropic interrupt.""" import inspect + source = inspect.getsource(AIAgent._interruptible_streaming_api_call) - assert "anthropic_messages" in source, \ + assert "anthropic_messages" in source, ( "_streaming_api_call must handle Anthropic interrupt" + ) # --------------------------------------------------------------------------- @@ -3668,11 +4042,18 @@ class TestStreamCallbackNonStreamingProvider: """For chat_completions-shaped responses, callback gets content.""" agent.api_mode = "anthropic_messages" mock_response = SimpleNamespace( - choices=[SimpleNamespace( - message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None), - finish_reason="stop", index=0, - )], - usage=None, model="test", id="test-id", + choices=[ + SimpleNamespace( + message=SimpleNamespace( + content="Hello", tool_calls=None, reasoning_content=None + ), + finish_reason="stop", + index=0, + ) + ], + usage=None, + model="test", + id="test-id", ) agent._interruptible_api_call = MagicMock(return_value=mock_response) @@ -3686,8 +4067,10 @@ class TestStreamCallbackNonStreamingProvider: try: if agent.api_mode == "anthropic_messages": text_parts = [ - block.text for block in getattr(response, "content", []) - if getattr(block, "type", None) == "text" and getattr(block, "text", None) + block.text + for block in getattr(response, "content", []) + if getattr(block, "type", None) == "text" + and getattr(block, "text", None) ] content = " ".join(text_parts) if text_parts else None else: @@ -3729,8 +4112,10 @@ class TestStreamCallbackNonStreamingProvider: try: if agent.api_mode == "anthropic_messages": text_parts = [ - block.text for block in getattr(mock_response, "content", []) - if getattr(block, "type", None) == "text" and getattr(block, "text", None) + block.text + for block in getattr(mock_response, "content", []) + if getattr(block, "type", None) == "text" + and getattr(block, "text", None) ] content = " ".join(text_parts) if text_parts else None else: @@ -3879,10 +4264,14 @@ class TestOAuthFlagAfterCredentialRefresh: agent._is_anthropic_oauth = False with ( - patch("agent.anthropic_adapter.resolve_anthropic_token", - return_value="sk-ant-setup-oauth-token"), - patch("agent.anthropic_adapter.build_anthropic_client", - return_value=MagicMock()), + patch( + "agent.anthropic_adapter.resolve_anthropic_token", + return_value="sk-ant-setup-oauth-token", + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), ): result = agent._try_refresh_anthropic_client_credentials() @@ -3898,10 +4287,14 @@ class TestOAuthFlagAfterCredentialRefresh: agent._is_anthropic_oauth = True with ( - patch("agent.anthropic_adapter.resolve_anthropic_token", - return_value="sk-ant-api03-new-key"), - patch("agent.anthropic_adapter.build_anthropic_client", - return_value=MagicMock()), + patch( + "agent.anthropic_adapter.resolve_anthropic_token", + return_value="sk-ant-api03-new-key", + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), ): result = agent._try_refresh_anthropic_client_credentials() @@ -3923,12 +4316,15 @@ class TestFallbackSetsOAuthFlag: mock_client.api_key = "sk-ant-setup-oauth-token" with ( - patch("agent.auxiliary_client.resolve_provider_client", - return_value=(mock_client, None)), - patch("agent.anthropic_adapter.build_anthropic_client", - return_value=MagicMock()), - patch("agent.anthropic_adapter.resolve_anthropic_token", - return_value=None), + patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(mock_client, None), + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), + patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None), ): result = agent._try_activate_fallback() @@ -3946,12 +4342,15 @@ class TestFallbackSetsOAuthFlag: mock_client.api_key = "sk-ant-api03-regular-key" with ( - patch("agent.auxiliary_client.resolve_provider_client", - return_value=(mock_client, None)), - patch("agent.anthropic_adapter.build_anthropic_client", - return_value=MagicMock()), - patch("agent.anthropic_adapter.resolve_anthropic_token", - return_value=None), + patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(mock_client, None), + ), + patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock(), + ), + patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None), ): result = agent._try_activate_fallback() @@ -3966,8 +4365,11 @@ class TestMemoryNudgeCounterPersistence: """Counters must exist on the agent after __init__.""" with patch("run_agent.get_tool_definitions", return_value=[]): a = AIAgent( - model="test", api_key="test-key", provider="openrouter", - skip_context_files=True, skip_memory=True, + model="test", + api_key="test-key", + provider="openrouter", + skip_context_files=True, + skip_memory=True, ) assert hasattr(a, "_turns_since_memory") assert hasattr(a, "_iters_since_skill") @@ -3977,6 +4379,7 @@ class TestMemoryNudgeCounterPersistence: def test_counters_not_reset_in_preamble(self): """The run_conversation preamble must not zero the nudge counters.""" import inspect + src = inspect.getsource(AIAgent.run_conversation) # The preamble resets many fields (retry counts, budget, etc.) # before the main loop. Find that reset block and verify our @@ -3992,6 +4395,7 @@ class TestDeadRetryCode: def test_no_unreachable_max_retries_after_backoff(self): import inspect + source = inspect.getsource(AIAgent.run_conversation) occurrences = source.count("if retry_count >= max_retries:") assert occurrences == 2, (