diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000..1a71cb0776
Binary files /dev/null and b/.DS_Store differ
diff --git a/.env.example b/.env.example
index 38804aa120..4c13afec63 100644
--- a/.env.example
+++ b/.env.example
@@ -1,12 +1,42 @@
 # Hermes Agent Environment Configuration
 # Copy this file to .env and fill in your API keys
 
+# =============================================================================
+# CORE SETTINGS
+# =============================================================================
+# Agent backend:
+# - openai  : default Hermes-Agent loop (OpenAI function-calling via OpenAI SDK)
+# - atropos : Atroposlib ServerManager/ManagedServer-backed loop (training/env integration)
+HERMES_BACKEND=openai
+
+# Debugging (prints to stdout; use with care)
+# HERMES_DEBUG_ATROPOS_REQUEST=1
+# HERMES_DEBUG_ATROPOS_RESPONSE=1
+# HERMES_DEBUG_OPENAI_REQUEST=1
+# HERMES_DEBUG_OPENAI_RESPONSE=1
+
+# =============================================================================
+# LOCAL / SELF-HOSTED OPENAI-COMPATIBLE ENDPOINTS (vLLM, SGLang, llama.cpp, etc.)
+# =============================================================================
+# If you set ATROPOS_SERVER_BASE_URL or OPENAI_BASE_URL, Hermes will use it instead
+# of OpenRouter.
+#
+# Local server convenience (base URL without /v1):
+# ATROPOS_SERVER_BASE_URL=http://localhost:11434
+# ATROPOS_SERVER_MODEL=glm-4.7-flash
+# ATROPOS_SERVER_API_KEY=local
+#
+# Generic OpenAI-compatible (base URL should include /v1):
+# OPENAI_BASE_URL=http://localhost:11434/v1
+# OPENAI_API_KEY=local
+
 # =============================================================================
 # LLM PROVIDER (OpenRouter)
 # =============================================================================
 # OpenRouter provides access to many models through one API
 # All LLM calls go through OpenRouter - no direct provider keys needed
 # Get your key at: https://openrouter.ai/keys
+OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
 OPENROUTER_API_KEY=
 
 # Default model to use (OpenRouter format: provider/model)
diff --git a/atropos_compatible_agent.py b/atropos_compatible_agent.py
index e0ba3402cb..a9102a03c4 100644
--- a/atropos_compatible_agent.py
+++ b/atropos_compatible_agent.py
@@ -19,6 +19,8 @@ import asyncio
 import json
 import re
 import time
+import warnings
+import os
 from contextlib import asynccontextmanager
 from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
 
@@ -75,8 +77,8 @@ class AtroposAIAgent(AIAgent):
         log_prefix_chars: int = 100,
         log_prefix: str = "",
         session_id: Optional[str] = None,
-        temperature: float = 0.7,
-        max_tokens: int = 4096,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
     ):
         # Call parent init mainly to reuse tool selection + trajectory saving utilities.
         super().__init__(
@@ -104,8 +106,14 @@ class AtroposAIAgent(AIAgent):
     @asynccontextmanager
     async def _managed(self) -> AsyncGenerator[Any, None]:
         if hasattr(self.server, "managed_server"):
-            async with self.server.managed_server(tokenizer=self.tokenizer) as managed:
-                yield managed
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore",
+                    message=r"Using OpenAIServer with managed_server does not allow for state tracking",
+                    category=UserWarning,
+                )
+                async with self.server.managed_server(tokenizer=self.tokenizer) as managed:
+                    yield managed
             return
 
         # Fall back to directly wrapping a single server object.
@@ -206,20 +214,106 @@ class AtroposAIAgent(AIAgent):
                     if active_system_prompt:
                         api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
 
-                    response = await managed.chat_completion(
-                        messages=api_messages,
-                        n=1,
-                        max_tokens=self.max_tokens,
-                        temperature=self.temperature,
-                    )
+                    chat_kwargs: Dict[str, Any] = {"messages": api_messages, "n": 1}
+                    if self.max_tokens is not None:
+                        chat_kwargs["max_tokens"] = self.max_tokens
+                    if self.temperature is not None:
+                        chat_kwargs["temperature"] = self.temperature
+
+                    # Prefer OpenAI tool calling when supported by the backend:
+                    # - Many providers normalize Hermes-style <tool_call> tags into tool_calls when `tools` is provided.
+                    # - ManagedServer (atroposlib) does prompt->completion conversion and does not support `tools`.
+                    #   Only pass `tools` when we're calling an OpenAI-compatible chat endpoint directly.
+                    tool_schemas = self.tools if self.tools else None
+                    managed_cls = type(managed).__name__
+                    if tool_schemas and managed_cls != "ManagedServer":
+                        chat_kwargs["tools"] = tool_schemas
+
+                    if os.getenv("HERMES_DEBUG_ATROPOS_REQUEST") == "1":
+                        meta = {
+                            "managed_type": managed_cls,
+                            "model": getattr(getattr(managed, "config", None), "model_name", self.model),
+                            "base_url": getattr(getattr(managed, "config", None), "base_url", None),
+                            "kwargs": chat_kwargs,
+                        }
+                        # Avoid dumping megabytes of data accidentally.
+                        # (Messages can be large; this is still "full" but bounded.)
+                        print("\n=== HERMES_DEBUG_ATROPOS_REQUEST ===", flush=True)
+                        print(json.dumps(meta, ensure_ascii=False, indent=2)[:200_000], flush=True)
+
+                    response = await managed.chat_completion(**chat_kwargs)
+
+                    if os.getenv("HERMES_DEBUG_ATROPOS_RESPONSE") == "1":
+                        try:
+                            dumped = response.model_dump()  # openai pydantic model
+                        except Exception:
+                            dumped = getattr(response, "__dict__", {"repr": repr(response)})
+                        print("\n=== HERMES_DEBUG_ATROPOS_RESPONSE: ChatCompletion (raw) ===", flush=True)
+                        print(json.dumps(dumped, ensure_ascii=False, indent=2), flush=True)
 
                     if hasattr(managed, "get_state"):
                         managed_state = managed.get_state()
 
-                    assistant_content = response.choices[0].message.content or ""
-                    messages.append({"role": "assistant", "content": assistant_content})
+                    msg = response.choices[0].message
+                    assistant_content = (msg.content or "")
+                    msg_reasoning = getattr(msg, "reasoning", None)
 
-                    tool_calls, parse_errors = self._parse_tool_calls(assistant_content)
+                    # Use tool_calls if the backend provides them (preferred).
+                    structured_tool_calls = getattr(msg, "tool_calls", None)
+
+                    # If the backend emits content="" but includes useful text in reasoning,
+                    # use it for parsing *only if needed* (e.g. tool tags).
+                    if assistant_content == "" and isinstance(msg_reasoning, str) and msg_reasoning:
+                        if os.getenv("HERMES_DEBUG_ATROPOS_RESPONSE") == "1":
+                            print("\n=== HERMES_DEBUG_ATROPOS_RESPONSE: message.reasoning present (content empty) ===", flush=True)
+                            print(msg_reasoning, flush=True)
+
+                    assistant_msg: Dict[str, Any] = {"role": "assistant", "content": assistant_content}
+                    if structured_tool_calls:
+                        # Preserve tool_calls so the next request is consistent with OpenAI protocol.
+                        try:
+                            assistant_msg["tool_calls"] = [
+                                {
+                                    "id": tc.id,
+                                    "type": tc.type,
+                                    "function": {"name": tc.function.name, "arguments": tc.function.arguments},
+                                }
+                                for tc in structured_tool_calls
+                            ]
+                        except Exception:
+                            # Best-effort; keep conversation moving.
+                            pass
+                    messages.append(assistant_msg)
+
+                    # Mode A: OpenAI tool calling (preferred when supported)
+                    if structured_tool_calls:
+                        for tc in structured_tool_calls:
+                            tool_start = time.time()
+                            try:
+                                tool_args = json.loads(tc.function.arguments or "{}")
+                            except Exception:
+                                tool_args = {}
+                            tool_result = handle_function_call(tc.function.name, tool_args, effective_task_id)
+                            tool_duration = time.time() - tool_start
+
+                            # Keep the raw tool result as tool content (OpenAI protocol expects role=tool).
+                            messages.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": tc.id,
+                                    "content": tool_result,
+                                }
+                            )
+
+                            if self.tool_delay and self.tool_delay > 0:
+                                await asyncio.sleep(self.tool_delay)
+
+                        # Continue loop after tool execution.
+                        continue
+
+                    # Mode B: Hermes XML tool tags in assistant text (fallback).
+                    parse_source = assistant_content or (msg_reasoning or "")
+                    tool_calls, parse_errors = self._parse_tool_calls(parse_source)
 
                     if parse_errors and not tool_calls:
                         # Ask the model to retry with valid tool JSON.
@@ -237,7 +331,7 @@ class AtroposAIAgent(AIAgent):
 
                     if not tool_calls:
                         # No tool calls: treat as final answer.
-                        final_response = assistant_content
+                        final_response = (assistant_content or "").strip()
                         completed = True
                         break
 
diff --git a/cli.py b/cli.py
index c4f436b0e8..c1d0b9f880 100755
--- a/cli.py
+++ b/cli.py
@@ -24,6 +24,8 @@ from typing import List, Dict, Any, Optional
 # Suppress startup messages for clean CLI experience
 os.environ["MSWEA_SILENT_STARTUP"] = "1"  # mini-swe-agent
 os.environ["HERMES_QUIET"] = "1"  # Our own modules
+os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
+os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
 
 import yaml
 
@@ -451,6 +453,7 @@ class HermesCLI:
         max_turns: int = 20,
         verbose: bool = False,
         compact: bool = False,
+        interactive: bool = True,
     ):
         """
         Initialize the Hermes CLI.
@@ -471,9 +474,38 @@ class HermesCLI:
         self.verbose = verbose if verbose is not None else CLI_CONFIG["agent"].get("verbose", False)
         
         # Configuration - priority: CLI args > env vars > config file
-        self.model = model or os.getenv("LLM_MODEL", CLI_CONFIG["model"]["default"])
-        self.base_url = base_url or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
-        self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+        self.model = (
+            model
+            or os.getenv("LLM_MODEL")
+            or os.getenv("ATROPOS_SERVER_MODEL")
+            or CLI_CONFIG["model"]["default"]
+        )
+
+        env_openai_base_url = os.getenv("OPENAI_BASE_URL")
+        if env_openai_base_url:
+            env_openai_base_url = env_openai_base_url.rstrip("/")
+            if not env_openai_base_url.endswith("/v1"):
+                env_openai_base_url = f"{env_openai_base_url}/v1"
+
+        env_atropos_base_url = os.getenv("ATROPOS_SERVER_BASE_URL")
+        if env_atropos_base_url:
+            env_atropos_base_url = env_atropos_base_url.rstrip("/")
+            if not env_atropos_base_url.endswith("/v1"):
+                env_atropos_base_url = f"{env_atropos_base_url}/v1"
+
+        self.base_url = (
+            base_url
+            or env_atropos_base_url
+            or env_openai_base_url
+            or os.getenv("OPENROUTER_BASE_URL")
+            or CLI_CONFIG["model"]["base_url"]
+        )
+        self.api_key = (
+            api_key
+            or os.getenv("ATROPOS_SERVER_API_KEY")
+            or os.getenv("OPENAI_API_KEY")
+            or os.getenv("OPENROUTER_API_KEY")
+        )
         self.max_turns = max_turns if max_turns != 20 else CLI_CONFIG["agent"].get("max_turns", 20)
 
         self.backend = (backend or os.getenv("HERMES_BACKEND") or "openai").strip().lower()
@@ -507,9 +539,11 @@ class HermesCLI:
         timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S")
         short_uuid = uuid.uuid4().hex[:6]
         self.session_id = f"{timestamp_str}_{short_uuid}"
-        
-        # Setup prompt_toolkit session with history
-        self._setup_prompt_session()
+
+        self.interactive = interactive
+        if self.interactive:
+            # Setup prompt_toolkit session with history
+            self._setup_prompt_session()
     
     def _setup_prompt_session(self):
         """Setup prompt_toolkit session with history and styling."""
@@ -574,6 +608,9 @@ class HermesCLI:
                     quiet_mode=True,  # Suppress verbose output for clean CLI
                     ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
                     session_id=self.session_id,
+                    # Do not force max_tokens/temperature by default; let the backend decide.
+                    max_tokens=None,
+                    temperature=None,
                 )
             else:
                 self.agent = AIAgent(
@@ -967,7 +1004,7 @@ class HermesCLI:
         
         return True
     
-    def chat(self, message: str) -> Optional[str]:
+    def chat(self, message: str, *, render: bool = True) -> Optional[str]:
         """
         Send a message to the agent and get a response.
         
@@ -984,8 +1021,9 @@ class HermesCLI:
         # Add user message to history
         self.conversation_history.append({"role": "user", "content": message})
         
-        # Visual separator after user input
-        print("─" * 60, flush=True)
+        if render:
+            # Visual separator after user input
+            print("─" * 60, flush=True)
         
         try:
             # Run the conversation
@@ -1000,7 +1038,7 @@ class HermesCLI:
             # Get the final response
             response = result.get("final_response", "")
             
-            if response:
+            if response and render:
                 # Use simple print for compatibility with prompt_toolkit's patch_stdout
                 print()
                 print("╭" + "─" * 58 + "╮")
@@ -1130,12 +1168,21 @@ def main(
         python cli.py -p "What is Python?"       # Single query mode (alias)
         python cli.py --list-tools               # List tools and exit
     """
-    # Signal to terminal_tool that we're in interactive mode
-    # This enables interactive sudo password prompts with timeout
-    os.environ["HERMES_INTERACTIVE"] = "1"
-    
-    # Handle query shorthand
-    query = query or q or prompt or p
+    # Resolve prompt modes:
+    # - query/-q: single-shot, but keep the normal banner UX
+    # - prompt/-p: single-shot, NO TUI/banner (for wrapper scripts)
+    query_text = query or q
+    prompt_text = prompt or p
+
+    # Signal to terminal_tool whether we can prompt the user (e.g. sudo password).
+    os.environ["HERMES_INTERACTIVE"] = "0" if prompt_text else "1"
+    if prompt_text:
+        # Wrapper mode should not emit spinners / interactive UX noise.
+        os.environ["HERMES_DISABLE_SPINNER"] = "1"
+
+    # Optional debug dump of the full model response objects.
+    # - HERMES_DEBUG_ATROPOS_RESPONSE=1 dumps the Atropos backend ChatCompletion
+    # - HERMES_DEBUG_OPENAI_RESPONSE=1 dumps the OpenAI backend ChatCompletion
     
     # Parse toolsets - handle both string and tuple/list inputs
     toolsets_list = None
@@ -1161,6 +1208,7 @@ def main(
         max_turns=max_turns,
         verbose=verbose,
         compact=compact,
+        interactive=not bool(prompt_text),
     )
     
     # Handle list commands (don't init agent for these)
@@ -1174,11 +1222,18 @@ def main(
         cli.show_toolsets()
         sys.exit(0)
     
+    # Handle wrapper-friendly prompt mode (no banner/TUI)
+    if prompt_text:
+        response = cli.chat(prompt_text, render=False)
+        if response is not None:
+            print(response)
+        return
+
     # Handle single query mode
-    if query:
+    if query_text:
         cli.show_banner()
-        cli.console.print(f"[bold blue]Query:[/] {query}")
-        cli.chat(query)
+        cli.console.print(f"[bold blue]Query:[/] {query_text}")
+        cli.chat(query_text)
         return
     
     # Run interactive mode
diff --git a/hermes b/hermes
index f0feeb2bad..58ef5cc483 100755
--- a/hermes
+++ b/hermes
@@ -7,6 +7,40 @@ Usage: ./hermes [options]
 """
 
 if __name__ == "__main__":
+    """
+    Fire (google/python-fire) does not support POSIX-style short flags like `-p`.
+    We translate the most common shorthands to their long equivalents so wrapper
+    scripts can reliably use:
+      - `-p "..."`  -> `--prompt "..."` (no TUI/banner; print result and exit)
+      - `-q "..."`  -> `--query "..."`  (single-shot with banner UX)
+    """
+
+    import sys
+
+    def _rewrite_short_flags(argv: list[str]) -> list[str]:
+        rewritten: list[str] = []
+        i = 0
+        while i < len(argv):
+            arg = argv[i]
+            if arg == "-p":
+                rewritten.append("--prompt")
+                if i + 1 < len(argv):
+                    rewritten.append(argv[i + 1])
+                    i += 2
+                    continue
+            if arg == "-q":
+                rewritten.append("--query")
+                if i + 1 < len(argv):
+                    rewritten.append(argv[i + 1])
+                    i += 2
+                    continue
+            rewritten.append(arg)
+            i += 1
+        return rewritten
+
+    sys.argv = [sys.argv[0]] + _rewrite_short_flags(sys.argv[1:])
+
     from cli import main
     import fire
+
     fire.Fire(main)
diff --git a/local_server.py b/local_server.py
index d2467a8bed..71ea093313 100644
--- a/local_server.py
+++ b/local_server.py
@@ -1,7 +1,7 @@
 """
 Local OpenAI-compatible server implementation for Hermes-Agent (Atropos integration).
 
-Extends the Atropos APIServer to work with local OpenAI-compatible APIs (e.g. Ollama),
+Extends the Atropos APIServer to work with local OpenAI-compatible APIs (e.g. vLLM, SGLang),
 providing tokens_and_logprobs_completion support via client-side tokenization.
 """
 
@@ -104,27 +104,35 @@ class LocalServer(APIServer):
         Create a LocalServer from environment variables (or explicit overrides).
         
         Env vars (checked in order):
-        - base URL: LOCAL_LLM_BASE_URL, LLM_BASE_URL, OLLAMA_BASE_URL
-        - model:    LOCAL_LLM_MODEL,    LLM_MODEL,    OLLAMA_MODEL
+        - base URL: ATROPOS_SERVER_BASE_URL, OPENAI_BASE_URL, LOCAL_LLM_BASE_URL, LLM_BASE_URL
+        - model:    ATROPOS_SERVER_MODEL,    LLM_MODEL,       LOCAL_LLM_MODEL
+        - api key:  ATROPOS_SERVER_API_KEY,  OPENAI_API_KEY,  LOCAL_LLM_API_KEY, LLM_API_KEY
         """
         from dotenv import load_dotenv
         load_dotenv()
         
         base_url = (
             base_url
+            or os.getenv("ATROPOS_SERVER_BASE_URL")
+            or os.getenv("OPENAI_BASE_URL")
             or os.getenv("LOCAL_LLM_BASE_URL")
             or os.getenv("LLM_BASE_URL")
-            or os.getenv("OLLAMA_BASE_URL")
             or "http://localhost:11434"
         )
         model = (
             model
-            or os.getenv("LOCAL_LLM_MODEL")
+            or os.getenv("ATROPOS_SERVER_MODEL")
             or os.getenv("LLM_MODEL")
-            or os.getenv("OLLAMA_MODEL")
+            or os.getenv("LOCAL_LLM_MODEL")
             or "hermes3:8b"
         )
-        api_key = api_key or os.getenv("LOCAL_LLM_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OLLAMA_API_KEY")
+        api_key = (
+            api_key
+            or os.getenv("ATROPOS_SERVER_API_KEY")
+            or os.getenv("OPENAI_API_KEY")
+            or os.getenv("LOCAL_LLM_API_KEY")
+            or os.getenv("LLM_API_KEY")
+        )
         
         config = APIServerConfig(
             model_name=model,
@@ -173,7 +181,7 @@ class LocalServer(APIServer):
         
         n = kwargs.get("n", 1)
         
-        # Ollama doesn't support n > 1, so we make multiple requests
+        # Some OpenAI-compatible servers don't support n > 1, so we make multiple requests.
         if n > 1:
             completion_list = await asyncio.gather(
                 *[self.openai.chat.completions.create(**{**kwargs, "n": 1}) for _ in range(n)]
@@ -197,7 +205,7 @@ class LocalServer(APIServer):
         
         n = kwargs.get("n", 1)
         
-        # Ollama doesn't support n > 1
+        # Some OpenAI-compatible servers don't support n > 1.
         if n > 1:
             completion_list = await asyncio.gather(
                 *[self.openai.completions.create(**{**kwargs, "n": 1}) for _ in range(n)]
@@ -283,7 +291,7 @@ class LocalServer(APIServer):
             # Tokenize output
             output_tokens = self.tokenizer.encode(text, add_special_tokens=False)
             
-            # Placeholder logprobs (Ollama doesn't provide per-token logprobs easily)
+            # Placeholder logprobs (many local servers don't provide per-token logprobs).
             # In production, use vLLM/SGLang which return real logprobs
             output_logprobs = [0.0] * len(output_tokens)
             
diff --git a/run_agent.py b/run_agent.py
index 0aba84a5f9..fb594d8caa 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -910,6 +910,16 @@ class AIAgent:
             if active_system_prompt:
                 # Insert system message at the beginning
                 api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
+
+            if os.getenv("HERMES_DEBUG_OPENAI_REQUEST") == "1":
+                meta = {
+                    "model": self.model,
+                    "base_url": self.base_url,
+                    "messages": api_messages,
+                    "tools": self.tools if self.tools else None,
+                }
+                print("\n=== HERMES_DEBUG_OPENAI_REQUEST ===", flush=True)
+                print(json.dumps(meta, ensure_ascii=False, indent=2)[:200_000], flush=True)
             
             # Calculate approximate request size for logging
             total_chars = sum(len(str(msg)) for msg in api_messages)
@@ -923,12 +933,13 @@ class AIAgent:
                 print(f"{self.log_prefix}   📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
                 print(f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}")
             else:
-                # Animated thinking spinner in quiet mode
-                face = random.choice(KawaiiSpinner.KAWAII_THINKING)
-                verb = random.choice(KawaiiSpinner.THINKING_VERBS)
-                spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
-                thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
-                thinking_spinner.start()
+                # Animated thinking spinner in quiet mode (disable for wrappers/non-TTY usage)
+                if os.getenv("HERMES_DISABLE_SPINNER") != "1":
+                    face = random.choice(KawaiiSpinner.KAWAII_THINKING)
+                    verb = random.choice(KawaiiSpinner.THINKING_VERBS)
+                    spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
+                    thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
+                    thinking_spinner.start()
             
             # Log request details if verbose
             if self.verbose_logging:
@@ -979,6 +990,14 @@ class AIAgent:
                         api_kwargs["extra_body"] = extra_body
                     
                     response = self.client.chat.completions.create(**api_kwargs)
+
+                    if os.getenv("HERMES_DEBUG_OPENAI_RESPONSE") == "1":
+                        try:
+                            dumped = response.model_dump()
+                        except Exception:
+                            dumped = getattr(response, "__dict__", {"repr": repr(response)})
+                        print("\n=== HERMES_DEBUG_OPENAI_RESPONSE: ChatCompletion (raw) ===", flush=True)
+                        print(json.dumps(dumped, ensure_ascii=False, indent=2), flush=True)
                     
                     api_duration = time.time() - api_start_time
                     
@@ -1294,7 +1313,7 @@ class AIAgent:
                         tool_start_time = time.time()
 
                         # Execute the tool - with animated spinner in quiet mode
-                        if self.quiet_mode:
+                        if self.quiet_mode and os.getenv("HERMES_DISABLE_SPINNER") != "1":
                             # Tool-specific spinner animations
                             tool_spinners = {
                                 'web_search': ('arrows', ['🔍', '🌐', '📡', '🔎']),
@@ -1324,6 +1343,9 @@ class AIAgent:
                                 tool_duration = time.time() - tool_start_time
                                 cute_msg = self._get_cute_tool_message(function_name, function_args, tool_duration)
                                 spinner.stop(cute_msg)
+                        elif self.quiet_mode:
+                            function_result = handle_function_call(function_name, function_args, effective_task_id)
+                            tool_duration = time.time() - tool_start_time
                         else:
                             function_result = handle_function_call(function_name, function_args, effective_task_id)
                             tool_duration = time.time() - tool_start_time