From 9b4d9452ba117a44aaaee897dd685462d2ae334d Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Feb 2026 18:01:31 -0800
Subject: [PATCH 01/48] Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.
---
 .env.example            |  10 +
 README.md               |  40 ++++
 TODO.md                 |  19 +-
 cli-config.yaml.example |  27 +++
 cli.py                  |  17 ++
 docs/cli.md             |  32 +++
 run_agent.py            | 481 +++++++++++++++++++++++++++++++++++++++-
 7 files changed, 614 insertions(+), 12 deletions(-)

diff --git a/.env.example b/.env.example
index 38804aa120..9c73f74e9b 100644
--- a/.env.example
+++ b/.env.example
@@ -154,3 +154,13 @@ WEB_TOOLS_DEBUG=false
 VISION_TOOLS_DEBUG=false
 MOA_TOOLS_DEBUG=false
 IMAGE_TOOLS_DEBUG=false
+
+# =============================================================================
+# CONTEXT COMPRESSION (Auto-shrinks long conversations)
+# =============================================================================
+# When conversation approaches model's context limit, middle turns are
+# automatically summarized to free up space.
+#
+# CONTEXT_COMPRESSION_ENABLED=true        # Enable auto-compression (default: true)
+# CONTEXT_COMPRESSION_THRESHOLD=0.85      # Compress at 85% of context limit
+# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001  # Fast model for summaries
diff --git a/README.md b/README.md
index 001728a33e..309c7c6892 100644
--- a/README.md
+++ b/README.md
@@ -290,6 +290,41 @@ logs/
 - **Trajectory Format**: Uses the same format as batch processing for consistency
 - **Git Ignored**: `logs/` is in `.gitignore` so logs aren't committed
 
+## Context Compression
+
+Long conversations can exceed the model's context limit. Hermes Agent automatically compresses context when approaching the limit:
+
+**How it works:**
+1. Tracks actual token usage from API responses (`usage.prompt_tokens`)
+2. When tokens reach 85% of model's context limit, triggers compression
+3. Protects first 3 turns (system prompt, initial request, first response)
+4. Protects last 4 turns (recent context is most relevant)
+5. Summarizes middle turns using a fast/cheap model (Gemini Flash)
+6. Inserts summary as a user message, conversation continues seamlessly
+
+**Configuration (`cli-config.yaml`):**
+```yaml
+compression:
+  enabled: true                    # Enable auto-compression (default)
+  threshold: 0.85                  # Compress at 85% of context limit
+  summary_model: "google/gemini-2.0-flash-001"
+```
+
+**Or via environment variables:**
+```bash
+CONTEXT_COMPRESSION_ENABLED=true
+CONTEXT_COMPRESSION_THRESHOLD=0.85
+CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001
+```
+
+**When compression triggers, you'll see:**
+```
+📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
+   📊 Model context limit: 200,000 tokens (85% = 170,000)
+   🗜️  Summarizing turns 4-15 (12 turns)
+   ✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
+```
+
 ## Interactive CLI
 
 The CLI provides a rich interactive experience for working with the agent.
@@ -579,6 +614,11 @@ All environment variables can be configured in the `.env` file (copy from `.env.
 - `TERMINAL_SSH_PORT`: SSH port (default: `22`)
 - `TERMINAL_SSH_KEY`: Path to SSH private key (optional, uses ssh-agent if not set)
 
+**Context Compression (auto-shrinks long conversations):**
+- `CONTEXT_COMPRESSION_ENABLED`: Enable auto-compression (default: `true`)
+- `CONTEXT_COMPRESSION_THRESHOLD`: Compress at this % of context limit (default: `0.85`)
+- `CONTEXT_COMPRESSION_MODEL`: Model for generating summaries (default: `google/gemini-2.0-flash-001`)
+
 **Browser Tool Configuration (agent-browser + Browserbase):**
 - `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
 - `BROWSERBASE_PROJECT_ID`: Browserbase project ID
diff --git a/TODO.md b/TODO.md
index 1d10fdcdc3..9b35e3b803 100644
--- a/TODO.md
+++ b/TODO.md
@@ -47,7 +47,24 @@ These items need to be addressed ASAP:
   - Structured JSON format for easy parsing and replay
   - Automatic on CLI runs (configurable)
 
-### 4. Stream Thinking Summaries in Real-Time 💭 ⏸️ DEFERRED
+### 4. Automatic Context Compression 🗜️ ✅ COMPLETE
+- [x] **Problem:** Long conversations exceed model context limits, causing errors
+- [x] **Solution:** Auto-compress middle turns when approaching limit
+- [x] **Implementation:**
+  - Fetches model context lengths from OpenRouter `/api/v1/models` API (cached 1hr)
+  - Tracks actual token usage from API responses (`usage.prompt_tokens`)
+  - Triggers at 85% of model's context limit (configurable)
+  - Protects first 3 turns (system, initial request, first response)
+  - Protects last 4 turns (recent context most relevant)
+  - Summarizes middle turns using fast model (Gemini Flash)
+  - Inserts summary as user message, conversation continues seamlessly
+  - If context error occurs, attempts compression before failing
+- [x] **Configuration (cli-config.yaml / env vars):**
+  - `CONTEXT_COMPRESSION_ENABLED` (default: true)
+  - `CONTEXT_COMPRESSION_THRESHOLD` (default: 0.85 = 85%)
+  - `CONTEXT_COMPRESSION_MODEL` (default: google/gemini-2.0-flash-001)
+
+### 5. Stream Thinking Summaries in Real-Time 💭 ⏸️ DEFERRED
 - [ ] **Problem:** Thinking/reasoning summaries not shown while streaming
 - [ ] **Complexity:** This is a significant refactor - leaving for later
 
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 432e11189e..947fa11af9 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -112,6 +112,33 @@ browser:
   # after this period of no activity between agent loops (default: 120 = 2 minutes)
   inactivity_timeout: 120
 
+# =============================================================================
+# Context Compression (Auto-shrinks long conversations)
+# =============================================================================
+# When conversation approaches model's context limit, middle turns are
+# automatically summarized to free up space while preserving important context.
+#
+# HOW IT WORKS:
+# 1. Tracks actual token usage from API responses (not estimates)
+# 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
+# 3. Protects first 3 turns (system prompt, initial request, first response)
+# 4. Protects last 4 turns (recent context is most relevant)
+# 5. Summarizes middle turns using a fast/cheap model
+# 6. Inserts summary as a user message, continues conversation seamlessly
+#
+compression:
+  # Enable automatic context compression (default: true)
+  # Set to false if you prefer to manage context manually or want errors on overflow
+  enabled: true
+  
+  # Trigger compression at this % of model's context limit (default: 0.85 = 85%)
+  # Lower values = more aggressive compression, higher values = compress later
+  threshold: 0.85
+  
+  # Model to use for generating summaries (fast/cheap recommended)
+  # This model compresses the middle turns into a concise summary
+  summary_model: "google/gemini-2.0-flash-001"
+
 # =============================================================================
 # Agent Behavior
 # =============================================================================
diff --git a/cli.py b/cli.py
index f3977b07ae..d73e10112e 100755
--- a/cli.py
+++ b/cli.py
@@ -71,6 +71,11 @@ def load_cli_config() -> Dict[str, Any]:
         "browser": {
             "inactivity_timeout": 120,  # Auto-cleanup inactive browser sessions after 2 min
         },
+        "compression": {
+            "enabled": True,      # Auto-compress when approaching context limit
+            "threshold": 0.85,    # Compress at 85% of model's context limit
+            "summary_model": "google/gemini-2.0-flash-001",  # Fast/cheap model for summaries
+        },
         "agent": {
             "max_turns": 20,
             "verbose": False,
@@ -154,6 +159,18 @@ def load_cli_config() -> Dict[str, Any]:
         if config_key in browser_config:
             os.environ[env_var] = str(browser_config[config_key])
     
+    # Apply compression config to environment variables
+    compression_config = defaults.get("compression", {})
+    compression_env_mappings = {
+        "enabled": "CONTEXT_COMPRESSION_ENABLED",
+        "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
+        "summary_model": "CONTEXT_COMPRESSION_MODEL",
+    }
+    
+    for config_key, env_var in compression_env_mappings.items():
+        if config_key in compression_config:
+            os.environ[env_var] = str(compression_config[config_key])
+    
     return defaults
 
 # Load configuration at module startup
diff --git a/docs/cli.md b/docs/cli.md
index f544257e7e..eb13b068d4 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -250,6 +250,38 @@ This is useful for:
 - Replaying conversations
 - Training data inspection
 
+### Context Compression
+
+Long conversations can exceed model context limits. The CLI automatically compresses context when approaching the limit:
+
+```yaml
+# In cli-config.yaml
+compression:
+  enabled: true                    # Enable auto-compression
+  threshold: 0.85                  # Compress at 85% of context limit  
+  summary_model: "google/gemini-2.0-flash-001"
+```
+
+**How it works:**
+1. Tracks actual token usage from each API response
+2. When tokens reach threshold, middle turns are summarized
+3. First 3 and last 4 turns are always protected
+4. Conversation continues seamlessly after compression
+
+**When compression triggers:**
+```
+📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
+   📊 Model context limit: 200,000 tokens (85% = 170,000)
+   🗜️  Summarizing turns 4-15 (12 turns)
+   ✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
+```
+
+To disable compression:
+```yaml
+compression:
+  enabled: false
+```
+
 ## Quiet Mode
 
 The CLI runs in "quiet mode" (`HERMES_QUIET=1`), which:
diff --git a/run_agent.py b/run_agent.py
index 10b6e26a01..2bd68d3218 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -51,6 +51,410 @@ from model_tools import get_tool_definitions, handle_function_call, check_toolse
 from tools.terminal_tool import cleanup_vm
 from tools.browser_tool import cleanup_browser
 
+import requests
+
+# =============================================================================
+# Model Context Management
+# =============================================================================
+
+# Cache for model metadata from OpenRouter
+_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
+_model_metadata_cache_time: float = 0
+_MODEL_CACHE_TTL = 3600  # 1 hour cache TTL
+
+# Default context lengths for common models (fallback if API fails)
+DEFAULT_CONTEXT_LENGTHS = {
+    "anthropic/claude-opus-4": 200000,
+    "anthropic/claude-opus-4.5": 200000,
+    "anthropic/claude-sonnet-4": 200000,
+    "anthropic/claude-sonnet-4-20250514": 200000,
+    "anthropic/claude-haiku-4.5": 200000,
+    "openai/gpt-4o": 128000,
+    "openai/gpt-4-turbo": 128000,
+    "openai/gpt-4o-mini": 128000,
+    "google/gemini-2.0-flash": 1048576,
+    "google/gemini-2.5-pro": 1048576,
+    "meta-llama/llama-3.3-70b-instruct": 131072,
+    "deepseek/deepseek-chat-v3": 65536,
+    "qwen/qwen-2.5-72b-instruct": 32768,
+}
+
+
+def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
+    """
+    Fetch model metadata from OpenRouter's /api/v1/models endpoint.
+    Results are cached for 1 hour to minimize API calls.
+    
+    Returns:
+        Dict mapping model_id to metadata (context_length, max_completion_tokens, etc.)
+    """
+    global _model_metadata_cache, _model_metadata_cache_time
+    
+    # Return cached data if fresh
+    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
+        return _model_metadata_cache
+    
+    try:
+        response = requests.get(
+            "https://openrouter.ai/api/v1/models",
+            timeout=10
+        )
+        response.raise_for_status()
+        data = response.json()
+        
+        # Build cache mapping model_id to relevant metadata
+        cache = {}
+        for model in data.get("data", []):
+            model_id = model.get("id", "")
+            cache[model_id] = {
+                "context_length": model.get("context_length", 128000),
+                "max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
+                "name": model.get("name", model_id),
+                "pricing": model.get("pricing", {}),
+            }
+            # Also cache by canonical slug if different
+            canonical = model.get("canonical_slug", "")
+            if canonical and canonical != model_id:
+                cache[canonical] = cache[model_id]
+        
+        _model_metadata_cache = cache
+        _model_metadata_cache_time = time.time()
+        
+        if not os.getenv("HERMES_QUIET"):
+            logging.debug(f"Fetched metadata for {len(cache)} models from OpenRouter")
+        
+        return cache
+        
+    except Exception as e:
+        logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
+        # Return cached data even if stale, or empty dict
+        return _model_metadata_cache or {}
+
+
+def get_model_context_length(model: str) -> int:
+    """
+    Get the context length for a specific model.
+    
+    Args:
+        model: Model identifier (e.g., "anthropic/claude-sonnet-4")
+        
+    Returns:
+        Context length in tokens (defaults to 128000 if unknown)
+    """
+    # Try to get from OpenRouter API
+    metadata = fetch_model_metadata()
+    if model in metadata:
+        return metadata[model].get("context_length", 128000)
+    
+    # Check default fallbacks (handles partial matches)
+    for default_model, length in DEFAULT_CONTEXT_LENGTHS.items():
+        if default_model in model or model in default_model:
+            return length
+    
+    # Conservative default
+    return 128000
+
+
+def estimate_tokens_rough(text: str) -> int:
+    """
+    Rough token estimate for pre-flight checks (before API call).
+    Uses ~4 chars per token heuristic.
+    
+    For accurate counts, use the `usage.prompt_tokens` from API responses.
+    
+    Args:
+        text: Text to estimate tokens for
+        
+    Returns:
+        Rough estimated token count
+    """
+    if not text:
+        return 0
+    return len(text) // 4
+
+
+def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int:
+    """
+    Rough token estimate for messages (pre-flight check only).
+    
+    For accurate counts, use the `usage.prompt_tokens` from API responses.
+    
+    Args:
+        messages: List of message dicts
+        
+    Returns:
+        Rough estimated token count
+    """
+    total_chars = sum(len(str(msg)) for msg in messages)
+    return total_chars // 4
+
+
+class ContextCompressor:
+    """
+    Compresses conversation context when approaching model's context limit.
+    
+    Uses similar logic to trajectory_compressor but operates in real-time:
+    1. Protects first few turns (system, initial user, first assistant response)
+    2. Protects last N turns (recent context is most relevant)
+    3. Summarizes middle turns when threshold is reached
+    
+    Token tracking uses actual counts from API responses (usage.prompt_tokens)
+    rather than estimates for accuracy.
+    """
+    
+    def __init__(
+        self,
+        model: str,
+        threshold_percent: float = 0.85,
+        summary_model: str = "google/gemini-2.0-flash-001",
+        protect_first_n: int = 3,
+        protect_last_n: int = 4,
+        summary_target_tokens: int = 500,
+        quiet_mode: bool = False,
+    ):
+        """
+        Initialize the context compressor.
+        
+        Args:
+            model: The main model being used (to determine context limit)
+            threshold_percent: Trigger compression at this % of context (default 85%)
+            summary_model: Model to use for generating summaries (cheap/fast)
+            protect_first_n: Number of initial turns to always keep
+            protect_last_n: Number of recent turns to always keep
+            summary_target_tokens: Target token count for summaries
+            quiet_mode: Suppress compression notifications
+        """
+        self.model = model
+        self.threshold_percent = threshold_percent
+        self.summary_model = summary_model
+        self.protect_first_n = protect_first_n
+        self.protect_last_n = protect_last_n
+        self.summary_target_tokens = summary_target_tokens
+        self.quiet_mode = quiet_mode
+        
+        self.context_length = get_model_context_length(model)
+        self.threshold_tokens = int(self.context_length * threshold_percent)
+        self.compression_count = 0
+        
+        # Track actual token usage from API responses
+        self.last_prompt_tokens = 0
+        self.last_completion_tokens = 0
+        self.last_total_tokens = 0
+        
+        # Initialize OpenRouter client for summarization
+        api_key = os.getenv("OPENROUTER_API_KEY", "")
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url="https://openrouter.ai/api/v1"
+        ) if api_key else None
+    
+    def update_from_response(self, usage: Dict[str, Any]):
+        """
+        Update tracked token usage from API response.
+        
+        Args:
+            usage: The usage dict from response (contains prompt_tokens, completion_tokens, total_tokens)
+        """
+        self.last_prompt_tokens = usage.get("prompt_tokens", 0)
+        self.last_completion_tokens = usage.get("completion_tokens", 0)
+        self.last_total_tokens = usage.get("total_tokens", 0)
+    
+    def should_compress(self, prompt_tokens: int = None) -> bool:
+        """
+        Check if context exceeds the compression threshold.
+        
+        Uses actual token count from API response for accuracy.
+        
+        Args:
+            prompt_tokens: Actual prompt tokens from last API response.
+                          If None, uses last tracked value.
+            
+        Returns:
+            True if compression should be triggered
+        """
+        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
+        return tokens >= self.threshold_tokens
+    
+    def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
+        """
+        Quick pre-flight check using rough estimate (before API call).
+        
+        Use this to avoid making an API call that would fail due to context overflow.
+        For post-response compression decisions, use should_compress() with actual tokens.
+        
+        Args:
+            messages: Current conversation messages
+            
+        Returns:
+            True if compression is likely needed
+        """
+        rough_estimate = estimate_messages_tokens_rough(messages)
+        return rough_estimate >= self.threshold_tokens
+    
+    def get_status(self) -> Dict[str, Any]:
+        """
+        Get current compression status for display/logging.
+        
+        Returns:
+            Dict with token usage and threshold info
+        """
+        return {
+            "last_prompt_tokens": self.last_prompt_tokens,
+            "threshold_tokens": self.threshold_tokens,
+            "context_length": self.context_length,
+            "usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
+            "compression_count": self.compression_count,
+        }
+    
+    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str:
+        """
+        Generate a concise summary of conversation turns using a fast model.
+        
+        Args:
+            turns_to_summarize: List of message dicts to summarize
+            
+        Returns:
+            Summary string
+        """
+        if not self.client:
+            # Fallback if no API key
+            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses."
+        
+        # Format turns for summarization
+        parts = []
+        for i, msg in enumerate(turns_to_summarize):
+            role = msg.get("role", "unknown")
+            content = msg.get("content", "")
+            
+            # Truncate very long content
+            if len(content) > 2000:
+                content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
+            
+            # Include tool call info if present
+            tool_calls = msg.get("tool_calls", [])
+            if tool_calls:
+                tool_names = [tc.get("function", {}).get("name", "?") for tc in tool_calls if isinstance(tc, dict)]
+                content += f"\n[Tool calls: {', '.join(tool_names)}]"
+            
+            parts.append(f"[{role.upper()}]: {content}")
+        
+        content_to_summarize = "\n\n".join(parts)
+        
+        prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
+
+Write from a neutral perspective describing:
+1. What actions were taken (tool calls, searches, file operations)
+2. Key information or results obtained
+3. Important decisions or findings
+4. Relevant data, file names, or outputs
+
+Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
+
+---
+TURNS TO SUMMARIZE:
+{content_to_summarize}
+---
+
+Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
+
+        try:
+            response = self.client.chat.completions.create(
+                model=self.summary_model,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.3,
+                max_tokens=self.summary_target_tokens * 2,
+                timeout=30.0,
+            )
+            
+            summary = response.choices[0].message.content.strip()
+            if not summary.startswith("[CONTEXT SUMMARY]:"):
+                summary = "[CONTEXT SUMMARY]: " + summary
+            
+            return summary
+            
+        except Exception as e:
+            logging.warning(f"Failed to generate context summary: {e}")
+            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses."
+    
+    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
+        """
+        Compress conversation messages by summarizing middle turns.
+        
+        Algorithm:
+        1. Keep first N turns (system prompt, initial context)
+        2. Keep last N turns (recent/relevant context)
+        3. Summarize everything in between
+        4. Insert summary as a user message
+        
+        Args:
+            messages: Current conversation messages
+            current_tokens: Actual token count from API (for logging). If None, uses estimate.
+            
+        Returns:
+            Compressed message list
+        """
+        n_messages = len(messages)
+        
+        # Not enough messages to compress
+        if n_messages <= self.protect_first_n + self.protect_last_n + 1:
+            if not self.quiet_mode:
+                print(f"⚠️  Cannot compress: only {n_messages} messages (need > {self.protect_first_n + self.protect_last_n + 1})")
+            return messages
+        
+        # Determine compression boundaries
+        compress_start = self.protect_first_n
+        compress_end = n_messages - self.protect_last_n
+        
+        # Nothing to compress
+        if compress_start >= compress_end:
+            return messages
+        
+        # Extract turns to summarize
+        turns_to_summarize = messages[compress_start:compress_end]
+        
+        # Use actual token count if provided, otherwise estimate
+        display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
+        
+        if not self.quiet_mode:
+            print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
+            print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
+            print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
+        
+        # Generate summary
+        summary = self._generate_summary(turns_to_summarize)
+        
+        # Build compressed messages
+        compressed = []
+        
+        # Keep protected head turns
+        for i in range(compress_start):
+            msg = messages[i].copy()
+            # Add notice to system message on first compression
+            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
+                msg["content"] = msg.get("content", "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
+            compressed.append(msg)
+        
+        # Add summary as user message
+        compressed.append({
+            "role": "user",
+            "content": summary
+        })
+        
+        # Keep protected tail turns
+        for i in range(compress_end, n_messages):
+            compressed.append(messages[i].copy())
+        
+        self.compression_count += 1
+        
+        if not self.quiet_mode:
+            # Estimate new size (actual will be known after next API call)
+            new_estimate = estimate_messages_tokens_rough(compressed)
+            saved_estimate = display_tokens - new_estimate
+            print(f"   ✅ Compressed: {n_messages} → {len(compressed)} messages (~{saved_estimate:,} tokens saved)")
+            print(f"   💡 Compression #{self.compression_count} complete")
+        
+        return compressed
+
 
 # =============================================================================
 # Default System Prompt Components
@@ -364,6 +768,30 @@ class AIAgent:
         
         # Track conversation messages for session logging
         self._session_messages: List[Dict[str, Any]] = []
+        
+        # Initialize context compressor for automatic context management
+        # Compresses conversation when approaching model's context limit
+        # Configuration via environment variables (can be set in .env or cli-config.yaml)
+        compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
+        compression_model = os.getenv("CONTEXT_COMPRESSION_MODEL", "google/gemini-2.0-flash-001")
+        compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
+        
+        self.context_compressor = ContextCompressor(
+            model=self.model,
+            threshold_percent=compression_threshold,
+            summary_model=compression_model,
+            protect_first_n=3,  # Keep system, first user, first assistant
+            protect_last_n=4,   # Keep recent context
+            summary_target_tokens=500,
+            quiet_mode=self.quiet_mode,
+        )
+        self.compression_enabled = compression_enabled
+        
+        if not self.quiet_mode:
+            if compression_enabled:
+                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
+            else:
+                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
     
     # Pools of kawaii faces for random selection
     KAWAII_SEARCH = [
@@ -1105,6 +1533,18 @@ class AIAgent:
                                 "error": "First response truncated due to output length limit"
                             }
                     
+                    # Track actual token usage from response for context management
+                    if hasattr(response, 'usage') and response.usage:
+                        usage_dict = {
+                            "prompt_tokens": getattr(response.usage, 'prompt_tokens', 0),
+                            "completion_tokens": getattr(response.usage, 'completion_tokens', 0),
+                            "total_tokens": getattr(response.usage, 'total_tokens', 0),
+                        }
+                        self.context_compressor.update_from_response(usage_dict)
+                        
+                        if self.verbose_logging:
+                            logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
+                    
                     break  # Success, exit retry loop
 
                 except Exception as api_error:
@@ -1132,17 +1572,28 @@ class AIAgent:
                     ])
                     
                     if is_context_length_error:
-                        print(f"{self.log_prefix}❌ Context length exceeded - this error cannot be resolved by retrying.")
-                        print(f"{self.log_prefix}   💡 The conversation has accumulated too much content from tool responses.")
-                        logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot continue.")
-                        # Return a partial result instead of crashing
-                        return {
-                            "messages": messages,
-                            "completed": False,
-                            "api_calls": api_call_count,
-                            "error": f"Context length exceeded ({approx_tokens:,} tokens). Conversation terminated early.",
-                            "partial": True
-                        }
+                        print(f"{self.log_prefix}⚠️  Context length exceeded - attempting compression...")
+                        
+                        # Try to compress and retry
+                        original_len = len(messages)
+                        messages = self.context_compressor.compress(messages, current_tokens=approx_tokens)
+                        
+                        if len(messages) < original_len:
+                            # Compression was possible, retry
+                            print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
+                            continue  # Retry with compressed messages
+                        else:
+                            # Can't compress further
+                            print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.")
+                            print(f"{self.log_prefix}   💡 The conversation has accumulated too much content.")
+                            logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
+                            return {
+                                "messages": messages,
+                                "completed": False,
+                                "api_calls": api_call_count,
+                                "error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
+                                "partial": True
+                            }
                     
                     if retry_count > max_retries:
                         print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
@@ -1351,6 +1802,14 @@ class AIAgent:
                         if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
                             time.sleep(self.tool_delay)
                     
+                    # Check if context compression is needed before next API call
+                    # Uses actual token count from last API response
+                    if self.compression_enabled and self.context_compressor.should_compress():
+                        messages = self.context_compressor.compress(
+                            messages, 
+                            current_tokens=self.context_compressor.last_prompt_tokens
+                        )
+                    
                     # Continue loop for next response
                     continue
                 

From e114f09f70be8048cdb8e124fed5ff7c4b69c963 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Feb 2026 22:48:18 -0800
Subject: [PATCH 02/48] Implement reasoning extraction and enhance assistant
 message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.
---
 TODO.md      |  94 +++++++++++++++++++++-----------------------
 run_agent.py | 107 ++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 128 insertions(+), 73 deletions(-)

diff --git a/TODO.md b/TODO.md
index 9b35e3b803..ed58a7b2ff 100644
--- a/TODO.md
+++ b/TODO.md
@@ -177,56 +177,48 @@ These items need to be addressed ASAP:
 
 ---
 
-## 2. Context Management (complements Subagents)
+## 2. Planning & Task Management 📋
 
-**Problem:** Context grows unbounded during long conversations. Trajectory compression exists for training data post-hoc, but live conversations lack intelligent context management.
+**Problem:** Agent handles tasks reactively without explicit planning. Complex multi-step tasks lack structure, progress tracking, and the ability to decompose work into manageable chunks.
 
 **Ideas:**
-- [ ] **Incremental summarization** - Compress old tool outputs on-the-fly during conversations
-  - Trigger when context exceeds threshold (e.g., 80% of max tokens)
-  - Preserve recent turns fully, summarize older tool responses
-  - Could reuse logic from `trajectory_compressor.py`
+- [ ] **Task decomposition tool** - Break complex requests into subtasks:
+  ```
+  User: "Set up a new Python project with FastAPI, tests, and Docker"
   
-- [ ] **Semantic memory retrieval** - Vector store for long conversation recall
-  - Embed important facts/findings as conversation progresses
-  - Retrieve relevant memories when needed instead of keeping everything in context
-  - Consider lightweight solutions: ChromaDB, FAISS, or even a simple embedding cache
+  Agent creates plan:
+  ├── 1. Create project structure and requirements.txt
+  ├── 2. Implement FastAPI app skeleton
+  ├── 3. Add pytest configuration and initial tests
+  ├── 4. Create Dockerfile and docker-compose.yml
+  └── 5. Verify everything works together
+  ```
+  - Each subtask becomes a trackable unit
+  - Agent can report progress: "Completed 3/5 tasks"
   
-- [ ] **Working vs. episodic memory** distinction
-  - Working memory: Current task state, recent tool results (always in context)
-  - Episodic memory: Past findings, tried approaches (retrieved on demand)
-  - Clear eviction policies for each
+- [ ] **Progress checkpoints** - Periodic self-assessment:
+  - After N tool calls or time elapsed, pause to evaluate
+  - "What have I accomplished? What remains? Am I on track?"
+  - Detect if stuck in loops or making no progress
+  - Could trigger replanning if approach isn't working
+  
+- [ ] **Explicit plan storage** - Persist plan in conversation:
+  - Store as structured data (not just in context)
+  - Update status as tasks complete
+  - User can ask "What's the plan?" or "What's left?"
+  - Survives context compression (plans are protected)
 
-**Files to modify:** `run_agent.py` (add memory manager), possibly new `tools/memory_tool.py`
+- [ ] **Failure recovery with replanning** - When things go wrong:
+  - Record what failed and why
+  - Revise plan to work around the issue
+  - "Step 3 failed because X, adjusting approach to Y"
+  - Prevents repeating failed strategies
+
+**Files to modify:** `run_agent.py` (add planning hooks), new `tools/planning_tool.py`
 
 ---
 
-## 3. Self-Reflection & Course Correction 🔄
-
-**Problem:** Current retry logic handles malformed outputs but not semantic failures. Agent doesn't reason about *why* something failed.
-
-**Ideas:**
-- [ ] **Meta-reasoning after failures** - When a tool returns an error or unexpected result:
-  ```
-  Tool failed → Reflect: "Why did this fail? What assumptions were wrong?"
-  → Adjust approach → Retry with new strategy
-  ```
-  - Could be a lightweight LLM call or structured self-prompt
-  
-- [ ] **Planning/replanning module** - For complex multi-step tasks:
-  - Generate plan before execution
-  - After each step, evaluate: "Am I on track? Should I revise the plan?"
-  - Store plan in working memory, update as needed
-  
-- [ ] **Approach memory** - Remember what didn't work:
-  - "I tried X for this type of problem and it failed because Y"
-  - Prevents repeating failed strategies in the same conversation
-
-**Files to modify:** `run_agent.py` (add reflection hooks in tool loop), new `tools/reflection_tool.py`
-
----
-
-## 4. Tool Composition & Learning 🔧
+## 3. Tool Composition & Learning 🔧
 
 **Problem:** Tools are atomic. Complex tasks require repeated manual orchestration of the same tool sequences.
 
@@ -257,7 +249,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 5. Dynamic Skills Expansion 📚
+## 4. Dynamic Skills Expansion 📚
 
 **Problem:** Skills system is elegant but static. Skills must be manually created and added.
 
@@ -286,7 +278,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 6. Task Continuation Hints 🎯
+## 5. Task Continuation Hints 🎯
 
 **Problem:** Could be more helpful by suggesting logical next steps.
 
@@ -336,7 +328,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 8. Resource Awareness & Efficiency 💰
+## 6. Resource Awareness & Efficiency 💰
 
 **Problem:** No awareness of costs, time, or resource usage. Could be smarter about efficiency.
 
@@ -373,7 +365,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 10. Project-Local Context 💾
+## 7. Project-Local Context 💾
 
 **Problem:** Valuable context lost between sessions.
 
@@ -393,7 +385,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 11. Graceful Degradation & Robustness 🛡️
+## 8. Graceful Degradation & Robustness 🛡️
 
 **Problem:** When things go wrong, recovery is limited. Should fail gracefully.
 
@@ -414,7 +406,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 12. Tools & Skills Wishlist 🧰
+## 9. Tools & Skills Wishlist 🧰
 
 *Things that would need new tool implementations (can't do well with current tools):*
 
@@ -481,7 +473,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 13. Messaging Platform Integrations 💬
+## 10. Messaging Platform Integrations 💬
 
 **Problem:** Agent currently only works via `cli.py` which requires direct terminal access. Users may want to interact via messaging apps from their phone or other devices.
 
@@ -525,7 +517,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 14. Scheduled Tasks / Cron Jobs ⏰
+## 11. Scheduled Tasks / Cron Jobs ⏰
 
 **Problem:** Agent only runs on-demand. Some tasks benefit from scheduled execution (daily summaries, monitoring, reminders).
 
@@ -570,7 +562,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 15. Text-to-Speech (TTS) 🔊
+## 12. Text-to-Speech (TTS) 🔊
 
 **Problem:** Agent can only respond with text. Some users prefer audio responses (accessibility, hands-free use, podcasts).
 
@@ -601,7 +593,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 16. Speech-to-Text / Audio Transcription 🎤
+## 13. Speech-to-Text / Audio Transcription 🎤
 
 **Problem:** Users may want to send voice memos instead of typing. Agent is blind to audio content.
 
diff --git a/run_agent.py b/run_agent.py
index 2bd68d3218..963a9db4f8 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -980,6 +980,49 @@ class AIAgent:
         # Check if there's any non-whitespace content remaining
         return bool(cleaned.strip())
     
+    def _extract_reasoning(self, assistant_message) -> Optional[str]:
+        """
+        Extract reasoning/thinking content from an assistant message.
+        
+        OpenRouter and various providers can return reasoning in multiple formats:
+        1. message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.)
+        2. message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.)
+        3. message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified)
+        
+        Args:
+            assistant_message: The assistant message object from the API response
+            
+        Returns:
+            Combined reasoning text, or None if no reasoning found
+        """
+        reasoning_parts = []
+        
+        # Check direct reasoning field
+        if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
+            reasoning_parts.append(assistant_message.reasoning)
+        
+        # Check reasoning_content field (alternative name used by some providers)
+        if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
+            # Don't duplicate if same as reasoning
+            if assistant_message.reasoning_content not in reasoning_parts:
+                reasoning_parts.append(assistant_message.reasoning_content)
+        
+        # Check reasoning_details array (OpenRouter unified format)
+        # Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
+        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+            for detail in assistant_message.reasoning_details:
+                if isinstance(detail, dict):
+                    # Extract summary from reasoning detail object
+                    summary = detail.get('summary') or detail.get('content') or detail.get('text')
+                    if summary and summary not in reasoning_parts:
+                        reasoning_parts.append(summary)
+        
+        # Combine all reasoning parts
+        if reasoning_parts:
+            return "\n\n".join(reasoning_parts)
+        
+        return None
+    
     def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
         """
         Get messages up to (but not including) the last assistant turn.
@@ -1318,22 +1361,20 @@ class AIAgent:
             for msg in messages:
                 api_msg = msg.copy()
                 
-                # For assistant messages with tool_calls, providers require 'reasoning_content' field
-                # Extract reasoning from our stored 'reasoning' field and add it as 'reasoning_content'
-                if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                # For ALL assistant messages, pass reasoning back to the API
+                # This ensures multi-turn reasoning context is preserved
+                if msg.get("role") == "assistant":
                     reasoning_text = msg.get("reasoning")
                     if reasoning_text:
-                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, etc.)
+                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
                         api_msg["reasoning_content"] = reasoning_text
                 
                 # Remove 'reasoning' field - it's for trajectory storage only
-                # The reasoning is already in the content via <think> tags AND
-                # we've added reasoning_content for API compatibility above
+                # We've copied it to 'reasoning_content' for the API above
                 if "reasoning" in api_msg:
                     api_msg.pop("reasoning")
-                # Remove 'reasoning_details' if present - we use reasoning_content instead
-                if "reasoning_details" in api_msg:
-                    api_msg.pop("reasoning_details")
+                # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
+                # The signature field helps maintain reasoning continuity
                 api_messages.append(api_msg)
             
             if active_system_prompt:
@@ -1694,14 +1735,16 @@ class AIAgent:
                     # Reset retry counter on successful JSON validation
                     self._invalid_json_retries = 0
                     
-                    # Extract reasoning from response if available (for reasoning models like minimax, kimi, etc.)
-                    # Extract reasoning from response for storage
-                    # The reasoning_content field will be added when preparing API messages
-                    reasoning_text = None
-                    if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
-                        reasoning_text = assistant_message.reasoning
-                    elif hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
-                        reasoning_text = assistant_message.reasoning_content
+                    # Extract reasoning from response if available
+                    # OpenRouter can return reasoning in multiple formats:
+                    # 1. message.reasoning - direct reasoning field
+                    # 2. message.reasoning_content - alternative field (some providers)
+                    # 3. message.reasoning_details - array with {summary: "..."} objects
+                    reasoning_text = self._extract_reasoning(assistant_message)
+                    
+                    if reasoning_text and self.verbose_logging:
+                        preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
+                        logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
                     
                     # Build assistant message with tool calls
                     # Content stays as-is; reasoning is stored separately and will be passed
@@ -1723,6 +1766,14 @@ class AIAgent:
                         ]
                     }
                     
+                    # Store reasoning_details for multi-turn reasoning context (OpenRouter)
+                    if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+                        assistant_msg["reasoning_details"] = [
+                            {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")}
+                            for d in assistant_message.reasoning_details
+                            if isinstance(d, dict)
+                        ]
+                    
                     messages.append(assistant_msg)
                     
                     # Execute each tool call
@@ -1810,6 +1861,10 @@ class AIAgent:
                             current_tokens=self.context_compressor.last_prompt_tokens
                         )
                     
+                    # Save session log incrementally (so progress is visible even if interrupted)
+                    self._session_messages = messages
+                    self._save_session_log(messages)
+                    
                     # Continue loop for next response
                     continue
                 
@@ -1865,11 +1920,11 @@ class AIAgent:
                         self._empty_content_retries = 0
                     
                     # Extract reasoning from response if available
-                    reasoning_text = None
-                    if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
-                        reasoning_text = assistant_message.reasoning
-                    elif hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
-                        reasoning_text = assistant_message.reasoning_content
+                    reasoning_text = self._extract_reasoning(assistant_message)
+                    
+                    if reasoning_text and self.verbose_logging:
+                        preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
+                        logging.debug(f"Captured final reasoning ({len(reasoning_text)} chars): {preview}")
                     
                     # Build final assistant message
                     # Content stays as-is; reasoning stored separately for trajectory extraction
@@ -1879,6 +1934,14 @@ class AIAgent:
                         "reasoning": reasoning_text  # Stored for trajectory extraction
                     }
                     
+                    # Store reasoning_details for multi-turn reasoning context (OpenRouter)
+                    if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+                        final_msg["reasoning_details"] = [
+                            {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")}
+                            for d in assistant_message.reasoning_details
+                            if isinstance(d, dict)
+                        ]
+                    
                     messages.append(final_msg)
                     
                     if not self.quiet_mode:

From c935a604f87622e3af91b4c8b9e22ce4b6c7cd6f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 01:25:03 -0800
Subject: [PATCH 03/48] Refactor TODO.md to reorganize task sections and update
 descriptions

- Renamed and reordered sections in the TODO list for clarity, moving "Interactive Clarifying Questions Tool" to section 5 and "Collaborative Problem Solving" to section 6.
- Removed outdated ideas related to task continuation hints and resource awareness, streamlining the focus on current development priorities.
- Enhanced the overall structure of the TODO list to better reflect ongoing and future tasks.
---
 TODO.md | 36 ++----------------------------------
 1 file changed, 2 insertions(+), 34 deletions(-)

diff --git a/TODO.md b/TODO.md
index ed58a7b2ff..a94ae89f74 100644
--- a/TODO.md
+++ b/TODO.md
@@ -278,21 +278,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 5. Task Continuation Hints 🎯
-
-**Problem:** Could be more helpful by suggesting logical next steps.
-
-**Ideas:**
-- [ ] **Suggest next steps** - At end of a task, suggest logical continuations:
-  - "Code is written. Want me to also write tests / docs / deploy?"
-  - Based on common workflows for task type
-  - Non-intrusive, just offer options
-
-**Files to modify:** `run_agent.py`, response generation logic
-
----
-
-## 7. Interactive Clarifying Questions Tool ❓
+## 5. Interactive Clarifying Questions Tool ❓
 
 **Problem:** Agent sometimes makes assumptions or guesses when it should ask the user. Currently can only ask via text, which gets lost in long outputs.
 
@@ -328,25 +314,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 6. Resource Awareness & Efficiency 💰
-
-**Problem:** No awareness of costs, time, or resource usage. Could be smarter about efficiency.
-
-**Ideas:**
-- [ ] **Tool result caching** - Don't repeat identical operations:
-  - Cache web searches, extractions within a session
-  - Invalidation based on time-sensitivity of query
-  - Hash-based lookup: same input → cached output
-
-- [ ] **Lazy evaluation** - Don't fetch everything upfront:
-  - Get summaries first, full content only if needed
-  - "I found 5 relevant pages. Want me to deep-dive on any?"
-
-**Files to modify:** `model_tools.py`, new `resource_tracker.py`
-
----
-
-## 9. Collaborative Problem Solving 🤝
+## 6. Collaborative Problem Solving 🤝
 
 **Problem:** Interaction is command/response. Complex problems benefit from dialogue.
 

From a3ba41fce21e546c011bd830f816c0aaff16c7cd Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 08:26:42 -0800
Subject: [PATCH 04/48] Implement cron job management system for scheduled
 tasks (similar to OpenAI's Pulse but the AI can also schedule jobs)

- Introduced a new cron job system allowing users to schedule automated tasks via the CLI, supporting one-time reminders and recurring jobs.
- Added commands for managing cron jobs: `/cron` to list jobs, `/cron add` to create new jobs, and `/cron remove` to delete jobs.
- Implemented job storage in `~/.hermes/cron/jobs.json` with output saved to `~/.hermes/cron/output/{job_id}/{timestamp}.md`.
- Enhanced the CLI and README documentation to include detailed usage instructions and examples for cron job management.
- Integrated cron job tools into the hermes-cli toolset, ensuring they are only available in interactive CLI mode.
- Added support for cron expression parsing with the `croniter` package, enabling flexible scheduling options.
---
 README.md              |  72 ++++++++
 TODO.md                |  73 +++++---
 cli.py                 | 167 ++++++++++++++++++
 cron/__init__.py       |  36 ++++
 cron/jobs.py           | 372 +++++++++++++++++++++++++++++++++++++++++
 cron/scheduler.py      | 188 +++++++++++++++++++++
 model_tools.py         | 109 +++++++++++-
 requirements.txt       |   4 +-
 tools/__init__.py      |  21 +++
 tools/cronjob_tools.py | 341 +++++++++++++++++++++++++++++++++++++
 toolsets.py            |  36 ++++
 11 files changed, 1384 insertions(+), 35 deletions(-)
 create mode 100644 cron/__init__.py
 create mode 100644 cron/jobs.py
 create mode 100644 cron/scheduler.py
 create mode 100644 tools/cronjob_tools.py

diff --git a/README.md b/README.md
index 309c7c6892..af28483717 100644
--- a/README.md
+++ b/README.md
@@ -325,6 +325,77 @@ CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001
    ✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
 ```
 
+## Scheduled Tasks (Cron Jobs)
+
+Hermes Agent can schedule automated tasks to run in the future - either one-time reminders or recurring jobs.
+
+### CLI Commands
+
+```bash
+# List scheduled jobs
+/cron
+
+# Add a one-shot reminder (runs once in 30 minutes)
+/cron add 30m Remind me to check the build status
+
+# Add a recurring job (every 2 hours)
+/cron add "every 2h" Check server status at 192.168.1.100 and report any issues
+
+# Add a cron expression (daily at 9am)
+/cron add "0 9 * * *" Generate a morning briefing summarizing GitHub notifications
+
+# Remove a job
+/cron remove abc123def456
+```
+
+### Agent Self-Scheduling
+
+The agent can also schedule its own follow-up tasks using tools:
+
+```python
+# Available when using hermes-cli toolset (default for CLI)
+schedule_cronjob(prompt="...", schedule="30m", repeat=1)  # One-shot
+schedule_cronjob(prompt="...", schedule="every 2h")       # Recurring
+list_cronjobs()                                            # View all jobs
+remove_cronjob(job_id="...")                              # Cancel a job
+```
+
+**⚠️ Important:** Cronjobs run in **isolated sessions with NO prior context**. The prompt must be completely self-contained with all necessary information (file paths, URLs, server addresses, etc.). The future agent will not remember anything from the current conversation.
+
+### Schedule Formats
+
+| Format | Example | Description |
+|--------|---------|-------------|
+| Duration | `30m`, `2h`, `1d` | One-shot delay from now |
+| Interval | `every 30m`, `every 2h` | Recurring at fixed intervals |
+| Cron | `0 9 * * *` | Cron expression (requires `croniter`) |
+| Timestamp | `2026-02-03T14:00` | One-shot at specific time |
+
+### Repeat Options
+
+| repeat | Behavior |
+|--------|----------|
+| (omitted) | One-shot schedules run once; intervals/cron run forever |
+| `1` | Run once then auto-delete |
+| `N` | Run N times then auto-delete |
+
+### Running the Cron Daemon
+
+Jobs are stored in `~/.hermes/cron/jobs.json` and executed by a scheduler:
+
+```bash
+# Option 1: Built-in daemon (checks every 60 seconds)
+python cli.py --cron-daemon
+
+# Option 2: System cron integration (run once per minute)
+# Add to crontab: crontab -e
+*/1 * * * * cd ~/hermes-agent && python cli.py --cron-tick-once >> ~/.hermes/cron/cron.log 2>&1
+```
+
+### Job Output
+
+Job outputs are saved to `~/.hermes/cron/output/{job_id}/{timestamp}.md` for review.
+
 ## Interactive CLI
 
 The CLI provides a rich interactive experience for working with the agent.
@@ -357,6 +428,7 @@ The CLI provides a rich interactive experience for working with the agent.
 | `/history` | Show conversation history |
 | `/save` | Save current conversation to file |
 | `/config` | Show current configuration |
+| `/cron` | Manage scheduled tasks (list, add, remove) |
 | `/quit` | Exit the CLI |
 
 ### Configuration
diff --git a/TODO.md b/TODO.md
index a94ae89f74..7a68afadee 100644
--- a/TODO.md
+++ b/TODO.md
@@ -485,48 +485,67 @@ These items need to be addressed ASAP:
 
 ---
 
-## 11. Scheduled Tasks / Cron Jobs ⏰
+## 11. Scheduled Tasks / Cron Jobs ⏰ ✅ COMPLETE
 
 **Problem:** Agent only runs on-demand. Some tasks benefit from scheduled execution (daily summaries, monitoring, reminders).
 
-**Ideas:**
-- [ ] **Cron-style scheduler** - Run agent turns on a schedule
-  - Store jobs in `~/.hermes/cron/jobs.json`
-  - Each job: `{ id, schedule, prompt, session_mode, delivery }`
-  - Uses APScheduler or similar Python library
+**Solution Implemented:**
+
+- [x] **Cron-style scheduler** - Run agent turns on a schedule
+  - Jobs stored in `~/.hermes/cron/jobs.json`
+  - Each job: `{ id, name, prompt, schedule, repeat, enabled, next_run_at, ... }`
+  - Built-in scheduler daemon or system cron integration
   
-- [ ] **Session modes:**
-  - `isolated` - Fresh session each run (no history, clean context)
-  - `main` - Append to main session (agent remembers previous scheduled runs)
+- [x] **Schedule formats:**
+  - Duration: `30m`, `2h`, `1d` (one-shot delay)
+  - Interval: `every 30m`, `every 2h` (recurring)
+  - Cron expression: `0 9 * * *` (requires `croniter` package)
+  - ISO timestamp: `2026-02-03T14:00:00` (one-shot at specific time)
+
+- [x] **Repeat options:**
+  - `repeat=None` (or omit): One-shot schedules run once; intervals/cron run forever
+  - `repeat=1`: Run once then auto-delete
+  - `repeat=N`: Run exactly N times then auto-delete
   
-- [ ] **Delivery options:**
-  - Write output to file (`~/.hermes/cron/output/{job_id}/{timestamp}.md`)
-  - Send to messaging channel (if integrations enabled)
-  - Both
-  
-- [ ] **CLI interface:**
+- [x] **CLI interface:**
   ```bash
   # List scheduled jobs
-  python cli.py --cron list
+  /cron
+  /cron list
   
-  # Add a job (runs daily at 9am)
-  python cli.py --cron add "Summarize my email inbox" --schedule "0 9 * * *"
+  # Add a one-shot job (runs once in 30 minutes)
+  /cron add 30m "Remind me to check the build status"
   
-  # Quick syntax for simple intervals  
-  python cli.py --cron add "Check server status" --every 30m
+  # Add a recurring job (every 2 hours)
+  /cron add "every 2h" "Check server status at 192.168.1.100"
+  
+  # Add a cron expression (daily at 9am)
+  /cron add "0 9 * * *" "Generate morning briefing"
   
   # Remove a job
-  python cli.py --cron remove <job_id>
+  /cron remove <job_id>
   ```
 
-- [ ] **Agent self-scheduling** - Let the agent create its own cron jobs
-  - New tool: `schedule_task(prompt, schedule, session_mode)`
-  - "Remind me to check the deployment tomorrow at 9am"
-  - Agent can set follow-up tasks for itself
+- [x] **Agent self-scheduling tools** (hermes-cli toolset):
+  - `schedule_cronjob(prompt, schedule, name?, repeat?)` - Create a scheduled task
+  - `list_cronjobs()` - View all scheduled jobs
+  - `remove_cronjob(job_id)` - Cancel a job
+  - Tool descriptions emphasize: **cronjobs run in isolated sessions with NO context**
 
-- [ ] **In-chat command:** `/cronjob {prompt} {frequency}` when using messaging integrations
+- [x] **Daemon modes:**
+  ```bash
+  # Built-in daemon (checks every 60 seconds)
+  python cli.py --cron-daemon
+  
+  # Single tick for system cron integration
+  python cli.py --cron-tick-once
+  ```
 
-**Files to create:** `cron/scheduler.py`, `cron/jobs.py`, `tools/schedule_tool.py`
+- [x] **Output storage:** `~/.hermes/cron/output/{job_id}/{timestamp}.md`
+
+**Files created:** `cron/__init__.py`, `cron/jobs.py`, `cron/scheduler.py`, `tools/cronjob_tools.py`
+
+**Toolset:** `hermes-cli` (default for CLI) includes cronjob tools; not in batch runner toolsets
 
 ---
 
diff --git a/cli.py b/cli.py
index d73e10112e..210e069df2 100755
--- a/cli.py
+++ b/cli.py
@@ -192,6 +192,9 @@ from run_agent import AIAgent
 from model_tools import get_tool_definitions, get_all_tool_names, get_toolset_for_tool, get_available_toolsets
 from toolsets import get_all_toolsets, get_toolset_info, resolve_toolset, validate_toolset
 
+# Cron job system for scheduled tasks
+from cron import create_job, list_jobs, remove_job, get_job, run_daemon as run_cron_daemon, tick as cron_tick
+
 # ============================================================================
 # ASCII Art & Branding
 # ============================================================================
@@ -402,6 +405,7 @@ COMMANDS = {
     "/reset": "Reset conversation only (keep screen)",
     "/save": "Save the current conversation",
     "/config": "Show current configuration",
+    "/cron": "Manage scheduled tasks (list, add, remove)",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }
 
@@ -878,6 +882,142 @@ class HermesCLI:
             print("  Usage: /personality <name>")
             print()
     
+    def _handle_cron_command(self, cmd: str):
+        """Handle the /cron command to manage scheduled tasks."""
+        parts = cmd.split(maxsplit=2)
+        
+        if len(parts) == 1:
+            # /cron - show help and list
+            print()
+            print("+" + "-" * 60 + "+")
+            print("|" + " " * 18 + "(^_^) Scheduled Tasks" + " " * 19 + "|")
+            print("+" + "-" * 60 + "+")
+            print()
+            print("  Commands:")
+            print("    /cron                     - List scheduled jobs")
+            print("    /cron list                - List scheduled jobs")
+            print('    /cron add <schedule> <prompt>  - Add a new job')
+            print("    /cron remove <job_id>     - Remove a job")
+            print()
+            print("  Schedule formats:")
+            print("    30m, 2h, 1d              - One-shot delay")
+            print('    "every 30m", "every 2h"  - Recurring interval')
+            print('    "0 9 * * *"              - Cron expression')
+            print()
+            
+            # Show current jobs
+            jobs = list_jobs()
+            if jobs:
+                print("  Current Jobs:")
+                print("  " + "-" * 55)
+                for job in jobs:
+                    # Format repeat status
+                    times = job["repeat"].get("times")
+                    completed = job["repeat"].get("completed", 0)
+                    if times is None:
+                        repeat_str = "forever"
+                    else:
+                        repeat_str = f"{completed}/{times}"
+                    
+                    print(f"    {job['id'][:12]:<12} | {job['schedule_display']:<15} | {repeat_str:<8}")
+                    prompt_preview = job['prompt'][:45] + "..." if len(job['prompt']) > 45 else job['prompt']
+                    print(f"      {prompt_preview}")
+                    if job.get("next_run_at"):
+                        from datetime import datetime
+                        next_run = datetime.fromisoformat(job["next_run_at"])
+                        print(f"      Next: {next_run.strftime('%Y-%m-%d %H:%M')}")
+                    print()
+            else:
+                print("  No scheduled jobs. Use '/cron add' to create one.")
+            print()
+            return
+        
+        subcommand = parts[1].lower()
+        
+        if subcommand == "list":
+            # /cron list - just show jobs
+            jobs = list_jobs()
+            if not jobs:
+                print("(._.) No scheduled jobs.")
+                return
+            
+            print()
+            print("Scheduled Jobs:")
+            print("-" * 70)
+            for job in jobs:
+                times = job["repeat"].get("times")
+                completed = job["repeat"].get("completed", 0)
+                repeat_str = "forever" if times is None else f"{completed}/{times}"
+                
+                print(f"  ID: {job['id']}")
+                print(f"  Name: {job['name']}")
+                print(f"  Schedule: {job['schedule_display']} ({repeat_str})")
+                print(f"  Next run: {job.get('next_run_at', 'N/A')}")
+                print(f"  Prompt: {job['prompt'][:80]}{'...' if len(job['prompt']) > 80 else ''}")
+                if job.get("last_run_at"):
+                    print(f"  Last run: {job['last_run_at']} ({job.get('last_status', '?')})")
+                print()
+        
+        elif subcommand == "add":
+            # /cron add <schedule> <prompt>
+            if len(parts) < 3:
+                print("(._.) Usage: /cron add <schedule> <prompt>")
+                print("  Example: /cron add 30m Remind me to take a break")
+                print('  Example: /cron add "every 2h" Check server status at 192.168.1.1')
+                return
+            
+            # Parse schedule and prompt
+            rest = parts[2].strip()
+            
+            # Handle quoted schedule (e.g., "every 30m" or "0 9 * * *")
+            if rest.startswith('"'):
+                # Find closing quote
+                close_quote = rest.find('"', 1)
+                if close_quote == -1:
+                    print("(._.) Unmatched quote in schedule")
+                    return
+                schedule = rest[1:close_quote]
+                prompt = rest[close_quote + 1:].strip()
+            else:
+                # First word is schedule
+                schedule_parts = rest.split(maxsplit=1)
+                schedule = schedule_parts[0]
+                prompt = schedule_parts[1] if len(schedule_parts) > 1 else ""
+            
+            if not prompt:
+                print("(._.) Please provide a prompt for the job")
+                return
+            
+            try:
+                job = create_job(prompt=prompt, schedule=schedule)
+                print(f"(^_^)b Created job: {job['id']}")
+                print(f"  Schedule: {job['schedule_display']}")
+                print(f"  Next run: {job['next_run_at']}")
+            except Exception as e:
+                print(f"(x_x) Failed to create job: {e}")
+        
+        elif subcommand == "remove" or subcommand == "rm" or subcommand == "delete":
+            # /cron remove <job_id>
+            if len(parts) < 3:
+                print("(._.) Usage: /cron remove <job_id>")
+                return
+            
+            job_id = parts[2].strip()
+            job = get_job(job_id)
+            
+            if not job:
+                print(f"(._.) Job not found: {job_id}")
+                return
+            
+            if remove_job(job_id):
+                print(f"(^_^)b Removed job: {job['name']} ({job_id})")
+            else:
+                print(f"(x_x) Failed to remove job: {job_id}")
+        
+        else:
+            print(f"(._.) Unknown cron command: {subcommand}")
+            print("  Available: list, add, remove")
+    
     def process_command(self, command: str) -> bool:
         """
         Process a slash command.
@@ -933,6 +1073,8 @@ class HermesCLI:
             self._handle_personality_command(cmd)
         elif cmd == "/save":
             self.save_conversation()
+        elif cmd.startswith("/cron"):
+            self._handle_cron_command(command)  # Use original command for proper parsing
         else:
             self.console.print(f"[bold red]Unknown command: {cmd}[/]")
             self.console.print("[dim #B8860B]Type /help for available commands[/]")
@@ -1072,6 +1214,8 @@ def main(
     compact: bool = False,
     list_tools: bool = False,
     list_toolsets: bool = False,
+    cron_daemon: bool = False,
+    cron_tick_once: bool = False,
 ):
     """
     Hermes Agent CLI - Interactive AI Assistant
@@ -1088,21 +1232,41 @@ def main(
         compact: Use compact display mode
         list_tools: List available tools and exit
         list_toolsets: List available toolsets and exit
+        cron_daemon: Run as cron daemon (check and execute due jobs continuously)
+        cron_tick_once: Run due cron jobs once and exit (for system cron integration)
     
     Examples:
         python cli.py                            # Start interactive mode
         python cli.py --toolsets web,terminal    # Use specific toolsets
         python cli.py -q "What is Python?"       # Single query mode
         python cli.py --list-tools               # List tools and exit
+        python cli.py --cron-daemon              # Run cron scheduler daemon
+        python cli.py --cron-tick-once           # Check and run due jobs once
     """
     # Signal to terminal_tool that we're in interactive mode
     # This enables interactive sudo password prompts with timeout
     os.environ["HERMES_INTERACTIVE"] = "1"
     
+    # Handle cron daemon mode (runs before CLI initialization)
+    if cron_daemon:
+        print("Starting Hermes Cron Daemon...")
+        print("Jobs will be checked every 60 seconds.")
+        print("Press Ctrl+C to stop.\n")
+        run_cron_daemon(check_interval=60, verbose=True)
+        return
+    
+    # Handle cron tick (single run for system cron integration)
+    if cron_tick_once:
+        jobs_run = cron_tick(verbose=True)
+        if jobs_run:
+            print(f"Executed {jobs_run} job(s)")
+        return
+    
     # Handle query shorthand
     query = query or q
     
     # Parse toolsets - handle both string and tuple/list inputs
+    # Default to hermes-cli toolset which includes cronjob management tools
     toolsets_list = None
     if toolsets:
         if isinstance(toolsets, str):
@@ -1115,6 +1279,9 @@ def main(
                     toolsets_list.extend([x.strip() for x in t.split(",")])
                 else:
                     toolsets_list.append(str(t))
+    else:
+        # Default: use hermes-cli toolset for full CLI functionality including cronjob tools
+        toolsets_list = ["hermes-cli"]
     
     # Create CLI instance
     cli = HermesCLI(
diff --git a/cron/__init__.py b/cron/__init__.py
new file mode 100644
index 0000000000..446187c7b4
--- /dev/null
+++ b/cron/__init__.py
@@ -0,0 +1,36 @@
+"""
+Cron job scheduling system for Hermes Agent.
+
+This module provides scheduled task execution, allowing the agent to:
+- Run automated tasks on schedules (cron expressions, intervals, one-shot)
+- Self-schedule reminders and follow-up tasks
+- Execute tasks in isolated sessions (no prior context)
+
+Usage:
+    # Run due jobs (for system cron integration)
+    python -c "from cron import tick; tick()"
+    
+    # Or via CLI
+    python cli.py --cron-daemon
+"""
+
+from cron.jobs import (
+    create_job,
+    get_job,
+    list_jobs,
+    remove_job,
+    update_job,
+    JOBS_FILE,
+)
+from cron.scheduler import tick, run_daemon
+
+__all__ = [
+    "create_job",
+    "get_job", 
+    "list_jobs",
+    "remove_job",
+    "update_job",
+    "tick",
+    "run_daemon",
+    "JOBS_FILE",
+]
diff --git a/cron/jobs.py b/cron/jobs.py
new file mode 100644
index 0000000000..9f7ff47c03
--- /dev/null
+++ b/cron/jobs.py
@@ -0,0 +1,372 @@
+"""
+Cron job storage and management.
+
+Jobs are stored in ~/.hermes/cron/jobs.json
+Output is saved to ~/.hermes/cron/output/{job_id}/{timestamp}.md
+"""
+
+import json
+import os
+import re
+import uuid
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional, Dict, List, Any
+
+try:
+    from croniter import croniter
+    HAS_CRONITER = True
+except ImportError:
+    HAS_CRONITER = False
+
+# =============================================================================
+# Configuration
+# =============================================================================
+
+HERMES_DIR = Path.home() / ".hermes"
+CRON_DIR = HERMES_DIR / "cron"
+JOBS_FILE = CRON_DIR / "jobs.json"
+OUTPUT_DIR = CRON_DIR / "output"
+
+
+def ensure_dirs():
+    """Ensure cron directories exist."""
+    CRON_DIR.mkdir(parents=True, exist_ok=True)
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+
+# =============================================================================
+# Schedule Parsing
+# =============================================================================
+
+def parse_duration(s: str) -> int:
+    """
+    Parse duration string into minutes.
+    
+    Examples:
+        "30m" → 30
+        "2h" → 120
+        "1d" → 1440
+    """
+    s = s.strip().lower()
+    match = re.match(r'^(\d+)\s*(m|min|mins|minute|minutes|h|hr|hrs|hour|hours|d|day|days)$', s)
+    if not match:
+        raise ValueError(f"Invalid duration: '{s}'. Use format like '30m', '2h', or '1d'")
+    
+    value = int(match.group(1))
+    unit = match.group(2)[0]  # First char: m, h, or d
+    
+    multipliers = {'m': 1, 'h': 60, 'd': 1440}
+    return value * multipliers[unit]
+
+
+def parse_schedule(schedule: str) -> Dict[str, Any]:
+    """
+    Parse schedule string into structured format.
+    
+    Returns dict with:
+        - kind: "once" | "interval" | "cron"
+        - For "once": "run_at" (ISO timestamp)
+        - For "interval": "minutes" (int)
+        - For "cron": "expr" (cron expression)
+    
+    Examples:
+        "30m"              → once in 30 minutes
+        "2h"               → once in 2 hours
+        "every 30m"        → recurring every 30 minutes
+        "every 2h"         → recurring every 2 hours
+        "0 9 * * *"        → cron expression
+        "2026-02-03T14:00" → once at timestamp
+    """
+    schedule = schedule.strip()
+    original = schedule
+    schedule_lower = schedule.lower()
+    
+    # "every X" pattern → recurring interval
+    if schedule_lower.startswith("every "):
+        duration_str = schedule[6:].strip()
+        minutes = parse_duration(duration_str)
+        return {
+            "kind": "interval",
+            "minutes": minutes,
+            "display": f"every {minutes}m"
+        }
+    
+    # Check for cron expression (5 or 6 space-separated fields)
+    # Cron fields: minute hour day month weekday [year]
+    parts = schedule.split()
+    if len(parts) >= 5 and all(
+        re.match(r'^[\d\*\-,/]+$', p) for p in parts[:5]
+    ):
+        if not HAS_CRONITER:
+            raise ValueError("Cron expressions require 'croniter' package. Install with: pip install croniter")
+        # Validate cron expression
+        try:
+            croniter(schedule)
+        except Exception as e:
+            raise ValueError(f"Invalid cron expression '{schedule}': {e}")
+        return {
+            "kind": "cron",
+            "expr": schedule,
+            "display": schedule
+        }
+    
+    # ISO timestamp (contains T or looks like date)
+    if 'T' in schedule or re.match(r'^\d{4}-\d{2}-\d{2}', schedule):
+        try:
+            # Parse and validate
+            dt = datetime.fromisoformat(schedule.replace('Z', '+00:00'))
+            return {
+                "kind": "once",
+                "run_at": dt.isoformat(),
+                "display": f"once at {dt.strftime('%Y-%m-%d %H:%M')}"
+            }
+        except ValueError as e:
+            raise ValueError(f"Invalid timestamp '{schedule}': {e}")
+    
+    # Duration like "30m", "2h", "1d" → one-shot from now
+    try:
+        minutes = parse_duration(schedule)
+        run_at = datetime.now() + timedelta(minutes=minutes)
+        return {
+            "kind": "once",
+            "run_at": run_at.isoformat(),
+            "display": f"once in {original}"
+        }
+    except ValueError:
+        pass
+    
+    raise ValueError(
+        f"Invalid schedule '{original}'. Use:\n"
+        f"  - Duration: '30m', '2h', '1d' (one-shot)\n"
+        f"  - Interval: 'every 30m', 'every 2h' (recurring)\n"
+        f"  - Cron: '0 9 * * *' (cron expression)\n"
+        f"  - Timestamp: '2026-02-03T14:00:00' (one-shot at time)"
+    )
+
+
+def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
+    """
+    Compute the next run time for a schedule.
+    
+    Returns ISO timestamp string, or None if no more runs.
+    """
+    now = datetime.now()
+    
+    if schedule["kind"] == "once":
+        run_at = datetime.fromisoformat(schedule["run_at"])
+        # If in the future, return it; if in the past, no more runs
+        return schedule["run_at"] if run_at > now else None
+    
+    elif schedule["kind"] == "interval":
+        minutes = schedule["minutes"]
+        if last_run_at:
+            # Next run is last_run + interval
+            last = datetime.fromisoformat(last_run_at)
+            next_run = last + timedelta(minutes=minutes)
+        else:
+            # First run is now + interval
+            next_run = now + timedelta(minutes=minutes)
+        return next_run.isoformat()
+    
+    elif schedule["kind"] == "cron":
+        if not HAS_CRONITER:
+            return None
+        cron = croniter(schedule["expr"], now)
+        next_run = cron.get_next(datetime)
+        return next_run.isoformat()
+    
+    return None
+
+
+# =============================================================================
+# Job CRUD Operations
+# =============================================================================
+
+def load_jobs() -> List[Dict[str, Any]]:
+    """Load all jobs from storage."""
+    ensure_dirs()
+    if not JOBS_FILE.exists():
+        return []
+    
+    try:
+        with open(JOBS_FILE, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            return data.get("jobs", [])
+    except (json.JSONDecodeError, IOError):
+        return []
+
+
+def save_jobs(jobs: List[Dict[str, Any]]):
+    """Save all jobs to storage."""
+    ensure_dirs()
+    with open(JOBS_FILE, 'w', encoding='utf-8') as f:
+        json.dump({"jobs": jobs, "updated_at": datetime.now().isoformat()}, f, indent=2)
+
+
+def create_job(
+    prompt: str,
+    schedule: str,
+    name: Optional[str] = None,
+    repeat: Optional[int] = None
+) -> Dict[str, Any]:
+    """
+    Create a new cron job.
+    
+    Args:
+        prompt: The prompt to run (must be self-contained)
+        schedule: Schedule string (see parse_schedule)
+        name: Optional friendly name
+        repeat: How many times to run (None = forever, 1 = once)
+    
+    Returns:
+        The created job dict
+    """
+    parsed_schedule = parse_schedule(schedule)
+    
+    # Auto-set repeat=1 for one-shot schedules if not specified
+    if parsed_schedule["kind"] == "once" and repeat is None:
+        repeat = 1
+    
+    job_id = uuid.uuid4().hex[:12]
+    now = datetime.now().isoformat()
+    
+    job = {
+        "id": job_id,
+        "name": name or prompt[:50].strip(),
+        "prompt": prompt,
+        "schedule": parsed_schedule,
+        "schedule_display": parsed_schedule.get("display", schedule),
+        "repeat": {
+            "times": repeat,  # None = forever
+            "completed": 0
+        },
+        "enabled": True,
+        "created_at": now,
+        "next_run_at": compute_next_run(parsed_schedule),
+        "last_run_at": None,
+        "last_status": None,
+        "last_error": None
+    }
+    
+    jobs = load_jobs()
+    jobs.append(job)
+    save_jobs(jobs)
+    
+    return job
+
+
+def get_job(job_id: str) -> Optional[Dict[str, Any]]:
+    """Get a job by ID."""
+    jobs = load_jobs()
+    for job in jobs:
+        if job["id"] == job_id:
+            return job
+    return None
+
+
+def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]:
+    """List all jobs, optionally including disabled ones."""
+    jobs = load_jobs()
+    if not include_disabled:
+        jobs = [j for j in jobs if j.get("enabled", True)]
+    return jobs
+
+
+def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Update a job by ID."""
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] == job_id:
+            jobs[i] = {**job, **updates}
+            save_jobs(jobs)
+            return jobs[i]
+    return None
+
+
+def remove_job(job_id: str) -> bool:
+    """Remove a job by ID."""
+    jobs = load_jobs()
+    original_len = len(jobs)
+    jobs = [j for j in jobs if j["id"] != job_id]
+    if len(jobs) < original_len:
+        save_jobs(jobs)
+        return True
+    return False
+
+
+def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
+    """
+    Mark a job as having been run.
+    
+    Updates last_run_at, last_status, increments completed count,
+    computes next_run_at, and auto-deletes if repeat limit reached.
+    """
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] == job_id:
+            now = datetime.now().isoformat()
+            job["last_run_at"] = now
+            job["last_status"] = "ok" if success else "error"
+            job["last_error"] = error if not success else None
+            
+            # Increment completed count
+            if job.get("repeat"):
+                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+                
+                # Check if we've hit the repeat limit
+                times = job["repeat"].get("times")
+                completed = job["repeat"]["completed"]
+                if times is not None and completed >= times:
+                    # Remove the job (limit reached)
+                    jobs.pop(i)
+                    save_jobs(jobs)
+                    return
+            
+            # Compute next run
+            job["next_run_at"] = compute_next_run(job["schedule"], now)
+            
+            # If no next run (one-shot completed), disable
+            if job["next_run_at"] is None:
+                job["enabled"] = False
+            
+            save_jobs(jobs)
+            return
+    
+    save_jobs(jobs)
+
+
+def get_due_jobs() -> List[Dict[str, Any]]:
+    """Get all jobs that are due to run now."""
+    now = datetime.now()
+    jobs = load_jobs()
+    due = []
+    
+    for job in jobs:
+        if not job.get("enabled", True):
+            continue
+        
+        next_run = job.get("next_run_at")
+        if not next_run:
+            continue
+        
+        next_run_dt = datetime.fromisoformat(next_run)
+        if next_run_dt <= now:
+            due.append(job)
+    
+    return due
+
+
+def save_job_output(job_id: str, output: str):
+    """Save job output to file."""
+    ensure_dirs()
+    job_output_dir = OUTPUT_DIR / job_id
+    job_output_dir.mkdir(parents=True, exist_ok=True)
+    
+    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    output_file = job_output_dir / f"{timestamp}.md"
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(output)
+    
+    return output_file
diff --git a/cron/scheduler.py b/cron/scheduler.py
new file mode 100644
index 0000000000..ea8f1c40e6
--- /dev/null
+++ b/cron/scheduler.py
@@ -0,0 +1,188 @@
+"""
+Cron job scheduler - executes due jobs.
+
+This module provides:
+- tick(): Run all due jobs once (for system cron integration)
+- run_daemon(): Run continuously, checking every 60 seconds
+"""
+
+import os
+import sys
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from cron.jobs import get_due_jobs, mark_job_run, save_job_output
+
+
+def run_job(job: dict) -> tuple[bool, str, Optional[str]]:
+    """
+    Execute a single cron job.
+    
+    Returns:
+        Tuple of (success, output, error_message)
+    """
+    from run_agent import AIAgent
+    
+    job_id = job["id"]
+    job_name = job["name"]
+    prompt = job["prompt"]
+    
+    print(f"[cron] Running job '{job_name}' (ID: {job_id})")
+    print(f"[cron] Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
+    
+    try:
+        # Create agent with default settings
+        # Jobs run in isolated sessions (no prior context)
+        agent = AIAgent(
+            model=os.getenv("HERMES_MODEL", "anthropic/claude-sonnet-4"),
+            quiet_mode=True,
+            session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        )
+        
+        # Run the conversation
+        result = agent.run_conversation(prompt)
+        
+        # Extract final response
+        final_response = result.get("final_response", "")
+        if not final_response:
+            final_response = "(No response generated)"
+        
+        # Build output document
+        output = f"""# Cron Job: {job_name}
+
+**Job ID:** {job_id}
+**Run Time:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+**Schedule:** {job.get('schedule_display', 'N/A')}
+
+## Prompt
+
+{prompt}
+
+## Response
+
+{final_response}
+"""
+        
+        print(f"[cron] Job '{job_name}' completed successfully")
+        return True, output, None
+        
+    except Exception as e:
+        error_msg = f"{type(e).__name__}: {str(e)}"
+        print(f"[cron] Job '{job_name}' failed: {error_msg}")
+        
+        # Build error output
+        output = f"""# Cron Job: {job_name} (FAILED)
+
+**Job ID:** {job_id}
+**Run Time:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+**Schedule:** {job.get('schedule_display', 'N/A')}
+
+## Prompt
+
+{prompt}
+
+## Error
+
+```
+{error_msg}
+
+{traceback.format_exc()}
+```
+"""
+        return False, output, error_msg
+
+
+def tick(verbose: bool = True) -> int:
+    """
+    Check and run all due jobs.
+    
+    This is designed to be called by system cron every minute:
+        */1 * * * * cd ~/hermes-agent && python -c "from cron import tick; tick()"
+    
+    Args:
+        verbose: Whether to print status messages
+    
+    Returns:
+        Number of jobs executed
+    """
+    due_jobs = get_due_jobs()
+    
+    if verbose and not due_jobs:
+        print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - No jobs due")
+        return 0
+    
+    if verbose:
+        print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - {len(due_jobs)} job(s) due")
+    
+    executed = 0
+    for job in due_jobs:
+        try:
+            success, output, error = run_job(job)
+            
+            # Save output to file
+            output_file = save_job_output(job["id"], output)
+            if verbose:
+                print(f"[cron] Output saved to: {output_file}")
+            
+            # Mark job as run (handles repeat counting, next_run computation)
+            mark_job_run(job["id"], success, error)
+            executed += 1
+            
+        except Exception as e:
+            print(f"[cron] Error processing job {job['id']}: {e}")
+            mark_job_run(job["id"], False, str(e))
+    
+    return executed
+
+
+def run_daemon(check_interval: int = 60, verbose: bool = True):
+    """
+    Run the cron daemon continuously.
+    
+    Checks for due jobs every `check_interval` seconds.
+    
+    Args:
+        check_interval: Seconds between checks (default: 60)
+        verbose: Whether to print status messages
+    """
+    print(f"[cron] Starting daemon (checking every {check_interval}s)")
+    print(f"[cron] Press Ctrl+C to stop")
+    print()
+    
+    try:
+        while True:
+            try:
+                tick(verbose=verbose)
+            except Exception as e:
+                print(f"[cron] Tick error: {e}")
+            
+            time.sleep(check_interval)
+            
+    except KeyboardInterrupt:
+        print("\n[cron] Daemon stopped")
+
+
+if __name__ == "__main__":
+    # Allow running directly: python cron/scheduler.py [daemon|tick]
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Hermes Cron Scheduler")
+    parser.add_argument("mode", choices=["daemon", "tick"], default="tick", nargs="?",
+                        help="Mode: 'tick' to run once, 'daemon' to run continuously")
+    parser.add_argument("--interval", type=int, default=60,
+                        help="Check interval in seconds for daemon mode")
+    parser.add_argument("--quiet", "-q", action="store_true",
+                        help="Suppress status messages")
+    
+    args = parser.parse_args()
+    
+    if args.mode == "daemon":
+        run_daemon(check_interval=args.interval, verbose=not args.quiet)
+    else:
+        tick(verbose=not args.quiet)
diff --git a/model_tools.py b/model_tools.py
index 7f752318aa..9878951d32 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -38,6 +38,17 @@ from tools.vision_tools import vision_analyze_tool, check_vision_requirements
 from tools.mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
 from tools.image_generation_tool import image_generate_tool, check_image_generation_requirements
 from tools.skills_tool import skills_categories, skills_list, skill_view, check_skills_requirements, SKILLS_TOOL_DESCRIPTION
+# Cronjob management tools (CLI-only)
+from tools.cronjob_tools import (
+    schedule_cronjob,
+    list_cronjobs,
+    remove_cronjob,
+    check_cronjob_requirements,
+    get_cronjob_tool_definitions,
+    SCHEDULE_CRONJOB_SCHEMA,
+    LIST_CRONJOBS_SCHEMA,
+    REMOVE_CRONJOB_SCHEMA
+)
 # Browser automation tools (agent-browser + Browserbase)
 from tools.browser_tool import (
     browser_navigate,
@@ -313,6 +324,22 @@ def get_browser_tool_definitions() -> List[Dict[str, Any]]:
     return [{"type": "function", "function": schema} for schema in BROWSER_TOOL_SCHEMAS]
 
 
+def get_cronjob_tool_definitions_formatted() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for cronjob management tools in OpenAI's expected format.
+    
+    These tools are only available in the hermes-cli toolset (interactive CLI mode).
+    
+    Returns:
+        List[Dict]: List of cronjob tool definitions compatible with OpenAI API
+    """
+    return [{"type": "function", "function": schema} for schema in [
+        SCHEDULE_CRONJOB_SCHEMA,
+        LIST_CRONJOBS_SCHEMA,
+        REMOVE_CRONJOB_SCHEMA
+    ]]
+
+
 def get_all_tool_names() -> List[str]:
     """
     Get the names of all available tools across all toolsets.
@@ -355,6 +382,12 @@ def get_all_tool_names() -> List[str]:
             "browser_vision"
         ])
     
+    # Cronjob management tools (CLI-only, checked at runtime)
+    if check_cronjob_requirements():
+        tool_names.extend([
+            "schedule_cronjob", "list_cronjobs", "remove_cronjob"
+        ])
+    
     return tool_names
 
 
@@ -389,7 +422,11 @@ def get_toolset_for_tool(tool_name: str) -> str:
         "browser_press": "browser_tools",
         "browser_close": "browser_tools",
         "browser_get_images": "browser_tools",
-        "browser_vision": "browser_tools"
+        "browser_vision": "browser_tools",
+        # Cronjob management tools
+        "schedule_cronjob": "cronjob_tools",
+        "list_cronjobs": "cronjob_tools",
+        "remove_cronjob": "cronjob_tools"
     }
     
     return toolset_mapping.get(tool_name, "unknown")
@@ -462,6 +499,11 @@ def get_tool_definitions(
         for tool in get_browser_tool_definitions():
             all_available_tools_map[tool["function"]["name"]] = tool
     
+    # Cronjob management tools (CLI-only)
+    if check_cronjob_requirements():
+        for tool in get_cronjob_tool_definitions_formatted():
+            all_available_tools_map[tool["function"]["name"]] = tool
+    
     # Determine which tools to include based on toolsets
     tools_to_include = set()
     
@@ -474,7 +516,7 @@ def get_tool_definitions(
                 print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
             else:
                 # Try legacy compatibility
-                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools"]:
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools", "cronjob_tools"]:
                     # Map legacy names to new system
                     legacy_map = {
                         "web_tools": ["web_search", "web_extract"],
@@ -488,7 +530,8 @@ def get_tool_definitions(
                             "browser_type", "browser_scroll", "browser_back",
                             "browser_press", "browser_close", "browser_get_images",
                             "browser_vision"
-                        ]
+                        ],
+                        "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.update(legacy_tools)
@@ -516,7 +559,7 @@ def get_tool_definitions(
                 print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
             else:
                 # Try legacy compatibility
-                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools"]:
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools", "cronjob_tools"]:
                     legacy_map = {
                         "web_tools": ["web_search", "web_extract"],
                         "terminal_tools": ["terminal"],
@@ -529,7 +572,8 @@ def get_tool_definitions(
                             "browser_type", "browser_scroll", "browser_back",
                             "browser_press", "browser_close", "browser_get_images",
                             "browser_vision"
-                        ]
+                        ],
+                        "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.difference_update(legacy_tools)
@@ -792,6 +836,48 @@ def handle_browser_function_call(
     return json.dumps({"error": f"Unknown browser function: {function_name}"}, ensure_ascii=False)
 
 
+def handle_cronjob_function_call(
+    function_name: str,
+    function_args: Dict[str, Any],
+    task_id: Optional[str] = None
+) -> str:
+    """
+    Handle function calls for cronjob management tools.
+    
+    These tools are only available in interactive CLI mode (hermes-cli toolset).
+    
+    Args:
+        function_name (str): Name of the cronjob function to call
+        function_args (Dict): Arguments for the function
+        task_id (str): Task identifier (unused, for API consistency)
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    if function_name == "schedule_cronjob":
+        return schedule_cronjob(
+            prompt=function_args.get("prompt", ""),
+            schedule=function_args.get("schedule", ""),
+            name=function_args.get("name"),
+            repeat=function_args.get("repeat"),
+            task_id=task_id
+        )
+    
+    elif function_name == "list_cronjobs":
+        return list_cronjobs(
+            include_disabled=function_args.get("include_disabled", False),
+            task_id=task_id
+        )
+    
+    elif function_name == "remove_cronjob":
+        return remove_cronjob(
+            job_id=function_args.get("job_id", ""),
+            task_id=task_id
+        )
+    
+    return json.dumps({"error": f"Unknown cronjob function: {function_name}"}, ensure_ascii=False)
+
+
 def handle_function_call(
     function_name: str, 
     function_args: Dict[str, Any], 
@@ -851,6 +937,10 @@ def handle_function_call(
         ]:
             return handle_browser_function_call(function_name, function_args, task_id, user_task)
 
+        # Route cronjob management tools
+        elif function_name in ["schedule_cronjob", "list_cronjobs", "remove_cronjob"]:
+            return handle_cronjob_function_call(function_name, function_args, task_id)
+
         else:
             error_msg = f"Unknown function: {function_name}"
             print(f"❌ {error_msg}")
@@ -916,6 +1006,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
             ],
             "description": "Browser automation for web interaction using agent-browser CLI with Browserbase cloud execution",
             "requirements": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID", "agent-browser npm package"]
+        },
+        "cronjob_tools": {
+            "available": check_cronjob_requirements(),
+            "tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
+            "description": "Schedule and manage automated tasks (cronjobs) - only available in interactive CLI mode",
+            "requirements": ["HERMES_INTERACTIVE=1 (set automatically by cli.py)"]
         }
     }
     
@@ -935,7 +1031,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
         "moa_tools": check_moa_requirements(),
         "image_tools": check_image_generation_requirements(),
         "skills_tools": check_skills_requirements(),
-        "browser_tools": check_browser_requirements()
+        "browser_tools": check_browser_requirements(),
+        "cronjob_tools": check_cronjob_requirements()
     }
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 828aeaba22..4bc28b6db8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,5 +30,5 @@ platformdirs
 # modal
 # boto3
 
-# Optional: Legacy Hecate terminal backend
-# git+ssh://git@github.com/NousResearch/hecate.git
+# Optional: For cron expression parsing (cronjob scheduling)
+croniter
\ No newline at end of file
diff --git a/tools/__init__.py b/tools/__init__.py
index 8d2ee3b400..3365dab441 100644
--- a/tools/__init__.py
+++ b/tools/__init__.py
@@ -83,6 +83,18 @@ from .browser_tool import (
     BROWSER_TOOL_SCHEMAS
 )
 
+# Cronjob management tools (CLI-only, hermes-cli toolset)
+from .cronjob_tools import (
+    schedule_cronjob,
+    list_cronjobs,
+    remove_cronjob,
+    check_cronjob_requirements,
+    get_cronjob_tool_definitions,
+    SCHEDULE_CRONJOB_SCHEMA,
+    LIST_CRONJOBS_SCHEMA,
+    REMOVE_CRONJOB_SCHEMA
+)
+
 __all__ = [
     # Web tools
     'web_search_tool',
@@ -131,5 +143,14 @@ __all__ = [
     'get_active_browser_sessions',
     'check_browser_requirements',
     'BROWSER_TOOL_SCHEMAS',
+    # Cronjob management tools (CLI-only)
+    'schedule_cronjob',
+    'list_cronjobs',
+    'remove_cronjob',
+    'check_cronjob_requirements',
+    'get_cronjob_tool_definitions',
+    'SCHEDULE_CRONJOB_SCHEMA',
+    'LIST_CRONJOBS_SCHEMA',
+    'REMOVE_CRONJOB_SCHEMA',
 ]
 
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
new file mode 100644
index 0000000000..f5573082d6
--- /dev/null
+++ b/tools/cronjob_tools.py
@@ -0,0 +1,341 @@
+"""
+Cron job management tools for Hermes Agent.
+
+These tools allow the agent to schedule, list, and remove automated tasks.
+Only available when running via CLI (hermes-cli toolset).
+
+IMPORTANT: Cronjobs run in isolated sessions with NO prior context.
+The prompt must contain ALL necessary information.
+"""
+
+import json
+import os
+from typing import Optional
+
+# Import from cron module (will be available when properly installed)
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from cron.jobs import create_job, get_job, list_jobs, remove_job
+
+
+# =============================================================================
+# Tool: schedule_cronjob
+# =============================================================================
+
+def schedule_cronjob(
+    prompt: str,
+    schedule: str,
+    name: Optional[str] = None,
+    repeat: Optional[int] = None,
+    task_id: str = None
+) -> str:
+    """
+    Schedule an automated task to run the agent on a schedule.
+    
+    IMPORTANT: When the cronjob runs, it starts a COMPLETELY FRESH session.
+    The agent will have NO memory of this conversation or any prior context.
+    Therefore, the prompt MUST contain ALL necessary information:
+    - Full context of what needs to be done
+    - Specific file paths, URLs, or identifiers
+    - Clear success criteria
+    - Any relevant background information
+    
+    BAD prompt:  "Check on that server issue"
+    GOOD prompt: "SSH into server 192.168.1.100 as user 'deploy', check if nginx 
+                  is running with 'systemctl status nginx', and verify the site 
+                  https://example.com returns HTTP 200. Report any issues found."
+    
+    Args:
+        prompt: Complete, self-contained instructions for the future agent.
+                Must include ALL context needed - the agent won't remember anything.
+        schedule: When to run. Either:
+                  - Duration for one-shot: "30m", "2h", "1d" (runs once)
+                  - Interval: "every 30m", "every 2h" (recurring)
+                  - Cron expression: "0 9 * * *" (daily at 9am)
+                  - ISO timestamp: "2026-02-03T14:00:00" (one-shot at specific time)
+        name: Optional human-friendly name for the job (for listing/management)
+        repeat: How many times to run. Omit for default behavior:
+                - One-shot schedules default to repeat=1 (run once)
+                - Intervals/cron default to forever
+                - Set repeat=5 to run 5 times then auto-delete
+    
+    Returns:
+        JSON with job_id, next_run time, and confirmation
+    """
+    try:
+        job = create_job(
+            prompt=prompt,
+            schedule=schedule,
+            name=name,
+            repeat=repeat
+        )
+        
+        # Format repeat info for display
+        times = job["repeat"].get("times")
+        if times is None:
+            repeat_display = "forever"
+        elif times == 1:
+            repeat_display = "once"
+        else:
+            repeat_display = f"{times} times"
+        
+        return json.dumps({
+            "success": True,
+            "job_id": job["id"],
+            "name": job["name"],
+            "schedule": job["schedule_display"],
+            "repeat": repeat_display,
+            "next_run_at": job["next_run_at"],
+            "message": f"Cronjob '{job['name']}' created. It will run {repeat_display}, next at {job['next_run_at']}."
+        }, indent=2)
+        
+    except Exception as e:
+        return json.dumps({
+            "success": False,
+            "error": str(e)
+        }, indent=2)
+
+
+SCHEDULE_CRONJOB_SCHEMA = {
+    "name": "schedule_cronjob",
+    "description": """Schedule an automated task to run the agent on a schedule.
+
+⚠️ CRITICAL: The cronjob runs in a FRESH SESSION with NO CONTEXT from this conversation.
+The prompt must be COMPLETELY SELF-CONTAINED with ALL necessary information including:
+- Full context and background
+- Specific file paths, URLs, server addresses
+- Clear instructions and success criteria
+- Any credentials or configuration details
+
+The future agent will NOT remember anything from the current conversation.
+
+SCHEDULE FORMATS:
+- One-shot: "30m", "2h", "1d" (runs once after delay)
+- Interval: "every 30m", "every 2h" (recurring)  
+- Cron: "0 9 * * *" (cron expression for precise scheduling)
+- Timestamp: "2026-02-03T14:00:00" (specific date/time)
+
+REPEAT BEHAVIOR:
+- One-shot schedules: run once by default
+- Intervals/cron: run forever by default
+- Set repeat=N to run exactly N times then auto-delete
+
+Use for: reminders, periodic checks, scheduled reports, automated maintenance.""",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "prompt": {
+                "type": "string",
+                "description": "Complete, self-contained instructions. Must include ALL context - the future agent will have NO memory of this conversation."
+            },
+            "schedule": {
+                "type": "string",
+                "description": "When to run: '30m' (once in 30min), 'every 30m' (recurring), '0 9 * * *' (cron), or ISO timestamp"
+            },
+            "name": {
+                "type": "string",
+                "description": "Optional human-friendly name for the job"
+            },
+            "repeat": {
+                "type": "integer",
+                "description": "How many times to run. Omit for default (once for one-shot, forever for recurring). Set to N for exactly N runs."
+            }
+        },
+        "required": ["prompt", "schedule"]
+    }
+}
+
+
+# =============================================================================
+# Tool: list_cronjobs
+# =============================================================================
+
+def list_cronjobs(include_disabled: bool = False, task_id: str = None) -> str:
+    """
+    List all scheduled cronjobs.
+    
+    Returns information about each job including:
+    - Job ID (needed for removal)
+    - Name
+    - Schedule (human-readable)
+    - Repeat status (completed/total or 'forever')
+    - Next scheduled run time
+    - Last run time and status (if any)
+    
+    Args:
+        include_disabled: Whether to include disabled/completed jobs
+    
+    Returns:
+        JSON array of all scheduled jobs
+    """
+    try:
+        jobs = list_jobs(include_disabled=include_disabled)
+        
+        formatted_jobs = []
+        for job in jobs:
+            # Format repeat status
+            times = job["repeat"].get("times")
+            completed = job["repeat"].get("completed", 0)
+            if times is None:
+                repeat_status = "forever"
+            else:
+                repeat_status = f"{completed}/{times}"
+            
+            formatted_jobs.append({
+                "job_id": job["id"],
+                "name": job["name"],
+                "prompt_preview": job["prompt"][:100] + "..." if len(job["prompt"]) > 100 else job["prompt"],
+                "schedule": job["schedule_display"],
+                "repeat": repeat_status,
+                "next_run_at": job.get("next_run_at"),
+                "last_run_at": job.get("last_run_at"),
+                "last_status": job.get("last_status"),
+                "enabled": job.get("enabled", True)
+            })
+        
+        return json.dumps({
+            "success": True,
+            "count": len(formatted_jobs),
+            "jobs": formatted_jobs
+        }, indent=2)
+        
+    except Exception as e:
+        return json.dumps({
+            "success": False,
+            "error": str(e)
+        }, indent=2)
+
+
+LIST_CRONJOBS_SCHEMA = {
+    "name": "list_cronjobs",
+    "description": """List all scheduled cronjobs with their IDs, schedules, and status.
+
+Use this to:
+- See what jobs are currently scheduled
+- Find job IDs for removal with remove_cronjob
+- Check job status and next run times
+
+Returns job_id, name, schedule, repeat status, next/last run times.""",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "include_disabled": {
+                "type": "boolean",
+                "description": "Include disabled/completed jobs in the list (default: false)"
+            }
+        },
+        "required": []
+    }
+}
+
+
+# =============================================================================
+# Tool: remove_cronjob
+# =============================================================================
+
+def remove_cronjob(job_id: str, task_id: str = None) -> str:
+    """
+    Remove a scheduled cronjob by its ID.
+    
+    Use list_cronjobs first to find the job_id of the job you want to remove.
+    
+    Args:
+        job_id: The ID of the job to remove (from list_cronjobs output)
+    
+    Returns:
+        JSON confirmation of removal
+    """
+    try:
+        job = get_job(job_id)
+        if not job:
+            return json.dumps({
+                "success": False,
+                "error": f"Job with ID '{job_id}' not found. Use list_cronjobs to see available jobs."
+            }, indent=2)
+        
+        removed = remove_job(job_id)
+        if removed:
+            return json.dumps({
+                "success": True,
+                "message": f"Cronjob '{job['name']}' (ID: {job_id}) has been removed.",
+                "removed_job": {
+                    "id": job_id,
+                    "name": job["name"],
+                    "schedule": job["schedule_display"]
+                }
+            }, indent=2)
+        else:
+            return json.dumps({
+                "success": False,
+                "error": f"Failed to remove job '{job_id}'"
+            }, indent=2)
+            
+    except Exception as e:
+        return json.dumps({
+            "success": False,
+            "error": str(e)
+        }, indent=2)
+
+
+REMOVE_CRONJOB_SCHEMA = {
+    "name": "remove_cronjob",
+    "description": """Remove a scheduled cronjob by its ID.
+
+Use list_cronjobs first to find the job_id of the job you want to remove.
+Jobs that have completed their repeat count are auto-removed, but you can
+use this to cancel a job before it completes.""",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "job_id": {
+                "type": "string",
+                "description": "The ID of the cronjob to remove (from list_cronjobs output)"
+            }
+        },
+        "required": ["job_id"]
+    }
+}
+
+
+# =============================================================================
+# Requirements check
+# =============================================================================
+
+def check_cronjob_requirements() -> bool:
+    """
+    Check if cronjob tools can be used.
+    
+    Only available in interactive CLI mode (HERMES_INTERACTIVE=1).
+    """
+    return os.getenv("HERMES_INTERACTIVE") == "1"
+
+
+# =============================================================================
+# Exports
+# =============================================================================
+
+def get_cronjob_tool_definitions():
+    """Return tool definitions for cronjob management."""
+    return [
+        SCHEDULE_CRONJOB_SCHEMA,
+        LIST_CRONJOBS_SCHEMA,
+        REMOVE_CRONJOB_SCHEMA
+    ]
+
+
+# For direct testing
+if __name__ == "__main__":
+    # Test the tools
+    print("Testing schedule_cronjob:")
+    result = schedule_cronjob(
+        prompt="Test prompt for cron job",
+        schedule="5m",
+        name="Test Job"
+    )
+    print(result)
+    
+    print("\nTesting list_cronjobs:")
+    result = list_cronjobs()
+    print(result)
diff --git a/toolsets.py b/toolsets.py
index 0390c02e47..b74b2fd386 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -84,6 +84,12 @@ TOOLSETS = {
         "includes": []
     },
     
+    "cronjob": {
+        "description": "Cronjob management tools - schedule, list, and remove automated tasks (CLI-only)",
+        "tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
+        "includes": []
+    },
+    
     # Scenario-specific toolsets
     
     "debugging": {
@@ -96,6 +102,36 @@ TOOLSETS = {
         "description": "Safe toolkit without terminal access",
         "tools": ["mixture_of_agents"],
         "includes": ["web", "vision", "creative"]
+    },
+    
+    # ==========================================================================
+    # CLI-specific toolsets (only available when running via cli.py)
+    # ==========================================================================
+    
+    "hermes-cli": {
+        "description": "Full interactive CLI toolset - all default tools plus cronjob management",
+        "tools": [
+            # Web tools
+            "web_search", "web_extract",
+            # Terminal
+            "terminal",
+            # Vision
+            "vision_analyze",
+            # Image generation
+            "image_generate",
+            # MoA
+            "mixture_of_agents",
+            # Skills
+            "skills_categories", "skills_list", "skill_view",
+            # Browser
+            "browser_navigate", "browser_snapshot", "browser_click",
+            "browser_type", "browser_scroll", "browser_back",
+            "browser_press", "browser_close", "browser_get_images",
+            "browser_vision",
+            # Cronjob management (CLI-only)
+            "schedule_cronjob", "list_cronjobs", "remove_cronjob"
+        ],
+        "includes": []
     }
 }
 

From 619c72e566fa4f79e6792c3fab71d08794292872 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:01:51 -0800
Subject: [PATCH 05/48] Enhance CLI with multi-platform messaging integration
 and configuration management

- Updated CLI to load configuration from user-specific and project-specific YAML files, prioritizing user settings.
- Introduced a new command `/platforms` to display the status of connected messaging platforms (Telegram, Discord, WhatsApp).
- Implemented a gateway system for handling messaging interactions, including session management and delivery routing for cron job outputs.
- Added support for environment variable configuration and a dedicated gateway configuration file for advanced settings.
- Enhanced documentation in README.md and added a new messaging.md file to guide users on platform integrations and setup.
- Updated toolsets to include platform-specific capabilities for Telegram, Discord, and WhatsApp, ensuring secure and tailored interactions.
---
 README.md                                  | 982 ++++++++-------------
 TODO.md                                    |  35 +-
 cli.py                                     |  93 +-
 cron/jobs.py                               |  15 +-
 docs/messaging.md                          | 461 ++++++++++
 gateway/__init__.py                        |  35 +
 gateway/config.py                          | 333 +++++++
 gateway/delivery.py                        | 318 +++++++
 gateway/platforms/__init__.py              |  17 +
 gateway/platforms/base.py                  | 274 ++++++
 gateway/platforms/discord.py               | 297 +++++++
 gateway/platforms/telegram.py              | 284 ++++++
 gateway/platforms/whatsapp.py              | 327 +++++++
 gateway/run.py                             | 375 ++++++++
 gateway/session.py                         | 522 +++++++++++
 hermes_agent.egg-info/PKG-INFO             | 868 ++++++++++++++++++
 hermes_agent.egg-info/SOURCES.txt          |  47 +
 hermes_agent.egg-info/dependency_links.txt |   1 +
 hermes_agent.egg-info/entry_points.txt     |   3 +
 hermes_agent.egg-info/requires.txt         |  35 +
 hermes_agent.egg-info/top_level.txt        |  11 +
 hermes_cli/__init__.py                     |  14 +
 hermes_cli/config.py                       | 392 ++++++++
 hermes_cli/cron.py                         | 131 +++
 hermes_cli/doctor.py                       | 278 ++++++
 hermes_cli/gateway.py                      | 371 ++++++++
 hermes_cli/main.py                         | 432 +++++++++
 hermes_cli/setup.py                        | 448 ++++++++++
 hermes_cli/status.py                       | 239 +++++
 pyproject.toml                             |   8 +-
 requirements.txt                           |  12 +-
 scripts/hermes-gateway                     | 414 +++++++++
 scripts/install.ps1                        | 371 ++++++++
 scripts/install.sh                         | 520 +++++++++++
 setup-hermes.sh                            | 259 +++---
 tools/cronjob_tools.py                     |  37 +-
 toolsets.py                                |  57 ++
 37 files changed, 8559 insertions(+), 757 deletions(-)
 create mode 100644 docs/messaging.md
 create mode 100644 gateway/__init__.py
 create mode 100644 gateway/config.py
 create mode 100644 gateway/delivery.py
 create mode 100644 gateway/platforms/__init__.py
 create mode 100644 gateway/platforms/base.py
 create mode 100644 gateway/platforms/discord.py
 create mode 100644 gateway/platforms/telegram.py
 create mode 100644 gateway/platforms/whatsapp.py
 create mode 100644 gateway/run.py
 create mode 100644 gateway/session.py
 create mode 100644 hermes_agent.egg-info/PKG-INFO
 create mode 100644 hermes_agent.egg-info/SOURCES.txt
 create mode 100644 hermes_agent.egg-info/dependency_links.txt
 create mode 100644 hermes_agent.egg-info/entry_points.txt
 create mode 100644 hermes_agent.egg-info/requires.txt
 create mode 100644 hermes_agent.egg-info/top_level.txt
 create mode 100644 hermes_cli/__init__.py
 create mode 100644 hermes_cli/config.py
 create mode 100644 hermes_cli/cron.py
 create mode 100644 hermes_cli/doctor.py
 create mode 100644 hermes_cli/gateway.py
 create mode 100644 hermes_cli/main.py
 create mode 100644 hermes_cli/setup.py
 create mode 100644 hermes_cli/status.py
 create mode 100755 scripts/hermes-gateway
 create mode 100644 scripts/install.ps1
 create mode 100755 scripts/install.sh

diff --git a/README.md b/README.md
index af28483717..aa46830319 100644
--- a/README.md
+++ b/README.md
@@ -1,724 +1,488 @@
-# Hermes Agent
+# Hermes Agent 🦋
 
-An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.
+An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system, messaging integrations, and scheduled tasks.
+
+## Quick Install
+
+**Linux/macOS:**
+```bash
+curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
+```
+
+**Windows (PowerShell):**
+```powershell
+irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex
+```
+
+The installer will:
+- Clone to `~/.hermes-agent`
+- Create a virtual environment
+- Install all dependencies
+- Run the interactive setup wizard
+- Add `hermes` to your PATH
+
+After installation, reload your shell and run:
+```bash
+hermes setup    # Configure API keys (if you skipped during install)
+hermes          # Start chatting!
+```
+
+---
+
+## Configuration
+
+All your settings are stored in `~/.hermes/` for easy access:
+
+```
+~/.hermes/
+├── config.yaml     # Settings (model, terminal, compression, etc.)
+├── .env            # API keys and secrets
+├── cron/           # Scheduled jobs
+├── sessions/       # Gateway sessions
+└── logs/           # Logs
+```
+
+### Managing Configuration
+
+```bash
+hermes config              # View current configuration
+hermes config edit         # Open config.yaml in your editor
+hermes config set KEY VAL  # Set a specific value
+
+# Examples:
+hermes config set model anthropic/claude-opus-4
+hermes config set terminal.backend docker
+hermes config set OPENROUTER_API_KEY sk-or-...  # Saves to .env
+```
+
+### Required API Keys
+
+You need at least one LLM provider:
+
+| Provider | Get Key | Env Variable |
+|----------|---------|--------------|
+| **OpenRouter** (recommended) | [openrouter.ai/keys](https://openrouter.ai/keys) | `OPENROUTER_API_KEY` |
+| Anthropic | [console.anthropic.com](https://console.anthropic.com/) | `ANTHROPIC_API_KEY` |
+| OpenAI | [platform.openai.com](https://platform.openai.com/api-keys) | `OPENAI_API_KEY` |
+
+### Optional API Keys
+
+| Feature | Provider | Env Variable |
+|---------|----------|--------------|
+| Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` |
+| Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
+| Image generation | [FAL](https://fal.ai/) | `FAL_KEY` |
+| Messaging | Telegram, Discord | `TELEGRAM_BOT_TOKEN`, `DISCORD_BOT_TOKEN` |
+
+---
+
+## Commands
+
+```bash
+hermes                    # Interactive chat (default)
+hermes chat -q "Hello"    # Single query mode
+hermes setup              # Configure API keys and settings
+hermes config             # View/edit configuration
+hermes status             # Show configuration status
+hermes doctor             # Diagnose issues
+hermes update             # Update to latest version
+hermes gateway            # Start messaging gateway
+hermes cron list          # View scheduled jobs
+hermes version            # Show version info
+```
+
+### CLI Commands (inside chat)
+
+| Command | Description |
+|---------|-------------|
+| `/help` | Show available commands |
+| `/tools` | List available tools |
+| `/model [name]` | Show or change model |
+| `/personality [name]` | Set personality (kawaii, pirate, etc.) |
+| `/clear` | Clear screen and reset |
+| `/cron` | Manage scheduled tasks |
+| `/config` | Show current configuration |
+| `/quit` | Exit |
+
+---
 
 ## Features
 
-- **Interactive CLI**: Beautiful terminal interface with animated feedback, personalities, and session management
-- **Web Tools**: Search, extract content, and crawl websites
-- **Terminal Tools**: Execute commands via local, Docker, Singularity, Modal, or SSH backends
-- **Browser Tools**: Automate web browsers to navigate, click, type, and extract content
-- **Vision Tools**: Analyze images from URLs
-- **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
-- **Creative Tools**: Generate images from text prompts
-- **Skills Tools**: On-demand knowledge documents with progressive disclosure
-- **Toolsets System**: Organize tools into logical groups for different scenarios
-- **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking
-- **Ephemeral System Prompts**: Guide model behavior without polluting training datasets
+### 🛠️ Tools & Toolsets
 
-## Quick Start (CLI)
+Tools are organized into logical **toolsets**:
 
 ```bash
-# After setup (see below), just run:
-./hermes
+# Use specific toolsets
+hermes --toolsets "web,terminal"
 
-# Or with options:
-./hermes --model "anthropic/claude-sonnet-4" --toolsets "web,terminal"
+# List all toolsets
+hermes --list-tools
 ```
 
-The CLI provides:
-- Animated spinners during thinking and tool execution
-- Kawaii-style feedback messages
-- `/commands` for configuration, history, and session management
-- Customizable personalities (`/personality kawaii`, `/personality pirate`, etc.)
-- Persistent configuration via `cli-config.yaml`
+**Available toolsets:** `web`, `terminal`, `browser`, `vision`, `creative`, `reasoning`, `skills`, `cronjob`, and more.
 
-## Setup
+### 🖥️ Terminal Backend
 
-### 1. Clone the Repository
-```bash
-# Clone with submodules (recommended)
-git clone --recurse-submodules https://github.com/NousResearch/Hermes-Agent.git
-cd Hermes-Agent
+The terminal tool can execute commands in different environments:
 
-# Or if already cloned without submodules:
-git submodule update --init --recursive
+| Backend | Description | Use Case |
+|---------|-------------|----------|
+| `local` | Run on your machine (default) | Development, trusted tasks |
+| `docker` | Isolated containers | Security, reproducibility |
+| `ssh` | Remote server | Sandboxing, keep agent away from its own code |
+| `singularity` | HPC containers | Cluster computing, rootless |
+| `modal` | Cloud execution | Serverless, scale |
+
+**Configure in `~/.hermes/config.yaml`:**
+```yaml
+terminal:
+  backend: local    # or: docker, ssh, singularity, modal
+  cwd: "."          # Working directory ("." = current dir)
+  timeout: 180      # Command timeout in seconds
 ```
 
-### 2. Install Dependencies
-```bash
-# Create and activate virtual environment (recommended)
-python3 -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
-
-# Install Python packages
-pip install -r requirements.txt
-
-# Install mini-swe-agent for terminal tools
-pip install -e ./mini-swe-agent
-
-# Install Node.js dependencies for browser tools (requires Node.js)
-npm install
+**Docker Backend:**
+```yaml
+terminal:
+  backend: docker
+  docker_image: python:3.11-slim
 ```
 
-### 3. Configure Environment Variables
-```bash
-# Copy the example environment file
-cp .env.example .env
-
-# Edit .env and add your API keys
-nano .env  # or use your preferred editor
+**SSH Backend** (recommended for security - agent can't modify its own code):
+```yaml
+terminal:
+  backend: ssh
 ```
-
-**Required API Keys:**
-- `OPENROUTER_API_KEY` - LLM access via OpenRouter (get at: https://openrouter.ai/keys)
-- `FIRECRAWL_API_KEY` - Web tools (get at: https://firecrawl.dev/)
-- `NOUS_API_KEY` - Vision & reasoning tools (get at: https://inference-api.nousresearch.com/)
-- `FAL_KEY` - Image generation (get at: https://fal.ai/)
-
-**Optional API Keys (for specific features):**
-- `BROWSERBASE_API_KEY` - Browser automation (get at: https://browserbase.com/)
-- `BROWSERBASE_PROJECT_ID` - From Browserbase dashboard
-- `MORPH_API_KEY` - For legacy Hecate terminal backend (get at: https://morph.so/)
-
-### 4. Configure Terminal Backend
-
-The terminal tool uses **mini-swe-agent** environments. Configure in `.env` or `cli-config.yaml`:
-
 ```bash
-# Backend: "local", "docker", "singularity", "modal", or "ssh"
-TERMINAL_ENV=local          # Default: runs on host machine (no isolation)
-TERMINAL_ENV=ssh            # Remote execution via SSH (agent code stays local)
-TERMINAL_ENV=singularity    # Recommended for HPC: Apptainer/Singularity containers
-TERMINAL_ENV=docker         # Isolated Docker containers
-TERMINAL_ENV=modal          # Cloud execution via Modal
-
-# Container image (for docker/singularity/modal backends)
-TERMINAL_DOCKER_IMAGE=python:3.11-slim
-TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim
-TERMINAL_TIMEOUT=60
-
-# SSH backend (for ssh)
+# Set credentials in ~/.hermes/.env
 TERMINAL_SSH_HOST=my-server.example.com
 TERMINAL_SSH_USER=myuser
-TERMINAL_SSH_KEY=~/.ssh/id_rsa  # Optional, uses ssh-agent if not set
+TERMINAL_SSH_KEY=~/.ssh/id_rsa
 ```
 
-**Backend Requirements:**
-- **local**: No extra setup (runs directly on your machine, no isolation)
-- **ssh**: SSH access to remote machine (great for sandboxing - agent can't touch its own code)
-- **singularity**: Requires Apptainer or Singularity installed (common on HPC clusters, no root needed)
-- **docker**: Requires Docker installed and user in `docker` group
-- **modal**: Requires Modal account (see setup below)
-
-### Singularity/Apptainer Setup (Recommended for HPC)
-
-Singularity/Apptainer provides rootless container execution, ideal for HPC clusters:
-
+**Singularity/Apptainer** (for HPC clusters):
 ```bash
-# 1. Verify Apptainer is installed
-apptainer --version  # or: singularity --version
+# Pre-build SIF for parallel workers
+apptainer build ~/python.sif docker://python:3.11-slim
 
-# 2. Set up cache directories (important for parallel workers)
-# Use /scratch if available (HPC), otherwise /tmp
-export APPTAINER_CACHEDIR=/scratch/$USER/.apptainer
-export APPTAINER_TMPDIR=/scratch/$USER/.apptainer/tmp
-mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
-
-# 3. Pre-build SIF image (recommended for parallel batch processing)
-# This avoids race conditions when multiple workers start simultaneously
-apptainer build $APPTAINER_CACHEDIR/python-nodejs.sif docker://nikolaik/python-nodejs:python3.11-nodejs20
-
-# 4. Configure .env to use the local SIF
-TERMINAL_ENV=singularity
-TERMINAL_SINGULARITY_IMAGE=/scratch/$USER/.apptainer/python-nodejs.sif
+# Configure
+hermes config set terminal.backend singularity
+hermes config set terminal.singularity_image ~/python.sif
 ```
 
-**Tip:** The batch scripts in `configs/` automatically handle SIF pre-building if `/scratch` is available.
-
-### Modal Cloud Backend Setup
-
-[Modal](https://modal.com) provides serverless cloud compute for running sandboxed environments at scale.
-
+**Modal** (serverless cloud):
 ```bash
-# 1. Install Modal and dependencies
 pip install modal boto3
-
-# 2. Authenticate with Modal (opens browser)
-modal setup
-
-# 3. Set terminal backend to modal in .env
-TERMINAL_ENV=modal
+modal setup  # Authenticate
+hermes config set terminal.backend modal
 ```
 
-Modal uses CLI-based authentication (stored in `~/.modal/`), so no API key is needed in `.env`. After running `modal setup`, commands will automatically execute in Modal's cloud sandboxes.
+**Sudo Support:** If a command needs sudo, you'll be prompted for your password (cached for the session). Or set `SUDO_PASSWORD` in `~/.hermes/.env`.
 
-### Browser Tools Setup
+### 📱 Messaging Gateway
 
-Browser tools enable the agent to navigate websites, fill forms, click buttons, and extract content. They use [agent-browser](https://github.com/vercel-labs/agent-browser) CLI with [Browserbase](https://browserbase.com) cloud execution.
+Chat with Hermes from Telegram, Discord, or WhatsApp:
 
 ```bash
-# 1. Install Node.js (if not already installed)
-# Use nvm (recommended) or your package manager
+# Configure your bot token
+hermes config set TELEGRAM_BOT_TOKEN "your_token"
 
-# 2. Install agent-browser CLI (choose one option):
-npm install -g agent-browser     # Option A: Global install (recommended)
-npm install                      # Option B: Local install (uses npx fallback)
+# Start the gateway
+hermes gateway
 
-# 3. Get Browserbase credentials
-# Sign up at https://browserbase.com/ and get your:
-# - API Key (from Settings → API Keys)
-# - Project ID (from your project dashboard)
-
-# 4. Add to your .env file:
-BROWSERBASE_API_KEY=your_api_key_here
-BROWSERBASE_PROJECT_ID=your_project_id_here
+# Or install as a service
+hermes gateway install
+hermes gateway start
 ```
 
-**Available Browser Tools:**
+See [docs/messaging.md](docs/messaging.md) for full setup.
 
-| Tool | Description |
-|------|-------------|
-| `browser_navigate` | Navigate to a URL |
-| `browser_snapshot` | Get text-based page snapshot with element refs |
-| `browser_click` | Click an element by ref (e.g., `@e5`) |
-| `browser_type` | Type text into an input field |
-| `browser_scroll` | Scroll up or down |
-| `browser_back` | Go back in browser history |
-| `browser_press` | Press a keyboard key (Enter, Tab, etc.) |
-| `browser_close` | Close the browser session |
-| `browser_get_images` | Get list of images on the page |
+### ⏰ Scheduled Tasks (Cron)
+
+Schedule tasks to run automatically:
 
-**Example Usage:**
 ```bash
-# Use browser tools with web search and vision
-python run_agent.py \
-  --query "Go to amazon.com and find the price of the latest Kindle" \
-  --enabled_toolsets=browser,web,vision
-
-# Use browser-focused distribution
-python batch_runner.py \
-  --dataset_file=browser_tasks.jsonl \
-  --distribution=browser_use \
-  --run_name=browser_run
+# In the CLI
+/cron add 30m "Remind me to check the build"
+/cron add "every 2h" "Check server status"
+/cron add "0 9 * * *" "Morning briefing"
+/cron list
+/cron remove <job_id>
 ```
 
-See `.env.example` for all available configuration options including debug settings.
+The agent can also self-schedule using `schedule_cronjob` tool.
 
-### Skills Tools
-
-Skills are on-demand knowledge documents the agent can load when needed. They follow a **progressive disclosure** pattern to minimize token usage:
-
-```
-skills/
-├── mlops/                    # Category folder
-│   ├── axolotl/             # Skill folder
-│   │   ├── SKILL.md         # Main instructions (required)
-│   │   ├── references/      # Additional docs, API specs
-│   │   └── templates/       # Output formats, configs
-│   └── vllm/
-│       └── SKILL.md
-```
-
-**Available Skills Tools:**
-
-| Tool | Description |
-|------|-------------|
-| `skills_categories` | List available skill categories (~50 tokens) |
-| `skills_list` | List skills with name + description (~3k tokens for 40 skills) |
-| `skill_view` | Load full skill content, tags, and linked files |
-
-**Example Usage:**
+**Run the scheduler:**
 ```bash
-# Use skills tools
-python run_agent.py \
-  --query "What skills do you have for fine-tuning? Show me the axolotl skill." \
-  --enabled_toolsets=skills
+hermes cron daemon         # Built-in daemon
+# Or add to system cron for reliability
+```
+
+### 🗜️ Context Compression
+
+Long conversations are automatically summarized when approaching context limits:
+
+```yaml
+# In ~/.hermes/config.yaml
+compression:
+  enabled: true
+  threshold: 0.85    # Compress at 85% of limit
+```
+
+### 📝 Session Logging
+
+Every conversation is logged to `~/.hermes-agent/logs/` for debugging:
+
+```
+logs/
+├── session_20260201_143052_a1b2c3.json
+└── ...
+```
+
+### 🌐 Browser Automation
+
+Browser tools let the agent navigate websites, fill forms, click buttons, and extract content using [Browserbase](https://browserbase.com/).
+
+**Setup:**
+```bash
+# 1. Get credentials from browserbase.com
+hermes config set BROWSERBASE_API_KEY your_api_key
+hermes config set BROWSERBASE_PROJECT_ID your_project_id
+
+# 2. Install Node.js dependencies (if not already)
+cd ~/.hermes-agent && npm install
+```
+
+**Available tools:** `browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_scroll`, `browser_back`, `browser_press`, `browser_close`, `browser_get_images`
+
+**Example:**
+```bash
+hermes --toolsets browser -q "Go to amazon.com and find the price of the latest Kindle"
+```
+
+### 📚 Skills System
+
+Skills are on-demand knowledge documents the agent can load when needed. They follow a **progressive disclosure** pattern to minimize token usage.
+
+**Using Skills:**
+```bash
+hermes --toolsets skills -q "What skills do you have?"
+hermes --toolsets skills -q "Show me the axolotl skill"
 ```
 
 **Creating Skills:**
 
-Skills use YAML frontmatter for metadata:
-```yaml
+Create `skills/category/skill-name/SKILL.md`:
+```markdown
 ---
 name: my-skill
 description: Brief description shown in skills_list
-tags: [tag1, tag2]
-related_skills: [other-skill]
+tags: [python, automation]
 version: 1.0.0
 ---
+
 # Skill Content
 
 Instructions, examples, and guidelines here...
 ```
 
-Skills can include:
-- `references/` - Additional documentation, API specs, examples
-- `templates/` - Output formats, config files, boilerplate code
-- `scripts/` - Executable helpers (Python, shell scripts)
-
-## Session Logging
-
-Every conversation is automatically logged to `logs/` for debugging and inspection:
-
+**Skill Structure:**
 ```
-logs/
-├── session_20260201_143052_a1b2c3.json
-├── session_20260201_150217_d4e5f6.json
-└── ...
+skills/
+├── mlops/
+│   ├── axolotl/
+│   │   ├── SKILL.md          # Main instructions (required)
+│   │   ├── references/       # Additional docs
+│   │   └── templates/        # Output formats
+│   └── vllm/
+│       └── SKILL.md
 ```
 
-**Log Format:**
-```json
-{
-  "session_id": "20260201_143052_a1b2c3",
-  "model": "anthropic/claude-sonnet-4",
-  "session_start": "2026-02-01T14:30:52.123456",
-  "last_updated": "2026-02-01T14:35:12.789012",
-  "message_count": 8,
-  "conversations": [
-    {"from": "system", "value": "..."},
-    {"from": "human", "value": "..."},
-    {"from": "gpt", "value": "..."},
-    {"from": "tool", "value": "..."}
-  ]
-}
-```
+---
 
-- **Automatic**: Logs are created and updated automatically after each conversation turn
-- **Session ID in Banner**: The CLI displays the session ID in the welcome banner
-- **Trajectory Format**: Uses the same format as batch processing for consistency
-- **Git Ignored**: `logs/` is in `.gitignore` so logs aren't committed
+## Manual Installation
 
-## Context Compression
-
-Long conversations can exceed the model's context limit. Hermes Agent automatically compresses context when approaching the limit:
-
-**How it works:**
-1. Tracks actual token usage from API responses (`usage.prompt_tokens`)
-2. When tokens reach 85% of model's context limit, triggers compression
-3. Protects first 3 turns (system prompt, initial request, first response)
-4. Protects last 4 turns (recent context is most relevant)
-5. Summarizes middle turns using a fast/cheap model (Gemini Flash)
-6. Inserts summary as a user message, conversation continues seamlessly
-
-**Configuration (`cli-config.yaml`):**
-```yaml
-compression:
-  enabled: true                    # Enable auto-compression (default)
-  threshold: 0.85                  # Compress at 85% of context limit
-  summary_model: "google/gemini-2.0-flash-001"
-```
-
-**Or via environment variables:**
-```bash
-CONTEXT_COMPRESSION_ENABLED=true
-CONTEXT_COMPRESSION_THRESHOLD=0.85
-CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001
-```
-
-**When compression triggers, you'll see:**
-```
-📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
-   📊 Model context limit: 200,000 tokens (85% = 170,000)
-   🗜️  Summarizing turns 4-15 (12 turns)
-   ✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
-```
-
-## Scheduled Tasks (Cron Jobs)
-
-Hermes Agent can schedule automated tasks to run in the future - either one-time reminders or recurring jobs.
-
-### CLI Commands
+If you prefer not to use the installer:
 
 ```bash
-# List scheduled jobs
-/cron
+# Clone the repository
+git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
 
-# Add a one-shot reminder (runs once in 30 minutes)
-/cron add 30m Remind me to check the build status
+# Run setup script
+./setup-hermes.sh
 
-# Add a recurring job (every 2 hours)
-/cron add "every 2h" Check server status at 192.168.1.100 and report any issues
-
-# Add a cron expression (daily at 9am)
-/cron add "0 9 * * *" Generate a morning briefing summarizing GitHub notifications
-
-# Remove a job
-/cron remove abc123def456
+# Or manually:
+python3 -m venv venv
+source venv/bin/activate
+pip install -e ".[all]"
+hermes setup
 ```
 
-### Agent Self-Scheduling
-
-The agent can also schedule its own follow-up tasks using tools:
-
-```python
-# Available when using hermes-cli toolset (default for CLI)
-schedule_cronjob(prompt="...", schedule="30m", repeat=1)  # One-shot
-schedule_cronjob(prompt="...", schedule="every 2h")       # Recurring
-list_cronjobs()                                            # View all jobs
-remove_cronjob(job_id="...")                              # Cancel a job
-```
-
-**⚠️ Important:** Cronjobs run in **isolated sessions with NO prior context**. The prompt must be completely self-contained with all necessary information (file paths, URLs, server addresses, etc.). The future agent will not remember anything from the current conversation.
-
-### Schedule Formats
-
-| Format | Example | Description |
-|--------|---------|-------------|
-| Duration | `30m`, `2h`, `1d` | One-shot delay from now |
-| Interval | `every 30m`, `every 2h` | Recurring at fixed intervals |
-| Cron | `0 9 * * *` | Cron expression (requires `croniter`) |
-| Timestamp | `2026-02-03T14:00` | One-shot at specific time |
-
-### Repeat Options
-
-| repeat | Behavior |
-|--------|----------|
-| (omitted) | One-shot schedules run once; intervals/cron run forever |
-| `1` | Run once then auto-delete |
-| `N` | Run N times then auto-delete |
-
-### Running the Cron Daemon
-
-Jobs are stored in `~/.hermes/cron/jobs.json` and executed by a scheduler:
-
-```bash
-# Option 1: Built-in daemon (checks every 60 seconds)
-python cli.py --cron-daemon
-
-# Option 2: System cron integration (run once per minute)
-# Add to crontab: crontab -e
-*/1 * * * * cd ~/hermes-agent && python cli.py --cron-tick-once >> ~/.hermes/cron/cron.log 2>&1
-```
-
-### Job Output
-
-Job outputs are saved to `~/.hermes/cron/output/{job_id}/{timestamp}.md` for review.
-
-## Interactive CLI
-
-The CLI provides a rich interactive experience for working with the agent.
-
-### Running the CLI
-
-```bash
-# Basic usage
-./hermes
-
-# With specific model
-./hermes --model "anthropic/claude-sonnet-4"
-
-# With specific toolsets
-./hermes --toolsets "web,terminal,skills"
-```
-
-### CLI Commands
-
-| Command | Description |
-|---------|-------------|
-| `/help` | Show available commands |
-| `/tools` | List available tools by toolset |
-| `/toolsets` | List available toolsets |
-| `/model [name]` | Show or change the current model |
-| `/prompt [text]` | View/set custom system prompt |
-| `/personality [name]` | Set a predefined personality |
-| `/clear` | Clear screen and reset conversation |
-| `/reset` | Reset conversation only |
-| `/history` | Show conversation history |
-| `/save` | Save current conversation to file |
-| `/config` | Show current configuration |
-| `/cron` | Manage scheduled tasks (list, add, remove) |
-| `/quit` | Exit the CLI |
-
-### Configuration
-
-Copy `cli-config.yaml.example` to `cli-config.yaml` and customize:
-
-```yaml
-# Model settings
-model:
-  default: "anthropic/claude-sonnet-4"
-
-# Terminal backend (local, docker, singularity, modal, or ssh)
-terminal:
-  env_type: "local"
-  cwd: "."  # Use current directory
-
-# Or use SSH for remote execution (keeps agent code isolated)
-# terminal:
-#   env_type: "ssh"
-#   ssh_host: "my-server.example.com"
-#   ssh_user: "myuser"
-#   ssh_key: "~/.ssh/id_rsa"
-#   cwd: "/home/myuser/project"
-
-# Enable specific toolsets
-toolsets:
-  - all  # or: web, terminal, browser, vision, etc.
-
-# Custom personalities (use with /personality command)
-agent:
-  personalities:
-    helpful: "You are a helpful assistant."
-    kawaii: "You are a kawaii assistant! Use cute expressions..."
-```
-
-### Personalities
-
-Built-in personalities available via `/personality`:
-- `helpful`, `concise`, `technical`, `creative`, `teacher`
-- `kawaii`, `catgirl`, `pirate`, `shakespeare`, `surfer`
-- `noir`, `uwu`, `philosopher`, `hype`
-
-## Toolsets System
-
-The agent uses a toolsets system for organizing and managing tools. All tools must be part of a toolset to be accessible - individual tool selection is not supported. This ensures consistent and logical grouping of capabilities.
-
-### Key Concepts
-
-- **Toolsets**: Logical groups of tools for specific use cases (e.g., "research", "development", "debugging")
-- **Composition**: Toolsets can include other toolsets for powerful combinations
-- **Custom Toolsets**: Create your own toolsets at runtime or by editing `toolsets.py`
-- **Toolset-Only Access**: Tools are only accessible through toolsets, not individually
-
-### Available Toolsets
-
-See `toolsets.py` for the complete list of predefined toolsets including:
-- Basic toolsets (web, terminal, vision, creative, reasoning)
-- Composite toolsets (research, development, analysis, etc.)
-- Scenario-specific toolsets (debugging, documentation, API testing, etc.)
-- Special toolsets (safe mode without terminal, minimal, offline)
-
-### Using Toolsets
-
-```bash
-# Use a predefined toolset
-python run_agent.py --enabled_toolsets=research --query "Find latest AI papers"
-
-# Combine multiple toolsets
-python run_agent.py --enabled_toolsets=web,vision --query "Analyze this website"
-
-# Enable all toolsets explicitly (same as omitting the flag)
-python run_agent.py --enabled_toolsets=all --query "Do web research and run commands if helpful"
-
-# Safe mode (no terminal access)
-python run_agent.py --enabled_toolsets=safe --query "Help without running commands"
-
-# List all available toolsets and tools
-python run_agent.py --list_tools
-```
-
-See `toolsets.py` for the complete list of available toolsets and how to create custom ones.
-
-## Basic Usage
-
-### Default (all tools enabled)
-```bash
-# Uses OpenRouter by default - just set OPENROUTER_API_KEY in .env
-python run_agent.py \
-  --query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
-  --max_turns 20 \
-  --model anthropic/claude-sonnet-4-20250514
-```
-
-### With specific toolset
-```bash
-python run_agent.py \
-  --query "Debug this Python error" \
-  --enabled_toolsets=debugging \
-  --model anthropic/claude-sonnet-4-20250514
-```
-
-### Python API
-```python
-from run_agent import AIAgent
-
-# Uses OpenRouter by default (reads OPENROUTER_API_KEY from .env)
-agent = AIAgent(
-    model="anthropic/claude-sonnet-4-20250514",
-    enabled_toolsets=["research"]
-)
-response = agent.chat("Find information about quantum computing")
-
-# Create custom toolset at runtime
-from toolsets import create_custom_toolset
-
-create_custom_toolset(
-    name="my_tools",
-    description="My custom toolkit",
-    tools=["web_search"],
-    includes=["terminal", "vision"]
-)
-
-agent = AIAgent(enabled_toolsets=["my_tools"])
-```
+---
 
 ## Batch Processing
 
-Process multiple prompts from a dataset in parallel with automatic checkpointing and statistics tracking:
+Process multiple prompts in parallel with automatic checkpointing:
 
 ```bash
-# Basic batch processing
 python batch_runner.py \
   --dataset_file=prompts.jsonl \
   --batch_size=20 \
-  --run_name=my_run
-
-# With specific distribution
-python batch_runner.py \
-  --dataset_file=prompts.jsonl \
-  --batch_size=20 \
-  --run_name=image_run \
-  --distribution=image_gen \
-  --num_workers=4
+  --run_name=my_run \
+  --num_workers=4 \
+  --distribution=default
 ```
 
-**Key Features:**
-- Parallel processing with configurable workers
-- Toolset distributions for varied data generation
-- Automatic checkpointing and resume capability
-- Combined output in `data/<run_name>/trajectories.jsonl`
-- Tool usage statistics and success rates
+**Key Options:**
+| Flag | Description |
+|------|-------------|
+| `--dataset_file` | JSONL file with prompts |
+| `--batch_size` | Prompts per batch |
+| `--run_name` | Name for output/checkpoints |
+| `--num_workers` | Parallel workers (default: 4) |
+| `--distribution` | Toolset distribution |
+| `--resume` | Resume from checkpoint |
+| `--ephemeral_system_prompt` | Guide behavior without saving to trajectories |
+| `--list_distributions` | Show available distributions |
 
-Use `--list_distributions` to see available toolset distributions for varied data generation.
+**Output:** `data/<run_name>/trajectories.jsonl`
 
 ### Trajectory Compression
 
-Post-process trajectories to fit within token budgets for training:
+Compress trajectories to fit token budgets for training:
 
 ```bash
-# Compress a directory of JSONL files
+# Compress a directory
 python trajectory_compressor.py --input=data/my_run
 
-# Compress a single JSONL file
-python trajectory_compressor.py --input=data/trajectories.jsonl
+# Compress with sampling
+python trajectory_compressor.py --input=data/my_run --sample_percent=15
 
-# Compress a 15% sample (useful for creating smaller training sets)
-python trajectory_compressor.py --input=data/trajectories.jsonl --sample_percent=15
-
-# Custom output and token target
-python trajectory_compressor.py \
-  --input=data/trajectories.jsonl \
-  --output=data/compressed.jsonl \
-  --target_max_tokens=16000
+# Custom token target
+python trajectory_compressor.py --input=data/my_run --target_max_tokens=16000
 ```
 
-**Features:**
-- Protects first turns (system, human, first GPT response, first tool call)
-- Protects last N turns (configurable)
-- Summarizes middle turns using LLM to fit target token budget
-- Supports both directory and single file input
-- Optional random sampling with `--sample_percent`
+Features:
+- Protects first/last turns
+- Summarizes middle turns via LLM
 - Configurable via `configs/trajectory_compression.yaml`
 
-### Ephemeral System Prompts
+---
 
-The ephemeral system prompt feature allows you to guide the model's behavior during batch processing **without** saving that prompt to the training dataset trajectories. This is useful for:
+## Python API
 
-- Guiding model behavior during data collection
-- Adding task-specific instructions 
-- Keeping saved trajectories clean and focused on tool-calling format
+```python
+from run_agent import AIAgent
 
-**Example:**
-```bash
-python batch_runner.py \
-  --dataset_file=prompts.jsonl \
-  --batch_size=10 \
-  --run_name=my_run \
-  --ephemeral_system_prompt="You are a helpful assistant focused on image generation."
+agent = AIAgent(
+    model="anthropic/claude-sonnet-4",
+    enabled_toolsets=["web", "terminal"]
+)
+
+result = agent.run_conversation("Search for the latest Python news")
+print(result["final_response"])
 ```
 
-The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
+---
 
-The ephemeral prompt influences model behavior during execution, but **only the standard tool-calling system prompt** is saved in trajectory files.
+## Environment Variables Reference
 
-## Command Line Arguments
+All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set them.
 
-**Single Agent (`run_agent.py`):**
-- `--query`: The question or task for the agent
-- `--model`: Model to use (default: claude-opus-4-20250514)
-- `--api_key`: API key for authentication
-- `--base_url`: API endpoint URL
-- `--max_turns`: Maximum number of tool-calling iterations
-- `--enabled_toolsets`: Comma-separated list of toolsets to enable. Use `all` (or `*`) to enable everything. If omitted, all toolsets are enabled by default.
-- `--disabled_toolsets`: Comma-separated list of toolsets to disable
-- `--list_tools`: List all available toolsets and tools
-- `--save_trajectories`: Save conversation trajectories to JSONL files
+**LLM Providers:**
+| Variable | Description |
+|----------|-------------|
+| `OPENROUTER_API_KEY` | OpenRouter API key (recommended) |
+| `ANTHROPIC_API_KEY` | Direct Anthropic access |
+| `OPENAI_API_KEY` | Direct OpenAI access |
 
-**Batch Processing (`batch_runner.py`):**
-- `--dataset_file`: Path to JSONL file with prompts
-- `--batch_size`: Number of prompts per batch
-- `--run_name`: Name for this run (for output/checkpointing)
-- `--distribution`: Toolset distribution to use (default: "default")
-- `--num_workers`: Number of parallel workers (default: 4)
-- `--resume`: Resume from checkpoint if interrupted
-- `--ephemeral_system_prompt`: System prompt used during execution but NOT saved to trajectories
-- `--list_distributions`: List available toolset distributions
+**Tool APIs:**
+| Variable | Description |
+|----------|-------------|
+| `FIRECRAWL_API_KEY` | Web scraping (firecrawl.dev) |
+| `BROWSERBASE_API_KEY` | Browser automation |
+| `BROWSERBASE_PROJECT_ID` | Browserbase project |
+| `FAL_KEY` | Image generation (fal.ai) |
 
-## Environment Variables
+**Terminal Backend:**
+| Variable | Description |
+|----------|-------------|
+| `TERMINAL_ENV` | Backend: `local`, `docker`, `ssh`, `singularity`, `modal` |
+| `TERMINAL_DOCKER_IMAGE` | Docker image (default: `python:3.11-slim`) |
+| `TERMINAL_SINGULARITY_IMAGE` | Singularity image or `.sif` path |
+| `TERMINAL_TIMEOUT` | Command timeout in seconds |
+| `TERMINAL_CWD` | Working directory |
+| `SUDO_PASSWORD` | Enable sudo (stored plaintext - be careful!) |
 
-All environment variables can be configured in the `.env` file (copy from `.env.example`).
+**SSH Backend:**
+| Variable | Description |
+|----------|-------------|
+| `TERMINAL_SSH_HOST` | Remote server hostname |
+| `TERMINAL_SSH_USER` | SSH username |
+| `TERMINAL_SSH_PORT` | SSH port (default: 22) |
+| `TERMINAL_SSH_KEY` | Path to private key |
 
-**LLM Provider (OpenRouter):**
-- `OPENROUTER_API_KEY`: Primary LLM access via OpenRouter (supports Claude, GPT-4, Gemini, etc.)
-- `LLM_MODEL`: Default model (e.g., `anthropic/claude-sonnet-4`, `openai/gpt-4o`)
+**Messaging:**
+| Variable | Description |
+|----------|-------------|
+| `TELEGRAM_BOT_TOKEN` | Telegram bot token (@BotFather) |
+| `TELEGRAM_HOME_CHANNEL` | Default channel for cron delivery |
+| `DISCORD_BOT_TOKEN` | Discord bot token |
+| `DISCORD_HOME_CHANNEL` | Default channel for cron delivery |
 
-**Tool API Keys:**
-- `FIRECRAWL_API_KEY`: Web tools (search, extract, crawl)
-- `NOUS_API_KEY`: Vision and reasoning tools
-- `FAL_KEY`: Image generation tools
+**Context Compression:**
+| Variable | Description |
+|----------|-------------|
+| `CONTEXT_COMPRESSION_ENABLED` | Enable auto-compression (default: true) |
+| `CONTEXT_COMPRESSION_THRESHOLD` | Trigger at this % of limit (default: 0.85) |
+| `CONTEXT_COMPRESSION_MODEL` | Model for summaries |
 
-**Terminal Tool Configuration (mini-swe-agent backend):**
-- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, `modal`, or `ssh` (default: `local`)
-- `TERMINAL_DOCKER_IMAGE`: Docker image for docker backend (default: `python:3.11-slim`)
-- `TERMINAL_SINGULARITY_IMAGE`: Singularity/Apptainer image (can be `docker://...` URL or local `.sif` path)
-- `TERMINAL_TIMEOUT`: Command timeout in seconds (default: `60`)
-- `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
-- `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)
-- `TERMINAL_SCRATCH_DIR`: Custom scratch directory for sandbox storage (optional, auto-detects `/scratch`)
-- `SUDO_PASSWORD`: Enable sudo commands by piping password via `sudo -S` (works with all backends)
-  - If unset in CLI mode, you'll be prompted interactively when sudo is needed (45s timeout)
+---
 
-**SSH Backend Configuration (for remote execution):**
-- `TERMINAL_SSH_HOST`: Remote server hostname or IP
-- `TERMINAL_SSH_USER`: SSH username
-- `TERMINAL_SSH_PORT`: SSH port (default: `22`)
-- `TERMINAL_SSH_KEY`: Path to SSH private key (optional, uses ssh-agent if not set)
+## File Structure
 
-**Context Compression (auto-shrinks long conversations):**
-- `CONTEXT_COMPRESSION_ENABLED`: Enable auto-compression (default: `true`)
-- `CONTEXT_COMPRESSION_THRESHOLD`: Compress at this % of context limit (default: `0.85`)
-- `CONTEXT_COMPRESSION_MODEL`: Model for generating summaries (default: `google/gemini-2.0-flash-001`)
+| Path | Description |
+|------|-------------|
+| `~/.hermes/config.yaml` | Your settings |
+| `~/.hermes/.env` | API keys and secrets |
+| `~/.hermes/cron/` | Scheduled jobs data |
+| `~/.hermes/sessions/` | Gateway session data |
+| `~/.hermes-agent/` | Installation directory |
+| `~/.hermes-agent/logs/` | Session logs |
+| `hermes_cli/` | CLI implementation |
+| `tools/` | Tool implementations |
+| `skills/` | Knowledge documents |
+| `gateway/` | Messaging platform adapters |
+| `cron/` | Scheduler implementation |
 
-**Browser Tool Configuration (agent-browser + Browserbase):**
-- `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
-- `BROWSERBASE_PROJECT_ID`: Browserbase project ID
-- `BROWSER_SESSION_TIMEOUT`: Session timeout in seconds (default: `300`)
+---
 
-**Legacy Hecate Terminal Backend (optional):**
-- `MORPH_API_KEY`: For Hecate/MorphCloud terminal backend
-- `HECATE_VM_LIFETIME_SECONDS`: VM lifetime (default: 300)
-- `HECATE_DEFAULT_SNAPSHOT_ID`: Default snapshot (default: snapshot_p5294qxt)
+## Troubleshooting
 
-**Debug Options:**
-- `WEB_TOOLS_DEBUG`, `VISION_TOOLS_DEBUG`, `MOA_TOOLS_DEBUG`, `IMAGE_TOOLS_DEBUG`: Enable debug logging
+```bash
+hermes doctor    # Run diagnostics
+hermes status    # Check configuration
+hermes config    # View current settings
+```
 
-## Key Files
+Common issues:
+- **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
+- **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
+- **Gateway won't start**: Check `hermes gateway status` and logs
 
-| File | Purpose |
-|------|---------|
-| `hermes` | CLI launcher script (run with `./hermes`) |
-| `cli.py` | Interactive CLI implementation |
-| `cli-config.yaml` | CLI configuration (copy from `.example`) |
-| `run_agent.py` | Main agent runner - single query execution |
-| `batch_runner.py` | Parallel batch processing with checkpointing |
-| `model_tools.py` | Core tool definitions and handlers |
-| `toolsets.py` | Toolset definitions and composition |
-| `toolset_distributions.py` | Probability distributions for data generation |
-| `trajectory_compressor.py` | Post-process trajectories for training |
-| `tools/` | Individual tool implementations |
-| `tools/skills_tool.py` | Skills system with progressive disclosure |
-| `skills/` | On-demand knowledge documents |
-| `docs/` | Documentation |
-| `configs/` | Example batch run scripts |
+---
+
+## Contributing
+
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Submit a pull request
+
+---
+
+## License
+
+MIT License - see [LICENSE](LICENSE) for details.
diff --git a/TODO.md b/TODO.md
index 7a68afadee..e25eed631e 100644
--- a/TODO.md
+++ b/TODO.md
@@ -441,7 +441,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 10. Messaging Platform Integrations 💬
+## 10. Messaging Platform Integrations 💬 ✅ COMPLETE
 
 **Problem:** Agent currently only works via `cli.py` which requires direct terminal access. Users may want to interact via messaging apps from their phone or other devices.
 
@@ -462,26 +462,37 @@ These items need to be addressed ASAP:
 ```
 
 **Platform support (each user sets up their own credentials):**
-- [ ] **Telegram** - via `python-telegram-bot` or `grammy` equivalent
+- [x] **Telegram** - via `python-telegram-bot`
   - Bot token from @BotFather
   - Easiest to set up, good for personal use
-- [ ] **Discord** - via `discord.py`
+- [x] **Discord** - via `discord.py`
   - Bot token from Discord Developer Portal
   - Can work in servers (group sessions) or DMs
-- [ ] **WhatsApp** - via `baileys` (WhatsApp Web protocol)
-  - QR code scan to authenticate
+- [x] **WhatsApp** - via Node.js bridge (whatsapp-web.js/baileys)
+  - Requires Node.js bridge setup
   - More complex, but reaches most people
 
 **Session management:**
-- [ ] **Session store** - JSONL persistence per session key
-  - `~/.hermes/sessions/{session_key}.jsonl`
-  - Session keys: `telegram:dm:{user_id}`, `discord:channel:{id}`, etc.
-- [ ] **Session expiry** - Configurable reset policies
-  - Daily reset (default 4am) OR idle timeout (e.g., 2 hours)
+- [x] **Session store** - JSONL persistence per session key
+  - `~/.hermes/sessions/{session_id}.jsonl`
+  - Session keys: `agent:main:telegram:dm`, `agent:main:discord:group:123`, etc.
+- [x] **Session expiry** - Configurable reset policies
+  - Daily reset (default 4am) OR idle timeout (default 2 hours)
   - Manual reset via `/reset` or `/new` command in chat
-- [ ] **Session continuity** - Conversations persist across messages until reset
+  - Per-platform and per-type overrides
+- [x] **Session continuity** - Conversations persist across messages until reset
 
-**Files to create:** `monitors/telegram_monitor.py`, `monitors/discord_monitor.py`, `monitors/session_store.py`
+**Files created:** `gateway/`, `gateway/platforms/`, `gateway/config.py`, `gateway/session.py`, `gateway/delivery.py`, `gateway/run.py`
+
+**Configuration:**
+- Environment variables: `TELEGRAM_BOT_TOKEN`, `DISCORD_BOT_TOKEN`, etc.
+- Config file: `~/.hermes/gateway.json`
+- CLI commands: `/platforms` to check status, `--gateway` to start
+
+**Dynamic context injection:**
+- Agent knows its source platform and chat
+- Agent knows connected platforms and home channels
+- Agent can deliver cron outputs to specific platforms
 
 ---
 
diff --git a/cli.py b/cli.py
index 210e069df2..bd06439fa9 100755
--- a/cli.py
+++ b/cli.py
@@ -46,12 +46,30 @@ if env_path.exists():
 
 def load_cli_config() -> Dict[str, Any]:
     """
-    Load CLI configuration from cli-config.yaml.
+    Load CLI configuration from config files.
+    
+    Config lookup order:
+    1. ~/.hermes/config.yaml (user config - preferred)
+    2. ./cli-config.yaml (project config - fallback)
     
     Environment variables take precedence over config file values.
-    Returns default values if config file doesn't exist.
+    Returns default values if no config file exists.
     """
-    config_path = Path(__file__).parent / 'cli-config.yaml'
+    # Check user config first (~/.hermes/config.yaml)
+    user_config_path = Path.home() / '.hermes' / 'config.yaml'
+    project_config_path = Path(__file__).parent / 'cli-config.yaml'
+    
+    # Use user config if it exists, otherwise project config
+    if user_config_path.exists():
+        config_path = user_config_path
+    else:
+        config_path = project_config_path
+    
+    # Also load .env from ~/.hermes/.env if it exists
+    user_env_path = Path.home() / '.hermes' / '.env'
+    if user_env_path.exists():
+        from dotenv import load_dotenv
+        load_dotenv(dotenv_path=user_env_path, override=True)
     
     # Default configuration
     defaults = {
@@ -406,6 +424,7 @@ COMMANDS = {
     "/save": "Save the current conversation",
     "/config": "Show current configuration",
     "/cron": "Manage scheduled tasks (list, add, remove)",
+    "/platforms": "Show gateway/messaging platform status",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }
 
@@ -1018,6 +1037,63 @@ class HermesCLI:
             print(f"(._.) Unknown cron command: {subcommand}")
             print("  Available: list, add, remove")
     
+    def _show_gateway_status(self):
+        """Show status of the gateway and connected messaging platforms."""
+        from gateway.config import load_gateway_config, Platform
+        
+        print()
+        print("+" + "-" * 60 + "+")
+        print("|" + " " * 15 + "(✿◠‿◠) Gateway Status" + " " * 17 + "|")
+        print("+" + "-" * 60 + "+")
+        print()
+        
+        try:
+            config = load_gateway_config()
+            connected = config.get_connected_platforms()
+            
+            print("  Messaging Platform Configuration:")
+            print("  " + "-" * 55)
+            
+            platform_status = {
+                Platform.TELEGRAM: ("Telegram", "TELEGRAM_BOT_TOKEN"),
+                Platform.DISCORD: ("Discord", "DISCORD_BOT_TOKEN"),
+                Platform.WHATSAPP: ("WhatsApp", "WHATSAPP_ENABLED"),
+            }
+            
+            for platform, (name, env_var) in platform_status.items():
+                pconfig = config.platforms.get(platform)
+                if pconfig and pconfig.enabled:
+                    home = config.get_home_channel(platform)
+                    home_str = f" → {home.name}" if home else ""
+                    print(f"    ✓ {name:<12} Enabled{home_str}")
+                else:
+                    print(f"    ○ {name:<12} Not configured ({env_var})")
+            
+            print()
+            print("  Session Reset Policy:")
+            print("  " + "-" * 55)
+            policy = config.default_reset_policy
+            print(f"    Mode: {policy.mode}")
+            print(f"    Daily reset at: {policy.at_hour}:00")
+            print(f"    Idle timeout: {policy.idle_minutes} minutes")
+            
+            print()
+            print("  To start the gateway:")
+            print("    python cli.py --gateway")
+            print()
+            print("  Configuration file: ~/.hermes/gateway.json")
+            print()
+            
+        except Exception as e:
+            print(f"  Error loading gateway config: {e}")
+            print()
+            print("  To configure the gateway:")
+            print("    1. Set environment variables:")
+            print("       TELEGRAM_BOT_TOKEN=your_token")
+            print("       DISCORD_BOT_TOKEN=your_token")
+            print("    2. Or create ~/.hermes/gateway.json")
+            print()
+    
     def process_command(self, command: str) -> bool:
         """
         Process a slash command.
@@ -1075,6 +1151,8 @@ class HermesCLI:
             self.save_conversation()
         elif cmd.startswith("/cron"):
             self._handle_cron_command(command)  # Use original command for proper parsing
+        elif cmd == "/platforms" or cmd == "/gateway":
+            self._show_gateway_status()
         else:
             self.console.print(f"[bold red]Unknown command: {cmd}[/]")
             self.console.print("[dim #B8860B]Type /help for available commands[/]")
@@ -1216,6 +1294,7 @@ def main(
     list_toolsets: bool = False,
     cron_daemon: bool = False,
     cron_tick_once: bool = False,
+    gateway: bool = False,
 ):
     """
     Hermes Agent CLI - Interactive AI Assistant
@@ -1262,6 +1341,14 @@ def main(
             print(f"Executed {jobs_run} job(s)")
         return
     
+    # Handle gateway mode (messaging platforms)
+    if gateway:
+        import asyncio
+        from gateway.run import start_gateway
+        print("Starting Hermes Gateway (messaging platforms)...")
+        asyncio.run(start_gateway())
+        return
+    
     # Handle query shorthand
     query = query or q
     
diff --git a/cron/jobs.py b/cron/jobs.py
index 9f7ff47c03..eb8f56b3dc 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -208,7 +208,9 @@ def create_job(
     prompt: str,
     schedule: str,
     name: Optional[str] = None,
-    repeat: Optional[int] = None
+    repeat: Optional[int] = None,
+    deliver: Optional[str] = None,
+    origin: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
     """
     Create a new cron job.
@@ -218,6 +220,8 @@ def create_job(
         schedule: Schedule string (see parse_schedule)
         name: Optional friendly name
         repeat: How many times to run (None = forever, 1 = once)
+        deliver: Where to deliver output ("origin", "local", "telegram", etc.)
+        origin: Source info where job was created (for "origin" delivery)
     
     Returns:
         The created job dict
@@ -228,6 +232,10 @@ def create_job(
     if parsed_schedule["kind"] == "once" and repeat is None:
         repeat = 1
     
+    # Default delivery to origin if available, otherwise local
+    if deliver is None:
+        deliver = "origin" if origin else "local"
+    
     job_id = uuid.uuid4().hex[:12]
     now = datetime.now().isoformat()
     
@@ -246,7 +254,10 @@ def create_job(
         "next_run_at": compute_next_run(parsed_schedule),
         "last_run_at": None,
         "last_status": None,
-        "last_error": None
+        "last_error": None,
+        # Delivery configuration
+        "deliver": deliver,
+        "origin": origin,  # Tracks where job was created for "origin" delivery
     }
     
     jobs = load_jobs()
diff --git a/docs/messaging.md b/docs/messaging.md
new file mode 100644
index 0000000000..5059401c21
--- /dev/null
+++ b/docs/messaging.md
@@ -0,0 +1,461 @@
+# Messaging Platform Integrations (Gateway)
+
+Hermes Agent can connect to messaging platforms like Telegram, Discord, and WhatsApp to serve as a conversational AI assistant.
+
+## Quick Start
+
+```bash
+# 1. Set your bot token(s) in .env file
+echo 'TELEGRAM_BOT_TOKEN="your_telegram_bot_token"' >> .env
+echo 'DISCORD_BOT_TOKEN="your_discord_bot_token"' >> .env
+
+# 2. Test the gateway (foreground)
+./scripts/hermes-gateway run
+
+# 3. Install as a system service (runs in background)
+./scripts/hermes-gateway install
+
+# 4. Manage the service
+./scripts/hermes-gateway start
+./scripts/hermes-gateway stop
+./scripts/hermes-gateway restart
+./scripts/hermes-gateway status
+```
+
+**Quick test (without service install):**
+```bash
+python cli.py --gateway  # Runs in foreground, useful for debugging
+```
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Hermes Gateway                             │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                 │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐          │
+│  │   Telegram   │  │   Discord    │  │   WhatsApp   │          │
+│  │   Adapter    │  │   Adapter    │  │   Adapter    │          │
+│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘          │
+│         │                 │                 │                   │
+│         └─────────────────┼─────────────────┘                   │
+│                           │                                     │
+│                  ┌────────▼────────┐                            │
+│                  │  Session Store  │                            │
+│                  │  (per-chat)     │                            │
+│                  └────────┬────────┘                            │
+│                           │                                     │
+│                  ┌────────▼────────┐                            │
+│                  │   AIAgent       │                            │
+│                  │   (run_agent)   │                            │
+│                  └─────────────────┘                            │
+│                                                                 │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+## Session Management
+
+### Session Persistence
+
+Sessions persist across messages until they reset. The agent remembers your conversation context.
+
+### Reset Policies
+
+Sessions reset based on configurable policies:
+
+| Policy | Default | Description |
+|--------|---------|-------------|
+| Daily | 4:00 AM | Reset at a specific hour each day |
+| Idle | 120 min | Reset after N minutes of inactivity |
+| Both | (combined) | Whichever triggers first |
+
+### Manual Reset
+
+Send `/new` or `/reset` as a message to start fresh.
+
+### Per-Platform Overrides
+
+Configure different reset policies per platform:
+
+```json
+{
+  "reset_by_platform": {
+    "telegram": { "mode": "idle", "idle_minutes": 240 },
+    "discord": { "mode": "idle", "idle_minutes": 60 }
+  }
+}
+```
+
+## Platform Setup
+
+### Telegram
+
+1. **Create a bot** via [@BotFather](https://t.me/BotFather)
+2. **Get your token** (looks like `123456789:ABCdefGHIjklMNOpqrsTUVwxyz`)
+3. **Set environment variable:**
+   ```bash
+   export TELEGRAM_BOT_TOKEN="your_token_here"
+   ```
+4. **Optional: Set home channel** for cron job delivery:
+   ```bash
+   export TELEGRAM_HOME_CHANNEL="-1001234567890"
+   export TELEGRAM_HOME_CHANNEL_NAME="My Notes"
+   ```
+
+**Requirements:**
+```bash
+pip install python-telegram-bot>=20.0
+```
+
+### Discord
+
+1. **Create an application** at [Discord Developer Portal](https://discord.com/developers/applications)
+2. **Create a bot** under your application
+3. **Get the bot token**
+4. **Enable required intents:**
+   - Message Content Intent
+   - Server Members Intent (optional)
+5. **Invite to your server** using OAuth2 URL generator (scopes: `bot`, `applications.commands`)
+6. **Set environment variable:**
+   ```bash
+   export DISCORD_BOT_TOKEN="your_token_here"
+   ```
+7. **Optional: Set home channel:**
+   ```bash
+   export DISCORD_HOME_CHANNEL="123456789012345678"
+   export DISCORD_HOME_CHANNEL_NAME="#bot-updates"
+   ```
+
+**Requirements:**
+```bash
+pip install discord.py>=2.0
+```
+
+### WhatsApp
+
+WhatsApp integration is more complex due to the lack of a simple bot API.
+
+**Options:**
+1. **WhatsApp Business API** (requires Meta verification)
+2. **whatsapp-web.js** via Node.js bridge (for personal accounts)
+
+**Bridge Setup:**
+1. Install Node.js
+2. Set up the bridge script (see `scripts/whatsapp-bridge/` for reference)
+3. Configure in gateway:
+   ```json
+   {
+     "platforms": {
+       "whatsapp": {
+         "enabled": true,
+         "extra": {
+           "bridge_script": "/path/to/bridge.js",
+           "bridge_port": 3000
+         }
+       }
+     }
+   }
+   ```
+
+## Configuration
+
+There are **three ways** to configure the gateway (in order of precedence):
+
+### 1. Environment Variables (`.env` file) - Recommended for Quick Setup
+
+Add to your `.env` file in the project root:
+
+```bash
+# =============================================================================
+# MESSAGING PLATFORM TOKENS
+# =============================================================================
+
+# Telegram - get from @BotFather on Telegram
+TELEGRAM_BOT_TOKEN=your_telegram_bot_token
+
+# Optional: Default channel for cron job delivery
+TELEGRAM_HOME_CHANNEL=-1001234567890
+TELEGRAM_HOME_CHANNEL_NAME="My Notes"
+
+# Discord - get from Discord Developer Portal
+DISCORD_BOT_TOKEN=your_discord_bot_token
+
+# Optional: Default channel for cron job delivery
+DISCORD_HOME_CHANNEL=123456789012345678
+DISCORD_HOME_CHANNEL_NAME="#bot-updates"
+
+# WhatsApp - requires Node.js bridge setup
+WHATSAPP_ENABLED=true
+
+# =============================================================================
+# SESSION SETTINGS
+# =============================================================================
+
+# Reset sessions after N minutes of inactivity (default: 120)
+SESSION_IDLE_MINUTES=120
+
+# Daily reset hour in 24h format (default: 4 = 4am)
+SESSION_RESET_HOUR=4
+```
+
+### 2. Gateway Config File (`~/.hermes/gateway.json`) - Full Control
+
+For advanced configuration, create `~/.hermes/gateway.json`:
+
+```json
+{
+  "platforms": {
+    "telegram": {
+      "enabled": true,
+      "token": "your_telegram_token",
+      "home_channel": {
+        "platform": "telegram",
+        "chat_id": "-1001234567890",
+        "name": "My Notes"
+      }
+    },
+    "discord": {
+      "enabled": true,
+      "token": "your_discord_token",
+      "home_channel": {
+        "platform": "discord",
+        "chat_id": "123456789012345678",
+        "name": "#bot-updates"
+      }
+    }
+  },
+  "default_reset_policy": {
+    "mode": "both",
+    "at_hour": 4,
+    "idle_minutes": 120
+  },
+  "reset_by_platform": {
+    "discord": {
+      "mode": "idle",
+      "idle_minutes": 60
+    }
+  },
+  "always_log_local": true
+}
+```
+
+## Platform-Specific Toolsets
+
+Each platform has its own toolset for security:
+
+| Platform | Toolset | Capabilities |
+|----------|---------|--------------|
+| CLI | `hermes-cli` | Full access (terminal, browser, etc.) |
+| Telegram | `hermes-telegram` | Web, vision, skills, cronjobs |
+| Discord | `hermes-discord` | Web search, vision, skills, cronjobs |
+| WhatsApp | `hermes-whatsapp` | Web, terminal, vision, skills, cronjobs |
+
+Discord has a more limited toolset because it often runs in public servers.
+
+## Cron Job Delivery
+
+When scheduling cron jobs, you can specify where the output should be delivered:
+
+```
+User: "Remind me to check the server in 30 minutes"
+
+Agent uses: schedule_cronjob(
+  prompt="Check server status...",
+  schedule="30m",
+  deliver="origin"  # Back to this chat
+)
+```
+
+### Delivery Options
+
+| Option | Description |
+|--------|-------------|
+| `"origin"` | Back to where the job was created |
+| `"local"` | Save to local files only |
+| `"telegram"` | Telegram home channel |
+| `"discord"` | Discord home channel |
+| `"telegram:123456"` | Specific Telegram chat |
+
+## Dynamic Context Injection
+
+The agent knows where it is via injected context:
+
+```
+## Current Session Context
+
+**Source:** Telegram (group: Dev Team, ID: -1001234567890)
+**Connected Platforms:** local, telegram, discord
+
+**Home Channels:**
+  - telegram: My Notes (ID: -1001234567890)
+  - discord: #bot-updates (ID: 123456789012345678)
+
+**Delivery options for scheduled tasks:**
+- "origin" → Back to this chat (Dev Team)
+- "local" → Save to local files only
+- "telegram" → Home channel (My Notes)
+- "discord" → Home channel (#bot-updates)
+```
+
+## CLI Commands
+
+| Command | Description |
+|---------|-------------|
+| `/platforms` | Show gateway configuration and status |
+| `--gateway` | Start the gateway (CLI flag) |
+
+## Troubleshooting
+
+### "python-telegram-bot not installed"
+
+```bash
+pip install python-telegram-bot>=20.0
+```
+
+### "discord.py not installed"
+
+```bash
+pip install discord.py>=2.0
+```
+
+### "No platforms connected"
+
+1. Check your environment variables are set
+2. Check your tokens are valid
+3. Try `/platforms` to see configuration status
+
+### Session not persisting
+
+1. Check `~/.hermes/sessions/` exists
+2. Check session policies aren't too aggressive
+3. Verify no errors in gateway logs
+
+## Adding a New Platform
+
+To add a new messaging platform:
+
+### 1. Create the adapter
+
+Create `gateway/platforms/your_platform.py`:
+
+```python
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.config import Platform, PlatformConfig
+
+class YourPlatformAdapter(BasePlatformAdapter):
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.YOUR_PLATFORM)
+    
+    async def connect(self) -> bool:
+        # Connect to the platform
+        ...
+    
+    async def disconnect(self) -> None:
+        # Disconnect
+        ...
+    
+    async def send(self, chat_id: str, content: str, ...) -> SendResult:
+        # Send a message
+        ...
+    
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        # Get chat information
+        ...
+```
+
+### 2. Register the platform
+
+Add to `gateway/config.py`:
+
+```python
+class Platform(Enum):
+    # ... existing ...
+    YOUR_PLATFORM = "your_platform"
+```
+
+### 3. Add to gateway runner
+
+Update `gateway/run.py` `_create_adapter()`:
+
+```python
+elif platform == Platform.YOUR_PLATFORM:
+    from gateway.platforms.your_platform import YourPlatformAdapter
+    return YourPlatformAdapter(config)
+```
+
+### 4. Create a toolset (optional)
+
+Add to `toolsets.py`:
+
+```python
+"hermes-your-platform": {
+    "description": "Your platform toolset",
+    "tools": [...],
+    "includes": []
+}
+```
+
+### 5. Configure
+
+Add environment variables to `.env`:
+
+```bash
+YOUR_PLATFORM_TOKEN=...
+YOUR_PLATFORM_HOME_CHANNEL=...
+```
+
+## Service Management
+
+### Linux (systemd)
+
+```bash
+# Install as user service
+./scripts/hermes-gateway install
+
+# Manage
+systemctl --user start hermes-gateway
+systemctl --user stop hermes-gateway
+systemctl --user restart hermes-gateway
+systemctl --user status hermes-gateway
+
+# View logs
+journalctl --user -u hermes-gateway -f
+
+# Enable lingering (keeps running after logout)
+sudo loginctl enable-linger $USER
+```
+
+### macOS (launchd)
+
+```bash
+# Install
+./scripts/hermes-gateway install
+
+# Manage
+launchctl start ai.hermes.gateway
+launchctl stop ai.hermes.gateway
+
+# View logs
+tail -f ~/.hermes/logs/gateway.log
+```
+
+### Manual (any platform)
+
+```bash
+# Run in foreground (for testing/debugging)
+./scripts/hermes-gateway run
+
+# Or via CLI (also foreground)
+python cli.py --gateway
+```
+
+## Storage Locations
+
+| Path | Purpose |
+|------|---------|
+| `~/.hermes/gateway.json` | Gateway configuration |
+| `~/.hermes/sessions/sessions.json` | Session index |
+| `~/.hermes/sessions/{id}.jsonl` | Conversation transcripts |
+| `~/.hermes/cron/output/` | Cron job outputs |
+| `~/.hermes/logs/gateway.log` | Gateway logs (macOS launchd) |
diff --git a/gateway/__init__.py b/gateway/__init__.py
new file mode 100644
index 0000000000..8b6d988934
--- /dev/null
+++ b/gateway/__init__.py
@@ -0,0 +1,35 @@
+"""
+Hermes Gateway - Multi-platform messaging integration.
+
+This module provides a unified gateway for connecting the Hermes agent
+to various messaging platforms (Telegram, Discord, WhatsApp) with:
+- Session management (persistent conversations with reset policies)
+- Dynamic context injection (agent knows where messages come from)
+- Delivery routing (cron job outputs to appropriate channels)
+- Platform-specific toolsets (different capabilities per platform)
+"""
+
+from .config import GatewayConfig, PlatformConfig, HomeChannel, load_gateway_config
+from .session import (
+    SessionContext,
+    SessionStore,
+    SessionResetPolicy,
+    build_session_context_prompt,
+)
+from .delivery import DeliveryRouter, DeliveryTarget
+
+__all__ = [
+    # Config
+    "GatewayConfig",
+    "PlatformConfig", 
+    "HomeChannel",
+    "load_gateway_config",
+    # Session
+    "SessionContext",
+    "SessionStore",
+    "SessionResetPolicy",
+    "build_session_context_prompt",
+    # Delivery
+    "DeliveryRouter",
+    "DeliveryTarget",
+]
diff --git a/gateway/config.py b/gateway/config.py
new file mode 100644
index 0000000000..302ba53619
--- /dev/null
+++ b/gateway/config.py
@@ -0,0 +1,333 @@
+"""
+Gateway configuration management.
+
+Handles loading and validating configuration for:
+- Connected platforms (Telegram, Discord, WhatsApp)
+- Home channels for each platform
+- Session reset policies
+- Delivery preferences
+"""
+
+import os
+import json
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Any
+from enum import Enum
+
+
+class Platform(Enum):
+    """Supported messaging platforms."""
+    LOCAL = "local"
+    TELEGRAM = "telegram"
+    DISCORD = "discord"
+    WHATSAPP = "whatsapp"
+
+
+@dataclass
+class HomeChannel:
+    """
+    Default destination for a platform.
+    
+    When a cron job specifies deliver="telegram" without a specific chat ID,
+    messages are sent to this home channel.
+    """
+    platform: Platform
+    chat_id: str
+    name: str  # Human-readable name for display
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "platform": self.platform.value,
+            "chat_id": self.chat_id,
+            "name": self.name,
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "HomeChannel":
+        return cls(
+            platform=Platform(data["platform"]),
+            chat_id=str(data["chat_id"]),
+            name=data.get("name", "Home"),
+        )
+
+
+@dataclass
+class SessionResetPolicy:
+    """
+    Controls when sessions reset (lose context).
+    
+    Modes:
+    - "daily": Reset at a specific hour each day
+    - "idle": Reset after N minutes of inactivity
+    - "both": Whichever triggers first (daily boundary OR idle timeout)
+    """
+    mode: str = "both"  # "daily", "idle", or "both"
+    at_hour: int = 4  # Hour for daily reset (0-23, local time)
+    idle_minutes: int = 120  # Minutes of inactivity before reset
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "mode": self.mode,
+            "at_hour": self.at_hour,
+            "idle_minutes": self.idle_minutes,
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "SessionResetPolicy":
+        return cls(
+            mode=data.get("mode", "both"),
+            at_hour=data.get("at_hour", 4),
+            idle_minutes=data.get("idle_minutes", 120),
+        )
+
+
+@dataclass
+class PlatformConfig:
+    """Configuration for a single messaging platform."""
+    enabled: bool = False
+    token: Optional[str] = None  # Bot token (Telegram, Discord)
+    api_key: Optional[str] = None  # API key if different from token
+    home_channel: Optional[HomeChannel] = None
+    
+    # Platform-specific settings
+    extra: Dict[str, Any] = field(default_factory=dict)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        result = {
+            "enabled": self.enabled,
+            "extra": self.extra,
+        }
+        if self.token:
+            result["token"] = self.token
+        if self.api_key:
+            result["api_key"] = self.api_key
+        if self.home_channel:
+            result["home_channel"] = self.home_channel.to_dict()
+        return result
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "PlatformConfig":
+        home_channel = None
+        if "home_channel" in data:
+            home_channel = HomeChannel.from_dict(data["home_channel"])
+        
+        return cls(
+            enabled=data.get("enabled", False),
+            token=data.get("token"),
+            api_key=data.get("api_key"),
+            home_channel=home_channel,
+            extra=data.get("extra", {}),
+        )
+
+
+@dataclass
+class GatewayConfig:
+    """
+    Main gateway configuration.
+    
+    Manages all platform connections, session policies, and delivery settings.
+    """
+    # Platform configurations
+    platforms: Dict[Platform, PlatformConfig] = field(default_factory=dict)
+    
+    # Session reset policies by type
+    default_reset_policy: SessionResetPolicy = field(default_factory=SessionResetPolicy)
+    reset_by_type: Dict[str, SessionResetPolicy] = field(default_factory=dict)
+    reset_by_platform: Dict[Platform, SessionResetPolicy] = field(default_factory=dict)
+    
+    # Reset trigger commands
+    reset_triggers: List[str] = field(default_factory=lambda: ["/new", "/reset"])
+    
+    # Storage paths
+    sessions_dir: Path = field(default_factory=lambda: Path.home() / ".hermes" / "sessions")
+    
+    # Delivery settings
+    always_log_local: bool = True  # Always save cron outputs to local files
+    
+    def get_connected_platforms(self) -> List[Platform]:
+        """Return list of platforms that are enabled and configured."""
+        connected = []
+        for platform, config in self.platforms.items():
+            if config.enabled and (config.token or config.api_key):
+                connected.append(platform)
+        return connected
+    
+    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
+        """Get the home channel for a platform."""
+        config = self.platforms.get(platform)
+        if config:
+            return config.home_channel
+        return None
+    
+    def get_reset_policy(
+        self, 
+        platform: Optional[Platform] = None,
+        session_type: Optional[str] = None
+    ) -> SessionResetPolicy:
+        """
+        Get the appropriate reset policy for a session.
+        
+        Priority: platform override > type override > default
+        """
+        # Platform-specific override takes precedence
+        if platform and platform in self.reset_by_platform:
+            return self.reset_by_platform[platform]
+        
+        # Type-specific override (dm, group, thread)
+        if session_type and session_type in self.reset_by_type:
+            return self.reset_by_type[session_type]
+        
+        return self.default_reset_policy
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "platforms": {
+                p.value: c.to_dict() for p, c in self.platforms.items()
+            },
+            "default_reset_policy": self.default_reset_policy.to_dict(),
+            "reset_by_type": {
+                k: v.to_dict() for k, v in self.reset_by_type.items()
+            },
+            "reset_by_platform": {
+                p.value: v.to_dict() for p, v in self.reset_by_platform.items()
+            },
+            "reset_triggers": self.reset_triggers,
+            "sessions_dir": str(self.sessions_dir),
+            "always_log_local": self.always_log_local,
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GatewayConfig":
+        platforms = {}
+        for platform_name, platform_data in data.get("platforms", {}).items():
+            try:
+                platform = Platform(platform_name)
+                platforms[platform] = PlatformConfig.from_dict(platform_data)
+            except ValueError:
+                pass  # Skip unknown platforms
+        
+        reset_by_type = {}
+        for type_name, policy_data in data.get("reset_by_type", {}).items():
+            reset_by_type[type_name] = SessionResetPolicy.from_dict(policy_data)
+        
+        reset_by_platform = {}
+        for platform_name, policy_data in data.get("reset_by_platform", {}).items():
+            try:
+                platform = Platform(platform_name)
+                reset_by_platform[platform] = SessionResetPolicy.from_dict(policy_data)
+            except ValueError:
+                pass
+        
+        default_policy = SessionResetPolicy()
+        if "default_reset_policy" in data:
+            default_policy = SessionResetPolicy.from_dict(data["default_reset_policy"])
+        
+        sessions_dir = Path.home() / ".hermes" / "sessions"
+        if "sessions_dir" in data:
+            sessions_dir = Path(data["sessions_dir"])
+        
+        return cls(
+            platforms=platforms,
+            default_reset_policy=default_policy,
+            reset_by_type=reset_by_type,
+            reset_by_platform=reset_by_platform,
+            reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
+            sessions_dir=sessions_dir,
+            always_log_local=data.get("always_log_local", True),
+        )
+
+
+def load_gateway_config() -> GatewayConfig:
+    """
+    Load gateway configuration from multiple sources.
+    
+    Priority (highest to lowest):
+    1. Environment variables
+    2. ~/.hermes/gateway.json
+    3. cli-config.yaml gateway section
+    4. Defaults
+    """
+    config = GatewayConfig()
+    
+    # Try loading from ~/.hermes/gateway.json
+    gateway_config_path = Path.home() / ".hermes" / "gateway.json"
+    if gateway_config_path.exists():
+        try:
+            with open(gateway_config_path, "r") as f:
+                data = json.load(f)
+                config = GatewayConfig.from_dict(data)
+        except Exception as e:
+            print(f"[gateway] Warning: Failed to load {gateway_config_path}: {e}")
+    
+    # Override with environment variables
+    _apply_env_overrides(config)
+    
+    return config
+
+
+def _apply_env_overrides(config: GatewayConfig) -> None:
+    """Apply environment variable overrides to config."""
+    
+    # Telegram
+    telegram_token = os.getenv("TELEGRAM_BOT_TOKEN")
+    if telegram_token:
+        if Platform.TELEGRAM not in config.platforms:
+            config.platforms[Platform.TELEGRAM] = PlatformConfig()
+        config.platforms[Platform.TELEGRAM].enabled = True
+        config.platforms[Platform.TELEGRAM].token = telegram_token
+    
+    telegram_home = os.getenv("TELEGRAM_HOME_CHANNEL")
+    if telegram_home and Platform.TELEGRAM in config.platforms:
+        config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
+            platform=Platform.TELEGRAM,
+            chat_id=telegram_home,
+            name=os.getenv("TELEGRAM_HOME_CHANNEL_NAME", "Home"),
+        )
+    
+    # Discord
+    discord_token = os.getenv("DISCORD_BOT_TOKEN")
+    if discord_token:
+        if Platform.DISCORD not in config.platforms:
+            config.platforms[Platform.DISCORD] = PlatformConfig()
+        config.platforms[Platform.DISCORD].enabled = True
+        config.platforms[Platform.DISCORD].token = discord_token
+    
+    discord_home = os.getenv("DISCORD_HOME_CHANNEL")
+    if discord_home and Platform.DISCORD in config.platforms:
+        config.platforms[Platform.DISCORD].home_channel = HomeChannel(
+            platform=Platform.DISCORD,
+            chat_id=discord_home,
+            name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
+        )
+    
+    # WhatsApp (typically uses different auth mechanism)
+    whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
+    if whatsapp_enabled:
+        if Platform.WHATSAPP not in config.platforms:
+            config.platforms[Platform.WHATSAPP] = PlatformConfig()
+        config.platforms[Platform.WHATSAPP].enabled = True
+    
+    # Session settings
+    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
+    if idle_minutes:
+        try:
+            config.default_reset_policy.idle_minutes = int(idle_minutes)
+        except ValueError:
+            pass
+    
+    reset_hour = os.getenv("SESSION_RESET_HOUR")
+    if reset_hour:
+        try:
+            config.default_reset_policy.at_hour = int(reset_hour)
+        except ValueError:
+            pass
+
+
+def save_gateway_config(config: GatewayConfig) -> None:
+    """Save gateway configuration to ~/.hermes/gateway.json."""
+    gateway_config_path = Path.home() / ".hermes" / "gateway.json"
+    gateway_config_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    with open(gateway_config_path, "w") as f:
+        json.dump(config.to_dict(), f, indent=2)
diff --git a/gateway/delivery.py b/gateway/delivery.py
new file mode 100644
index 0000000000..04c55f0bac
--- /dev/null
+++ b/gateway/delivery.py
@@ -0,0 +1,318 @@
+"""
+Delivery routing for cron job outputs and agent responses.
+
+Routes messages to the appropriate destination based on:
+- Explicit targets (e.g., "telegram:123456789")
+- Platform home channels (e.g., "telegram" → home channel)
+- Origin (back to where the job was created)
+- Local (always saved to files)
+"""
+
+import json
+from pathlib import Path
+from datetime import datetime
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Any, Union
+from enum import Enum
+
+from .config import Platform, GatewayConfig, HomeChannel
+from .session import SessionSource
+
+
+@dataclass
+class DeliveryTarget:
+    """
+    A single delivery target.
+    
+    Represents where a message should be sent:
+    - "origin" → back to source
+    - "local" → save to local files
+    - "telegram" → Telegram home channel
+    - "telegram:123456" → specific Telegram chat
+    """
+    platform: Platform
+    chat_id: Optional[str] = None  # None means use home channel
+    is_origin: bool = False
+    is_explicit: bool = False  # True if chat_id was explicitly specified
+    
+    @classmethod
+    def parse(cls, target: str, origin: Optional[SessionSource] = None) -> "DeliveryTarget":
+        """
+        Parse a delivery target string.
+        
+        Formats:
+        - "origin" → back to source
+        - "local" → local files only
+        - "telegram" → Telegram home channel
+        - "telegram:123456" → specific Telegram chat
+        """
+        target = target.strip().lower()
+        
+        if target == "origin":
+            if origin:
+                return cls(
+                    platform=origin.platform,
+                    chat_id=origin.chat_id,
+                    is_origin=True,
+                )
+            else:
+                # Fallback to local if no origin
+                return cls(platform=Platform.LOCAL, is_origin=True)
+        
+        if target == "local":
+            return cls(platform=Platform.LOCAL)
+        
+        # Check for platform:chat_id format
+        if ":" in target:
+            platform_str, chat_id = target.split(":", 1)
+            try:
+                platform = Platform(platform_str)
+                return cls(platform=platform, chat_id=chat_id, is_explicit=True)
+            except ValueError:
+                # Unknown platform, treat as local
+                return cls(platform=Platform.LOCAL)
+        
+        # Just a platform name (use home channel)
+        try:
+            platform = Platform(target)
+            return cls(platform=platform)
+        except ValueError:
+            # Unknown platform, treat as local
+            return cls(platform=Platform.LOCAL)
+    
+    def to_string(self) -> str:
+        """Convert back to string format."""
+        if self.is_origin:
+            return "origin"
+        if self.platform == Platform.LOCAL:
+            return "local"
+        if self.chat_id:
+            return f"{self.platform.value}:{self.chat_id}"
+        return self.platform.value
+
+
+class DeliveryRouter:
+    """
+    Routes messages to appropriate destinations.
+    
+    Handles the logic of resolving delivery targets and dispatching
+    messages to the right platform adapters.
+    """
+    
+    def __init__(self, config: GatewayConfig, adapters: Dict[Platform, Any] = None):
+        """
+        Initialize the delivery router.
+        
+        Args:
+            config: Gateway configuration
+            adapters: Dict mapping platforms to their adapter instances
+        """
+        self.config = config
+        self.adapters = adapters or {}
+        self.output_dir = Path.home() / ".hermes" / "cron" / "output"
+    
+    def resolve_targets(
+        self,
+        deliver: Union[str, List[str]],
+        origin: Optional[SessionSource] = None
+    ) -> List[DeliveryTarget]:
+        """
+        Resolve delivery specification to concrete targets.
+        
+        Args:
+            deliver: Delivery spec - "origin", "telegram", ["local", "discord"], etc.
+            origin: The source where the request originated (for "origin" target)
+        
+        Returns:
+            List of resolved delivery targets
+        """
+        if isinstance(deliver, str):
+            deliver = [deliver]
+        
+        targets = []
+        seen_platforms = set()
+        
+        for target_str in deliver:
+            target = DeliveryTarget.parse(target_str, origin)
+            
+            # Resolve home channel if needed
+            if target.chat_id is None and target.platform != Platform.LOCAL:
+                home = self.config.get_home_channel(target.platform)
+                if home:
+                    target.chat_id = home.chat_id
+                else:
+                    # No home channel configured, skip this platform
+                    continue
+            
+            # Deduplicate
+            key = (target.platform, target.chat_id)
+            if key not in seen_platforms:
+                seen_platforms.add(key)
+                targets.append(target)
+        
+        # Always include local if configured
+        if self.config.always_log_local:
+            local_key = (Platform.LOCAL, None)
+            if local_key not in seen_platforms:
+                targets.append(DeliveryTarget(platform=Platform.LOCAL))
+        
+        return targets
+    
+    async def deliver(
+        self,
+        content: str,
+        targets: List[DeliveryTarget],
+        job_id: Optional[str] = None,
+        job_name: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Deliver content to all specified targets.
+        
+        Args:
+            content: The message/output to deliver
+            targets: List of delivery targets
+            job_id: Optional job ID (for cron jobs)
+            job_name: Optional job name
+            metadata: Additional metadata to include
+        
+        Returns:
+            Dict with delivery results per target
+        """
+        results = {}
+        
+        for target in targets:
+            try:
+                if target.platform == Platform.LOCAL:
+                    result = self._deliver_local(content, job_id, job_name, metadata)
+                else:
+                    result = await self._deliver_to_platform(target, content, metadata)
+                
+                results[target.to_string()] = {
+                    "success": True,
+                    "result": result
+                }
+            except Exception as e:
+                results[target.to_string()] = {
+                    "success": False,
+                    "error": str(e)
+                }
+        
+        return results
+    
+    def _deliver_local(
+        self,
+        content: str,
+        job_id: Optional[str],
+        job_name: Optional[str],
+        metadata: Optional[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Save content to local files."""
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        
+        if job_id:
+            output_path = self.output_dir / job_id / f"{timestamp}.md"
+        else:
+            output_path = self.output_dir / "misc" / f"{timestamp}.md"
+        
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Build the output document
+        lines = []
+        if job_name:
+            lines.append(f"# {job_name}")
+        else:
+            lines.append("# Delivery Output")
+        
+        lines.append("")
+        lines.append(f"**Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        
+        if job_id:
+            lines.append(f"**Job ID:** {job_id}")
+        
+        if metadata:
+            for key, value in metadata.items():
+                lines.append(f"**{key}:** {value}")
+        
+        lines.append("")
+        lines.append("---")
+        lines.append("")
+        lines.append(content)
+        
+        output_path.write_text("\n".join(lines))
+        
+        return {
+            "path": str(output_path),
+            "timestamp": timestamp
+        }
+    
+    async def _deliver_to_platform(
+        self,
+        target: DeliveryTarget,
+        content: str,
+        metadata: Optional[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Deliver content to a messaging platform."""
+        adapter = self.adapters.get(target.platform)
+        
+        if not adapter:
+            raise ValueError(f"No adapter configured for {target.platform.value}")
+        
+        if not target.chat_id:
+            raise ValueError(f"No chat ID for {target.platform.value} delivery")
+        
+        # Call the adapter's send method
+        # Adapters should implement: async def send(chat_id: str, content: str) -> Dict
+        return await adapter.send(target.chat_id, content, metadata=metadata)
+
+
+def parse_deliver_spec(
+    deliver: Optional[Union[str, List[str]]],
+    origin: Optional[SessionSource] = None,
+    default: str = "origin"
+) -> Union[str, List[str]]:
+    """
+    Normalize a delivery specification.
+    
+    If None or empty, returns the default.
+    """
+    if not deliver:
+        return default
+    return deliver
+
+
+def build_delivery_context_for_tool(
+    config: GatewayConfig,
+    origin: Optional[SessionSource] = None
+) -> Dict[str, Any]:
+    """
+    Build context for the schedule_cronjob tool to understand delivery options.
+    
+    This is passed to the tool so it can validate and explain delivery targets.
+    """
+    connected = config.get_connected_platforms()
+    
+    options = {
+        "origin": {
+            "description": "Back to where this job was created",
+            "available": origin is not None,
+        },
+        "local": {
+            "description": "Save to local files only",
+            "available": True,
+        }
+    }
+    
+    for platform in connected:
+        home = config.get_home_channel(platform)
+        options[platform.value] = {
+            "description": f"{platform.value.title()} home channel",
+            "available": True,
+            "home_channel": home.to_dict() if home else None,
+        }
+    
+    return {
+        "origin": origin.to_dict() if origin else None,
+        "options": options,
+        "always_log_local": config.always_log_local,
+    }
diff --git a/gateway/platforms/__init__.py b/gateway/platforms/__init__.py
new file mode 100644
index 0000000000..dae74568d0
--- /dev/null
+++ b/gateway/platforms/__init__.py
@@ -0,0 +1,17 @@
+"""
+Platform adapters for messaging integrations.
+
+Each adapter handles:
+- Receiving messages from a platform
+- Sending messages/responses back
+- Platform-specific authentication
+- Message formatting and media handling
+"""
+
+from .base import BasePlatformAdapter, MessageEvent, SendResult
+
+__all__ = [
+    "BasePlatformAdapter",
+    "MessageEvent",
+    "SendResult",
+]
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
new file mode 100644
index 0000000000..e9a5f828e8
--- /dev/null
+++ b/gateway/platforms/base.py
@@ -0,0 +1,274 @@
+"""
+Base platform adapter interface.
+
+All platform adapters (Telegram, Discord, WhatsApp) inherit from this
+and implement the required methods.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Dict, List, Optional, Any, Callable, Awaitable
+from enum import Enum
+
+import sys
+sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
+
+from gateway.config import Platform, PlatformConfig
+from gateway.session import SessionSource
+
+
+class MessageType(Enum):
+    """Types of incoming messages."""
+    TEXT = "text"
+    PHOTO = "photo"
+    VIDEO = "video"
+    AUDIO = "audio"
+    VOICE = "voice"
+    DOCUMENT = "document"
+    STICKER = "sticker"
+    COMMAND = "command"  # /command style
+
+
+@dataclass
+class MessageEvent:
+    """
+    Incoming message from a platform.
+    
+    Normalized representation that all adapters produce.
+    """
+    # Message content
+    text: str
+    message_type: MessageType = MessageType.TEXT
+    
+    # Source information
+    source: SessionSource = None
+    
+    # Original platform data
+    raw_message: Any = None
+    message_id: Optional[str] = None
+    
+    # Media attachments
+    media_urls: List[str] = field(default_factory=list)
+    media_types: List[str] = field(default_factory=list)
+    
+    # Reply context
+    reply_to_message_id: Optional[str] = None
+    
+    # Timestamps
+    timestamp: datetime = field(default_factory=datetime.now)
+    
+    def is_command(self) -> bool:
+        """Check if this is a command message (e.g., /new, /reset)."""
+        return self.text.startswith("/")
+    
+    def get_command(self) -> Optional[str]:
+        """Extract command name if this is a command message."""
+        if not self.is_command():
+            return None
+        # Split on space and get first word, strip the /
+        parts = self.text.split(maxsplit=1)
+        return parts[0][1:].lower() if parts else None
+    
+    def get_command_args(self) -> str:
+        """Get the arguments after a command."""
+        if not self.is_command():
+            return self.text
+        parts = self.text.split(maxsplit=1)
+        return parts[1] if len(parts) > 1 else ""
+
+
+@dataclass 
+class SendResult:
+    """Result of sending a message."""
+    success: bool
+    message_id: Optional[str] = None
+    error: Optional[str] = None
+    raw_response: Any = None
+
+
+# Type for message handlers
+MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
+
+
+class BasePlatformAdapter(ABC):
+    """
+    Base class for platform adapters.
+    
+    Subclasses implement platform-specific logic for:
+    - Connecting and authenticating
+    - Receiving messages
+    - Sending messages/responses
+    - Handling media
+    """
+    
+    def __init__(self, config: PlatformConfig, platform: Platform):
+        self.config = config
+        self.platform = platform
+        self._message_handler: Optional[MessageHandler] = None
+        self._running = False
+    
+    @property
+    def name(self) -> str:
+        """Human-readable name for this adapter."""
+        return self.platform.value.title()
+    
+    @property
+    def is_connected(self) -> bool:
+        """Check if adapter is currently connected."""
+        return self._running
+    
+    def set_message_handler(self, handler: MessageHandler) -> None:
+        """
+        Set the handler for incoming messages.
+        
+        The handler receives a MessageEvent and should return
+        an optional response string.
+        """
+        self._message_handler = handler
+    
+    @abstractmethod
+    async def connect(self) -> bool:
+        """
+        Connect to the platform and start receiving messages.
+        
+        Returns True if connection was successful.
+        """
+        pass
+    
+    @abstractmethod
+    async def disconnect(self) -> None:
+        """Disconnect from the platform."""
+        pass
+    
+    @abstractmethod
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> SendResult:
+        """
+        Send a message to a chat.
+        
+        Args:
+            chat_id: The chat/channel ID to send to
+            content: Message content (may be markdown)
+            reply_to: Optional message ID to reply to
+            metadata: Additional platform-specific options
+        
+        Returns:
+            SendResult with success status and message ID
+        """
+        pass
+    
+    async def send_typing(self, chat_id: str) -> None:
+        """
+        Send a typing indicator.
+        
+        Override in subclasses if the platform supports it.
+        """
+        pass
+    
+    async def handle_message(self, event: MessageEvent) -> None:
+        """
+        Process an incoming message.
+        
+        Calls the registered message handler and sends the response.
+        """
+        if not self._message_handler:
+            return
+        
+        try:
+            # Send typing indicator
+            await self.send_typing(event.source.chat_id)
+            
+            # Call the handler
+            response = await self._message_handler(event)
+            
+            # Send response if any
+            if response:
+                await self.send(
+                    chat_id=event.source.chat_id,
+                    content=response,
+                    reply_to=event.message_id
+                )
+        except Exception as e:
+            print(f"[{self.name}] Error handling message: {e}")
+    
+    def build_source(
+        self,
+        chat_id: str,
+        chat_name: Optional[str] = None,
+        chat_type: str = "dm",
+        user_id: Optional[str] = None,
+        user_name: Optional[str] = None,
+        thread_id: Optional[str] = None
+    ) -> SessionSource:
+        """Helper to build a SessionSource for this platform."""
+        return SessionSource(
+            platform=self.platform,
+            chat_id=str(chat_id),
+            chat_name=chat_name,
+            chat_type=chat_type,
+            user_id=str(user_id) if user_id else None,
+            user_name=user_name,
+            thread_id=str(thread_id) if thread_id else None,
+        )
+    
+    @abstractmethod
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """
+        Get information about a chat/channel.
+        
+        Returns dict with at least:
+        - name: Chat name
+        - type: "dm", "group", "channel"
+        """
+        pass
+    
+    def format_message(self, content: str) -> str:
+        """
+        Format a message for this platform.
+        
+        Override in subclasses to handle platform-specific formatting
+        (e.g., Telegram MarkdownV2, Discord markdown).
+        
+        Default implementation returns content as-is.
+        """
+        return content
+    
+    def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
+        """
+        Split a long message into chunks.
+        
+        Args:
+            content: The full message content
+            max_length: Maximum length per chunk (platform-specific)
+        
+        Returns:
+            List of message chunks
+        """
+        if len(content) <= max_length:
+            return [content]
+        
+        chunks = []
+        while content:
+            if len(content) <= max_length:
+                chunks.append(content)
+                break
+            
+            # Try to split at a newline
+            split_idx = content.rfind("\n", 0, max_length)
+            if split_idx == -1:
+                # No newline, split at space
+                split_idx = content.rfind(" ", 0, max_length)
+            if split_idx == -1:
+                # No space either, hard split
+                split_idx = max_length
+            
+            chunks.append(content[:split_idx])
+            content = content[split_idx:].lstrip()
+        
+        return chunks
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
new file mode 100644
index 0000000000..345b198991
--- /dev/null
+++ b/gateway/platforms/discord.py
@@ -0,0 +1,297 @@
+"""
+Discord platform adapter.
+
+Uses discord.py library for:
+- Receiving messages from servers and DMs
+- Sending responses back
+- Handling threads and channels
+"""
+
+import asyncio
+from typing import Dict, List, Optional, Any
+
+try:
+    import discord
+    from discord import Message as DiscordMessage, Intents
+    from discord.ext import commands
+    DISCORD_AVAILABLE = True
+except ImportError:
+    DISCORD_AVAILABLE = False
+    discord = None
+    DiscordMessage = Any
+    Intents = Any
+    commands = None
+
+import sys
+sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+
+def check_discord_requirements() -> bool:
+    """Check if Discord dependencies are available."""
+    return DISCORD_AVAILABLE
+
+
+class DiscordAdapter(BasePlatformAdapter):
+    """
+    Discord bot adapter.
+    
+    Handles:
+    - Receiving messages from servers and DMs
+    - Sending responses with Discord markdown
+    - Thread support
+    - Slash commands (future)
+    """
+    
+    # Discord message limits
+    MAX_MESSAGE_LENGTH = 2000
+    
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.DISCORD)
+        self._client: Optional[commands.Bot] = None
+        self._ready_event = asyncio.Event()
+    
+    async def connect(self) -> bool:
+        """Connect to Discord and start receiving events."""
+        if not DISCORD_AVAILABLE:
+            print(f"[{self.name}] discord.py not installed. Run: pip install discord.py")
+            return False
+        
+        if not self.config.token:
+            print(f"[{self.name}] No bot token configured")
+            return False
+        
+        try:
+            # Set up intents
+            intents = Intents.default()
+            intents.message_content = True
+            intents.dm_messages = True
+            intents.guild_messages = True
+            
+            # Create bot
+            self._client = commands.Bot(
+                command_prefix="!",  # Not really used, we handle raw messages
+                intents=intents,
+            )
+            
+            # Register event handlers
+            @self._client.event
+            async def on_ready():
+                print(f"[{self.name}] Connected as {self._client.user}")
+                self._ready_event.set()
+            
+            @self._client.event
+            async def on_message(message: DiscordMessage):
+                # Ignore bot's own messages
+                if message.author == self._client.user:
+                    return
+                await self._handle_message(message)
+            
+            # Start the bot in background
+            asyncio.create_task(self._client.start(self.config.token))
+            
+            # Wait for ready
+            await asyncio.wait_for(self._ready_event.wait(), timeout=30)
+            
+            self._running = True
+            return True
+            
+        except asyncio.TimeoutError:
+            print(f"[{self.name}] Timeout waiting for connection")
+            return False
+        except Exception as e:
+            print(f"[{self.name}] Failed to connect: {e}")
+            return False
+    
+    async def disconnect(self) -> None:
+        """Disconnect from Discord."""
+        if self._client:
+            try:
+                await self._client.close()
+            except Exception as e:
+                print(f"[{self.name}] Error during disconnect: {e}")
+        
+        self._running = False
+        self._client = None
+        self._ready_event.clear()
+        print(f"[{self.name}] Disconnected")
+    
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> SendResult:
+        """Send a message to a Discord channel."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            # Get the channel
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            
+            if not channel:
+                return SendResult(success=False, error=f"Channel {chat_id} not found")
+            
+            # Format and split message if needed
+            formatted = self.format_message(content)
+            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+            
+            message_ids = []
+            reference = None
+            
+            if reply_to:
+                try:
+                    ref_msg = await channel.fetch_message(int(reply_to))
+                    reference = ref_msg
+                except Exception:
+                    pass  # Ignore if we can't find the referenced message
+            
+            for i, chunk in enumerate(chunks):
+                msg = await channel.send(
+                    content=chunk,
+                    reference=reference if i == 0 else None,
+                )
+                message_ids.append(str(msg.id))
+            
+            return SendResult(
+                success=True,
+                message_id=message_ids[0] if message_ids else None,
+                raw_response={"message_ids": message_ids}
+            )
+            
+        except Exception as e:
+            return SendResult(success=False, error=str(e))
+    
+    async def send_typing(self, chat_id: str) -> None:
+        """Send typing indicator."""
+        if self._client:
+            try:
+                channel = self._client.get_channel(int(chat_id))
+                if channel:
+                    await channel.typing()
+            except Exception:
+                pass  # Ignore typing indicator failures
+    
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Get information about a Discord channel."""
+        if not self._client:
+            return {"name": "Unknown", "type": "dm"}
+        
+        try:
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            
+            if not channel:
+                return {"name": str(chat_id), "type": "dm"}
+            
+            # Determine channel type
+            if isinstance(channel, discord.DMChannel):
+                chat_type = "dm"
+                name = channel.recipient.name if channel.recipient else str(chat_id)
+            elif isinstance(channel, discord.Thread):
+                chat_type = "thread"
+                name = channel.name
+            elif isinstance(channel, discord.TextChannel):
+                chat_type = "channel"
+                name = f"#{channel.name}"
+                if channel.guild:
+                    name = f"{channel.guild.name} / {name}"
+            else:
+                chat_type = "channel"
+                name = getattr(channel, "name", str(chat_id))
+            
+            return {
+                "name": name,
+                "type": chat_type,
+                "guild_id": str(channel.guild.id) if hasattr(channel, "guild") and channel.guild else None,
+                "guild_name": channel.guild.name if hasattr(channel, "guild") and channel.guild else None,
+            }
+        except Exception as e:
+            return {"name": str(chat_id), "type": "dm", "error": str(e)}
+    
+    def format_message(self, content: str) -> str:
+        """
+        Format message for Discord.
+        
+        Discord uses its own markdown variant.
+        """
+        # Discord markdown is fairly standard, no special escaping needed
+        return content
+    
+    async def _handle_message(self, message: DiscordMessage) -> None:
+        """Handle incoming Discord messages."""
+        # Determine message type
+        msg_type = MessageType.TEXT
+        if message.content.startswith("/"):
+            msg_type = MessageType.COMMAND
+        elif message.attachments:
+            # Check attachment types
+            for att in message.attachments:
+                if att.content_type:
+                    if att.content_type.startswith("image/"):
+                        msg_type = MessageType.PHOTO
+                    elif att.content_type.startswith("video/"):
+                        msg_type = MessageType.VIDEO
+                    elif att.content_type.startswith("audio/"):
+                        msg_type = MessageType.AUDIO
+                    else:
+                        msg_type = MessageType.DOCUMENT
+                    break
+        
+        # Determine chat type
+        if isinstance(message.channel, discord.DMChannel):
+            chat_type = "dm"
+            chat_name = message.author.name
+        elif isinstance(message.channel, discord.Thread):
+            chat_type = "thread"
+            chat_name = message.channel.name
+        else:
+            chat_type = "group"  # Treat server channels as groups
+            chat_name = getattr(message.channel, "name", str(message.channel.id))
+            if hasattr(message.channel, "guild") and message.channel.guild:
+                chat_name = f"{message.channel.guild.name} / #{chat_name}"
+        
+        # Get thread ID if in a thread
+        thread_id = None
+        if isinstance(message.channel, discord.Thread):
+            thread_id = str(message.channel.id)
+        
+        # Build source
+        source = self.build_source(
+            chat_id=str(message.channel.id),
+            chat_name=chat_name,
+            chat_type=chat_type,
+            user_id=str(message.author.id),
+            user_name=message.author.display_name,
+            thread_id=thread_id,
+        )
+        
+        # Build media URLs
+        media_urls = [att.url for att in message.attachments]
+        media_types = [att.content_type or "unknown" for att in message.attachments]
+        
+        event = MessageEvent(
+            text=message.content,
+            message_type=msg_type,
+            source=source,
+            raw_message=message,
+            message_id=str(message.id),
+            media_urls=media_urls,
+            media_types=media_types,
+            reply_to_message_id=str(message.reference.message_id) if message.reference else None,
+            timestamp=message.created_at,
+        )
+        
+        await self.handle_message(event)
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
new file mode 100644
index 0000000000..ef46351c6a
--- /dev/null
+++ b/gateway/platforms/telegram.py
@@ -0,0 +1,284 @@
+"""
+Telegram platform adapter.
+
+Uses python-telegram-bot library for:
+- Receiving messages from users/groups
+- Sending responses back
+- Handling media and commands
+"""
+
+import asyncio
+from typing import Dict, List, Optional, Any
+
+try:
+    from telegram import Update, Bot, Message
+    from telegram.ext import (
+        Application,
+        CommandHandler,
+        MessageHandler as TelegramMessageHandler,
+        ContextTypes,
+        filters,
+    )
+    from telegram.constants import ParseMode, ChatType
+    TELEGRAM_AVAILABLE = True
+except ImportError:
+    TELEGRAM_AVAILABLE = False
+    Update = Any
+    Bot = Any
+    Message = Any
+    Application = Any
+    ContextTypes = Any
+
+import sys
+sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+
+def check_telegram_requirements() -> bool:
+    """Check if Telegram dependencies are available."""
+    return TELEGRAM_AVAILABLE
+
+
+class TelegramAdapter(BasePlatformAdapter):
+    """
+    Telegram bot adapter.
+    
+    Handles:
+    - Receiving messages from users and groups
+    - Sending responses with Telegram markdown
+    - Forum topics (thread_id support)
+    - Media messages
+    """
+    
+    # Telegram message limits
+    MAX_MESSAGE_LENGTH = 4096
+    
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.TELEGRAM)
+        self._app: Optional[Application] = None
+        self._bot: Optional[Bot] = None
+    
+    async def connect(self) -> bool:
+        """Connect to Telegram and start polling for updates."""
+        if not TELEGRAM_AVAILABLE:
+            print(f"[{self.name}] python-telegram-bot not installed. Run: pip install python-telegram-bot")
+            return False
+        
+        if not self.config.token:
+            print(f"[{self.name}] No bot token configured")
+            return False
+        
+        try:
+            # Build the application
+            self._app = Application.builder().token(self.config.token).build()
+            self._bot = self._app.bot
+            
+            # Register handlers
+            self._app.add_handler(TelegramMessageHandler(
+                filters.TEXT & ~filters.COMMAND,
+                self._handle_text_message
+            ))
+            self._app.add_handler(TelegramMessageHandler(
+                filters.COMMAND,
+                self._handle_command
+            ))
+            self._app.add_handler(TelegramMessageHandler(
+                filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL,
+                self._handle_media_message
+            ))
+            
+            # Start polling in background
+            await self._app.initialize()
+            await self._app.start()
+            await self._app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
+            
+            self._running = True
+            print(f"[{self.name}] Connected and polling for updates")
+            return True
+            
+        except Exception as e:
+            print(f"[{self.name}] Failed to connect: {e}")
+            return False
+    
+    async def disconnect(self) -> None:
+        """Stop polling and disconnect."""
+        if self._app:
+            try:
+                await self._app.updater.stop()
+                await self._app.stop()
+                await self._app.shutdown()
+            except Exception as e:
+                print(f"[{self.name}] Error during disconnect: {e}")
+        
+        self._running = False
+        self._app = None
+        self._bot = None
+        print(f"[{self.name}] Disconnected")
+    
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> SendResult:
+        """Send a message to a Telegram chat."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            # Format and split message if needed
+            formatted = self.format_message(content)
+            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+            
+            message_ids = []
+            thread_id = metadata.get("thread_id") if metadata else None
+            
+            for i, chunk in enumerate(chunks):
+                msg = await self._bot.send_message(
+                    chat_id=int(chat_id),
+                    text=chunk,
+                    parse_mode=ParseMode.MARKDOWN,
+                    reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                    message_thread_id=int(thread_id) if thread_id else None,
+                )
+                message_ids.append(str(msg.message_id))
+            
+            return SendResult(
+                success=True,
+                message_id=message_ids[0] if message_ids else None,
+                raw_response={"message_ids": message_ids}
+            )
+            
+        except Exception as e:
+            return SendResult(success=False, error=str(e))
+    
+    async def send_typing(self, chat_id: str) -> None:
+        """Send typing indicator."""
+        if self._bot:
+            try:
+                await self._bot.send_chat_action(
+                    chat_id=int(chat_id),
+                    action="typing"
+                )
+            except Exception:
+                pass  # Ignore typing indicator failures
+    
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Get information about a Telegram chat."""
+        if not self._bot:
+            return {"name": "Unknown", "type": "dm"}
+        
+        try:
+            chat = await self._bot.get_chat(int(chat_id))
+            
+            chat_type = "dm"
+            if chat.type == ChatType.GROUP:
+                chat_type = "group"
+            elif chat.type == ChatType.SUPERGROUP:
+                chat_type = "group"
+                if chat.is_forum:
+                    chat_type = "forum"
+            elif chat.type == ChatType.CHANNEL:
+                chat_type = "channel"
+            
+            return {
+                "name": chat.title or chat.full_name or str(chat_id),
+                "type": chat_type,
+                "username": chat.username,
+                "is_forum": getattr(chat, "is_forum", False),
+            }
+        except Exception as e:
+            return {"name": str(chat_id), "type": "dm", "error": str(e)}
+    
+    def format_message(self, content: str) -> str:
+        """
+        Format message for Telegram.
+        
+        Telegram uses a subset of markdown. We'll use the simpler
+        Markdown mode (not MarkdownV2) for compatibility.
+        """
+        # Basic escaping for Telegram Markdown
+        # In Markdown mode (not V2), only certain characters need escaping
+        return content
+    
+    async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+        """Handle incoming text messages."""
+        if not update.message or not update.message.text:
+            return
+        
+        event = self._build_message_event(update.message, MessageType.TEXT)
+        await self.handle_message(event)
+    
+    async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+        """Handle incoming command messages."""
+        if not update.message or not update.message.text:
+            return
+        
+        event = self._build_message_event(update.message, MessageType.COMMAND)
+        await self.handle_message(event)
+    
+    async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+        """Handle incoming media messages."""
+        if not update.message:
+            return
+        
+        msg = update.message
+        
+        # Determine media type
+        if msg.photo:
+            msg_type = MessageType.PHOTO
+        elif msg.video:
+            msg_type = MessageType.VIDEO
+        elif msg.audio:
+            msg_type = MessageType.AUDIO
+        elif msg.voice:
+            msg_type = MessageType.VOICE
+        else:
+            msg_type = MessageType.DOCUMENT
+        
+        event = self._build_message_event(msg, msg_type)
+        
+        # Add caption as text
+        if msg.caption:
+            event.text = msg.caption
+        
+        await self.handle_message(event)
+    
+    def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
+        """Build a MessageEvent from a Telegram message."""
+        chat = message.chat
+        user = message.from_user
+        
+        # Determine chat type
+        chat_type = "dm"
+        if chat.type in (ChatType.GROUP, ChatType.SUPERGROUP):
+            chat_type = "group"
+        elif chat.type == ChatType.CHANNEL:
+            chat_type = "channel"
+        
+        # Build source
+        source = self.build_source(
+            chat_id=str(chat.id),
+            chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
+            chat_type=chat_type,
+            user_id=str(user.id) if user else None,
+            user_name=user.full_name if user else None,
+            thread_id=str(message.message_thread_id) if message.message_thread_id else None,
+        )
+        
+        return MessageEvent(
+            text=message.text or "",
+            message_type=msg_type,
+            source=source,
+            raw_message=message,
+            message_id=str(message.message_id),
+            timestamp=message.date,
+        )
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
new file mode 100644
index 0000000000..b737a847d8
--- /dev/null
+++ b/gateway/platforms/whatsapp.py
@@ -0,0 +1,327 @@
+"""
+WhatsApp platform adapter.
+
+WhatsApp integration is more complex than Telegram/Discord because:
+- No official bot API for personal accounts
+- Business API requires Meta Business verification
+- Most solutions use web-based automation
+
+This adapter supports multiple backends:
+1. WhatsApp Business API (requires Meta verification)
+2. whatsapp-web.js (via Node.js subprocess) - for personal accounts
+3. Baileys (via Node.js subprocess) - alternative for personal accounts
+
+For simplicity, we'll implement a generic interface that can work
+with different backends via a bridge pattern.
+"""
+
+import asyncio
+import json
+import subprocess
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+
+import sys
+sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+
+def check_whatsapp_requirements() -> bool:
+    """
+    Check if WhatsApp dependencies are available.
+    
+    WhatsApp requires a Node.js bridge for most implementations.
+    """
+    # Check for Node.js
+    try:
+        result = subprocess.run(
+            ["node", "--version"],
+            capture_output=True,
+            text=True,
+            timeout=5
+        )
+        return result.returncode == 0
+    except Exception:
+        return False
+
+
+class WhatsAppAdapter(BasePlatformAdapter):
+    """
+    WhatsApp adapter.
+    
+    This implementation uses a simple HTTP bridge pattern where:
+    1. A Node.js process runs the WhatsApp Web client
+    2. Messages are forwarded via HTTP/IPC to this Python adapter
+    3. Responses are sent back through the bridge
+    
+    The actual Node.js bridge implementation can vary:
+    - whatsapp-web.js based
+    - Baileys based
+    - Business API based
+    
+    Configuration:
+    - bridge_script: Path to the Node.js bridge script
+    - bridge_port: Port for HTTP communication (default: 3000)
+    - session_path: Path to store WhatsApp session data
+    """
+    
+    # WhatsApp message limits
+    MAX_MESSAGE_LENGTH = 65536  # WhatsApp allows longer messages
+    
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.WHATSAPP)
+        self._bridge_process: Optional[subprocess.Popen] = None
+        self._bridge_port: int = config.extra.get("bridge_port", 3000)
+        self._bridge_script: Optional[str] = config.extra.get("bridge_script")
+        self._session_path: Path = Path(config.extra.get(
+            "session_path",
+            Path.home() / ".hermes" / "whatsapp" / "session"
+        ))
+        self._message_queue: asyncio.Queue = asyncio.Queue()
+    
+    async def connect(self) -> bool:
+        """
+        Start the WhatsApp bridge.
+        
+        This launches the Node.js bridge process and waits for it to be ready.
+        """
+        if not check_whatsapp_requirements():
+            print(f"[{self.name}] Node.js not found. WhatsApp requires Node.js.")
+            return False
+        
+        if not self._bridge_script:
+            print(f"[{self.name}] No bridge script configured.")
+            print(f"[{self.name}] Set 'bridge_script' in whatsapp.extra config.")
+            print(f"[{self.name}] See docs/messaging.md for WhatsApp setup instructions.")
+            return False
+        
+        bridge_path = Path(self._bridge_script)
+        if not bridge_path.exists():
+            print(f"[{self.name}] Bridge script not found: {bridge_path}")
+            return False
+        
+        try:
+            # Ensure session directory exists
+            self._session_path.mkdir(parents=True, exist_ok=True)
+            
+            # Start the bridge process
+            self._bridge_process = subprocess.Popen(
+                [
+                    "node",
+                    str(bridge_path),
+                    "--port", str(self._bridge_port),
+                    "--session", str(self._session_path),
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+            
+            # Wait for bridge to be ready (look for ready signal)
+            # This is a simplified version - real implementation would
+            # wait for an HTTP health check or specific stdout message
+            await asyncio.sleep(5)
+            
+            if self._bridge_process.poll() is not None:
+                stderr = self._bridge_process.stderr.read() if self._bridge_process.stderr else ""
+                print(f"[{self.name}] Bridge process died: {stderr}")
+                return False
+            
+            # Start message polling task
+            asyncio.create_task(self._poll_messages())
+            
+            self._running = True
+            print(f"[{self.name}] Bridge started on port {self._bridge_port}")
+            print(f"[{self.name}] Scan QR code if prompted (check bridge output)")
+            return True
+            
+        except Exception as e:
+            print(f"[{self.name}] Failed to start bridge: {e}")
+            return False
+    
+    async def disconnect(self) -> None:
+        """Stop the WhatsApp bridge."""
+        if self._bridge_process:
+            try:
+                self._bridge_process.terminate()
+                await asyncio.sleep(1)
+                if self._bridge_process.poll() is None:
+                    self._bridge_process.kill()
+            except Exception as e:
+                print(f"[{self.name}] Error stopping bridge: {e}")
+        
+        self._running = False
+        self._bridge_process = None
+        print(f"[{self.name}] Disconnected")
+    
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> SendResult:
+        """Send a message via the WhatsApp bridge."""
+        if not self._running:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            import aiohttp
+            
+            async with aiohttp.ClientSession() as session:
+                payload = {
+                    "chatId": chat_id,
+                    "message": content,
+                }
+                if reply_to:
+                    payload["replyTo"] = reply_to
+                
+                async with session.post(
+                    f"http://localhost:{self._bridge_port}/send",
+                    json=payload,
+                    timeout=aiohttp.ClientTimeout(total=30)
+                ) as resp:
+                    if resp.status == 200:
+                        data = await resp.json()
+                        return SendResult(
+                            success=True,
+                            message_id=data.get("messageId"),
+                            raw_response=data
+                        )
+                    else:
+                        error = await resp.text()
+                        return SendResult(success=False, error=error)
+                        
+        except ImportError:
+            return SendResult(
+                success=False, 
+                error="aiohttp not installed. Run: pip install aiohttp"
+            )
+        except Exception as e:
+            return SendResult(success=False, error=str(e))
+    
+    async def send_typing(self, chat_id: str) -> None:
+        """Send typing indicator via bridge."""
+        if not self._running:
+            return
+        
+        try:
+            import aiohttp
+            
+            async with aiohttp.ClientSession() as session:
+                await session.post(
+                    f"http://localhost:{self._bridge_port}/typing",
+                    json={"chatId": chat_id},
+                    timeout=aiohttp.ClientTimeout(total=5)
+                )
+        except Exception:
+            pass  # Ignore typing indicator failures
+    
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Get information about a WhatsApp chat."""
+        if not self._running:
+            return {"name": "Unknown", "type": "dm"}
+        
+        try:
+            import aiohttp
+            
+            async with aiohttp.ClientSession() as session:
+                async with session.get(
+                    f"http://localhost:{self._bridge_port}/chat/{chat_id}",
+                    timeout=aiohttp.ClientTimeout(total=10)
+                ) as resp:
+                    if resp.status == 200:
+                        data = await resp.json()
+                        return {
+                            "name": data.get("name", chat_id),
+                            "type": "group" if data.get("isGroup") else "dm",
+                            "participants": data.get("participants", []),
+                        }
+        except Exception:
+            pass
+        
+        return {"name": chat_id, "type": "dm"}
+    
+    async def _poll_messages(self) -> None:
+        """Poll the bridge for incoming messages."""
+        try:
+            import aiohttp
+        except ImportError:
+            print(f"[{self.name}] aiohttp not installed, message polling disabled")
+            return
+        
+        while self._running:
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(
+                        f"http://localhost:{self._bridge_port}/messages",
+                        timeout=aiohttp.ClientTimeout(total=30)
+                    ) as resp:
+                        if resp.status == 200:
+                            messages = await resp.json()
+                            for msg_data in messages:
+                                event = self._build_message_event(msg_data)
+                                if event:
+                                    await self.handle_message(event)
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                print(f"[{self.name}] Poll error: {e}")
+                await asyncio.sleep(5)
+            
+            await asyncio.sleep(1)  # Poll interval
+    
+    def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
+        """Build a MessageEvent from bridge message data."""
+        try:
+            # Determine message type
+            msg_type = MessageType.TEXT
+            if data.get("hasMedia"):
+                media_type = data.get("mediaType", "")
+                if "image" in media_type:
+                    msg_type = MessageType.PHOTO
+                elif "video" in media_type:
+                    msg_type = MessageType.VIDEO
+                elif "audio" in media_type or "ptt" in media_type:  # ptt = voice note
+                    msg_type = MessageType.VOICE
+                else:
+                    msg_type = MessageType.DOCUMENT
+            
+            # Determine chat type
+            is_group = data.get("isGroup", False)
+            chat_type = "group" if is_group else "dm"
+            
+            # Build source
+            source = self.build_source(
+                chat_id=data.get("chatId", ""),
+                chat_name=data.get("chatName"),
+                chat_type=chat_type,
+                user_id=data.get("senderId"),
+                user_name=data.get("senderName"),
+            )
+            
+            return MessageEvent(
+                text=data.get("body", ""),
+                message_type=msg_type,
+                source=source,
+                raw_message=data,
+                message_id=data.get("messageId"),
+                media_urls=data.get("mediaUrls", []),
+            )
+        except Exception as e:
+            print(f"[{self.name}] Error building event: {e}")
+            return None
+
+
+# Note: A reference Node.js bridge script would be provided in scripts/whatsapp-bridge/
+# It would use whatsapp-web.js or Baileys to:
+# 1. Handle WhatsApp Web authentication (QR code)
+# 2. Listen for incoming messages
+# 3. Expose HTTP endpoints for send/receive/status
diff --git a/gateway/run.py b/gateway/run.py
new file mode 100644
index 0000000000..dfa97c4574
--- /dev/null
+++ b/gateway/run.py
@@ -0,0 +1,375 @@
+"""
+Gateway runner - entry point for messaging platform integrations.
+
+This module provides:
+- start_gateway(): Start all configured platform adapters
+- GatewayRunner: Main class managing the gateway lifecycle
+
+Usage:
+    # Start the gateway
+    python -m gateway.run
+    
+    # Or from CLI
+    python cli.py --gateway
+"""
+
+import asyncio
+import os
+import sys
+import signal
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Optional, Any, List
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gateway.config import (
+    Platform,
+    GatewayConfig,
+    load_gateway_config,
+)
+from gateway.session import (
+    SessionStore,
+    SessionSource,
+    SessionContext,
+    build_session_context,
+    build_session_context_prompt,
+)
+from gateway.delivery import DeliveryRouter, DeliveryTarget
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent
+
+
+class GatewayRunner:
+    """
+    Main gateway controller.
+    
+    Manages the lifecycle of all platform adapters and routes
+    messages to/from the agent.
+    """
+    
+    def __init__(self, config: Optional[GatewayConfig] = None):
+        self.config = config or load_gateway_config()
+        self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+        self.session_store = SessionStore(self.config.sessions_dir, self.config)
+        self.delivery_router = DeliveryRouter(self.config)
+        self._running = False
+        self._shutdown_event = asyncio.Event()
+    
+    async def start(self) -> bool:
+        """
+        Start the gateway and all configured platform adapters.
+        
+        Returns True if at least one adapter connected successfully.
+        """
+        print("[gateway] Starting Hermes Gateway...")
+        print(f"[gateway] Session storage: {self.config.sessions_dir}")
+        
+        connected_count = 0
+        
+        # Initialize and connect each configured platform
+        for platform, platform_config in self.config.platforms.items():
+            if not platform_config.enabled:
+                continue
+            
+            adapter = self._create_adapter(platform, platform_config)
+            if not adapter:
+                print(f"[gateway] No adapter available for {platform.value}")
+                continue
+            
+            # Set up message handler
+            adapter.set_message_handler(self._handle_message)
+            
+            # Try to connect
+            print(f"[gateway] Connecting to {platform.value}...")
+            try:
+                success = await adapter.connect()
+                if success:
+                    self.adapters[platform] = adapter
+                    connected_count += 1
+                    print(f"[gateway] ✓ {platform.value} connected")
+                else:
+                    print(f"[gateway] ✗ {platform.value} failed to connect")
+            except Exception as e:
+                print(f"[gateway] ✗ {platform.value} error: {e}")
+        
+        if connected_count == 0:
+            print("[gateway] No platforms connected. Check your configuration.")
+            return False
+        
+        # Update delivery router with adapters
+        self.delivery_router.adapters = self.adapters
+        
+        self._running = True
+        print(f"[gateway] Gateway running with {connected_count} platform(s)")
+        print("[gateway] Press Ctrl+C to stop")
+        
+        return True
+    
+    async def stop(self) -> None:
+        """Stop the gateway and disconnect all adapters."""
+        print("[gateway] Stopping gateway...")
+        self._running = False
+        
+        for platform, adapter in self.adapters.items():
+            try:
+                await adapter.disconnect()
+                print(f"[gateway] ✓ {platform.value} disconnected")
+            except Exception as e:
+                print(f"[gateway] ✗ {platform.value} disconnect error: {e}")
+        
+        self.adapters.clear()
+        self._shutdown_event.set()
+        print("[gateway] Gateway stopped")
+    
+    async def wait_for_shutdown(self) -> None:
+        """Wait for shutdown signal."""
+        await self._shutdown_event.wait()
+    
+    def _create_adapter(
+        self, 
+        platform: Platform, 
+        config: Any
+    ) -> Optional[BasePlatformAdapter]:
+        """Create the appropriate adapter for a platform."""
+        if platform == Platform.TELEGRAM:
+            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
+            if not check_telegram_requirements():
+                print(f"[gateway] Telegram: python-telegram-bot not installed")
+                return None
+            return TelegramAdapter(config)
+        
+        elif platform == Platform.DISCORD:
+            from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
+            if not check_discord_requirements():
+                print(f"[gateway] Discord: discord.py not installed")
+                return None
+            return DiscordAdapter(config)
+        
+        elif platform == Platform.WHATSAPP:
+            from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
+            if not check_whatsapp_requirements():
+                print(f"[gateway] WhatsApp: Node.js not installed or bridge not configured")
+                return None
+            return WhatsAppAdapter(config)
+        
+        return None
+    
+    async def _handle_message(self, event: MessageEvent) -> Optional[str]:
+        """
+        Handle an incoming message from any platform.
+        
+        This is the core message processing pipeline:
+        1. Check for commands (/new, /reset, etc.)
+        2. Get or create session
+        3. Build context for agent
+        4. Run agent conversation
+        5. Return response
+        """
+        source = event.source
+        
+        # Check for reset commands
+        command = event.get_command()
+        if command in ["new", "reset"]:
+            return await self._handle_reset_command(event)
+        
+        if command == "status":
+            return await self._handle_status_command(event)
+        
+        # Get or create session
+        session_entry = self.session_store.get_or_create_session(source)
+        
+        # Build session context
+        context = build_session_context(source, self.config, session_entry)
+        
+        # Set environment variables for tools
+        self._set_session_env(context)
+        
+        # Build the context prompt to inject
+        context_prompt = build_session_context_prompt(context)
+        
+        # Load conversation history from transcript
+        history = self.session_store.load_transcript(session_entry.session_id)
+        
+        try:
+            # Run the agent
+            response = await self._run_agent(
+                message=event.text,
+                context_prompt=context_prompt,
+                history=history,
+                source=source,
+                session_id=session_entry.session_id
+            )
+            
+            # Append to transcript
+            self.session_store.append_to_transcript(
+                session_entry.session_id,
+                {"role": "user", "content": event.text, "timestamp": datetime.now().isoformat()}
+            )
+            self.session_store.append_to_transcript(
+                session_entry.session_id,
+                {"role": "assistant", "content": response, "timestamp": datetime.now().isoformat()}
+            )
+            
+            # Update session
+            self.session_store.update_session(session_entry.session_key)
+            
+            return response
+            
+        except Exception as e:
+            print(f"[gateway] Agent error: {e}")
+            return f"Sorry, I encountered an error: {str(e)}"
+        finally:
+            # Clear session env
+            self._clear_session_env()
+    
+    async def _handle_reset_command(self, event: MessageEvent) -> str:
+        """Handle /new or /reset command."""
+        source = event.source
+        
+        # Get existing session key
+        session_key = f"agent:main:{source.platform.value}:" + \
+                      (f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}")
+        
+        # Reset the session
+        new_entry = self.session_store.reset_session(session_key)
+        
+        if new_entry:
+            return "✨ Session reset! I've started fresh with no memory of our previous conversation."
+        else:
+            # No existing session, just create one
+            self.session_store.get_or_create_session(source, force_new=True)
+            return "✨ New session started!"
+    
+    async def _handle_status_command(self, event: MessageEvent) -> str:
+        """Handle /status command."""
+        source = event.source
+        session_entry = self.session_store.get_or_create_session(source)
+        
+        connected_platforms = [p.value for p in self.adapters.keys()]
+        
+        lines = [
+            "📊 **Hermes Gateway Status**",
+            "",
+            f"**Session ID:** `{session_entry.session_id[:12]}...`",
+            f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
+            f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
+            f"**Tokens:** {session_entry.total_tokens:,}",
+            "",
+            f"**Connected Platforms:** {', '.join(connected_platforms)}",
+        ]
+        
+        return "\n".join(lines)
+    
+    def _set_session_env(self, context: SessionContext) -> None:
+        """Set environment variables for the current session."""
+        os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
+        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
+        if context.source.chat_name:
+            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
+    
+    def _clear_session_env(self) -> None:
+        """Clear session environment variables."""
+        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"]:
+            if var in os.environ:
+                del os.environ[var]
+    
+    async def _run_agent(
+        self,
+        message: str,
+        context_prompt: str,
+        history: List[Dict[str, Any]],
+        source: SessionSource,
+        session_id: str
+    ) -> str:
+        """
+        Run the agent with the given message and context.
+        
+        This is run in a thread pool to not block the event loop.
+        """
+        from run_agent import AIAgent
+        
+        # Determine toolset based on platform
+        toolset_map = {
+            Platform.LOCAL: "hermes-cli",
+            Platform.TELEGRAM: "hermes-telegram",
+            Platform.DISCORD: "hermes-discord",
+            Platform.WHATSAPP: "hermes-whatsapp",
+        }
+        toolset = toolset_map.get(source.platform, "hermes-telegram")
+        
+        def run_sync():
+            agent = AIAgent(
+                model=os.getenv("HERMES_MODEL", "anthropic/claude-sonnet-4"),
+                quiet_mode=True,
+                enabled_toolsets=[toolset],
+                ephemeral_system_prompt=context_prompt,
+                session_id=session_id,
+            )
+            
+            # If we have history, we need to restore it
+            # For now, we pass the message directly
+            # TODO: Implement proper history restoration
+            
+            result = agent.run_conversation(message)
+            return result.get("final_response", "(No response)")
+        
+        # Run in thread pool to not block
+        loop = asyncio.get_event_loop()
+        response = await loop.run_in_executor(None, run_sync)
+        
+        return response
+
+
+async def start_gateway(config: Optional[GatewayConfig] = None) -> None:
+    """
+    Start the gateway and run until interrupted.
+    
+    This is the main entry point for running the gateway.
+    """
+    runner = GatewayRunner(config)
+    
+    # Set up signal handlers
+    def signal_handler():
+        asyncio.create_task(runner.stop())
+    
+    loop = asyncio.get_event_loop()
+    for sig in (signal.SIGINT, signal.SIGTERM):
+        try:
+            loop.add_signal_handler(sig, signal_handler)
+        except NotImplementedError:
+            # Windows doesn't support add_signal_handler
+            pass
+    
+    # Start the gateway
+    success = await runner.start()
+    if not success:
+        return
+    
+    # Wait for shutdown
+    await runner.wait_for_shutdown()
+
+
+def main():
+    """CLI entry point for the gateway."""
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
+    parser.add_argument("--config", "-c", help="Path to gateway config file")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+    
+    args = parser.parse_args()
+    
+    config = None
+    if args.config:
+        import json
+        with open(args.config) as f:
+            data = json.load(f)
+            config = GatewayConfig.from_dict(data)
+    
+    # Run the gateway
+    asyncio.run(start_gateway(config))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/gateway/session.py b/gateway/session.py
new file mode 100644
index 0000000000..d6ab462bb3
--- /dev/null
+++ b/gateway/session.py
@@ -0,0 +1,522 @@
+"""
+Session management for the gateway.
+
+Handles:
+- Session context tracking (where messages come from)
+- Session storage (conversations persisted to disk)
+- Reset policy evaluation (when to start fresh)
+- Dynamic system prompt injection (agent knows its context)
+"""
+
+import os
+import json
+import uuid
+from pathlib import Path
+from datetime import datetime, timedelta
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Any
+
+from .config import (
+    Platform,
+    GatewayConfig,
+    SessionResetPolicy,
+    HomeChannel,
+)
+
+
+@dataclass
+class SessionSource:
+    """
+    Describes where a message originated from.
+    
+    This information is used to:
+    1. Route responses back to the right place
+    2. Inject context into the system prompt
+    3. Track origin for cron job delivery
+    """
+    platform: Platform
+    chat_id: str
+    chat_name: Optional[str] = None
+    chat_type: str = "dm"  # "dm", "group", "channel", "thread"
+    user_id: Optional[str] = None
+    user_name: Optional[str] = None
+    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
+    
+    @property
+    def description(self) -> str:
+        """Human-readable description of the source."""
+        if self.platform == Platform.LOCAL:
+            return "CLI terminal"
+        
+        parts = []
+        if self.chat_type == "dm":
+            parts.append(f"DM with {self.user_name or self.user_id or 'user'}")
+        elif self.chat_type == "group":
+            parts.append(f"group: {self.chat_name or self.chat_id}")
+        elif self.chat_type == "channel":
+            parts.append(f"channel: {self.chat_name or self.chat_id}")
+        else:
+            parts.append(self.chat_name or self.chat_id)
+        
+        if self.thread_id:
+            parts.append(f"thread: {self.thread_id}")
+        
+        return ", ".join(parts)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "platform": self.platform.value,
+            "chat_id": self.chat_id,
+            "chat_name": self.chat_name,
+            "chat_type": self.chat_type,
+            "user_id": self.user_id,
+            "user_name": self.user_name,
+            "thread_id": self.thread_id,
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
+        return cls(
+            platform=Platform(data["platform"]),
+            chat_id=str(data["chat_id"]),
+            chat_name=data.get("chat_name"),
+            chat_type=data.get("chat_type", "dm"),
+            user_id=data.get("user_id"),
+            user_name=data.get("user_name"),
+            thread_id=data.get("thread_id"),
+        )
+    
+    @classmethod
+    def local_cli(cls) -> "SessionSource":
+        """Create a source representing the local CLI."""
+        return cls(
+            platform=Platform.LOCAL,
+            chat_id="cli",
+            chat_name="CLI terminal",
+            chat_type="dm",
+        )
+
+
+@dataclass
+class SessionContext:
+    """
+    Full context for a session, used for dynamic system prompt injection.
+    
+    The agent receives this information to understand:
+    - Where messages are coming from
+    - What platforms are available
+    - Where it can deliver scheduled task outputs
+    """
+    source: SessionSource
+    connected_platforms: List[Platform]
+    home_channels: Dict[Platform, HomeChannel]
+    
+    # Session metadata
+    session_key: str = ""
+    session_id: str = ""
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "source": self.source.to_dict(),
+            "connected_platforms": [p.value for p in self.connected_platforms],
+            "home_channels": {
+                p.value: hc.to_dict() for p, hc in self.home_channels.items()
+            },
+            "session_key": self.session_key,
+            "session_id": self.session_id,
+            "created_at": self.created_at.isoformat() if self.created_at else None,
+            "updated_at": self.updated_at.isoformat() if self.updated_at else None,
+        }
+
+
+def build_session_context_prompt(context: SessionContext) -> str:
+    """
+    Build the dynamic system prompt section that tells the agent about its context.
+    
+    This is injected into the system prompt so the agent knows:
+    - Where messages are coming from
+    - What platforms are connected
+    - Where it can deliver scheduled task outputs
+    """
+    lines = [
+        "## Current Session Context",
+        "",
+    ]
+    
+    # Source info
+    platform_name = context.source.platform.value.title()
+    if context.source.platform == Platform.LOCAL:
+        lines.append(f"**Source:** {platform_name} (the machine running this agent)")
+    else:
+        lines.append(f"**Source:** {platform_name} ({context.source.description})")
+    
+    # Connected platforms
+    platforms_list = ["local (files on this machine)"]
+    for p in context.connected_platforms:
+        if p != Platform.LOCAL:
+            platforms_list.append(f"{p.value}: Connected ✓")
+    
+    lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
+    
+    # Home channels
+    if context.home_channels:
+        lines.append("")
+        lines.append("**Home Channels (default destinations):**")
+        for platform, home in context.home_channels.items():
+            lines.append(f"  - {platform.value}: {home.name} (ID: {home.chat_id})")
+    
+    # Delivery options for scheduled tasks
+    lines.append("")
+    lines.append("**Delivery options for scheduled tasks:**")
+    
+    # Origin delivery
+    if context.source.platform == Platform.LOCAL:
+        lines.append("- `\"origin\"` → Local output (saved to files)")
+    else:
+        lines.append(f"- `\"origin\"` → Back to this chat ({context.source.chat_name or context.source.chat_id})")
+    
+    # Local always available
+    lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
+    
+    # Platform home channels
+    for platform, home in context.home_channels.items():
+        lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
+    
+    # Note about explicit targeting
+    lines.append("")
+    lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
+    
+    return "\n".join(lines)
+
+
+@dataclass
+class SessionEntry:
+    """
+    Entry in the session store.
+    
+    Maps a session key to its current session ID and metadata.
+    """
+    session_key: str
+    session_id: str
+    created_at: datetime
+    updated_at: datetime
+    
+    # Origin metadata for delivery routing
+    origin: Optional[SessionSource] = None
+    
+    # Display metadata
+    display_name: Optional[str] = None
+    platform: Optional[Platform] = None
+    chat_type: str = "dm"
+    
+    # Token tracking
+    input_tokens: int = 0
+    output_tokens: int = 0
+    total_tokens: int = 0
+    
+    def to_dict(self) -> Dict[str, Any]:
+        result = {
+            "session_key": self.session_key,
+            "session_id": self.session_id,
+            "created_at": self.created_at.isoformat(),
+            "updated_at": self.updated_at.isoformat(),
+            "display_name": self.display_name,
+            "platform": self.platform.value if self.platform else None,
+            "chat_type": self.chat_type,
+            "input_tokens": self.input_tokens,
+            "output_tokens": self.output_tokens,
+            "total_tokens": self.total_tokens,
+        }
+        if self.origin:
+            result["origin"] = self.origin.to_dict()
+        return result
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "SessionEntry":
+        origin = None
+        if "origin" in data and data["origin"]:
+            origin = SessionSource.from_dict(data["origin"])
+        
+        platform = None
+        if data.get("platform"):
+            try:
+                platform = Platform(data["platform"])
+            except ValueError:
+                pass
+        
+        return cls(
+            session_key=data["session_key"],
+            session_id=data["session_id"],
+            created_at=datetime.fromisoformat(data["created_at"]),
+            updated_at=datetime.fromisoformat(data["updated_at"]),
+            origin=origin,
+            display_name=data.get("display_name"),
+            platform=platform,
+            chat_type=data.get("chat_type", "dm"),
+            input_tokens=data.get("input_tokens", 0),
+            output_tokens=data.get("output_tokens", 0),
+            total_tokens=data.get("total_tokens", 0),
+        )
+
+
+class SessionStore:
+    """
+    Manages session storage and retrieval.
+    
+    Sessions are stored in:
+    - sessions.json: Index mapping session keys to session IDs
+    - {session_id}.jsonl: Conversation transcripts
+    """
+    
+    def __init__(self, sessions_dir: Path, config: GatewayConfig):
+        self.sessions_dir = sessions_dir
+        self.config = config
+        self._entries: Dict[str, SessionEntry] = {}
+        self._loaded = False
+    
+    def _ensure_loaded(self) -> None:
+        """Load sessions from disk if not already loaded."""
+        if self._loaded:
+            return
+        
+        self.sessions_dir.mkdir(parents=True, exist_ok=True)
+        sessions_file = self.sessions_dir / "sessions.json"
+        
+        if sessions_file.exists():
+            try:
+                with open(sessions_file, "r") as f:
+                    data = json.load(f)
+                    for key, entry_data in data.items():
+                        self._entries[key] = SessionEntry.from_dict(entry_data)
+            except Exception as e:
+                print(f"[gateway] Warning: Failed to load sessions: {e}")
+        
+        self._loaded = True
+    
+    def _save(self) -> None:
+        """Save sessions index to disk."""
+        self.sessions_dir.mkdir(parents=True, exist_ok=True)
+        sessions_file = self.sessions_dir / "sessions.json"
+        
+        data = {key: entry.to_dict() for key, entry in self._entries.items()}
+        with open(sessions_file, "w") as f:
+            json.dump(data, f, indent=2)
+    
+    def _generate_session_key(self, source: SessionSource) -> str:
+        """Generate a session key from a source."""
+        platform = source.platform.value
+        
+        if source.chat_type == "dm":
+            # DMs share the main session per platform
+            return f"agent:main:{platform}:dm"
+        else:
+            # Groups/channels get their own keys
+            return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
+    
+    def _should_reset(self, entry: SessionEntry, source: SessionSource) -> bool:
+        """
+        Check if a session should be reset based on policy.
+        
+        Returns True if the session is stale and should start fresh.
+        """
+        policy = self.config.get_reset_policy(
+            platform=source.platform,
+            session_type=source.chat_type
+        )
+        
+        now = datetime.now()
+        
+        # Check idle timeout
+        if policy.mode in ("idle", "both"):
+            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
+            if now > idle_deadline:
+                return True
+        
+        # Check daily reset
+        if policy.mode in ("daily", "both"):
+            # Find the most recent reset boundary
+            today_reset = now.replace(
+                hour=policy.at_hour, 
+                minute=0, 
+                second=0, 
+                microsecond=0
+            )
+            if now.hour < policy.at_hour:
+                # Reset boundary was yesterday
+                today_reset -= timedelta(days=1)
+            
+            if entry.updated_at < today_reset:
+                return True
+        
+        return False
+    
+    def get_or_create_session(
+        self, 
+        source: SessionSource,
+        force_new: bool = False
+    ) -> SessionEntry:
+        """
+        Get an existing session or create a new one.
+        
+        Evaluates reset policy to determine if the existing session is stale.
+        """
+        self._ensure_loaded()
+        
+        session_key = self._generate_session_key(source)
+        now = datetime.now()
+        
+        # Check for existing session
+        if session_key in self._entries and not force_new:
+            entry = self._entries[session_key]
+            
+            # Check if session should be reset
+            if not self._should_reset(entry, source):
+                # Update timestamp and return existing
+                entry.updated_at = now
+                self._save()
+                return entry
+        
+        # Create new session
+        session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+        
+        entry = SessionEntry(
+            session_key=session_key,
+            session_id=session_id,
+            created_at=now,
+            updated_at=now,
+            origin=source,
+            display_name=source.chat_name,
+            platform=source.platform,
+            chat_type=source.chat_type,
+        )
+        
+        self._entries[session_key] = entry
+        self._save()
+        
+        return entry
+    
+    def update_session(
+        self, 
+        session_key: str,
+        input_tokens: int = 0,
+        output_tokens: int = 0
+    ) -> None:
+        """Update a session's metadata after an interaction."""
+        self._ensure_loaded()
+        
+        if session_key in self._entries:
+            entry = self._entries[session_key]
+            entry.updated_at = datetime.now()
+            entry.input_tokens += input_tokens
+            entry.output_tokens += output_tokens
+            entry.total_tokens = entry.input_tokens + entry.output_tokens
+            self._save()
+    
+    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
+        """Force reset a session, creating a new session ID."""
+        self._ensure_loaded()
+        
+        if session_key not in self._entries:
+            return None
+        
+        old_entry = self._entries[session_key]
+        now = datetime.now()
+        session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+        
+        new_entry = SessionEntry(
+            session_key=session_key,
+            session_id=session_id,
+            created_at=now,
+            updated_at=now,
+            origin=old_entry.origin,
+            display_name=old_entry.display_name,
+            platform=old_entry.platform,
+            chat_type=old_entry.chat_type,
+        )
+        
+        self._entries[session_key] = new_entry
+        self._save()
+        
+        return new_entry
+    
+    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
+        """
+        List all sessions, optionally filtered by activity.
+        
+        Args:
+            active_minutes: If provided, only return sessions updated within this many minutes
+        """
+        self._ensure_loaded()
+        
+        entries = list(self._entries.values())
+        
+        if active_minutes is not None:
+            cutoff = datetime.now() - timedelta(minutes=active_minutes)
+            entries = [e for e in entries if e.updated_at >= cutoff]
+        
+        # Sort by most recently updated
+        entries.sort(key=lambda e: e.updated_at, reverse=True)
+        
+        return entries
+    
+    def get_transcript_path(self, session_id: str) -> Path:
+        """Get the path to a session's transcript file."""
+        return self.sessions_dir / f"{session_id}.jsonl"
+    
+    def append_to_transcript(self, session_id: str, message: Dict[str, Any]) -> None:
+        """Append a message to a session's transcript."""
+        transcript_path = self.get_transcript_path(session_id)
+        
+        with open(transcript_path, "a") as f:
+            f.write(json.dumps(message, ensure_ascii=False) + "\n")
+    
+    def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
+        """Load all messages from a session's transcript."""
+        transcript_path = self.get_transcript_path(session_id)
+        
+        if not transcript_path.exists():
+            return []
+        
+        messages = []
+        with open(transcript_path, "r") as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    messages.append(json.loads(line))
+        
+        return messages
+
+
+def build_session_context(
+    source: SessionSource,
+    config: GatewayConfig,
+    session_entry: Optional[SessionEntry] = None
+) -> SessionContext:
+    """
+    Build a full session context from a source and config.
+    
+    This is used to inject context into the agent's system prompt.
+    """
+    connected = config.get_connected_platforms()
+    
+    home_channels = {}
+    for platform in connected:
+        home = config.get_home_channel(platform)
+        if home:
+            home_channels[platform] = home
+    
+    context = SessionContext(
+        source=source,
+        connected_platforms=connected,
+        home_channels=home_channels,
+    )
+    
+    if session_entry:
+        context.session_key = session_entry.session_key
+        context.session_id = session_entry.session_id
+        context.created_at = session_entry.created_at
+        context.updated_at = session_entry.updated_at
+    
+    return context
diff --git a/hermes_agent.egg-info/PKG-INFO b/hermes_agent.egg-info/PKG-INFO
new file mode 100644
index 0000000000..159a406927
--- /dev/null
+++ b/hermes_agent.egg-info/PKG-INFO
@@ -0,0 +1,868 @@
+Metadata-Version: 2.4
+Name: hermes-agent
+Version: 0.1.0
+Summary: AI agent with advanced tool-calling and toolsets
+Author: Nous Research
+License: MIT
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: openai
+Requires-Dist: python-dotenv
+Requires-Dist: fire
+Requires-Dist: httpx
+Requires-Dist: rich
+Requires-Dist: tenacity
+Requires-Dist: pyyaml
+Requires-Dist: requests
+Requires-Dist: jinja2
+Requires-Dist: pydantic>=2.0
+Requires-Dist: firecrawl-py
+Requires-Dist: fal-client
+Requires-Dist: litellm>=1.75.5
+Requires-Dist: typer
+Requires-Dist: platformdirs
+Provides-Extra: modal
+Requires-Dist: modal; extra == "modal"
+Requires-Dist: boto3; extra == "modal"
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-asyncio; extra == "dev"
+Provides-Extra: messaging
+Requires-Dist: python-telegram-bot>=20.0; extra == "messaging"
+Requires-Dist: discord.py>=2.0; extra == "messaging"
+Provides-Extra: cron
+Requires-Dist: croniter; extra == "cron"
+Provides-Extra: all
+Requires-Dist: croniter; extra == "all"
+Requires-Dist: python-telegram-bot>=20.0; extra == "all"
+Requires-Dist: discord.py>=2.0; extra == "all"
+
+# Hermes Agent
+
+An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.
+
+## Features
+
+- **Interactive CLI**: Beautiful terminal interface with animated feedback, personalities, and session management
+- **Messaging Gateway**: Connect to Telegram, Discord, and WhatsApp for conversational AI anywhere
+- **Web Tools**: Search, extract content, and crawl websites
+- **Terminal Tools**: Execute commands via local, Docker, Singularity, Modal, or SSH backends
+- **Browser Tools**: Automate web browsers to navigate, click, type, and extract content
+- **Vision Tools**: Analyze images from URLs
+- **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
+- **Creative Tools**: Generate images from text prompts
+- **Skills Tools**: On-demand knowledge documents with progressive disclosure
+- **Toolsets System**: Organize tools into logical groups for different scenarios
+- **Scheduled Tasks**: Cron jobs for automated agent tasks with delivery to platforms
+- **Context Compression**: Automatic summarization when approaching context limits
+- **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking
+- **Ephemeral System Prompts**: Guide model behavior without polluting training datasets
+
+## Installation
+
+### Quick Install (Recommended)
+
+**Linux/macOS:**
+```bash
+curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
+```
+
+**Windows (PowerShell):**
+```powershell
+irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex
+```
+
+This installer will:
+- Clone the repository to `~/.hermes-agent`
+- Create a virtual environment and install dependencies
+- Set up the `hermes` command in your PATH
+- Run an interactive setup wizard to configure API keys
+
+### Manual Installation
+
+If you prefer to install manually:
+
+```bash
+# Clone with submodules
+git clone --recurse-submodules https://github.com/NousResearch/Hermes-Agent.git
+cd Hermes-Agent
+
+# Run the setup script
+./setup-hermes.sh
+```
+
+Or step-by-step:
+
+```bash
+# Create and activate virtual environment
+python3 -m venv venv
+source venv/bin/activate  # Windows: venv\Scripts\activate
+
+# Install in editable mode with all extras
+pip install -e ".[all]"
+
+# Or install dependencies manually
+pip install -r requirements.txt
+pip install -e ./mini-swe-agent
+
+# Copy and configure environment
+cp .env.example .env
+# Edit .env with your API keys
+
+# Run the setup wizard
+hermes setup
+```
+
+## Quick Start
+
+Once installed, the `hermes` command is your main entry point:
+
+```bash
+hermes                    # Interactive chat (default)
+hermes chat               # Same as above
+hermes chat -q "Hello"    # Single query, then exit
+hermes setup              # Configure API keys and settings
+hermes status             # Show configuration status
+hermes doctor             # Diagnose issues
+hermes gateway            # Start messaging gateway (Telegram/Discord/WhatsApp)
+hermes cron daemon        # Run cron job scheduler
+hermes version            # Show version info
+```
+
+**Legacy `./hermes` script:**
+```bash
+# The old CLI script still works:
+./hermes
+
+# Or with options:
+./hermes --model "anthropic/claude-sonnet-4" --toolsets "web,terminal"
+```
+
+The CLI provides:
+- Animated spinners during thinking and tool execution
+- Kawaii-style feedback messages
+- `/commands` for configuration, history, and session management
+- Customizable personalities (`/personality kawaii`, `/personality pirate`, etc.)
+- Persistent configuration via `cli-config.yaml`
+
+## Configuration
+
+### Environment Variables
+```bash
+# Copy the example environment file
+cp .env.example .env
+
+# Edit .env and add your API keys
+nano .env  # or use your preferred editor
+```
+
+**Required API Keys:**
+- `OPENROUTER_API_KEY` - LLM access via OpenRouter (get at: https://openrouter.ai/keys)
+- `FIRECRAWL_API_KEY` - Web tools (get at: https://firecrawl.dev/)
+- `NOUS_API_KEY` - Vision & reasoning tools (get at: https://inference-api.nousresearch.com/)
+- `FAL_KEY` - Image generation (get at: https://fal.ai/)
+
+**Optional API Keys (for specific features):**
+- `BROWSERBASE_API_KEY` - Browser automation (get at: https://browserbase.com/)
+- `BROWSERBASE_PROJECT_ID` - From Browserbase dashboard
+- `MORPH_API_KEY` - For legacy Hecate terminal backend (get at: https://morph.so/)
+
+### 4. Configure Terminal Backend
+
+The terminal tool uses **mini-swe-agent** environments. Configure in `.env` or `cli-config.yaml`:
+
+```bash
+# Backend: "local", "docker", "singularity", "modal", or "ssh"
+TERMINAL_ENV=local          # Default: runs on host machine (no isolation)
+TERMINAL_ENV=ssh            # Remote execution via SSH (agent code stays local)
+TERMINAL_ENV=singularity    # Recommended for HPC: Apptainer/Singularity containers
+TERMINAL_ENV=docker         # Isolated Docker containers
+TERMINAL_ENV=modal          # Cloud execution via Modal
+
+# Container image (for docker/singularity/modal backends)
+TERMINAL_DOCKER_IMAGE=python:3.11-slim
+TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim
+TERMINAL_TIMEOUT=60
+
+# SSH backend (for ssh)
+TERMINAL_SSH_HOST=my-server.example.com
+TERMINAL_SSH_USER=myuser
+TERMINAL_SSH_KEY=~/.ssh/id_rsa  # Optional, uses ssh-agent if not set
+```
+
+**Backend Requirements:**
+- **local**: No extra setup (runs directly on your machine, no isolation)
+- **ssh**: SSH access to remote machine (great for sandboxing - agent can't touch its own code)
+- **singularity**: Requires Apptainer or Singularity installed (common on HPC clusters, no root needed)
+- **docker**: Requires Docker installed and user in `docker` group
+- **modal**: Requires Modal account (see setup below)
+
+### Singularity/Apptainer Setup (Recommended for HPC)
+
+Singularity/Apptainer provides rootless container execution, ideal for HPC clusters:
+
+```bash
+# 1. Verify Apptainer is installed
+apptainer --version  # or: singularity --version
+
+# 2. Set up cache directories (important for parallel workers)
+# Use /scratch if available (HPC), otherwise /tmp
+export APPTAINER_CACHEDIR=/scratch/$USER/.apptainer
+export APPTAINER_TMPDIR=/scratch/$USER/.apptainer/tmp
+mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
+
+# 3. Pre-build SIF image (recommended for parallel batch processing)
+# This avoids race conditions when multiple workers start simultaneously
+apptainer build $APPTAINER_CACHEDIR/python-nodejs.sif docker://nikolaik/python-nodejs:python3.11-nodejs20
+
+# 4. Configure .env to use the local SIF
+TERMINAL_ENV=singularity
+TERMINAL_SINGULARITY_IMAGE=/scratch/$USER/.apptainer/python-nodejs.sif
+```
+
+**Tip:** The batch scripts in `configs/` automatically handle SIF pre-building if `/scratch` is available.
+
+### Modal Cloud Backend Setup
+
+[Modal](https://modal.com) provides serverless cloud compute for running sandboxed environments at scale.
+
+```bash
+# 1. Install Modal and dependencies
+pip install modal boto3
+
+# 2. Authenticate with Modal (opens browser)
+modal setup
+
+# 3. Set terminal backend to modal in .env
+TERMINAL_ENV=modal
+```
+
+Modal uses CLI-based authentication (stored in `~/.modal/`), so no API key is needed in `.env`. After running `modal setup`, commands will automatically execute in Modal's cloud sandboxes.
+
+### Browser Tools Setup
+
+Browser tools enable the agent to navigate websites, fill forms, click buttons, and extract content. They use [agent-browser](https://github.com/vercel-labs/agent-browser) CLI with [Browserbase](https://browserbase.com) cloud execution.
+
+```bash
+# 1. Install Node.js (if not already installed)
+# Use nvm (recommended) or your package manager
+
+# 2. Install agent-browser CLI (choose one option):
+npm install -g agent-browser     # Option A: Global install (recommended)
+npm install                      # Option B: Local install (uses npx fallback)
+
+# 3. Get Browserbase credentials
+# Sign up at https://browserbase.com/ and get your:
+# - API Key (from Settings → API Keys)
+# - Project ID (from your project dashboard)
+
+# 4. Add to your .env file:
+BROWSERBASE_API_KEY=your_api_key_here
+BROWSERBASE_PROJECT_ID=your_project_id_here
+```
+
+**Available Browser Tools:**
+
+| Tool | Description |
+|------|-------------|
+| `browser_navigate` | Navigate to a URL |
+| `browser_snapshot` | Get text-based page snapshot with element refs |
+| `browser_click` | Click an element by ref (e.g., `@e5`) |
+| `browser_type` | Type text into an input field |
+| `browser_scroll` | Scroll up or down |
+| `browser_back` | Go back in browser history |
+| `browser_press` | Press a keyboard key (Enter, Tab, etc.) |
+| `browser_close` | Close the browser session |
+| `browser_get_images` | Get list of images on the page |
+
+**Example Usage:**
+```bash
+# Use browser tools with web search and vision
+python run_agent.py \
+  --query "Go to amazon.com and find the price of the latest Kindle" \
+  --enabled_toolsets=browser,web,vision
+
+# Use browser-focused distribution
+python batch_runner.py \
+  --dataset_file=browser_tasks.jsonl \
+  --distribution=browser_use \
+  --run_name=browser_run
+```
+
+See `.env.example` for all available configuration options including debug settings.
+
+### Skills Tools
+
+Skills are on-demand knowledge documents the agent can load when needed. They follow a **progressive disclosure** pattern to minimize token usage:
+
+```
+skills/
+├── mlops/                    # Category folder
+│   ├── axolotl/             # Skill folder
+│   │   ├── SKILL.md         # Main instructions (required)
+│   │   ├── references/      # Additional docs, API specs
+│   │   └── templates/       # Output formats, configs
+│   └── vllm/
+│       └── SKILL.md
+```
+
+**Available Skills Tools:**
+
+| Tool | Description |
+|------|-------------|
+| `skills_categories` | List available skill categories (~50 tokens) |
+| `skills_list` | List skills with name + description (~3k tokens for 40 skills) |
+| `skill_view` | Load full skill content, tags, and linked files |
+
+**Example Usage:**
+```bash
+# Use skills tools
+python run_agent.py \
+  --query "What skills do you have for fine-tuning? Show me the axolotl skill." \
+  --enabled_toolsets=skills
+```
+
+**Creating Skills:**
+
+Skills use YAML frontmatter for metadata:
+```yaml
+---
+name: my-skill
+description: Brief description shown in skills_list
+tags: [tag1, tag2]
+related_skills: [other-skill]
+version: 1.0.0
+---
+# Skill Content
+
+Instructions, examples, and guidelines here...
+```
+
+Skills can include:
+- `references/` - Additional documentation, API specs, examples
+- `templates/` - Output formats, config files, boilerplate code
+- `scripts/` - Executable helpers (Python, shell scripts)
+
+## Session Logging
+
+Every conversation is automatically logged to `logs/` for debugging and inspection:
+
+```
+logs/
+├── session_20260201_143052_a1b2c3.json
+├── session_20260201_150217_d4e5f6.json
+└── ...
+```
+
+**Log Format:**
+```json
+{
+  "session_id": "20260201_143052_a1b2c3",
+  "model": "anthropic/claude-sonnet-4",
+  "session_start": "2026-02-01T14:30:52.123456",
+  "last_updated": "2026-02-01T14:35:12.789012",
+  "message_count": 8,
+  "conversations": [
+    {"from": "system", "value": "..."},
+    {"from": "human", "value": "..."},
+    {"from": "gpt", "value": "..."},
+    {"from": "tool", "value": "..."}
+  ]
+}
+```
+
+- **Automatic**: Logs are created and updated automatically after each conversation turn
+- **Session ID in Banner**: The CLI displays the session ID in the welcome banner
+- **Trajectory Format**: Uses the same format as batch processing for consistency
+- **Git Ignored**: `logs/` is in `.gitignore` so logs aren't committed
+
+## Context Compression
+
+Long conversations can exceed the model's context limit. Hermes Agent automatically compresses context when approaching the limit:
+
+**How it works:**
+1. Tracks actual token usage from API responses (`usage.prompt_tokens`)
+2. When tokens reach 85% of model's context limit, triggers compression
+3. Protects first 3 turns (system prompt, initial request, first response)
+4. Protects last 4 turns (recent context is most relevant)
+5. Summarizes middle turns using a fast/cheap model (Gemini Flash)
+6. Inserts summary as a user message, conversation continues seamlessly
+
+**Configuration (`cli-config.yaml`):**
+```yaml
+compression:
+  enabled: true                    # Enable auto-compression (default)
+  threshold: 0.85                  # Compress at 85% of context limit
+  summary_model: "google/gemini-2.0-flash-001"
+```
+
+**Or via environment variables:**
+```bash
+CONTEXT_COMPRESSION_ENABLED=true
+CONTEXT_COMPRESSION_THRESHOLD=0.85
+CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001
+```
+
+**When compression triggers, you'll see:**
+```
+📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
+   📊 Model context limit: 200,000 tokens (85% = 170,000)
+   🗜️  Summarizing turns 4-15 (12 turns)
+   ✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
+```
+
+## Scheduled Tasks (Cron Jobs)
+
+Hermes Agent can schedule automated tasks to run in the future - either one-time reminders or recurring jobs.
+
+### CLI Commands
+
+```bash
+# List scheduled jobs
+/cron
+
+# Add a one-shot reminder (runs once in 30 minutes)
+/cron add 30m Remind me to check the build status
+
+# Add a recurring job (every 2 hours)
+/cron add "every 2h" Check server status at 192.168.1.100 and report any issues
+
+# Add a cron expression (daily at 9am)
+/cron add "0 9 * * *" Generate a morning briefing summarizing GitHub notifications
+
+# Remove a job
+/cron remove abc123def456
+```
+
+### Agent Self-Scheduling
+
+The agent can also schedule its own follow-up tasks using tools:
+
+```python
+# Available when using hermes-cli toolset (default for CLI)
+schedule_cronjob(prompt="...", schedule="30m", repeat=1)  # One-shot
+schedule_cronjob(prompt="...", schedule="every 2h")       # Recurring
+list_cronjobs()                                            # View all jobs
+remove_cronjob(job_id="...")                              # Cancel a job
+```
+
+**⚠️ Important:** Cronjobs run in **isolated sessions with NO prior context**. The prompt must be completely self-contained with all necessary information (file paths, URLs, server addresses, etc.). The future agent will not remember anything from the current conversation.
+
+### Schedule Formats
+
+| Format | Example | Description |
+|--------|---------|-------------|
+| Duration | `30m`, `2h`, `1d` | One-shot delay from now |
+| Interval | `every 30m`, `every 2h` | Recurring at fixed intervals |
+| Cron | `0 9 * * *` | Cron expression (requires `croniter`) |
+| Timestamp | `2026-02-03T14:00` | One-shot at specific time |
+
+### Repeat Options
+
+| repeat | Behavior |
+|--------|----------|
+| (omitted) | One-shot schedules run once; intervals/cron run forever |
+| `1` | Run once then auto-delete |
+| `N` | Run N times then auto-delete |
+
+### Running the Cron Daemon
+
+Jobs are stored in `~/.hermes/cron/jobs.json` and executed by a scheduler:
+
+```bash
+# Option 1: Built-in daemon (checks every 60 seconds)
+python cli.py --cron-daemon
+
+# Option 2: System cron integration (run once per minute)
+# Add to crontab: crontab -e
+*/1 * * * * cd ~/hermes-agent && python cli.py --cron-tick-once >> ~/.hermes/cron/cron.log 2>&1
+```
+
+### Job Output
+
+Job outputs are saved to `~/.hermes/cron/output/{job_id}/{timestamp}.md` for review.
+
+## Messaging Gateway (Telegram, Discord, WhatsApp)
+
+Connect Hermes Agent to messaging platforms so you can chat from anywhere.
+
+### Quick Start
+
+```bash
+# 1. Add your bot token to .env
+echo 'TELEGRAM_BOT_TOKEN="your_token"' >> .env
+
+# 2. Test the gateway (foreground)
+./scripts/hermes-gateway run
+
+# 3. Install as a background service
+./scripts/hermes-gateway install
+
+# 4. Manage the service
+./scripts/hermes-gateway start   # Start
+./scripts/hermes-gateway stop    # Stop
+./scripts/hermes-gateway status  # Check status
+```
+
+### Supported Platforms
+
+| Platform | Setup | Toolset |
+|----------|-------|---------|
+| Telegram | Bot via @BotFather | `hermes-telegram` |
+| Discord | Bot via Developer Portal | `hermes-discord` |
+| WhatsApp | Node.js bridge | `hermes-whatsapp` |
+
+### Session Management
+
+- Sessions persist across messages (agent remembers context)
+- Reset policies: daily (4am), idle (2 hours), or both
+- Manual reset: send `/new` or `/reset`
+
+### Cron Job Delivery
+
+Schedule tasks that deliver to specific platforms:
+
+```python
+schedule_cronjob(
+    prompt="Check server status...",
+    schedule="every 1h",
+    deliver="telegram"  # or "origin", "discord", etc.
+)
+```
+
+### CLI Commands
+
+| Command | Description |
+|---------|-------------|
+| `/platforms` | Show gateway configuration status |
+| `--gateway` | Start the gateway (CLI flag) |
+
+See [docs/messaging.md](docs/messaging.md) for full setup instructions.
+
+## Interactive CLI
+
+The CLI provides a rich interactive experience for working with the agent.
+
+### Running the CLI
+
+```bash
+# Basic usage
+./hermes
+
+# With specific model
+./hermes --model "anthropic/claude-sonnet-4"
+
+# With specific toolsets
+./hermes --toolsets "web,terminal,skills"
+```
+
+### CLI Commands
+
+| Command | Description |
+|---------|-------------|
+| `/help` | Show available commands |
+| `/tools` | List available tools by toolset |
+| `/toolsets` | List available toolsets |
+| `/model [name]` | Show or change the current model |
+| `/prompt [text]` | View/set custom system prompt |
+| `/personality [name]` | Set a predefined personality |
+| `/clear` | Clear screen and reset conversation |
+| `/reset` | Reset conversation only |
+| `/history` | Show conversation history |
+| `/save` | Save current conversation to file |
+| `/config` | Show current configuration |
+| `/cron` | Manage scheduled tasks (list, add, remove) |
+| `/platforms` | Show gateway/messaging platform status |
+| `/quit` | Exit the CLI |
+
+### Configuration
+
+Copy `cli-config.yaml.example` to `cli-config.yaml` and customize:
+
+```yaml
+# Model settings
+model:
+  default: "anthropic/claude-sonnet-4"
+
+# Terminal backend (local, docker, singularity, modal, or ssh)
+terminal:
+  env_type: "local"
+  cwd: "."  # Use current directory
+
+# Or use SSH for remote execution (keeps agent code isolated)
+# terminal:
+#   env_type: "ssh"
+#   ssh_host: "my-server.example.com"
+#   ssh_user: "myuser"
+#   ssh_key: "~/.ssh/id_rsa"
+#   cwd: "/home/myuser/project"
+
+# Enable specific toolsets
+toolsets:
+  - all  # or: web, terminal, browser, vision, etc.
+
+# Custom personalities (use with /personality command)
+agent:
+  personalities:
+    helpful: "You are a helpful assistant."
+    kawaii: "You are a kawaii assistant! Use cute expressions..."
+```
+
+### Personalities
+
+Built-in personalities available via `/personality`:
+- `helpful`, `concise`, `technical`, `creative`, `teacher`
+- `kawaii`, `catgirl`, `pirate`, `shakespeare`, `surfer`
+- `noir`, `uwu`, `philosopher`, `hype`
+
+## Toolsets System
+
+The agent uses a toolsets system for organizing and managing tools. All tools must be part of a toolset to be accessible - individual tool selection is not supported. This ensures consistent and logical grouping of capabilities.
+
+### Key Concepts
+
+- **Toolsets**: Logical groups of tools for specific use cases (e.g., "research", "development", "debugging")
+- **Composition**: Toolsets can include other toolsets for powerful combinations
+- **Custom Toolsets**: Create your own toolsets at runtime or by editing `toolsets.py`
+- **Toolset-Only Access**: Tools are only accessible through toolsets, not individually
+
+### Available Toolsets
+
+See `toolsets.py` for the complete list of predefined toolsets including:
+- Basic toolsets (web, terminal, vision, creative, reasoning)
+- Composite toolsets (research, development, analysis, etc.)
+- Scenario-specific toolsets (debugging, documentation, API testing, etc.)
+- Special toolsets (safe mode without terminal, minimal, offline)
+
+### Using Toolsets
+
+```bash
+# Use a predefined toolset
+python run_agent.py --enabled_toolsets=research --query "Find latest AI papers"
+
+# Combine multiple toolsets
+python run_agent.py --enabled_toolsets=web,vision --query "Analyze this website"
+
+# Enable all toolsets explicitly (same as omitting the flag)
+python run_agent.py --enabled_toolsets=all --query "Do web research and run commands if helpful"
+
+# Safe mode (no terminal access)
+python run_agent.py --enabled_toolsets=safe --query "Help without running commands"
+
+# List all available toolsets and tools
+python run_agent.py --list_tools
+```
+
+See `toolsets.py` for the complete list of available toolsets and how to create custom ones.
+
+## Basic Usage
+
+### Default (all tools enabled)
+```bash
+# Uses OpenRouter by default - just set OPENROUTER_API_KEY in .env
+python run_agent.py \
+  --query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
+  --max_turns 20 \
+  --model anthropic/claude-sonnet-4-20250514
+```
+
+### With specific toolset
+```bash
+python run_agent.py \
+  --query "Debug this Python error" \
+  --enabled_toolsets=debugging \
+  --model anthropic/claude-sonnet-4-20250514
+```
+
+### Python API
+```python
+from run_agent import AIAgent
+
+# Uses OpenRouter by default (reads OPENROUTER_API_KEY from .env)
+agent = AIAgent(
+    model="anthropic/claude-sonnet-4-20250514",
+    enabled_toolsets=["research"]
+)
+response = agent.chat("Find information about quantum computing")
+
+# Create custom toolset at runtime
+from toolsets import create_custom_toolset
+
+create_custom_toolset(
+    name="my_tools",
+    description="My custom toolkit",
+    tools=["web_search"],
+    includes=["terminal", "vision"]
+)
+
+agent = AIAgent(enabled_toolsets=["my_tools"])
+```
+
+## Batch Processing
+
+Process multiple prompts from a dataset in parallel with automatic checkpointing and statistics tracking:
+
+```bash
+# Basic batch processing
+python batch_runner.py \
+  --dataset_file=prompts.jsonl \
+  --batch_size=20 \
+  --run_name=my_run
+
+# With specific distribution
+python batch_runner.py \
+  --dataset_file=prompts.jsonl \
+  --batch_size=20 \
+  --run_name=image_run \
+  --distribution=image_gen \
+  --num_workers=4
+```
+
+**Key Features:**
+- Parallel processing with configurable workers
+- Toolset distributions for varied data generation
+- Automatic checkpointing and resume capability
+- Combined output in `data/<run_name>/trajectories.jsonl`
+- Tool usage statistics and success rates
+
+Use `--list_distributions` to see available toolset distributions for varied data generation.
+
+### Trajectory Compression
+
+Post-process trajectories to fit within token budgets for training:
+
+```bash
+# Compress a directory of JSONL files
+python trajectory_compressor.py --input=data/my_run
+
+# Compress a single JSONL file
+python trajectory_compressor.py --input=data/trajectories.jsonl
+
+# Compress a 15% sample (useful for creating smaller training sets)
+python trajectory_compressor.py --input=data/trajectories.jsonl --sample_percent=15
+
+# Custom output and token target
+python trajectory_compressor.py \
+  --input=data/trajectories.jsonl \
+  --output=data/compressed.jsonl \
+  --target_max_tokens=16000
+```
+
+**Features:**
+- Protects first turns (system, human, first GPT response, first tool call)
+- Protects last N turns (configurable)
+- Summarizes middle turns using LLM to fit target token budget
+- Supports both directory and single file input
+- Optional random sampling with `--sample_percent`
+- Configurable via `configs/trajectory_compression.yaml`
+
+### Ephemeral System Prompts
+
+The ephemeral system prompt feature allows you to guide the model's behavior during batch processing **without** saving that prompt to the training dataset trajectories. This is useful for:
+
+- Guiding model behavior during data collection
+- Adding task-specific instructions 
+- Keeping saved trajectories clean and focused on tool-calling format
+
+**Example:**
+```bash
+python batch_runner.py \
+  --dataset_file=prompts.jsonl \
+  --batch_size=10 \
+  --run_name=my_run \
+  --ephemeral_system_prompt="You are a helpful assistant focused on image generation."
+```
+
+The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
+
+The ephemeral prompt influences model behavior during execution, but **only the standard tool-calling system prompt** is saved in trajectory files.
+
+## Command Line Arguments
+
+**Single Agent (`run_agent.py`):**
+- `--query`: The question or task for the agent
+- `--model`: Model to use (default: claude-opus-4-20250514)
+- `--api_key`: API key for authentication
+- `--base_url`: API endpoint URL
+- `--max_turns`: Maximum number of tool-calling iterations
+- `--enabled_toolsets`: Comma-separated list of toolsets to enable. Use `all` (or `*`) to enable everything. If omitted, all toolsets are enabled by default.
+- `--disabled_toolsets`: Comma-separated list of toolsets to disable
+- `--list_tools`: List all available toolsets and tools
+- `--save_trajectories`: Save conversation trajectories to JSONL files
+
+**Batch Processing (`batch_runner.py`):**
+- `--dataset_file`: Path to JSONL file with prompts
+- `--batch_size`: Number of prompts per batch
+- `--run_name`: Name for this run (for output/checkpointing)
+- `--distribution`: Toolset distribution to use (default: "default")
+- `--num_workers`: Number of parallel workers (default: 4)
+- `--resume`: Resume from checkpoint if interrupted
+- `--ephemeral_system_prompt`: System prompt used during execution but NOT saved to trajectories
+- `--list_distributions`: List available toolset distributions
+
+## Environment Variables
+
+All environment variables can be configured in the `.env` file (copy from `.env.example`).
+
+**LLM Provider (OpenRouter):**
+- `OPENROUTER_API_KEY`: Primary LLM access via OpenRouter (supports Claude, GPT-4, Gemini, etc.)
+- `LLM_MODEL`: Default model (e.g., `anthropic/claude-sonnet-4`, `openai/gpt-4o`)
+
+**Tool API Keys:**
+- `FIRECRAWL_API_KEY`: Web tools (search, extract, crawl)
+- `NOUS_API_KEY`: Vision and reasoning tools
+- `FAL_KEY`: Image generation tools
+
+**Terminal Tool Configuration (mini-swe-agent backend):**
+- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, `modal`, or `ssh` (default: `local`)
+- `TERMINAL_DOCKER_IMAGE`: Docker image for docker backend (default: `python:3.11-slim`)
+- `TERMINAL_SINGULARITY_IMAGE`: Singularity/Apptainer image (can be `docker://...` URL or local `.sif` path)
+- `TERMINAL_TIMEOUT`: Command timeout in seconds (default: `60`)
+- `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
+- `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)
+- `TERMINAL_SCRATCH_DIR`: Custom scratch directory for sandbox storage (optional, auto-detects `/scratch`)
+- `SUDO_PASSWORD`: Enable sudo commands by piping password via `sudo -S` (works with all backends)
+  - If unset in CLI mode, you'll be prompted interactively when sudo is needed (45s timeout)
+
+**SSH Backend Configuration (for remote execution):**
+- `TERMINAL_SSH_HOST`: Remote server hostname or IP
+- `TERMINAL_SSH_USER`: SSH username
+- `TERMINAL_SSH_PORT`: SSH port (default: `22`)
+- `TERMINAL_SSH_KEY`: Path to SSH private key (optional, uses ssh-agent if not set)
+
+**Context Compression (auto-shrinks long conversations):**
+- `CONTEXT_COMPRESSION_ENABLED`: Enable auto-compression (default: `true`)
+- `CONTEXT_COMPRESSION_THRESHOLD`: Compress at this % of context limit (default: `0.85`)
+- `CONTEXT_COMPRESSION_MODEL`: Model for generating summaries (default: `google/gemini-2.0-flash-001`)
+
+**Browser Tool Configuration (agent-browser + Browserbase):**
+- `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
+- `BROWSERBASE_PROJECT_ID`: Browserbase project ID
+- `BROWSER_SESSION_TIMEOUT`: Session timeout in seconds (default: `300`)
+
+**Legacy Hecate Terminal Backend (optional):**
+- `MORPH_API_KEY`: For Hecate/MorphCloud terminal backend
+- `HECATE_VM_LIFETIME_SECONDS`: VM lifetime (default: 300)
+- `HECATE_DEFAULT_SNAPSHOT_ID`: Default snapshot (default: snapshot_p5294qxt)
+
+**Debug Options:**
+- `WEB_TOOLS_DEBUG`, `VISION_TOOLS_DEBUG`, `MOA_TOOLS_DEBUG`, `IMAGE_TOOLS_DEBUG`: Enable debug logging
+
+## Key Files
+
+| File | Purpose |
+|------|---------|
+| `hermes` | CLI launcher script (run with `./hermes`) |
+| `cli.py` | Interactive CLI implementation |
+| `cli-config.yaml` | CLI configuration (copy from `.example`) |
+| `run_agent.py` | Main agent runner - single query execution |
+| `batch_runner.py` | Parallel batch processing with checkpointing |
+| `model_tools.py` | Core tool definitions and handlers |
+| `toolsets.py` | Toolset definitions and composition |
+| `toolset_distributions.py` | Probability distributions for data generation |
+| `trajectory_compressor.py` | Post-process trajectories for training |
+| `tools/` | Individual tool implementations |
+| `tools/skills_tool.py` | Skills system with progressive disclosure |
+| `skills/` | On-demand knowledge documents |
+| `docs/` | Documentation |
+| `configs/` | Example batch run scripts |
diff --git a/hermes_agent.egg-info/SOURCES.txt b/hermes_agent.egg-info/SOURCES.txt
new file mode 100644
index 0000000000..d94b105e0a
--- /dev/null
+++ b/hermes_agent.egg-info/SOURCES.txt
@@ -0,0 +1,47 @@
+README.md
+batch_runner.py
+cli.py
+model_tools.py
+pyproject.toml
+run_agent.py
+toolset_distributions.py
+toolsets.py
+trajectory_compressor.py
+cron/__init__.py
+cron/jobs.py
+cron/scheduler.py
+gateway/__init__.py
+gateway/config.py
+gateway/delivery.py
+gateway/run.py
+gateway/session.py
+hermes_agent.egg-info/PKG-INFO
+hermes_agent.egg-info/SOURCES.txt
+hermes_agent.egg-info/dependency_links.txt
+hermes_agent.egg-info/entry_points.txt
+hermes_agent.egg-info/requires.txt
+hermes_agent.egg-info/top_level.txt
+hermes_cli/__init__.py
+hermes_cli/cron.py
+hermes_cli/doctor.py
+hermes_cli/gateway.py
+hermes_cli/main.py
+hermes_cli/setup.py
+hermes_cli/status.py
+tests/test_batch_runner.py
+tests/test_checkpoint_resumption.py
+tests/test_modal_terminal.py
+tests/test_nous_api_limits.py
+tests/test_nous_api_pattern.py
+tests/test_temperature_fix.py
+tests/test_web_tools.py
+tools/__init__.py
+tools/browser_tool.py
+tools/cronjob_tools.py
+tools/image_generation_tool.py
+tools/mixture_of_agents_tool.py
+tools/skills_tool.py
+tools/terminal_hecate.py
+tools/terminal_tool.py
+tools/vision_tools.py
+tools/web_tools.py
\ No newline at end of file
diff --git a/hermes_agent.egg-info/dependency_links.txt b/hermes_agent.egg-info/dependency_links.txt
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/hermes_agent.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/hermes_agent.egg-info/entry_points.txt b/hermes_agent.egg-info/entry_points.txt
new file mode 100644
index 0000000000..504de51c24
--- /dev/null
+++ b/hermes_agent.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+hermes = hermes_cli.main:main
+hermes-agent = run_agent:main
diff --git a/hermes_agent.egg-info/requires.txt b/hermes_agent.egg-info/requires.txt
new file mode 100644
index 0000000000..91036b7cda
--- /dev/null
+++ b/hermes_agent.egg-info/requires.txt
@@ -0,0 +1,35 @@
+openai
+python-dotenv
+fire
+httpx
+rich
+tenacity
+pyyaml
+requests
+jinja2
+pydantic>=2.0
+firecrawl-py
+fal-client
+litellm>=1.75.5
+typer
+platformdirs
+
+[all]
+croniter
+python-telegram-bot>=20.0
+discord.py>=2.0
+
+[cron]
+croniter
+
+[dev]
+pytest
+pytest-asyncio
+
+[messaging]
+python-telegram-bot>=20.0
+discord.py>=2.0
+
+[modal]
+modal
+boto3
diff --git a/hermes_agent.egg-info/top_level.txt b/hermes_agent.egg-info/top_level.txt
new file mode 100644
index 0000000000..a804090242
--- /dev/null
+++ b/hermes_agent.egg-info/top_level.txt
@@ -0,0 +1,11 @@
+batch_runner
+cli
+cron
+gateway
+hermes_cli
+model_tools
+run_agent
+tools
+toolset_distributions
+toolsets
+trajectory_compressor
diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py
new file mode 100644
index 0000000000..d8c95978c4
--- /dev/null
+++ b/hermes_cli/__init__.py
@@ -0,0 +1,14 @@
+"""
+Hermes CLI - Unified command-line interface for Hermes Agent.
+
+Provides subcommands for:
+- hermes chat          - Interactive chat (same as ./hermes)
+- hermes gateway       - Run gateway in foreground
+- hermes gateway start - Start gateway service
+- hermes gateway stop  - Stop gateway service  
+- hermes setup         - Interactive setup wizard
+- hermes status        - Show status of all components
+- hermes cron          - Manage cron jobs
+"""
+
+__version__ = "0.1.0"
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
new file mode 100644
index 0000000000..210473dbba
--- /dev/null
+++ b/hermes_cli/config.py
@@ -0,0 +1,392 @@
+"""
+Configuration management for Hermes Agent.
+
+Config files are stored in ~/.hermes/ for easy access:
+- ~/.hermes/config.yaml  - All settings (model, toolsets, terminal, etc.)
+- ~/.hermes/.env         - API keys and secrets
+
+This module provides:
+- hermes config          - Show current configuration
+- hermes config edit     - Open config in editor
+- hermes config set      - Set a specific value
+- hermes config wizard   - Re-run setup wizard
+"""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+import yaml
+
+# ANSI colors
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+
+def color(text: str, *codes) -> str:
+    if not sys.stdout.isatty():
+        return text
+    return "".join(codes) + text + Colors.RESET
+
+
+# =============================================================================
+# Config paths
+# =============================================================================
+
+def get_hermes_home() -> Path:
+    """Get the Hermes home directory (~/.hermes)."""
+    return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+
+def get_config_path() -> Path:
+    """Get the main config file path."""
+    return get_hermes_home() / "config.yaml"
+
+def get_env_path() -> Path:
+    """Get the .env file path (for API keys)."""
+    return get_hermes_home() / ".env"
+
+def get_project_root() -> Path:
+    """Get the project installation directory."""
+    return Path(__file__).parent.parent.resolve()
+
+def ensure_hermes_home():
+    """Ensure ~/.hermes directory structure exists."""
+    home = get_hermes_home()
+    (home / "cron").mkdir(parents=True, exist_ok=True)
+    (home / "sessions").mkdir(parents=True, exist_ok=True)
+    (home / "logs").mkdir(parents=True, exist_ok=True)
+
+
+# =============================================================================
+# Config loading/saving
+# =============================================================================
+
+DEFAULT_CONFIG = {
+    "model": "anthropic/claude-sonnet-4",
+    "toolsets": ["hermes-cli"],
+    "max_turns": 100,
+    
+    "terminal": {
+        "backend": "local",
+        "cwd": ".",  # Use current directory
+        "timeout": 180,
+        "docker_image": "python:3.11-slim",
+    },
+    
+    "browser": {
+        "inactivity_timeout": 120,
+    },
+    
+    "compression": {
+        "enabled": True,
+        "threshold": 0.85,
+        "summary_model": "google/gemini-2.0-flash-001",
+    },
+    
+    "display": {
+        "compact": False,
+        "personality": "kawaii",
+    },
+}
+
+
+def load_config() -> Dict[str, Any]:
+    """Load configuration from ~/.hermes/config.yaml."""
+    config_path = get_config_path()
+    
+    config = DEFAULT_CONFIG.copy()
+    
+    if config_path.exists():
+        try:
+            with open(config_path) as f:
+                user_config = yaml.safe_load(f) or {}
+            
+            # Deep merge
+            for key, value in user_config.items():
+                if isinstance(value, dict) and key in config and isinstance(config[key], dict):
+                    config[key].update(value)
+                else:
+                    config[key] = value
+        except Exception as e:
+            print(f"Warning: Failed to load config: {e}")
+    
+    return config
+
+
+def save_config(config: Dict[str, Any]):
+    """Save configuration to ~/.hermes/config.yaml."""
+    ensure_hermes_home()
+    config_path = get_config_path()
+    
+    with open(config_path, 'w') as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+
+
+def load_env() -> Dict[str, str]:
+    """Load environment variables from ~/.hermes/.env."""
+    env_path = get_env_path()
+    env_vars = {}
+    
+    if env_path.exists():
+        with open(env_path) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#') and '=' in line:
+                    key, _, value = line.partition('=')
+                    env_vars[key.strip()] = value.strip().strip('"\'')
+    
+    return env_vars
+
+
+def save_env_value(key: str, value: str):
+    """Save or update a value in ~/.hermes/.env."""
+    ensure_hermes_home()
+    env_path = get_env_path()
+    
+    # Load existing
+    lines = []
+    if env_path.exists():
+        with open(env_path) as f:
+            lines = f.readlines()
+    
+    # Find and update or append
+    found = False
+    for i, line in enumerate(lines):
+        if line.strip().startswith(f"{key}="):
+            lines[i] = f"{key}={value}\n"
+            found = True
+            break
+    
+    if not found:
+        lines.append(f"{key}={value}\n")
+    
+    with open(env_path, 'w') as f:
+        f.writelines(lines)
+
+
+def get_env_value(key: str) -> Optional[str]:
+    """Get a value from ~/.hermes/.env or environment."""
+    # Check environment first
+    if key in os.environ:
+        return os.environ[key]
+    
+    # Then check .env file
+    env_vars = load_env()
+    return env_vars.get(key)
+
+
+# =============================================================================
+# Config display
+# =============================================================================
+
+def redact_key(key: str) -> str:
+    """Redact an API key for display."""
+    if not key:
+        return color("(not set)", Colors.DIM)
+    if len(key) < 12:
+        return "***"
+    return key[:4] + "..." + key[-4:]
+
+
+def show_config():
+    """Display current configuration."""
+    config = load_config()
+    env_vars = load_env()
+    
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
+    print(color("│              🦋 Hermes Configuration                    │", Colors.CYAN))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
+    
+    # Paths
+    print()
+    print(color("◆ Paths", Colors.CYAN, Colors.BOLD))
+    print(f"  Config:       {get_config_path()}")
+    print(f"  Secrets:      {get_env_path()}")
+    print(f"  Install:      {get_project_root()}")
+    
+    # API Keys
+    print()
+    print(color("◆ API Keys", Colors.CYAN, Colors.BOLD))
+    
+    keys = [
+        ("OPENROUTER_API_KEY", "OpenRouter"),
+        ("ANTHROPIC_API_KEY", "Anthropic"),
+        ("OPENAI_API_KEY", "OpenAI"),
+        ("FIRECRAWL_API_KEY", "Firecrawl"),
+        ("BROWSERBASE_API_KEY", "Browserbase"),
+        ("FAL_KEY", "FAL"),
+    ]
+    
+    for env_key, name in keys:
+        value = get_env_value(env_key)
+        print(f"  {name:<14} {redact_key(value)}")
+    
+    # Model settings
+    print()
+    print(color("◆ Model", Colors.CYAN, Colors.BOLD))
+    print(f"  Model:        {config.get('model', 'not set')}")
+    print(f"  Max turns:    {config.get('max_turns', 100)}")
+    print(f"  Toolsets:     {', '.join(config.get('toolsets', ['all']))}")
+    
+    # Terminal
+    print()
+    print(color("◆ Terminal", Colors.CYAN, Colors.BOLD))
+    terminal = config.get('terminal', {})
+    print(f"  Backend:      {terminal.get('backend', 'local')}")
+    print(f"  Working dir:  {terminal.get('cwd', '.')}")
+    print(f"  Timeout:      {terminal.get('timeout', 60)}s")
+    
+    if terminal.get('backend') == 'docker':
+        print(f"  Docker image: {terminal.get('docker_image', 'python:3.11-slim')}")
+    elif terminal.get('backend') == 'ssh':
+        ssh_host = get_env_value('TERMINAL_SSH_HOST')
+        ssh_user = get_env_value('TERMINAL_SSH_USER')
+        print(f"  SSH host:     {ssh_host or '(not set)'}")
+        print(f"  SSH user:     {ssh_user or '(not set)'}")
+    
+    # Compression
+    print()
+    print(color("◆ Context Compression", Colors.CYAN, Colors.BOLD))
+    compression = config.get('compression', {})
+    enabled = compression.get('enabled', True)
+    print(f"  Enabled:      {'yes' if enabled else 'no'}")
+    if enabled:
+        print(f"  Threshold:    {compression.get('threshold', 0.85) * 100:.0f}%")
+        print(f"  Model:        {compression.get('summary_model', 'google/gemini-2.0-flash-001')}")
+    
+    # Messaging
+    print()
+    print(color("◆ Messaging Platforms", Colors.CYAN, Colors.BOLD))
+    
+    telegram_token = get_env_value('TELEGRAM_BOT_TOKEN')
+    discord_token = get_env_value('DISCORD_BOT_TOKEN')
+    
+    print(f"  Telegram:     {'configured' if telegram_token else color('not configured', Colors.DIM)}")
+    print(f"  Discord:      {'configured' if discord_token else color('not configured', Colors.DIM)}")
+    
+    print()
+    print(color("─" * 60, Colors.DIM))
+    print(color("  hermes config edit     # Edit config file", Colors.DIM))
+    print(color("  hermes config set KEY VALUE", Colors.DIM))
+    print(color("  hermes setup           # Run setup wizard", Colors.DIM))
+    print()
+
+
+def edit_config():
+    """Open config file in user's editor."""
+    config_path = get_config_path()
+    
+    # Ensure config exists
+    if not config_path.exists():
+        save_config(DEFAULT_CONFIG)
+        print(f"Created {config_path}")
+    
+    # Find editor
+    editor = os.getenv('EDITOR') or os.getenv('VISUAL')
+    
+    if not editor:
+        # Try common editors
+        for cmd in ['nano', 'vim', 'vi', 'code', 'notepad']:
+            import shutil
+            if shutil.which(cmd):
+                editor = cmd
+                break
+    
+    if not editor:
+        print(f"No editor found. Config file is at:")
+        print(f"  {config_path}")
+        return
+    
+    print(f"Opening {config_path} in {editor}...")
+    subprocess.run([editor, str(config_path)])
+
+
+def set_config_value(key: str, value: str):
+    """Set a configuration value."""
+    # Check if it's an API key (goes to .env)
+    api_keys = [
+        'OPENROUTER_API_KEY', 'ANTHROPIC_API_KEY', 'OPENAI_API_KEY',
+        'FIRECRAWL_API_KEY', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID',
+        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
+        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
+        'SUDO_PASSWORD'
+    ]
+    
+    if key.upper() in api_keys or key.upper().startswith('TERMINAL_SSH'):
+        save_env_value(key.upper(), value)
+        print(f"✓ Set {key} in {get_env_path()}")
+        return
+    
+    # Otherwise it goes to config.yaml
+    config = load_config()
+    
+    # Handle nested keys (e.g., "terminal.backend")
+    parts = key.split('.')
+    current = config
+    
+    for part in parts[:-1]:
+        if part not in current:
+            current[part] = {}
+        current = current[part]
+    
+    # Convert value to appropriate type
+    if value.lower() in ('true', 'yes', 'on'):
+        value = True
+    elif value.lower() in ('false', 'no', 'off'):
+        value = False
+    elif value.isdigit():
+        value = int(value)
+    elif value.replace('.', '', 1).isdigit():
+        value = float(value)
+    
+    current[parts[-1]] = value
+    save_config(config)
+    print(f"✓ Set {key} = {value} in {get_config_path()}")
+
+
+# =============================================================================
+# Command handler
+# =============================================================================
+
+def config_command(args):
+    """Handle config subcommands."""
+    subcmd = getattr(args, 'config_command', None)
+    
+    if subcmd is None or subcmd == "show":
+        show_config()
+    
+    elif subcmd == "edit":
+        edit_config()
+    
+    elif subcmd == "set":
+        key = getattr(args, 'key', None)
+        value = getattr(args, 'value', None)
+        if not key or not value:
+            print("Usage: hermes config set KEY VALUE")
+            print()
+            print("Examples:")
+            print("  hermes config set model anthropic/claude-sonnet-4")
+            print("  hermes config set terminal.backend docker")
+            print("  hermes config set OPENROUTER_API_KEY sk-or-...")
+            sys.exit(1)
+        set_config_value(key, value)
+    
+    elif subcmd == "path":
+        print(get_config_path())
+    
+    elif subcmd == "env-path":
+        print(get_env_path())
+    
+    else:
+        print(f"Unknown config command: {subcmd}")
+        sys.exit(1)
diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py
new file mode 100644
index 0000000000..dc43ab636f
--- /dev/null
+++ b/hermes_cli/cron.py
@@ -0,0 +1,131 @@
+"""
+Cron subcommand for hermes CLI.
+
+Handles: hermes cron [list|daemon|tick]
+"""
+
+import json
+import sys
+import time
+from pathlib import Path
+from datetime import datetime
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+sys.path.insert(0, str(PROJECT_ROOT))
+
+# ANSI colors
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    CYAN = "\033[36m"
+
+def color(text: str, *codes) -> str:
+    if not sys.stdout.isatty():
+        return text
+    return "".join(codes) + text + Colors.RESET
+
+
+def cron_list(show_all: bool = False):
+    """List all scheduled jobs."""
+    from cron.jobs import list_jobs
+    
+    jobs = list_jobs(include_disabled=show_all)
+    
+    if not jobs:
+        print(color("No scheduled jobs.", Colors.DIM))
+        print(color("Create one with: hermes cron add <schedule> <prompt>", Colors.DIM))
+        return
+    
+    print()
+    print(color("┌─────────────────────────────────────────────────────────────────────────┐", Colors.CYAN))
+    print(color("│                         Scheduled Jobs                                  │", Colors.CYAN))
+    print(color("└─────────────────────────────────────────────────────────────────────────┘", Colors.CYAN))
+    print()
+    
+    for job in jobs:
+        job_id = job.get("id", "?")[:8]
+        name = job.get("name", "(unnamed)")
+        schedule = job.get("schedule_display", job.get("schedule", {}).get("value", "?"))
+        enabled = job.get("enabled", True)
+        next_run = job.get("next_run_at", "?")
+        
+        # Repeat info
+        repeat_info = job.get("repeat", {})
+        repeat_times = repeat_info.get("times")
+        repeat_completed = repeat_info.get("completed", 0)
+        
+        if repeat_times:
+            repeat_str = f"{repeat_completed}/{repeat_times}"
+        else:
+            repeat_str = "∞"
+        
+        # Delivery targets
+        deliver = job.get("deliver", ["local"])
+        if isinstance(deliver, str):
+            deliver = [deliver]
+        deliver_str = ", ".join(deliver)
+        
+        # Status indicator
+        if not enabled:
+            status = color("[disabled]", Colors.RED)
+        else:
+            status = color("[active]", Colors.GREEN)
+        
+        print(f"  {color(job_id, Colors.YELLOW)} {status}")
+        print(f"    Name:      {name}")
+        print(f"    Schedule:  {schedule}")
+        print(f"    Repeat:    {repeat_str}")
+        print(f"    Next run:  {next_run}")
+        print(f"    Deliver:   {deliver_str}")
+        print()
+
+
+def cron_daemon(interval: int = 60):
+    """Run the cron daemon."""
+    from cron.scheduler import start_daemon
+    
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
+    print(color("│              🦋 Hermes Cron Daemon                      │", Colors.CYAN))
+    print(color("├─────────────────────────────────────────────────────────┤", Colors.CYAN))
+    print(color("│  Press Ctrl+C to stop                                   │", Colors.CYAN))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
+    print()
+    
+    try:
+        start_daemon(interval=interval)
+    except KeyboardInterrupt:
+        print()
+        print(color("Cron daemon stopped.", Colors.YELLOW))
+
+
+def cron_tick():
+    """Run due jobs once (for system cron integration)."""
+    from cron.scheduler import tick
+    
+    print(f"[{datetime.now().isoformat()}] Running cron tick...")
+    tick()
+
+
+def cron_command(args):
+    """Handle cron subcommands."""
+    subcmd = getattr(args, 'cron_command', None)
+    
+    if subcmd is None or subcmd == "list":
+        show_all = getattr(args, 'all', False)
+        cron_list(show_all)
+    
+    elif subcmd == "daemon":
+        interval = getattr(args, 'interval', 60)
+        cron_daemon(interval)
+    
+    elif subcmd == "tick":
+        cron_tick()
+    
+    else:
+        print(f"Unknown cron command: {subcmd}")
+        print("Usage: hermes cron [list|daemon|tick]")
+        sys.exit(1)
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
new file mode 100644
index 0000000000..2b69317bb4
--- /dev/null
+++ b/hermes_cli/doctor.py
@@ -0,0 +1,278 @@
+"""
+Doctor command for hermes CLI.
+
+Diagnoses issues with Hermes Agent setup.
+"""
+
+import os
+import sys
+import subprocess
+import shutil
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+
+# ANSI colors
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    CYAN = "\033[36m"
+
+def color(text: str, *codes) -> str:
+    if not sys.stdout.isatty():
+        return text
+    return "".join(codes) + text + Colors.RESET
+
+def check_ok(text: str, detail: str = ""):
+    print(f"  {color('✓', Colors.GREEN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))
+
+def check_warn(text: str, detail: str = ""):
+    print(f"  {color('⚠', Colors.YELLOW)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))
+
+def check_fail(text: str, detail: str = ""):
+    print(f"  {color('✗', Colors.RED)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))
+
+def check_info(text: str):
+    print(f"    {color('→', Colors.CYAN)} {text}")
+
+
+def run_doctor(args):
+    """Run diagnostic checks."""
+    should_fix = getattr(args, 'fix', False)
+    
+    issues = []
+    
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
+    print(color("│                 🩺 Hermes Doctor                        │", Colors.CYAN))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
+    
+    # =========================================================================
+    # Check: Python version
+    # =========================================================================
+    print()
+    print(color("◆ Python Environment", Colors.CYAN, Colors.BOLD))
+    
+    py_version = sys.version_info
+    if py_version >= (3, 10):
+        check_ok(f"Python {py_version.major}.{py_version.minor}.{py_version.micro}")
+    elif py_version >= (3, 8):
+        check_warn(f"Python {py_version.major}.{py_version.minor}.{py_version.micro}", "(3.10+ recommended)")
+    else:
+        check_fail(f"Python {py_version.major}.{py_version.minor}.{py_version.micro}", "(3.10+ required)")
+        issues.append("Upgrade Python to 3.10+")
+    
+    # Check if in virtual environment
+    in_venv = sys.prefix != sys.base_prefix
+    if in_venv:
+        check_ok("Virtual environment active")
+    else:
+        check_warn("Not in virtual environment", "(recommended)")
+    
+    # =========================================================================
+    # Check: Required packages
+    # =========================================================================
+    print()
+    print(color("◆ Required Packages", Colors.CYAN, Colors.BOLD))
+    
+    required_packages = [
+        ("openai", "OpenAI SDK"),
+        ("rich", "Rich (terminal UI)"),
+        ("dotenv", "python-dotenv"),
+        ("yaml", "PyYAML"),
+        ("httpx", "HTTPX"),
+    ]
+    
+    optional_packages = [
+        ("croniter", "Croniter (cron expressions)"),
+        ("browserbase", "Browserbase SDK"),
+        ("telegram", "python-telegram-bot"),
+        ("discord", "discord.py"),
+    ]
+    
+    for module, name in required_packages:
+        try:
+            __import__(module)
+            check_ok(name)
+        except ImportError:
+            check_fail(name, "(missing)")
+            issues.append(f"Install {name}: pip install {module}")
+    
+    for module, name in optional_packages:
+        try:
+            __import__(module)
+            check_ok(name, "(optional)")
+        except ImportError:
+            check_warn(name, "(optional, not installed)")
+    
+    # =========================================================================
+    # Check: Configuration files
+    # =========================================================================
+    print()
+    print(color("◆ Configuration Files", Colors.CYAN, Colors.BOLD))
+    
+    env_path = PROJECT_ROOT / '.env'
+    if env_path.exists():
+        check_ok(".env file exists")
+        
+        # Check for common issues
+        content = env_path.read_text()
+        if "OPENROUTER_API_KEY" in content or "ANTHROPIC_API_KEY" in content:
+            check_ok("API key configured")
+        else:
+            check_warn("No API key found in .env")
+            issues.append("Run 'hermes setup' to configure API keys")
+    else:
+        check_fail(".env file missing")
+        check_info("Run 'hermes setup' to create one")
+        issues.append("Run 'hermes setup' to create .env")
+    
+    config_path = PROJECT_ROOT / 'cli-config.yaml'
+    if config_path.exists():
+        check_ok("cli-config.yaml exists")
+    else:
+        check_warn("cli-config.yaml not found", "(using defaults)")
+    
+    # =========================================================================
+    # Check: Directory structure
+    # =========================================================================
+    print()
+    print(color("◆ Directory Structure", Colors.CYAN, Colors.BOLD))
+    
+    hermes_home = Path.home() / ".hermes"
+    if hermes_home.exists():
+        check_ok("~/.hermes directory exists")
+    else:
+        check_warn("~/.hermes not found", "(will be created on first use)")
+    
+    logs_dir = PROJECT_ROOT / "logs"
+    if logs_dir.exists():
+        check_ok("logs/ directory exists")
+    else:
+        check_warn("logs/ not found", "(will be created on first use)")
+    
+    # =========================================================================
+    # Check: External tools
+    # =========================================================================
+    print()
+    print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
+    
+    # Git
+    if shutil.which("git"):
+        check_ok("git")
+    else:
+        check_warn("git not found", "(optional)")
+    
+    # Docker (optional)
+    terminal_env = os.getenv("TERMINAL_ENV", "local")
+    if terminal_env == "docker":
+        if shutil.which("docker"):
+            # Check if docker daemon is running
+            result = subprocess.run(["docker", "info"], capture_output=True)
+            if result.returncode == 0:
+                check_ok("docker", "(daemon running)")
+            else:
+                check_fail("docker daemon not running")
+                issues.append("Start Docker daemon")
+        else:
+            check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
+            issues.append("Install Docker or change TERMINAL_ENV")
+    else:
+        if shutil.which("docker"):
+            check_ok("docker", "(optional)")
+        else:
+            check_warn("docker not found", "(optional)")
+    
+    # SSH (if using ssh backend)
+    if terminal_env == "ssh":
+        ssh_host = os.getenv("TERMINAL_SSH_HOST")
+        if ssh_host:
+            # Try to connect
+            result = subprocess.run(
+                ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode == 0:
+                check_ok(f"SSH connection to {ssh_host}")
+            else:
+                check_fail(f"SSH connection to {ssh_host}")
+                issues.append(f"Check SSH configuration for {ssh_host}")
+        else:
+            check_fail("TERMINAL_SSH_HOST not set", "(required for TERMINAL_ENV=ssh)")
+            issues.append("Set TERMINAL_SSH_HOST in .env")
+    
+    # =========================================================================
+    # Check: API connectivity
+    # =========================================================================
+    print()
+    print(color("◆ API Connectivity", Colors.CYAN, Colors.BOLD))
+    
+    openrouter_key = os.getenv("OPENROUTER_API_KEY")
+    if openrouter_key:
+        try:
+            import httpx
+            response = httpx.get(
+                "https://openrouter.ai/api/v1/models",
+                headers={"Authorization": f"Bearer {openrouter_key}"},
+                timeout=10
+            )
+            if response.status_code == 200:
+                check_ok("OpenRouter API")
+            elif response.status_code == 401:
+                check_fail("OpenRouter API", "(invalid API key)")
+                issues.append("Check OPENROUTER_API_KEY in .env")
+            else:
+                check_fail("OpenRouter API", f"(HTTP {response.status_code})")
+        except Exception as e:
+            check_fail("OpenRouter API", f"({e})")
+            issues.append("Check network connectivity")
+    else:
+        check_warn("OpenRouter API", "(not configured)")
+    
+    anthropic_key = os.getenv("ANTHROPIC_API_KEY")
+    if anthropic_key:
+        try:
+            import httpx
+            response = httpx.get(
+                "https://api.anthropic.com/v1/models",
+                headers={
+                    "x-api-key": anthropic_key,
+                    "anthropic-version": "2023-06-01"
+                },
+                timeout=10
+            )
+            if response.status_code == 200:
+                check_ok("Anthropic API")
+            elif response.status_code == 401:
+                check_fail("Anthropic API", "(invalid API key)")
+            else:
+                # Note: Anthropic may not have /models endpoint
+                check_warn("Anthropic API", "(couldn't verify)")
+        except Exception as e:
+            check_warn("Anthropic API", f"({e})")
+    
+    # =========================================================================
+    # Summary
+    # =========================================================================
+    print()
+    if issues:
+        print(color("─" * 60, Colors.YELLOW))
+        print(color(f"  Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+        print()
+        for i, issue in enumerate(issues, 1):
+            print(f"  {i}. {issue}")
+        print()
+        
+        if should_fix:
+            print(color("  Attempting auto-fix is not yet implemented.", Colors.DIM))
+            print(color("  Please resolve issues manually.", Colors.DIM))
+    else:
+        print(color("─" * 60, Colors.GREEN))
+        print(color("  All checks passed! 🎉", Colors.GREEN, Colors.BOLD))
+    
+    print()
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
new file mode 100644
index 0000000000..678a68927a
--- /dev/null
+++ b/hermes_cli/gateway.py
@@ -0,0 +1,371 @@
+"""
+Gateway subcommand for hermes CLI.
+
+Handles: hermes gateway [run|start|stop|restart|status|install|uninstall]
+"""
+
+import asyncio
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+
+
+def is_linux() -> bool:
+    return sys.platform.startswith('linux')
+
+def is_macos() -> bool:
+    return sys.platform == 'darwin'
+
+def is_windows() -> bool:
+    return sys.platform == 'win32'
+
+
+# =============================================================================
+# Service Configuration
+# =============================================================================
+
+SERVICE_NAME = "hermes-gateway"
+SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
+
+def get_systemd_unit_path() -> Path:
+    return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service"
+
+def get_launchd_plist_path() -> Path:
+    return Path.home() / "Library" / "LaunchAgents" / "ai.hermes.gateway.plist"
+
+def get_python_path() -> str:
+    venv_python = PROJECT_ROOT / "venv" / "bin" / "python"
+    if venv_python.exists():
+        return str(venv_python)
+    return sys.executable
+
+def get_hermes_cli_path() -> str:
+    """Get the path to the hermes CLI."""
+    # Check if installed via pip
+    import shutil
+    hermes_bin = shutil.which("hermes")
+    if hermes_bin:
+        return hermes_bin
+    
+    # Fallback to direct module execution
+    return f"{get_python_path()} -m hermes_cli.main"
+
+
+# =============================================================================
+# Systemd (Linux)
+# =============================================================================
+
+def generate_systemd_unit() -> str:
+    python_path = get_python_path()
+    working_dir = str(PROJECT_ROOT)
+    
+    return f"""[Unit]
+Description={SERVICE_DESCRIPTION}
+After=network.target
+
+[Service]
+Type=simple
+ExecStart={python_path} -m hermes_cli.main gateway run
+WorkingDirectory={working_dir}
+Restart=on-failure
+RestartSec=10
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=default.target
+"""
+
+def systemd_install(force: bool = False):
+    unit_path = get_systemd_unit_path()
+    
+    if unit_path.exists() and not force:
+        print(f"Service already installed at: {unit_path}")
+        print("Use --force to reinstall")
+        return
+    
+    unit_path.parent.mkdir(parents=True, exist_ok=True)
+    print(f"Installing systemd service to: {unit_path}")
+    unit_path.write_text(generate_systemd_unit())
+    
+    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
+    subprocess.run(["systemctl", "--user", "enable", SERVICE_NAME], check=True)
+    
+    print()
+    print("✓ Service installed and enabled!")
+    print()
+    print("Next steps:")
+    print(f"  hermes gateway start              # Start the service")
+    print(f"  hermes gateway status             # Check status")
+    print(f"  journalctl --user -u {SERVICE_NAME} -f  # View logs")
+    print()
+    print("To enable lingering (keeps running after logout):")
+    print("  sudo loginctl enable-linger $USER")
+
+def systemd_uninstall():
+    subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=False)
+    subprocess.run(["systemctl", "--user", "disable", SERVICE_NAME], check=False)
+    
+    unit_path = get_systemd_unit_path()
+    if unit_path.exists():
+        unit_path.unlink()
+        print(f"✓ Removed {unit_path}")
+    
+    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
+    print("✓ Service uninstalled")
+
+def systemd_start():
+    subprocess.run(["systemctl", "--user", "start", SERVICE_NAME], check=True)
+    print("✓ Service started")
+
+def systemd_stop():
+    subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=True)
+    print("✓ Service stopped")
+
+def systemd_restart():
+    subprocess.run(["systemctl", "--user", "restart", SERVICE_NAME], check=True)
+    print("✓ Service restarted")
+
+def systemd_status(deep: bool = False):
+    # Check if service unit file exists
+    unit_path = get_systemd_unit_path()
+    if not unit_path.exists():
+        print("✗ Gateway service is not installed")
+        print("  Run: hermes gateway install")
+        return
+    
+    # Show detailed status first
+    subprocess.run(
+        ["systemctl", "--user", "status", SERVICE_NAME, "--no-pager"],
+        capture_output=False
+    )
+    
+    # Check if service is active
+    result = subprocess.run(
+        ["systemctl", "--user", "is-active", SERVICE_NAME],
+        capture_output=True,
+        text=True
+    )
+    
+    status = result.stdout.strip()
+    
+    if status == "active":
+        print("✓ Gateway service is running")
+    else:
+        print("✗ Gateway service is stopped")
+        print("  Run: hermes gateway start")
+    
+    if deep:
+        print()
+        print("Recent logs:")
+        subprocess.run([
+            "journalctl", "--user", "-u", SERVICE_NAME,
+            "-n", "20", "--no-pager"
+        ])
+
+
+# =============================================================================
+# Launchd (macOS)
+# =============================================================================
+
+def generate_launchd_plist() -> str:
+    python_path = get_python_path()
+    working_dir = str(PROJECT_ROOT)
+    log_dir = Path.home() / ".hermes" / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    
+    return f"""<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>ai.hermes.gateway</string>
+    
+    <key>ProgramArguments</key>
+    <array>
+        <string>{python_path}</string>
+        <string>-m</string>
+        <string>hermes_cli.main</string>
+        <string>gateway</string>
+        <string>run</string>
+    </array>
+    
+    <key>WorkingDirectory</key>
+    <string>{working_dir}</string>
+    
+    <key>RunAtLoad</key>
+    <true/>
+    
+    <key>KeepAlive</key>
+    <dict>
+        <key>SuccessfulExit</key>
+        <false/>
+    </dict>
+    
+    <key>StandardOutPath</key>
+    <string>{log_dir}/gateway.log</string>
+    
+    <key>StandardErrorPath</key>
+    <string>{log_dir}/gateway.error.log</string>
+</dict>
+</plist>
+"""
+
+def launchd_install(force: bool = False):
+    plist_path = get_launchd_plist_path()
+    
+    if plist_path.exists() and not force:
+        print(f"Service already installed at: {plist_path}")
+        print("Use --force to reinstall")
+        return
+    
+    plist_path.parent.mkdir(parents=True, exist_ok=True)
+    print(f"Installing launchd service to: {plist_path}")
+    plist_path.write_text(generate_launchd_plist())
+    
+    subprocess.run(["launchctl", "load", str(plist_path)], check=True)
+    
+    print()
+    print("✓ Service installed and loaded!")
+    print()
+    print("Next steps:")
+    print("  hermes gateway status             # Check status")
+    print("  tail -f ~/.hermes/logs/gateway.log  # View logs")
+
+def launchd_uninstall():
+    plist_path = get_launchd_plist_path()
+    subprocess.run(["launchctl", "unload", str(plist_path)], check=False)
+    
+    if plist_path.exists():
+        plist_path.unlink()
+        print(f"✓ Removed {plist_path}")
+    
+    print("✓ Service uninstalled")
+
+def launchd_start():
+    subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
+    print("✓ Service started")
+
+def launchd_stop():
+    subprocess.run(["launchctl", "stop", "ai.hermes.gateway"], check=True)
+    print("✓ Service stopped")
+
+def launchd_restart():
+    launchd_stop()
+    launchd_start()
+
+def launchd_status(deep: bool = False):
+    result = subprocess.run(
+        ["launchctl", "list", "ai.hermes.gateway"],
+        capture_output=True,
+        text=True
+    )
+    
+    if result.returncode == 0:
+        print("✓ Gateway service is loaded")
+        print(result.stdout)
+    else:
+        print("✗ Gateway service is not loaded")
+    
+    if deep:
+        log_file = Path.home() / ".hermes" / "logs" / "gateway.log"
+        if log_file.exists():
+            print()
+            print("Recent logs:")
+            subprocess.run(["tail", "-20", str(log_file)])
+
+
+# =============================================================================
+# Gateway Runner
+# =============================================================================
+
+def run_gateway(verbose: bool = False):
+    """Run the gateway in foreground."""
+    sys.path.insert(0, str(PROJECT_ROOT))
+    
+    from gateway.run import start_gateway
+    
+    print("┌─────────────────────────────────────────────────────────┐")
+    print("│           🦋 Hermes Gateway Starting...                 │")
+    print("├─────────────────────────────────────────────────────────┤")
+    print("│  Press Ctrl+C to stop                                   │")
+    print("└─────────────────────────────────────────────────────────┘")
+    print()
+    
+    asyncio.run(start_gateway())
+
+
+# =============================================================================
+# Main Command Handler
+# =============================================================================
+
+def gateway_command(args):
+    """Handle gateway subcommands."""
+    subcmd = getattr(args, 'gateway_command', None)
+    
+    # Default to run if no subcommand
+    if subcmd is None or subcmd == "run":
+        verbose = getattr(args, 'verbose', False)
+        run_gateway(verbose)
+        return
+    
+    # Service management commands
+    if subcmd == "install":
+        force = getattr(args, 'force', False)
+        if is_linux():
+            systemd_install(force)
+        elif is_macos():
+            launchd_install(force)
+        else:
+            print("Service installation not supported on this platform.")
+            print("Run manually: hermes gateway run")
+            sys.exit(1)
+    
+    elif subcmd == "uninstall":
+        if is_linux():
+            systemd_uninstall()
+        elif is_macos():
+            launchd_uninstall()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif subcmd == "start":
+        if is_linux():
+            systemd_start()
+        elif is_macos():
+            launchd_start()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif subcmd == "stop":
+        if is_linux():
+            systemd_stop()
+        elif is_macos():
+            launchd_stop()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif subcmd == "restart":
+        if is_linux():
+            systemd_restart()
+        elif is_macos():
+            launchd_restart()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif subcmd == "status":
+        deep = getattr(args, 'deep', False)
+        if is_linux():
+            systemd_status(deep)
+        elif is_macos():
+            launchd_status(deep)
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
new file mode 100644
index 0000000000..a16fd7f1c6
--- /dev/null
+++ b/hermes_cli/main.py
@@ -0,0 +1,432 @@
+#!/usr/bin/env python3
+"""
+Hermes CLI - Main entry point.
+
+Usage:
+    hermes                     # Interactive chat (default)
+    hermes chat                # Interactive chat
+    hermes gateway             # Run gateway in foreground
+    hermes gateway start       # Start gateway as service
+    hermes gateway stop        # Stop gateway service
+    hermes gateway status      # Show gateway status
+    hermes gateway install     # Install gateway service
+    hermes gateway uninstall   # Uninstall gateway service
+    hermes setup               # Interactive setup wizard
+    hermes status              # Show status of all components
+    hermes cron                # Manage cron jobs
+    hermes cron list           # List cron jobs
+    hermes cron daemon         # Run cron daemon
+    hermes doctor              # Check configuration and dependencies
+    hermes version             # Show version
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+sys.path.insert(0, str(PROJECT_ROOT))
+
+# Load .env file
+from dotenv import load_dotenv
+env_path = PROJECT_ROOT / '.env'
+if env_path.exists():
+    load_dotenv(dotenv_path=env_path)
+
+from hermes_cli import __version__
+
+
+def cmd_chat(args):
+    """Run interactive chat CLI."""
+    # Import and run the CLI
+    from cli import main as cli_main
+    
+    # Build kwargs from args
+    kwargs = {
+        "model": args.model,
+        "toolsets": args.toolsets,
+        "verbose": args.verbose,
+        "query": args.query,
+    }
+    # Filter out None values
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+    
+    cli_main(**kwargs)
+
+
+def cmd_gateway(args):
+    """Gateway management commands."""
+    from hermes_cli.gateway import gateway_command
+    gateway_command(args)
+
+
+def cmd_setup(args):
+    """Interactive setup wizard."""
+    from hermes_cli.setup import run_setup_wizard
+    run_setup_wizard(args)
+
+
+def cmd_status(args):
+    """Show status of all components."""
+    from hermes_cli.status import show_status
+    show_status(args)
+
+
+def cmd_cron(args):
+    """Cron job management."""
+    from hermes_cli.cron import cron_command
+    cron_command(args)
+
+
+def cmd_doctor(args):
+    """Check configuration and dependencies."""
+    from hermes_cli.doctor import run_doctor
+    run_doctor(args)
+
+
+def cmd_config(args):
+    """Configuration management."""
+    from hermes_cli.config import config_command
+    config_command(args)
+
+
+def cmd_version(args):
+    """Show version."""
+    print(f"Hermes Agent v{__version__}")
+    print(f"Project: {PROJECT_ROOT}")
+    
+    # Show Python version
+    print(f"Python: {sys.version.split()[0]}")
+    
+    # Check for key dependencies
+    try:
+        import openai
+        print(f"OpenAI SDK: {openai.__version__}")
+    except ImportError:
+        print("OpenAI SDK: Not installed")
+
+
+def cmd_update(args):
+    """Update Hermes Agent to the latest version."""
+    import subprocess
+    
+    print("🦋 Updating Hermes Agent...")
+    print()
+    
+    # Check if we're in a git repo
+    git_dir = PROJECT_ROOT / '.git'
+    if not git_dir.exists():
+        print("✗ Not a git repository. Please reinstall:")
+        print("  curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash")
+        sys.exit(1)
+    
+    # Fetch and pull
+    try:
+        print("→ Fetching updates...")
+        subprocess.run(["git", "fetch", "origin"], cwd=PROJECT_ROOT, check=True)
+        
+        # Get current branch
+        result = subprocess.run(
+            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+            cwd=PROJECT_ROOT,
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        branch = result.stdout.strip()
+        
+        # Check if there are updates
+        result = subprocess.run(
+            ["git", "rev-list", f"HEAD..origin/{branch}", "--count"],
+            cwd=PROJECT_ROOT,
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        commit_count = int(result.stdout.strip())
+        
+        if commit_count == 0:
+            print("✓ Already up to date!")
+            return
+        
+        print(f"→ Found {commit_count} new commit(s)")
+        print("→ Pulling updates...")
+        subprocess.run(["git", "pull", "origin", branch], cwd=PROJECT_ROOT, check=True)
+        
+        # Reinstall Python dependencies
+        print("→ Updating Python dependencies...")
+        venv_pip = PROJECT_ROOT / "venv" / "bin" / "pip"
+        if venv_pip.exists():
+            subprocess.run([str(venv_pip), "install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
+        else:
+            subprocess.run(["pip", "install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
+        
+        # Check for Node.js deps
+        if (PROJECT_ROOT / "package.json").exists():
+            import shutil
+            if shutil.which("npm"):
+                print("→ Updating Node.js dependencies...")
+                subprocess.run(["npm", "install", "--silent"], cwd=PROJECT_ROOT, check=False)
+        
+        print()
+        print("✓ Update complete!")
+        print()
+        print("Note: If you have the gateway service running, restart it:")
+        print("  hermes gateway restart")
+        
+    except subprocess.CalledProcessError as e:
+        print(f"✗ Update failed: {e}")
+        sys.exit(1)
+
+
+def main():
+    """Main entry point for hermes CLI."""
+    parser = argparse.ArgumentParser(
+        prog="hermes",
+        description="Hermes Agent - AI assistant with tool-calling capabilities",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    hermes                        Start interactive chat
+    hermes chat -q "Hello"        Single query mode
+    hermes setup                  Run setup wizard
+    hermes config                 View configuration
+    hermes config edit            Edit config in $EDITOR
+    hermes config set model gpt-4 Set a config value
+    hermes gateway                Run messaging gateway
+    hermes gateway install        Install as system service
+    hermes update                 Update to latest version
+
+For more help on a command:
+    hermes <command> --help
+"""
+    )
+    
+    parser.add_argument(
+        "--version", "-V",
+        action="store_true",
+        help="Show version and exit"
+    )
+    
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    
+    # =========================================================================
+    # chat command
+    # =========================================================================
+    chat_parser = subparsers.add_parser(
+        "chat",
+        help="Interactive chat with the agent",
+        description="Start an interactive chat session with Hermes Agent"
+    )
+    chat_parser.add_argument(
+        "-q", "--query",
+        help="Single query (non-interactive mode)"
+    )
+    chat_parser.add_argument(
+        "-m", "--model",
+        help="Model to use (e.g., anthropic/claude-sonnet-4)"
+    )
+    chat_parser.add_argument(
+        "-t", "--toolsets",
+        help="Comma-separated toolsets to enable"
+    )
+    chat_parser.add_argument(
+        "-v", "--verbose",
+        action="store_true",
+        help="Verbose output"
+    )
+    chat_parser.set_defaults(func=cmd_chat)
+    
+    # =========================================================================
+    # gateway command
+    # =========================================================================
+    gateway_parser = subparsers.add_parser(
+        "gateway",
+        help="Messaging gateway management",
+        description="Manage the messaging gateway (Telegram, Discord, WhatsApp)"
+    )
+    gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
+    
+    # gateway run (default)
+    gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
+    gateway_run.add_argument("-v", "--verbose", action="store_true")
+    
+    # gateway start
+    gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service")
+    
+    # gateway stop
+    gateway_stop = gateway_subparsers.add_parser("stop", help="Stop gateway service")
+    
+    # gateway restart
+    gateway_restart = gateway_subparsers.add_parser("restart", help="Restart gateway service")
+    
+    # gateway status
+    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
+    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    
+    # gateway install
+    gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as service")
+    gateway_install.add_argument("--force", action="store_true", help="Force reinstall")
+    
+    # gateway uninstall
+    gateway_uninstall = gateway_subparsers.add_parser("uninstall", help="Uninstall gateway service")
+    
+    gateway_parser.set_defaults(func=cmd_gateway)
+    
+    # =========================================================================
+    # setup command
+    # =========================================================================
+    setup_parser = subparsers.add_parser(
+        "setup",
+        help="Interactive setup wizard",
+        description="Configure Hermes Agent with an interactive wizard"
+    )
+    setup_parser.add_argument(
+        "--non-interactive",
+        action="store_true",
+        help="Non-interactive mode (use defaults/env vars)"
+    )
+    setup_parser.add_argument(
+        "--reset",
+        action="store_true",
+        help="Reset configuration to defaults"
+    )
+    setup_parser.set_defaults(func=cmd_setup)
+    
+    # =========================================================================
+    # status command
+    # =========================================================================
+    status_parser = subparsers.add_parser(
+        "status",
+        help="Show status of all components",
+        description="Display status of Hermes Agent components"
+    )
+    status_parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Show all details (redacted for sharing)"
+    )
+    status_parser.add_argument(
+        "--deep",
+        action="store_true",
+        help="Run deep checks (may take longer)"
+    )
+    status_parser.set_defaults(func=cmd_status)
+    
+    # =========================================================================
+    # cron command
+    # =========================================================================
+    cron_parser = subparsers.add_parser(
+        "cron",
+        help="Cron job management",
+        description="Manage scheduled tasks"
+    )
+    cron_subparsers = cron_parser.add_subparsers(dest="cron_command")
+    
+    # cron list
+    cron_list = cron_subparsers.add_parser("list", help="List scheduled jobs")
+    cron_list.add_argument("--all", action="store_true", help="Include disabled jobs")
+    
+    # cron daemon
+    cron_daemon = cron_subparsers.add_parser("daemon", help="Run cron daemon")
+    cron_daemon.add_argument("--interval", type=int, default=60, help="Check interval in seconds")
+    
+    # cron tick
+    cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once (for system cron)")
+    
+    cron_parser.set_defaults(func=cmd_cron)
+    
+    # =========================================================================
+    # doctor command
+    # =========================================================================
+    doctor_parser = subparsers.add_parser(
+        "doctor",
+        help="Check configuration and dependencies",
+        description="Diagnose issues with Hermes Agent setup"
+    )
+    doctor_parser.add_argument(
+        "--fix",
+        action="store_true",
+        help="Attempt to fix issues automatically"
+    )
+    doctor_parser.set_defaults(func=cmd_doctor)
+    
+    # =========================================================================
+    # config command
+    # =========================================================================
+    config_parser = subparsers.add_parser(
+        "config",
+        help="View and edit configuration",
+        description="Manage Hermes Agent configuration"
+    )
+    config_subparsers = config_parser.add_subparsers(dest="config_command")
+    
+    # config show (default)
+    config_show = config_subparsers.add_parser("show", help="Show current configuration")
+    
+    # config edit
+    config_edit = config_subparsers.add_parser("edit", help="Open config file in editor")
+    
+    # config set
+    config_set = config_subparsers.add_parser("set", help="Set a configuration value")
+    config_set.add_argument("key", nargs="?", help="Configuration key (e.g., model, terminal.backend)")
+    config_set.add_argument("value", nargs="?", help="Value to set")
+    
+    # config path
+    config_path = config_subparsers.add_parser("path", help="Print config file path")
+    
+    # config env-path
+    config_env = config_subparsers.add_parser("env-path", help="Print .env file path")
+    
+    config_parser.set_defaults(func=cmd_config)
+    
+    # =========================================================================
+    # version command
+    # =========================================================================
+    version_parser = subparsers.add_parser(
+        "version",
+        help="Show version information"
+    )
+    version_parser.set_defaults(func=cmd_version)
+    
+    # =========================================================================
+    # update command
+    # =========================================================================
+    update_parser = subparsers.add_parser(
+        "update",
+        help="Update Hermes Agent to the latest version",
+        description="Pull the latest changes from git and reinstall dependencies"
+    )
+    update_parser.set_defaults(func=cmd_update)
+    
+    # =========================================================================
+    # Parse and execute
+    # =========================================================================
+    args = parser.parse_args()
+    
+    # Handle --version flag
+    if args.version:
+        cmd_version(args)
+        return
+    
+    # Default to chat if no command specified
+    if args.command is None:
+        # No command = run chat
+        args.query = None
+        args.model = None
+        args.toolsets = None
+        args.verbose = False
+        cmd_chat(args)
+        return
+    
+    # Execute the command
+    if hasattr(args, 'func'):
+        args.func(args)
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
new file mode 100644
index 0000000000..c85f778c96
--- /dev/null
+++ b/hermes_cli/setup.py
@@ -0,0 +1,448 @@
+"""
+Interactive setup wizard for Hermes Agent.
+
+Guides users through:
+1. Installation directory confirmation
+2. API key configuration
+3. Model selection  
+4. Terminal backend selection
+5. Messaging platform setup
+6. Optional features
+
+Config files are stored in ~/.hermes/ for easy access.
+"""
+
+import os
+import sys
+from pathlib import Path
+from typing import Optional, Dict, Any
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+
+# Import config helpers
+from hermes_cli.config import (
+    get_hermes_home, get_config_path, get_env_path,
+    load_config, save_config, save_env_value, get_env_value,
+    ensure_hermes_home, DEFAULT_CONFIG
+)
+
+# ANSI colors
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+
+def color(text: str, *codes) -> str:
+    """Apply color codes to text."""
+    if not sys.stdout.isatty():
+        return text
+    return "".join(codes) + text + Colors.RESET
+
+def print_header(title: str):
+    """Print a section header."""
+    print()
+    print(color(f"◆ {title}", Colors.CYAN, Colors.BOLD))
+
+def print_info(text: str):
+    """Print info text."""
+    print(color(f"  {text}", Colors.DIM))
+
+def print_success(text: str):
+    """Print success message."""
+    print(color(f"✓ {text}", Colors.GREEN))
+
+def print_warning(text: str):
+    """Print warning message."""
+    print(color(f"⚠ {text}", Colors.YELLOW))
+
+def print_error(text: str):
+    """Print error message."""
+    print(color(f"✗ {text}", Colors.RED))
+
+def prompt(question: str, default: str = None, password: bool = False) -> str:
+    """Prompt for input with optional default."""
+    if default:
+        display = f"{question} [{default}]: "
+    else:
+        display = f"{question}: "
+    
+    try:
+        if password:
+            import getpass
+            value = getpass.getpass(color(display, Colors.YELLOW))
+        else:
+            value = input(color(display, Colors.YELLOW))
+        
+        return value.strip() or default or ""
+    except (KeyboardInterrupt, EOFError):
+        print()
+        sys.exit(1)
+
+def prompt_choice(question: str, choices: list, default: int = 0) -> int:
+    """Prompt for a choice from a list."""
+    print(color(question, Colors.YELLOW))
+    
+    for i, choice in enumerate(choices):
+        marker = "●" if i == default else "○"
+        if i == default:
+            print(color(f"  {marker} {choice}", Colors.GREEN))
+        else:
+            print(f"  {marker} {choice}")
+    
+    while True:
+        try:
+            value = input(color(f"  Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM))
+            if not value:
+                return default
+            idx = int(value) - 1
+            if 0 <= idx < len(choices):
+                return idx
+            print_error(f"Please enter a number between 1 and {len(choices)}")
+        except ValueError:
+            print_error("Please enter a number")
+        except (KeyboardInterrupt, EOFError):
+            print()
+            sys.exit(1)
+
+def prompt_yes_no(question: str, default: bool = True) -> bool:
+    """Prompt for yes/no."""
+    default_str = "Y/n" if default else "y/N"
+    
+    while True:
+        value = input(color(f"{question} [{default_str}]: ", Colors.YELLOW)).strip().lower()
+        
+        if not value:
+            return default
+        if value in ('y', 'yes'):
+            return True
+        if value in ('n', 'no'):
+            return False
+        print_error("Please enter 'y' or 'n'")
+
+
+def run_setup_wizard(args):
+    """Run the interactive setup wizard."""
+    ensure_hermes_home()
+    
+    config = load_config()
+    hermes_home = get_hermes_home()
+    
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA))
+    print(color("│             🦋 Hermes Agent Setup Wizard                │", Colors.MAGENTA))
+    print(color("├─────────────────────────────────────────────────────────┤", Colors.MAGENTA))
+    print(color("│  Let's configure your Hermes Agent installation.       │", Colors.MAGENTA))
+    print(color("│  Press Ctrl+C at any time to exit.                     │", Colors.MAGENTA))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.MAGENTA))
+    
+    # =========================================================================
+    # Step 0: Show paths
+    # =========================================================================
+    print_header("Configuration Location")
+    print_info(f"Config file:  {get_config_path()}")
+    print_info(f"Secrets file: {get_env_path()}")
+    print_info(f"Data folder:  {hermes_home}")
+    print_info(f"Install dir:  {PROJECT_ROOT}")
+    print()
+    print_info("You can edit these files directly or use 'hermes config edit'")
+    
+    # =========================================================================
+    # Step 1: API Keys
+    # =========================================================================
+    print_header("Model/Auth Provider")
+    
+    # Check if already configured
+    existing_or = get_env_value("OPENROUTER_API_KEY")
+    existing_ant = get_env_value("ANTHROPIC_API_KEY")
+    
+    if existing_or or existing_ant:
+        configured = "OpenRouter" if existing_or else "Anthropic"
+        print_info(f"Currently configured: {configured}")
+        if not prompt_yes_no("Reconfigure API provider?", False):
+            print_info("Keeping existing configuration")
+        else:
+            existing_or = None  # Force reconfigure
+    
+    if not existing_or and not existing_ant:
+        provider_choices = [
+            "OpenRouter (recommended - access to all models)",
+            "Anthropic API (direct Claude access)",
+            "OpenAI API",
+            "Skip for now"
+        ]
+        
+        provider_idx = prompt_choice("Select your primary model provider:", provider_choices, 0)
+        
+        if provider_idx == 0:  # OpenRouter
+            print_info("Get your API key at: https://openrouter.ai/keys")
+            api_key = prompt("OpenRouter API key", password=True)
+            if api_key:
+                save_env_value("OPENROUTER_API_KEY", api_key)
+                print_success("OpenRouter API key saved")
+        
+        elif provider_idx == 1:  # Anthropic
+            print_info("Get your API key at: https://console.anthropic.com/")
+            api_key = prompt("Anthropic API key", password=True)
+            if api_key:
+                save_env_value("ANTHROPIC_API_KEY", api_key)
+                print_success("Anthropic API key saved")
+        
+        elif provider_idx == 2:  # OpenAI
+            print_info("Get your API key at: https://platform.openai.com/api-keys")
+            api_key = prompt("OpenAI API key", password=True)
+            if api_key:
+                save_env_value("OPENAI_API_KEY", api_key)
+                print_success("OpenAI API key saved")
+    
+    # =========================================================================
+    # Step 2: Model Selection
+    # =========================================================================
+    print_header("Default Model")
+    
+    current_model = config.get('model', 'anthropic/claude-sonnet-4')
+    print_info(f"Current: {current_model}")
+    
+    model_choices = [
+        "anthropic/claude-sonnet-4 (recommended)",
+        "anthropic/claude-opus-4",
+        "openai/gpt-4o",
+        "google/gemini-2.0-flash",
+        "Enter custom model",
+        "Keep current"
+    ]
+    
+    model_idx = prompt_choice("Select default model:", model_choices, 5)  # Default: keep current
+    
+    if model_idx == 0:
+        config['model'] = "anthropic/claude-sonnet-4"
+    elif model_idx == 1:
+        config['model'] = "anthropic/claude-opus-4"
+    elif model_idx == 2:
+        config['model'] = "openai/gpt-4o"
+    elif model_idx == 3:
+        config['model'] = "google/gemini-2.0-flash"
+    elif model_idx == 4:
+        custom = prompt("Enter model name (e.g., anthropic/claude-sonnet-4)")
+        if custom:
+            config['model'] = custom
+    
+    # =========================================================================
+    # Step 3: Terminal Backend
+    # =========================================================================
+    print_header("Terminal Backend")
+    print_info("The terminal tool allows the agent to run commands.")
+    
+    current_backend = config.get('terminal', {}).get('backend', 'local')
+    print_info(f"Current: {current_backend}")
+    
+    terminal_choices = [
+        "Local (run commands on this machine - no isolation)",
+        "Docker (isolated containers - recommended for security)",
+        "SSH (run commands on a remote server)",
+        "Keep current"
+    ]
+    
+    # Default based on current
+    default_terminal = {'local': 0, 'docker': 1, 'ssh': 2}.get(current_backend, 0)
+    
+    terminal_idx = prompt_choice("Select terminal backend:", terminal_choices, 3)  # Default: keep
+    
+    if terminal_idx == 0:  # Local
+        config.setdefault('terminal', {})['backend'] = 'local'
+        print_success("Terminal set to local")
+        
+        if prompt_yes_no("Enable sudo support? (allows agent to run sudo commands)", False):
+            print_warning("SECURITY WARNING: Sudo password will be stored in plaintext")
+            sudo_pass = prompt("Sudo password (leave empty to skip)", password=True)
+            if sudo_pass:
+                save_env_value("SUDO_PASSWORD", sudo_pass)
+                print_success("Sudo password saved")
+    
+    elif terminal_idx == 1:  # Docker
+        config.setdefault('terminal', {})['backend'] = 'docker'
+        docker_image = prompt("Docker image", config.get('terminal', {}).get('docker_image', 'python:3.11-slim'))
+        config['terminal']['docker_image'] = docker_image
+        print_success("Terminal set to Docker")
+    
+    elif terminal_idx == 2:  # SSH
+        config.setdefault('terminal', {})['backend'] = 'ssh'
+        
+        current_host = get_env_value('TERMINAL_SSH_HOST') or ''
+        current_user = get_env_value('TERMINAL_SSH_USER') or os.getenv("USER", "")
+        
+        ssh_host = prompt("SSH host", current_host)
+        ssh_user = prompt("SSH user", current_user)
+        ssh_key = prompt("SSH key path", "~/.ssh/id_rsa")
+        
+        if ssh_host:
+            save_env_value("TERMINAL_SSH_HOST", ssh_host)
+        if ssh_user:
+            save_env_value("TERMINAL_SSH_USER", ssh_user)
+        if ssh_key:
+            save_env_value("TERMINAL_SSH_KEY", ssh_key)
+        
+        print_success("Terminal set to SSH")
+    
+    # =========================================================================
+    # Step 4: Context Compression
+    # =========================================================================
+    print_header("Context Compression")
+    print_info("Automatically summarize old messages when context gets too long.")
+    
+    compression = config.get('compression', {})
+    current_enabled = compression.get('enabled', True)
+    
+    if prompt_yes_no(f"Enable context compression?", current_enabled):
+        config.setdefault('compression', {})['enabled'] = True
+        
+        current_threshold = compression.get('threshold', 0.85)
+        threshold_str = prompt(f"Compression threshold (0.5-0.95)", str(current_threshold))
+        try:
+            threshold = float(threshold_str)
+            if 0.5 <= threshold <= 0.95:
+                config['compression']['threshold'] = threshold
+        except ValueError:
+            pass
+        
+        print_success("Context compression enabled")
+    else:
+        config.setdefault('compression', {})['enabled'] = False
+    
+    # =========================================================================
+    # Step 5: Messaging Platforms (Optional)
+    # =========================================================================
+    print_header("Messaging Platforms (Optional)")
+    print_info("Connect to messaging platforms to chat with Hermes from anywhere.")
+    
+    # Telegram
+    existing_telegram = get_env_value('TELEGRAM_BOT_TOKEN')
+    if existing_telegram:
+        print_info("Telegram: already configured")
+        if prompt_yes_no("Reconfigure Telegram?", False):
+            existing_telegram = None
+    
+    if not existing_telegram and prompt_yes_no("Set up Telegram bot?", False):
+        print_info("Create a bot via @BotFather on Telegram")
+        token = prompt("Telegram bot token", password=True)
+        if token:
+            save_env_value("TELEGRAM_BOT_TOKEN", token)
+            print_success("Telegram token saved")
+            
+            home_channel = prompt("Home channel ID (optional, for cron delivery)")
+            if home_channel:
+                save_env_value("TELEGRAM_HOME_CHANNEL", home_channel)
+    
+    # Discord
+    existing_discord = get_env_value('DISCORD_BOT_TOKEN')
+    if existing_discord:
+        print_info("Discord: already configured")
+        if prompt_yes_no("Reconfigure Discord?", False):
+            existing_discord = None
+    
+    if not existing_discord and prompt_yes_no("Set up Discord bot?", False):
+        print_info("Create a bot at https://discord.com/developers/applications")
+        token = prompt("Discord bot token", password=True)
+        if token:
+            save_env_value("DISCORD_BOT_TOKEN", token)
+            print_success("Discord token saved")
+            
+            home_channel = prompt("Home channel ID (optional, for cron delivery)")
+            if home_channel:
+                save_env_value("DISCORD_HOME_CHANNEL", home_channel)
+    
+    # =========================================================================
+    # Step 6: Additional Tools (Optional)
+    # =========================================================================
+    print_header("Additional Tools (Optional)")
+    
+    # Firecrawl
+    if not get_env_value('FIRECRAWL_API_KEY'):
+        if prompt_yes_no("Set up web scraping (Firecrawl)?", False):
+            print_info("Get your API key at: https://firecrawl.dev/")
+            api_key = prompt("Firecrawl API key", password=True)
+            if api_key:
+                save_env_value("FIRECRAWL_API_KEY", api_key)
+                print_success("Firecrawl API key saved")
+    else:
+        print_info("Firecrawl: already configured")
+    
+    # Browserbase
+    if not get_env_value('BROWSERBASE_API_KEY'):
+        if prompt_yes_no("Set up browser automation (Browserbase)?", False):
+            print_info("Get your API key at: https://browserbase.com/")
+            api_key = prompt("Browserbase API key", password=True)
+            project_id = prompt("Browserbase project ID")
+            if api_key:
+                save_env_value("BROWSERBASE_API_KEY", api_key)
+            if project_id:
+                save_env_value("BROWSERBASE_PROJECT_ID", project_id)
+            print_success("Browserbase configured")
+    else:
+        print_info("Browserbase: already configured")
+    
+    # FAL
+    if not get_env_value('FAL_KEY'):
+        if prompt_yes_no("Set up image generation (FAL)?", False):
+            print_info("Get your API key at: https://fal.ai/")
+            api_key = prompt("FAL API key", password=True)
+            if api_key:
+                save_env_value("FAL_KEY", api_key)
+                print_success("FAL API key saved")
+    else:
+        print_info("FAL: already configured")
+    
+    # =========================================================================
+    # Save config
+    # =========================================================================
+    save_config(config)
+    
+    # =========================================================================
+    # Done!
+    # =========================================================================
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.GREEN))
+    print(color("│              ✓ Setup Complete!                          │", Colors.GREEN))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.GREEN))
+    print()
+    
+    # Show file locations prominently
+    print(color("📁 Your configuration files:", Colors.CYAN, Colors.BOLD))
+    print()
+    print(f"   {color('Settings:', Colors.YELLOW)}  {get_config_path()}")
+    print(f"              Model, terminal backend, compression, etc.")
+    print()
+    print(f"   {color('API Keys:', Colors.YELLOW)}  {get_env_path()}")
+    print(f"              OpenRouter, Anthropic, Firecrawl, etc.")
+    print()
+    print(f"   {color('Data:', Colors.YELLOW)}      {hermes_home}/")
+    print(f"              Cron jobs, sessions, logs")
+    print()
+    
+    print(color("─" * 60, Colors.DIM))
+    print()
+    print(color("📝 To edit your configuration:", Colors.CYAN, Colors.BOLD))
+    print()
+    print(f"   {color('hermes config', Colors.GREEN)}        View current settings")
+    print(f"   {color('hermes config edit', Colors.GREEN)}   Open config in your editor")
+    print(f"   {color('hermes config set KEY VALUE', Colors.GREEN)}")
+    print(f"                         Set a specific value")
+    print()
+    print(f"   Or edit the files directly:")
+    print(f"   {color(f'nano {get_config_path()}', Colors.DIM)}")
+    print(f"   {color(f'nano {get_env_path()}', Colors.DIM)}")
+    print()
+    
+    print(color("─" * 60, Colors.DIM))
+    print()
+    print(color("🚀 Ready to go!", Colors.CYAN, Colors.BOLD))
+    print()
+    print(f"   {color('hermes', Colors.GREEN)}              Start chatting")
+    print(f"   {color('hermes gateway', Colors.GREEN)}      Start messaging gateway")
+    print(f"   {color('hermes doctor', Colors.GREEN)}       Check for issues")
+    print()
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
new file mode 100644
index 0000000000..2d24bb50a5
--- /dev/null
+++ b/hermes_cli/status.py
@@ -0,0 +1,239 @@
+"""
+Status command for hermes CLI.
+
+Shows the status of all Hermes Agent components.
+"""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+
+# ANSI colors
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    CYAN = "\033[36m"
+
+def color(text: str, *codes) -> str:
+    if not sys.stdout.isatty():
+        return text
+    return "".join(codes) + text + Colors.RESET
+
+def check_mark(ok: bool) -> str:
+    if ok:
+        return color("✓", Colors.GREEN)
+    return color("✗", Colors.RED)
+
+def redact_key(key: str) -> str:
+    """Redact an API key for display."""
+    if not key:
+        return "(not set)"
+    if len(key) < 12:
+        return "***"
+    return key[:4] + "..." + key[-4:]
+
+
+def show_status(args):
+    """Show status of all Hermes Agent components."""
+    show_all = getattr(args, 'all', False)
+    deep = getattr(args, 'deep', False)
+    
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
+    print(color("│                 🦋 Hermes Agent Status                  │", Colors.CYAN))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
+    
+    # =========================================================================
+    # Environment
+    # =========================================================================
+    print()
+    print(color("◆ Environment", Colors.CYAN, Colors.BOLD))
+    print(f"  Project:      {PROJECT_ROOT}")
+    print(f"  Python:       {sys.version.split()[0]}")
+    
+    env_path = PROJECT_ROOT / '.env'
+    print(f"  .env file:    {check_mark(env_path.exists())} {'exists' if env_path.exists() else 'not found'}")
+    
+    # =========================================================================
+    # API Keys
+    # =========================================================================
+    print()
+    print(color("◆ API Keys", Colors.CYAN, Colors.BOLD))
+    
+    keys = {
+        "OpenRouter": "OPENROUTER_API_KEY",
+        "Anthropic": "ANTHROPIC_API_KEY", 
+        "OpenAI": "OPENAI_API_KEY",
+        "Firecrawl": "FIRECRAWL_API_KEY",
+        "Browserbase": "BROWSERBASE_API_KEY",
+        "FAL": "FAL_KEY",
+    }
+    
+    for name, env_var in keys.items():
+        value = os.getenv(env_var, "")
+        has_key = bool(value)
+        display = redact_key(value) if not show_all else value
+        print(f"  {name:<12}  {check_mark(has_key)} {display}")
+    
+    # =========================================================================
+    # Terminal Configuration
+    # =========================================================================
+    print()
+    print(color("◆ Terminal Backend", Colors.CYAN, Colors.BOLD))
+    
+    terminal_env = os.getenv("TERMINAL_ENV", "local")
+    print(f"  Backend:      {terminal_env}")
+    
+    if terminal_env == "ssh":
+        ssh_host = os.getenv("TERMINAL_SSH_HOST", "")
+        ssh_user = os.getenv("TERMINAL_SSH_USER", "")
+        print(f"  SSH Host:     {ssh_host or '(not set)'}")
+        print(f"  SSH User:     {ssh_user or '(not set)'}")
+    elif terminal_env == "docker":
+        docker_image = os.getenv("TERMINAL_DOCKER_IMAGE", "python:3.11-slim")
+        print(f"  Docker Image: {docker_image}")
+    
+    sudo_password = os.getenv("SUDO_PASSWORD", "")
+    print(f"  Sudo:         {check_mark(bool(sudo_password))} {'enabled' if sudo_password else 'disabled'}")
+    
+    # =========================================================================
+    # Messaging Platforms
+    # =========================================================================
+    print()
+    print(color("◆ Messaging Platforms", Colors.CYAN, Colors.BOLD))
+    
+    platforms = {
+        "Telegram": ("TELEGRAM_BOT_TOKEN", "TELEGRAM_HOME_CHANNEL"),
+        "Discord": ("DISCORD_BOT_TOKEN", "DISCORD_HOME_CHANNEL"),
+        "WhatsApp": ("WHATSAPP_ENABLED", None),
+    }
+    
+    for name, (token_var, home_var) in platforms.items():
+        token = os.getenv(token_var, "")
+        has_token = bool(token)
+        
+        home_channel = ""
+        if home_var:
+            home_channel = os.getenv(home_var, "")
+        
+        status = "configured" if has_token else "not configured"
+        if home_channel:
+            status += f" (home: {home_channel})"
+        
+        print(f"  {name:<12}  {check_mark(has_token)} {status}")
+    
+    # =========================================================================
+    # Gateway Status
+    # =========================================================================
+    print()
+    print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD))
+    
+    if sys.platform.startswith('linux'):
+        result = subprocess.run(
+            ["systemctl", "--user", "is-active", "hermes-gateway"],
+            capture_output=True,
+            text=True
+        )
+        is_active = result.stdout.strip() == "active"
+        print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
+        print(f"  Manager:      systemd (user)")
+        
+    elif sys.platform == 'darwin':
+        result = subprocess.run(
+            ["launchctl", "list", "ai.hermes.gateway"],
+            capture_output=True,
+            text=True
+        )
+        is_loaded = result.returncode == 0
+        print(f"  Status:       {check_mark(is_loaded)} {'loaded' if is_loaded else 'not loaded'}")
+        print(f"  Manager:      launchd")
+    else:
+        print(f"  Status:       {color('N/A', Colors.DIM)}")
+        print(f"  Manager:      (not supported on this platform)")
+    
+    # =========================================================================
+    # Cron Jobs
+    # =========================================================================
+    print()
+    print(color("◆ Scheduled Jobs", Colors.CYAN, Colors.BOLD))
+    
+    jobs_file = Path.home() / ".hermes" / "cron" / "jobs.json"
+    if jobs_file.exists():
+        import json
+        try:
+            with open(jobs_file) as f:
+                data = json.load(f)
+                jobs = data.get("jobs", [])
+                enabled_jobs = [j for j in jobs if j.get("enabled", True)]
+                print(f"  Jobs:         {len(enabled_jobs)} active, {len(jobs)} total")
+        except:
+            print(f"  Jobs:         (error reading jobs file)")
+    else:
+        print(f"  Jobs:         0")
+    
+    # =========================================================================
+    # Sessions
+    # =========================================================================
+    print()
+    print(color("◆ Sessions", Colors.CYAN, Colors.BOLD))
+    
+    sessions_file = Path.home() / ".hermes" / "sessions" / "sessions.json"
+    if sessions_file.exists():
+        import json
+        try:
+            with open(sessions_file) as f:
+                data = json.load(f)
+                print(f"  Active:       {len(data)} session(s)")
+        except:
+            print(f"  Active:       (error reading sessions file)")
+    else:
+        print(f"  Active:       0")
+    
+    # =========================================================================
+    # Deep checks
+    # =========================================================================
+    if deep:
+        print()
+        print(color("◆ Deep Checks", Colors.CYAN, Colors.BOLD))
+        
+        # Check OpenRouter connectivity
+        openrouter_key = os.getenv("OPENROUTER_API_KEY", "")
+        if openrouter_key:
+            try:
+                import httpx
+                response = httpx.get(
+                    "https://openrouter.ai/api/v1/models",
+                    headers={"Authorization": f"Bearer {openrouter_key}"},
+                    timeout=10
+                )
+                ok = response.status_code == 200
+                print(f"  OpenRouter:   {check_mark(ok)} {'reachable' if ok else f'error ({response.status_code})'}")
+            except Exception as e:
+                print(f"  OpenRouter:   {check_mark(False)} error: {e}")
+        
+        # Check gateway port
+        try:
+            import socket
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.settimeout(1)
+            result = sock.connect_ex(('127.0.0.1', 18789))
+            sock.close()
+            # Port in use = gateway likely running
+            port_in_use = result == 0
+            # This is informational, not necessarily bad
+            print(f"  Port 18789:   {'in use' if port_in_use else 'available'}")
+        except:
+            pass
+    
+    print()
+    print(color("─" * 60, Colors.DIM))
+    print(color("  Run 'hermes doctor' for detailed diagnostics", Colors.DIM))
+    print(color("  Run 'hermes setup' to configure", Colors.DIM))
+    print()
diff --git a/pyproject.toml b/pyproject.toml
index 10e257f77a..99c32f3cbb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,12 +34,16 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal", "boto3"]
 dev = ["pytest", "pytest-asyncio"]
+messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0"]
+cron = ["croniter"]
+all = ["croniter", "python-telegram-bot>=20.0", "discord.py>=2.0"]
 
 [project.scripts]
+hermes = "hermes_cli.main:main"
 hermes-agent = "run_agent:main"
 
 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli"]
 
 [tool.setuptools.packages.find]
-include = ["tools"]
+include = ["tools", "hermes_cli", "gateway", "cron"]
diff --git a/requirements.txt b/requirements.txt
index 4bc28b6db8..68a31e4479 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,4 +31,14 @@ platformdirs
 # boto3
 
 # Optional: For cron expression parsing (cronjob scheduling)
-croniter
\ No newline at end of file
+croniter
+
+# Optional: For messaging platform integrations (gateway)
+# Telegram: pip install python-telegram-bot
+# python-telegram-bot>=20.0
+
+# Discord: pip install discord.py
+# discord.py>=2.0
+
+# WhatsApp: Requires Node.js bridge (see docs/messaging.md)
+# aiohttp  # For WhatsApp bridge communication
\ No newline at end of file
diff --git a/scripts/hermes-gateway b/scripts/hermes-gateway
new file mode 100755
index 0000000000..59fa1056f9
--- /dev/null
+++ b/scripts/hermes-gateway
@@ -0,0 +1,414 @@
+#!/usr/bin/env python3
+"""
+Hermes Gateway - Standalone messaging platform integration.
+
+This is the proper entry point for running the gateway as a service.
+NOT tied to the CLI - runs independently.
+
+Usage:
+    # Run in foreground (for testing)
+    ./scripts/hermes-gateway
+    
+    # Install as systemd service
+    ./scripts/hermes-gateway install
+    
+    # Manage the service
+    ./scripts/hermes-gateway start
+    ./scripts/hermes-gateway stop
+    ./scripts/hermes-gateway restart
+    ./scripts/hermes-gateway status
+    
+    # Uninstall
+    ./scripts/hermes-gateway uninstall
+"""
+
+import argparse
+import asyncio
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+# Add parent directory to path
+SCRIPT_DIR = Path(__file__).parent.resolve()
+PROJECT_DIR = SCRIPT_DIR.parent
+sys.path.insert(0, str(PROJECT_DIR))
+
+# Load .env file
+from dotenv import load_dotenv
+env_path = PROJECT_DIR / '.env'
+if env_path.exists():
+    load_dotenv(dotenv_path=env_path)
+
+
+# =============================================================================
+# Service Configuration
+# =============================================================================
+
+SERVICE_NAME = "hermes-gateway"
+SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
+
+def get_systemd_unit_path() -> Path:
+    """Get the path for the systemd user service file."""
+    return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service"
+
+def get_launchd_plist_path() -> Path:
+    """Get the path for the launchd plist file (macOS)."""
+    return Path.home() / "Library" / "LaunchAgents" / f"ai.hermes.gateway.plist"
+
+def get_python_path() -> str:
+    """Get the path to the Python interpreter."""
+    # Prefer the venv if it exists
+    venv_python = PROJECT_DIR / "venv" / "bin" / "python"
+    if venv_python.exists():
+        return str(venv_python)
+    return sys.executable
+
+def get_gateway_script_path() -> str:
+    """Get the path to this script."""
+    return str(Path(__file__).resolve())
+
+
+# =============================================================================
+# Systemd Service (Linux)
+# =============================================================================
+
+def generate_systemd_unit() -> str:
+    """Generate the systemd unit file content."""
+    python_path = get_python_path()
+    script_path = get_gateway_script_path()
+    working_dir = str(PROJECT_DIR)
+    
+    return f"""[Unit]
+Description={SERVICE_DESCRIPTION}
+After=network.target
+
+[Service]
+Type=simple
+ExecStart={python_path} {script_path} run
+WorkingDirectory={working_dir}
+Restart=on-failure
+RestartSec=10
+StandardOutput=journal
+StandardError=journal
+
+# Environment (optional - can also use .env file)
+# Environment="TELEGRAM_BOT_TOKEN=your_token"
+# Environment="DISCORD_BOT_TOKEN=your_token"
+
+[Install]
+WantedBy=default.target
+"""
+
+def install_systemd():
+    """Install the systemd user service."""
+    unit_path = get_systemd_unit_path()
+    unit_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    print(f"Installing systemd service to: {unit_path}")
+    unit_path.write_text(generate_systemd_unit())
+    
+    # Reload systemd
+    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
+    
+    # Enable the service (start on boot)
+    subprocess.run(["systemctl", "--user", "enable", SERVICE_NAME], check=True)
+    
+    print(f"✓ Service installed and enabled")
+    print(f"")
+    print(f"To start the service:")
+    print(f"  systemctl --user start {SERVICE_NAME}")
+    print(f"")
+    print(f"To view logs:")
+    print(f"  journalctl --user -u {SERVICE_NAME} -f")
+    print(f"")
+    print(f"To enable lingering (keeps service running after logout):")
+    print(f"  sudo loginctl enable-linger $USER")
+
+def uninstall_systemd():
+    """Uninstall the systemd user service."""
+    unit_path = get_systemd_unit_path()
+    
+    # Stop and disable first
+    subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=False)
+    subprocess.run(["systemctl", "--user", "disable", SERVICE_NAME], check=False)
+    
+    # Remove the unit file
+    if unit_path.exists():
+        unit_path.unlink()
+        print(f"✓ Removed {unit_path}")
+    
+    # Reload systemd
+    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
+    print(f"✓ Service uninstalled")
+
+def systemd_status():
+    """Show systemd service status."""
+    subprocess.run(["systemctl", "--user", "status", SERVICE_NAME])
+
+def systemd_start():
+    """Start the systemd service."""
+    subprocess.run(["systemctl", "--user", "start", SERVICE_NAME], check=True)
+    print(f"✓ Service started")
+
+def systemd_stop():
+    """Stop the systemd service."""
+    subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=True)
+    print(f"✓ Service stopped")
+
+def systemd_restart():
+    """Restart the systemd service."""
+    subprocess.run(["systemctl", "--user", "restart", SERVICE_NAME], check=True)
+    print(f"✓ Service restarted")
+
+
+# =============================================================================
+# Launchd Service (macOS)
+# =============================================================================
+
+def generate_launchd_plist() -> str:
+    """Generate the launchd plist file content."""
+    python_path = get_python_path()
+    script_path = get_gateway_script_path()
+    working_dir = str(PROJECT_DIR)
+    log_dir = Path.home() / ".hermes" / "logs"
+    
+    return f"""<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>ai.hermes.gateway</string>
+    
+    <key>ProgramArguments</key>
+    <array>
+        <string>{python_path}</string>
+        <string>{script_path}</string>
+        <string>run</string>
+    </array>
+    
+    <key>WorkingDirectory</key>
+    <string>{working_dir}</string>
+    
+    <key>RunAtLoad</key>
+    <true/>
+    
+    <key>KeepAlive</key>
+    <dict>
+        <key>SuccessfulExit</key>
+        <false/>
+    </dict>
+    
+    <key>StandardOutPath</key>
+    <string>{log_dir}/gateway.log</string>
+    
+    <key>StandardErrorPath</key>
+    <string>{log_dir}/gateway.error.log</string>
+    
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>PATH</key>
+        <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
+    </dict>
+</dict>
+</plist>
+"""
+
+def install_launchd():
+    """Install the launchd service (macOS)."""
+    plist_path = get_launchd_plist_path()
+    plist_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Ensure log directory exists
+    log_dir = Path.home() / ".hermes" / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    
+    print(f"Installing launchd service to: {plist_path}")
+    plist_path.write_text(generate_launchd_plist())
+    
+    # Load the service
+    subprocess.run(["launchctl", "load", str(plist_path)], check=True)
+    
+    print(f"✓ Service installed and loaded")
+    print(f"")
+    print(f"To view logs:")
+    print(f"  tail -f ~/.hermes/logs/gateway.log")
+    print(f"")
+    print(f"To manage the service:")
+    print(f"  launchctl start ai.hermes.gateway")
+    print(f"  launchctl stop ai.hermes.gateway")
+
+def uninstall_launchd():
+    """Uninstall the launchd service (macOS)."""
+    plist_path = get_launchd_plist_path()
+    
+    # Unload first
+    subprocess.run(["launchctl", "unload", str(plist_path)], check=False)
+    
+    # Remove the plist file
+    if plist_path.exists():
+        plist_path.unlink()
+        print(f"✓ Removed {plist_path}")
+    
+    print(f"✓ Service uninstalled")
+
+def launchd_status():
+    """Show launchd service status."""
+    subprocess.run(["launchctl", "list", "ai.hermes.gateway"])
+
+def launchd_start():
+    """Start the launchd service."""
+    subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
+    print(f"✓ Service started")
+
+def launchd_stop():
+    """Stop the launchd service."""
+    subprocess.run(["launchctl", "stop", "ai.hermes.gateway"], check=True)
+    print(f"✓ Service stopped")
+
+def launchd_restart():
+    """Restart the launchd service."""
+    launchd_stop()
+    launchd_start()
+
+
+# =============================================================================
+# Platform Detection
+# =============================================================================
+
+def is_linux() -> bool:
+    return sys.platform.startswith('linux')
+
+def is_macos() -> bool:
+    return sys.platform == 'darwin'
+
+def is_windows() -> bool:
+    return sys.platform == 'win32'
+
+
+# =============================================================================
+# Gateway Runner
+# =============================================================================
+
+def run_gateway():
+    """Run the gateway in foreground."""
+    from gateway.run import start_gateway
+    print("Starting Hermes Gateway...")
+    print("Press Ctrl+C to stop.")
+    print()
+    asyncio.run(start_gateway())
+
+
+# =============================================================================
+# Main CLI
+# =============================================================================
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Hermes Gateway - Messaging Platform Integration",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    # Run in foreground (for testing)
+    ./scripts/hermes-gateway run
+    
+    # Install as system service
+    ./scripts/hermes-gateway install
+    
+    # Manage the service
+    ./scripts/hermes-gateway start
+    ./scripts/hermes-gateway stop
+    ./scripts/hermes-gateway restart
+    ./scripts/hermes-gateway status
+    
+    # Uninstall
+    ./scripts/hermes-gateway uninstall
+
+Configuration:
+    Set environment variables in .env file or system environment:
+    - TELEGRAM_BOT_TOKEN
+    - DISCORD_BOT_TOKEN
+    - WHATSAPP_ENABLED
+    
+    Or create ~/.hermes/gateway.json for advanced configuration.
+"""
+    )
+    
+    parser.add_argument(
+        "command",
+        choices=["run", "install", "uninstall", "start", "stop", "restart", "status"],
+        nargs="?",
+        default="run",
+        help="Command to execute (default: run)"
+    )
+    
+    parser.add_argument(
+        "--verbose", "-v",
+        action="store_true",
+        help="Verbose output"
+    )
+    
+    args = parser.parse_args()
+    
+    # Detect platform and dispatch command
+    if args.command == "run":
+        run_gateway()
+    
+    elif args.command == "install":
+        if is_linux():
+            install_systemd()
+        elif is_macos():
+            install_launchd()
+        else:
+            print("Service installation not supported on this platform.")
+            print("Please run manually: ./scripts/hermes-gateway run")
+            sys.exit(1)
+    
+    elif args.command == "uninstall":
+        if is_linux():
+            uninstall_systemd()
+        elif is_macos():
+            uninstall_launchd()
+        else:
+            print("Service uninstallation not supported on this platform.")
+            sys.exit(1)
+    
+    elif args.command == "start":
+        if is_linux():
+            systemd_start()
+        elif is_macos():
+            launchd_start()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif args.command == "stop":
+        if is_linux():
+            systemd_stop()
+        elif is_macos():
+            launchd_stop()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif args.command == "restart":
+        if is_linux():
+            systemd_restart()
+        elif is_macos():
+            launchd_restart()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+    
+    elif args.command == "status":
+        if is_linux():
+            systemd_status()
+        elif is_macos():
+            launchd_status()
+        else:
+            print("Not supported on this platform.")
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/install.ps1 b/scripts/install.ps1
new file mode 100644
index 0000000000..a6679c934c
--- /dev/null
+++ b/scripts/install.ps1
@@ -0,0 +1,371 @@
+# ============================================================================
+# Hermes Agent Installer for Windows
+# ============================================================================
+# Installation script for Windows (PowerShell).
+#
+# Usage:
+#   irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex
+#
+# Or download and run with options:
+#   .\install.ps1 -NoVenv -SkipSetup
+#
+# ============================================================================
+
+param(
+    [switch]$NoVenv,
+    [switch]$SkipSetup,
+    [string]$Branch = "main",
+    [string]$InstallDir = "$env:USERPROFILE\.hermes-agent"
+)
+
+$ErrorActionPreference = "Stop"
+
+# ============================================================================
+# Configuration
+# ============================================================================
+
+$RepoUrl = "https://github.com/NousResearch/hermes-agent.git"
+
+# ============================================================================
+# Helper functions
+# ============================================================================
+
+function Write-Banner {
+    Write-Host ""
+    Write-Host "┌─────────────────────────────────────────────────────────┐" -ForegroundColor Magenta
+    Write-Host "│             🦋 Hermes Agent Installer                   │" -ForegroundColor Magenta
+    Write-Host "├─────────────────────────────────────────────────────────┤" -ForegroundColor Magenta
+    Write-Host "│  I'm just a butterfly with a lot of tools.             │" -ForegroundColor Magenta
+    Write-Host "└─────────────────────────────────────────────────────────┘" -ForegroundColor Magenta
+    Write-Host ""
+}
+
+function Write-Info {
+    param([string]$Message)
+    Write-Host "→ $Message" -ForegroundColor Cyan
+}
+
+function Write-Success {
+    param([string]$Message)
+    Write-Host "✓ $Message" -ForegroundColor Green
+}
+
+function Write-Warning {
+    param([string]$Message)
+    Write-Host "⚠ $Message" -ForegroundColor Yellow
+}
+
+function Write-Error {
+    param([string]$Message)
+    Write-Host "✗ $Message" -ForegroundColor Red
+}
+
+# ============================================================================
+# Dependency checks
+# ============================================================================
+
+function Test-Python {
+    Write-Info "Checking Python..."
+    
+    # Try different python commands
+    $pythonCmds = @("python3", "python", "py -3")
+    
+    foreach ($cmd in $pythonCmds) {
+        try {
+            $version = & $cmd.Split()[0] $cmd.Split()[1..99] -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>$null
+            if ($version) {
+                $major, $minor = $version.Split('.')
+                if ([int]$major -ge 3 -and [int]$minor -ge 10) {
+                    $script:PythonCmd = $cmd
+                    Write-Success "Python $version found"
+                    return $true
+                }
+            }
+        } catch {
+            # Try next command
+        }
+    }
+    
+    Write-Error "Python 3.10+ not found"
+    Write-Info "Please install Python 3.10 or newer from:"
+    Write-Info "  https://www.python.org/downloads/"
+    Write-Info ""
+    Write-Info "Make sure to check 'Add Python to PATH' during installation"
+    return $false
+}
+
+function Test-Git {
+    Write-Info "Checking Git..."
+    
+    if (Get-Command git -ErrorAction SilentlyContinue) {
+        $version = git --version
+        Write-Success "Git found ($version)"
+        return $true
+    }
+    
+    Write-Error "Git not found"
+    Write-Info "Please install Git from:"
+    Write-Info "  https://git-scm.com/download/win"
+    return $false
+}
+
+function Test-Node {
+    Write-Info "Checking Node.js (optional, for browser tools)..."
+    
+    if (Get-Command node -ErrorAction SilentlyContinue) {
+        $version = node --version
+        Write-Success "Node.js $version found"
+        $script:HasNode = $true
+        return $true
+    }
+    
+    Write-Warning "Node.js not found (browser tools will be limited)"
+    Write-Info "To install Node.js (optional):"
+    Write-Info "  https://nodejs.org/en/download/"
+    $script:HasNode = $false
+    return $true  # Don't fail - Node is optional
+}
+
+# ============================================================================
+# Installation
+# ============================================================================
+
+function Install-Repository {
+    Write-Info "Installing to $InstallDir..."
+    
+    if (Test-Path $InstallDir) {
+        if (Test-Path "$InstallDir\.git") {
+            Write-Info "Existing installation found, updating..."
+            Push-Location $InstallDir
+            git fetch origin
+            git checkout $Branch
+            git pull origin $Branch
+            Pop-Location
+        } else {
+            Write-Error "Directory exists but is not a git repository: $InstallDir"
+            Write-Info "Remove it or choose a different directory with -InstallDir"
+            exit 1
+        }
+    } else {
+        git clone --branch $Branch $RepoUrl $InstallDir
+    }
+    
+    Write-Success "Repository ready"
+}
+
+function Install-Venv {
+    if ($NoVenv) {
+        Write-Info "Skipping virtual environment (-NoVenv)"
+        return
+    }
+    
+    Write-Info "Creating virtual environment..."
+    
+    Push-Location $InstallDir
+    
+    if (-not (Test-Path "venv")) {
+        & $PythonCmd -m venv venv
+    }
+    
+    # Activate
+    & .\venv\Scripts\Activate.ps1
+    
+    # Upgrade pip
+    pip install --upgrade pip wheel setuptools | Out-Null
+    
+    Pop-Location
+    
+    Write-Success "Virtual environment ready"
+}
+
+function Install-Dependencies {
+    Write-Info "Installing dependencies..."
+    
+    Push-Location $InstallDir
+    
+    if (-not $NoVenv) {
+        & .\venv\Scripts\Activate.ps1
+    }
+    
+    try {
+        pip install -e ".[all]" 2>&1 | Out-Null
+    } catch {
+        pip install -e "." | Out-Null
+    }
+    
+    Pop-Location
+    
+    Write-Success "Dependencies installed"
+}
+
+function Set-PathVariable {
+    Write-Info "Setting up PATH..."
+    
+    if ($NoVenv) {
+        $binDir = "$InstallDir"
+    } else {
+        $binDir = "$InstallDir\venv\Scripts"
+    }
+    
+    # Add to user PATH
+    $currentPath = [Environment]::GetEnvironmentVariable("Path", "User")
+    
+    if ($currentPath -notlike "*$binDir*") {
+        [Environment]::SetEnvironmentVariable(
+            "Path",
+            "$binDir;$currentPath",
+            "User"
+        )
+        Write-Success "Added to user PATH"
+    } else {
+        Write-Info "PATH already configured"
+    }
+    
+    # Update current session
+    $env:Path = "$binDir;$env:Path"
+}
+
+function Copy-ConfigTemplates {
+    Write-Info "Setting up configuration files..."
+    
+    Push-Location $InstallDir
+    
+    # Create .env from example
+    if (-not (Test-Path ".env")) {
+        if (Test-Path ".env.example") {
+            Copy-Item ".env.example" ".env"
+            Write-Success "Created .env from template"
+        }
+    } else {
+        Write-Info ".env already exists, keeping it"
+    }
+    
+    # Create cli-config.yaml from example
+    if (-not (Test-Path "cli-config.yaml")) {
+        if (Test-Path "cli-config.yaml.example") {
+            Copy-Item "cli-config.yaml.example" "cli-config.yaml"
+            Write-Success "Created cli-config.yaml from template"
+        }
+    } else {
+        Write-Info "cli-config.yaml already exists, keeping it"
+    }
+    
+    Pop-Location
+    
+    # Create user data directory
+    $hermesDir = "$env:USERPROFILE\.hermes"
+    New-Item -ItemType Directory -Force -Path "$hermesDir\cron" | Out-Null
+    New-Item -ItemType Directory -Force -Path "$hermesDir\sessions" | Out-Null
+    New-Item -ItemType Directory -Force -Path "$hermesDir\logs" | Out-Null
+    Write-Success "Created ~/.hermes data directory"
+}
+
+function Install-NodeDeps {
+    if (-not $HasNode) {
+        Write-Info "Skipping Node.js dependencies (Node not installed)"
+        return
+    }
+    
+    Push-Location $InstallDir
+    
+    if (Test-Path "package.json") {
+        Write-Info "Installing Node.js dependencies..."
+        try {
+            npm install --silent 2>&1 | Out-Null
+            Write-Success "Node.js dependencies installed"
+        } catch {
+            Write-Warning "npm install failed (browser tools may not work)"
+        }
+    }
+    
+    Pop-Location
+}
+
+function Invoke-SetupWizard {
+    if ($SkipSetup) {
+        Write-Info "Skipping setup wizard (-SkipSetup)"
+        return
+    }
+    
+    Write-Host ""
+    Write-Info "Starting setup wizard..."
+    Write-Host ""
+    
+    Push-Location $InstallDir
+    
+    if (-not $NoVenv) {
+        & .\venv\Scripts\Activate.ps1
+    }
+    
+    python -m hermes_cli.main setup
+    
+    Pop-Location
+}
+
+function Write-Completion {
+    Write-Host ""
+    Write-Host "┌─────────────────────────────────────────────────────────┐" -ForegroundColor Green
+    Write-Host "│              ✓ Installation Complete!                   │" -ForegroundColor Green
+    Write-Host "└─────────────────────────────────────────────────────────┘" -ForegroundColor Green
+    Write-Host ""
+    
+    # Show file locations
+    Write-Host "📁 Your files:" -ForegroundColor Cyan
+    Write-Host ""
+    Write-Host "   Install:   " -NoNewline -ForegroundColor Yellow
+    Write-Host "$InstallDir"
+    Write-Host "   Config:    " -NoNewline -ForegroundColor Yellow
+    Write-Host "$env:USERPROFILE\.hermes\config.yaml"
+    Write-Host "   API Keys:  " -NoNewline -ForegroundColor Yellow
+    Write-Host "$env:USERPROFILE\.hermes\.env"
+    Write-Host "   Data:      " -NoNewline -ForegroundColor Yellow
+    Write-Host "$env:USERPROFILE\.hermes\ (cron, sessions, logs)"
+    Write-Host ""
+    
+    Write-Host "─────────────────────────────────────────────────────────" -ForegroundColor Cyan
+    Write-Host ""
+    Write-Host "🚀 Commands:" -ForegroundColor Cyan
+    Write-Host ""
+    Write-Host "   hermes              " -NoNewline -ForegroundColor Green
+    Write-Host "Start chatting"
+    Write-Host "   hermes setup        " -NoNewline -ForegroundColor Green
+    Write-Host "Configure API keys & settings"
+    Write-Host "   hermes config       " -NoNewline -ForegroundColor Green
+    Write-Host "View/edit configuration"
+    Write-Host "   hermes config edit  " -NoNewline -ForegroundColor Green
+    Write-Host "Open config in editor"
+    Write-Host "   hermes gateway      " -NoNewline -ForegroundColor Green
+    Write-Host "Run messaging gateway"
+    Write-Host "   hermes update       " -NoNewline -ForegroundColor Green
+    Write-Host "Update to latest version"
+    Write-Host ""
+    
+    Write-Host "─────────────────────────────────────────────────────────" -ForegroundColor Cyan
+    Write-Host ""
+    Write-Host "⚡ Restart your terminal for PATH changes to take effect" -ForegroundColor Yellow
+    Write-Host ""
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+function Main {
+    Write-Banner
+    
+    if (-not (Test-Python)) { exit 1 }
+    if (-not (Test-Git)) { exit 1 }
+    Test-Node  # Optional, doesn't fail
+    
+    Install-Repository
+    Install-Venv
+    Install-Dependencies
+    Install-NodeDeps
+    Set-PathVariable
+    Copy-ConfigTemplates
+    Invoke-SetupWizard
+    
+    Write-Completion
+}
+
+Main
diff --git a/scripts/install.sh b/scripts/install.sh
new file mode 100755
index 0000000000..f038bf831d
--- /dev/null
+++ b/scripts/install.sh
@@ -0,0 +1,520 @@
+#!/bin/bash
+# ============================================================================
+# Hermes Agent Installer
+# ============================================================================
+# Installation script for Linux and macOS.
+#
+# Usage:
+#   curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
+#
+# Or with options:
+#   curl -fsSL ... | bash -s -- --no-venv --skip-setup
+#
+# ============================================================================
+
+set -e
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+MAGENTA='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m' # No Color
+BOLD='\033[1m'
+
+# Configuration
+REPO_URL="https://github.com/NousResearch/hermes-agent.git"
+INSTALL_DIR="${HERMES_INSTALL_DIR:-$HOME/.hermes-agent}"
+PYTHON_MIN_VERSION="3.10"
+
+# Options
+USE_VENV=true
+RUN_SETUP=true
+BRANCH="main"
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --no-venv)
+            USE_VENV=false
+            shift
+            ;;
+        --skip-setup)
+            RUN_SETUP=false
+            shift
+            ;;
+        --branch)
+            BRANCH="$2"
+            shift 2
+            ;;
+        --dir)
+            INSTALL_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            echo "Hermes Agent Installer"
+            echo ""
+            echo "Usage: install.sh [OPTIONS]"
+            echo ""
+            echo "Options:"
+            echo "  --no-venv      Don't create virtual environment"
+            echo "  --skip-setup   Skip interactive setup wizard"
+            echo "  --branch NAME  Git branch to install (default: main)"
+            echo "  --dir PATH     Installation directory (default: ~/.hermes-agent)"
+            echo "  -h, --help     Show this help"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# ============================================================================
+# Helper functions
+# ============================================================================
+
+print_banner() {
+    echo ""
+    echo -e "${MAGENTA}${BOLD}"
+    echo "┌─────────────────────────────────────────────────────────┐"
+    echo "│             🦋 Hermes Agent Installer                   │"
+    echo "├─────────────────────────────────────────────────────────┤"
+    echo "│  I'm just a butterfly with a lot of tools.             │"
+    echo "└─────────────────────────────────────────────────────────┘"
+    echo -e "${NC}"
+}
+
+log_info() {
+    echo -e "${CYAN}→${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}✓${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}⚠${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}✗${NC} $1"
+}
+
+# ============================================================================
+# System detection
+# ============================================================================
+
+detect_os() {
+    case "$(uname -s)" in
+        Linux*)
+            OS="linux"
+            if [ -f /etc/os-release ]; then
+                . /etc/os-release
+                DISTRO="$ID"
+            else
+                DISTRO="unknown"
+            fi
+            ;;
+        Darwin*)
+            OS="macos"
+            DISTRO="macos"
+            ;;
+        CYGWIN*|MINGW*|MSYS*)
+            OS="windows"
+            DISTRO="windows"
+            log_error "Windows detected. Please use the PowerShell installer:"
+            log_info "  irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex"
+            exit 1
+            ;;
+        *)
+            OS="unknown"
+            DISTRO="unknown"
+            log_warn "Unknown operating system"
+            ;;
+    esac
+    
+    log_success "Detected: $OS ($DISTRO)"
+}
+
+# ============================================================================
+# Dependency checks
+# ============================================================================
+
+check_python() {
+    log_info "Checking Python..."
+    
+    # Try different python commands
+    for cmd in python3.12 python3.11 python3.10 python3 python; do
+        if command -v $cmd &> /dev/null; then
+            PYTHON_CMD=$cmd
+            PYTHON_VERSION=$($cmd -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
+            
+            # Check version
+            if python3 -c "import sys; exit(0 if sys.version_info >= (3, 10) else 1)" 2>/dev/null; then
+                log_success "Python $PYTHON_VERSION found"
+                return 0
+            fi
+        fi
+    done
+    
+    log_error "Python 3.10+ not found"
+    log_info "Please install Python 3.10 or newer:"
+    
+    case "$OS" in
+        linux)
+            case "$DISTRO" in
+                ubuntu|debian)
+                    log_info "  sudo apt update && sudo apt install python3.11 python3.11-venv"
+                    ;;
+                fedora)
+                    log_info "  sudo dnf install python3.11"
+                    ;;
+                arch)
+                    log_info "  sudo pacman -S python"
+                    ;;
+                *)
+                    log_info "  Use your package manager to install Python 3.10+"
+                    ;;
+            esac
+            ;;
+        macos)
+            log_info "  brew install python@3.11"
+            log_info "  Or download from https://www.python.org/downloads/"
+            ;;
+    esac
+    
+    exit 1
+}
+
+check_git() {
+    log_info "Checking Git..."
+    
+    if command -v git &> /dev/null; then
+        GIT_VERSION=$(git --version | awk '{print $3}')
+        log_success "Git $GIT_VERSION found"
+        return 0
+    fi
+    
+    log_error "Git not found"
+    log_info "Please install Git:"
+    
+    case "$OS" in
+        linux)
+            case "$DISTRO" in
+                ubuntu|debian)
+                    log_info "  sudo apt update && sudo apt install git"
+                    ;;
+                fedora)
+                    log_info "  sudo dnf install git"
+                    ;;
+                arch)
+                    log_info "  sudo pacman -S git"
+                    ;;
+                *)
+                    log_info "  Use your package manager to install git"
+                    ;;
+            esac
+            ;;
+        macos)
+            log_info "  xcode-select --install"
+            log_info "  Or: brew install git"
+            ;;
+    esac
+    
+    exit 1
+}
+
+check_node() {
+    log_info "Checking Node.js (optional, for browser tools)..."
+    
+    if command -v node &> /dev/null; then
+        NODE_VERSION=$(node --version)
+        log_success "Node.js $NODE_VERSION found"
+        HAS_NODE=true
+        return 0
+    fi
+    
+    log_warn "Node.js not found (browser tools will be limited)"
+    log_info "To install Node.js (optional):"
+    
+    case "$OS" in
+        linux)
+            case "$DISTRO" in
+                ubuntu|debian)
+                    log_info "  curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -"
+                    log_info "  sudo apt install -y nodejs"
+                    ;;
+                fedora)
+                    log_info "  sudo dnf install nodejs"
+                    ;;
+                arch)
+                    log_info "  sudo pacman -S nodejs npm"
+                    ;;
+                *)
+                    log_info "  https://nodejs.org/en/download/"
+                    ;;
+            esac
+            ;;
+        macos)
+            log_info "  brew install node"
+            log_info "  Or: https://nodejs.org/en/download/"
+            ;;
+    esac
+    
+    HAS_NODE=false
+    # Don't exit - Node is optional
+}
+
+# ============================================================================
+# Installation
+# ============================================================================
+
+clone_repo() {
+    log_info "Installing to $INSTALL_DIR..."
+    
+    if [ -d "$INSTALL_DIR" ]; then
+        if [ -d "$INSTALL_DIR/.git" ]; then
+            log_info "Existing installation found, updating..."
+            cd "$INSTALL_DIR"
+            git fetch origin
+            git checkout "$BRANCH"
+            git pull origin "$BRANCH"
+        else
+            log_error "Directory exists but is not a git repository: $INSTALL_DIR"
+            log_info "Remove it or choose a different directory with --dir"
+            exit 1
+        fi
+    else
+        git clone --branch "$BRANCH" "$REPO_URL" "$INSTALL_DIR"
+    fi
+    
+    cd "$INSTALL_DIR"
+    log_success "Repository ready"
+}
+
+setup_venv() {
+    if [ "$USE_VENV" = false ]; then
+        log_info "Skipping virtual environment (--no-venv)"
+        return 0
+    fi
+    
+    log_info "Creating virtual environment..."
+    
+    if [ -d "venv" ]; then
+        log_info "Virtual environment already exists"
+    else
+        $PYTHON_CMD -m venv venv
+    fi
+    
+    # Activate
+    source venv/bin/activate
+    
+    # Upgrade pip
+    pip install --upgrade pip wheel setuptools > /dev/null
+    
+    log_success "Virtual environment ready"
+}
+
+install_deps() {
+    log_info "Installing dependencies..."
+    
+    if [ "$USE_VENV" = true ]; then
+        source venv/bin/activate
+    fi
+    
+    # Install the package in editable mode with all extras
+    pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
+    
+    log_success "Dependencies installed"
+}
+
+setup_path() {
+    log_info "Setting up PATH..."
+    
+    # Determine the bin directory
+    if [ "$USE_VENV" = true ]; then
+        BIN_DIR="$INSTALL_DIR/venv/bin"
+    else
+        BIN_DIR="$HOME/.local/bin"
+        mkdir -p "$BIN_DIR"
+        
+        # Create a wrapper script
+        cat > "$BIN_DIR/hermes" << EOF
+#!/bin/bash
+cd "$INSTALL_DIR"
+exec python -m hermes_cli.main "\$@"
+EOF
+        chmod +x "$BIN_DIR/hermes"
+    fi
+    
+    # Add to PATH in shell config
+    SHELL_CONFIG=""
+    if [ -n "$BASH_VERSION" ]; then
+        if [ -f "$HOME/.bashrc" ]; then
+            SHELL_CONFIG="$HOME/.bashrc"
+        elif [ -f "$HOME/.bash_profile" ]; then
+            SHELL_CONFIG="$HOME/.bash_profile"
+        fi
+    elif [ -n "$ZSH_VERSION" ] || [ -f "$HOME/.zshrc" ]; then
+        SHELL_CONFIG="$HOME/.zshrc"
+    fi
+    
+    PATH_LINE="export PATH=\"$BIN_DIR:\$PATH\""
+    
+    if [ -n "$SHELL_CONFIG" ]; then
+        if ! grep -q "hermes-agent" "$SHELL_CONFIG" 2>/dev/null; then
+            echo "" >> "$SHELL_CONFIG"
+            echo "# Hermes Agent" >> "$SHELL_CONFIG"
+            echo "$PATH_LINE" >> "$SHELL_CONFIG"
+            log_success "Added to $SHELL_CONFIG"
+        else
+            log_info "PATH already configured in $SHELL_CONFIG"
+        fi
+    fi
+    
+    # Also export for current session
+    export PATH="$BIN_DIR:$PATH"
+    
+    log_success "PATH configured"
+}
+
+copy_config_templates() {
+    log_info "Setting up configuration files..."
+    
+    # Create .env from example
+    if [ ! -f "$INSTALL_DIR/.env" ]; then
+        if [ -f "$INSTALL_DIR/.env.example" ]; then
+            cp "$INSTALL_DIR/.env.example" "$INSTALL_DIR/.env"
+            log_success "Created .env from template"
+        fi
+    else
+        log_info ".env already exists, keeping it"
+    fi
+    
+    # Create cli-config.yaml from example
+    if [ ! -f "$INSTALL_DIR/cli-config.yaml" ]; then
+        if [ -f "$INSTALL_DIR/cli-config.yaml.example" ]; then
+            cp "$INSTALL_DIR/cli-config.yaml.example" "$INSTALL_DIR/cli-config.yaml"
+            log_success "Created cli-config.yaml from template"
+        fi
+    else
+        log_info "cli-config.yaml already exists, keeping it"
+    fi
+    
+    # Create ~/.hermes directory for user data
+    mkdir -p "$HOME/.hermes/cron"
+    mkdir -p "$HOME/.hermes/sessions"
+    mkdir -p "$HOME/.hermes/logs"
+    log_success "Created ~/.hermes data directory"
+}
+
+install_node_deps() {
+    if [ "$HAS_NODE" = false ]; then
+        log_info "Skipping Node.js dependencies (Node not installed)"
+        return 0
+    fi
+    
+    if [ -f "$INSTALL_DIR/package.json" ]; then
+        log_info "Installing Node.js dependencies..."
+        cd "$INSTALL_DIR"
+        npm install --silent 2>/dev/null || {
+            log_warn "npm install failed (browser tools may not work)"
+            return 0
+        }
+        log_success "Node.js dependencies installed"
+    fi
+}
+
+run_setup_wizard() {
+    if [ "$RUN_SETUP" = false ]; then
+        log_info "Skipping setup wizard (--skip-setup)"
+        return 0
+    fi
+    
+    echo ""
+    log_info "Starting setup wizard..."
+    echo ""
+    
+    if [ "$USE_VENV" = true ]; then
+        source "$INSTALL_DIR/venv/bin/activate"
+    fi
+    
+    cd "$INSTALL_DIR"
+    python -m hermes_cli.main setup
+}
+
+print_success() {
+    echo ""
+    echo -e "${GREEN}${BOLD}"
+    echo "┌─────────────────────────────────────────────────────────┐"
+    echo "│              ✓ Installation Complete!                   │"
+    echo "└─────────────────────────────────────────────────────────┘"
+    echo -e "${NC}"
+    echo ""
+    
+    # Show file locations
+    echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
+    echo ""
+    echo -e "   ${YELLOW}Install:${NC}   $INSTALL_DIR"
+    echo -e "   ${YELLOW}Config:${NC}    ~/.hermes/config.yaml"
+    echo -e "   ${YELLOW}API Keys:${NC}  ~/.hermes/.env"
+    echo -e "   ${YELLOW}Data:${NC}      ~/.hermes/ (cron, sessions, logs)"
+    echo ""
+    
+    echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
+    echo ""
+    echo -e "${CYAN}${BOLD}🚀 Commands:${NC}"
+    echo ""
+    echo -e "   ${GREEN}hermes${NC}              Start chatting"
+    echo -e "   ${GREEN}hermes setup${NC}        Configure API keys & settings"
+    echo -e "   ${GREEN}hermes config${NC}       View/edit configuration"
+    echo -e "   ${GREEN}hermes config edit${NC}  Open config in editor"
+    echo -e "   ${GREEN}hermes gateway${NC}      Run messaging gateway"
+    echo -e "   ${GREEN}hermes update${NC}       Update to latest version"
+    echo ""
+    
+    echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
+    echo ""
+    echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
+    echo ""
+    echo "   source ~/.bashrc   # or ~/.zshrc"
+    echo ""
+    
+    # Show Node.js warning if not installed
+    if [ "$HAS_NODE" = false ]; then
+        echo -e "${YELLOW}"
+        echo "Note: Node.js was not found. Browser automation tools"
+        echo "will have limited functionality. Install Node.js later"
+        echo "if you need full browser support."
+        echo -e "${NC}"
+    fi
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+main() {
+    print_banner
+    
+    detect_os
+    check_python
+    check_git
+    check_node
+    
+    clone_repo
+    setup_venv
+    install_deps
+    install_node_deps
+    setup_path
+    copy_config_templates
+    run_setup_wizard
+    
+    print_success
+}
+
+main
diff --git a/setup-hermes.sh b/setup-hermes.sh
index f5b78f267e..4cffdc7313 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -1,149 +1,156 @@
 #!/bin/bash
-
+# ============================================================================
 # Hermes Agent Setup Script
-# Automated setup for all dependencies and configuration
+# ============================================================================
+# Quick setup for developers who cloned the repo manually.
+#
+# Usage:
+#   ./setup-hermes.sh
+#
+# This script:
+# 1. Creates a virtual environment (if not exists)
+# 2. Installs dependencies
+# 3. Creates .env from template (if not exists)
+# 4. Installs the 'hermes' CLI command
+# 5. Runs the setup wizard (optional)
+# ============================================================================
 
 set -e
 
-echo "========================================="
-echo "Hermes Agent Setup"
-echo "========================================="
+# Colors
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+echo ""
+echo -e "${CYAN}🦋 Hermes Agent Setup${NC}"
 echo ""
 
-# Change to hermes-agent directory
-cd /home/teknium/hermes-agent
+# ============================================================================
+# Python check
+# ============================================================================
 
-# Check Python version
-echo "[1/10] Checking Python version..."
-python_version=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2)
-echo "✓ Python $python_version detected"
-echo ""
+echo -e "${CYAN}→${NC} Checking Python..."
 
-# Install uv
-echo "[2/10] Installing uv (fast Python package installer)..."
-if ! command -v uv &> /dev/null; then
-    echo "Installing uv..."
-    curl -LsSf https://astral.sh/uv/install.sh | sh
-    export PATH="$HOME/.cargo/bin:$PATH"
-    echo "✓ uv installed"
-else
-    echo "✓ uv already installed: $(uv --version)"
+PYTHON_CMD=""
+for cmd in python3.12 python3.11 python3.10 python3 python; do
+    if command -v $cmd &> /dev/null; then
+        if $cmd -c "import sys; exit(0 if sys.version_info >= (3, 10) else 1)" 2>/dev/null; then
+            PYTHON_CMD=$cmd
+            break
+        fi
+    fi
+done
+
+if [ -z "$PYTHON_CMD" ]; then
+    echo -e "${YELLOW}✗${NC} Python 3.10+ required"
+    exit 1
 fi
-echo ""
 
-# Install Node.js 20 using NodeSource
-echo "[3/10] Installing Node.js 20..."
-if ! command -v node &> /dev/null || [[ $(node --version | cut -d'v' -f2 | cut -d'.' -f1) -lt 20 ]]; then
-    echo "Installing Node.js 20 LTS..."
-    curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
-    sudo apt-get install -y nodejs
-    echo "✓ Node.js installed"
+PYTHON_VERSION=$($PYTHON_CMD -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
+echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION found"
+
+# ============================================================================
+# Virtual environment
+# ============================================================================
+
+echo -e "${CYAN}→${NC} Setting up virtual environment..."
+
+if [ ! -d "venv" ]; then
+    $PYTHON_CMD -m venv venv
+    echo -e "${GREEN}✓${NC} Created venv"
 else
-    echo "✓ Node.js 20+ already installed: $(node --version)"
+    echo -e "${GREEN}✓${NC} venv exists"
 fi
-echo ""
 
-# Initialize git submodules
-echo "[4/10] Initializing git submodules..."
-git submodule update --init --recursive
-echo "✓ Submodules initialized"
-echo ""
-
-# Create Python virtual environment with uv
-echo "[5/10] Creating Python virtual environment with uv..."
-if [ -d "venv" ]; then
-    echo "Virtual environment already exists, skipping..."
-else
-    uv venv venv
-    echo "✓ Virtual environment created with uv"
-fi
-echo ""
-
-# Activate virtual environment and install Python packages with uv
-echo "[6/10] Installing Python dependencies with uv..."
 source venv/bin/activate
-uv pip install -r requirements.txt
-echo "✓ Python packages installed"
-echo ""
+pip install --upgrade pip wheel setuptools > /dev/null
 
-# Install mini-swe-agent with uv
-echo "[7/10] Installing mini-swe-agent..."
-uv pip install -e ./mini-swe-agent
-echo "✓ mini-swe-agent installed"
-echo ""
+# ============================================================================
+# Dependencies
+# ============================================================================
 
-# Install Node.js dependencies
-echo "[8/10] Installing Node.js dependencies..."
-npm install
-echo "✓ Node.js packages installed"
-echo ""
+echo -e "${CYAN}→${NC} Installing dependencies..."
 
-# Set up environment file
-echo "[9/10] Setting up environment configuration..."
-if [ -f ".env" ]; then
-    echo ".env file already exists, creating backup..."
-    cp .env .env.backup.$(date +%Y%m%d_%H%M%S)
-fi
-cp .env.example .env
-echo "✓ .env file created from .env.example"
-echo ""
+pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
 
-# Set up CLI config
-echo "[10/10] Setting up CLI configuration..."
-if [ ! -f "cli-config.yaml" ]; then
-    cp cli-config.yaml.example cli-config.yaml
-    echo "✓ cli-config.yaml created from example"
+echo -e "${GREEN}✓${NC} Dependencies installed"
+
+# ============================================================================
+# Environment file
+# ============================================================================
+
+if [ ! -f ".env" ]; then
+    if [ -f ".env.example" ]; then
+        cp .env.example .env
+        echo -e "${GREEN}✓${NC} Created .env from template"
+    fi
 else
-    echo "cli-config.yaml already exists, skipping..."
+    echo -e "${GREEN}✓${NC} .env exists"
 fi
+
+# ============================================================================
+# PATH setup
+# ============================================================================
+
+echo -e "${CYAN}→${NC} Setting up hermes command..."
+
+BIN_DIR="$SCRIPT_DIR/venv/bin"
+
+# Add to shell config if not already there
+SHELL_CONFIG=""
+if [ -f "$HOME/.zshrc" ]; then
+    SHELL_CONFIG="$HOME/.zshrc"
+elif [ -f "$HOME/.bashrc" ]; then
+    SHELL_CONFIG="$HOME/.bashrc"
+elif [ -f "$HOME/.bash_profile" ]; then
+    SHELL_CONFIG="$HOME/.bash_profile"
+fi
+
+if [ -n "$SHELL_CONFIG" ]; then
+    if ! grep -q "hermes-agent" "$SHELL_CONFIG" 2>/dev/null; then
+        echo "" >> "$SHELL_CONFIG"
+        echo "# Hermes Agent" >> "$SHELL_CONFIG"
+        echo "export PATH=\"$BIN_DIR:\$PATH\"" >> "$SHELL_CONFIG"
+        echo -e "${GREEN}✓${NC} Added to $SHELL_CONFIG"
+    else
+        echo -e "${GREEN}✓${NC} PATH already in $SHELL_CONFIG"
+    fi
+fi
+
+# ============================================================================
+# Done
+# ============================================================================
+
+echo ""
+echo -e "${GREEN}✓ Setup complete!${NC}"
+echo ""
+echo "Next steps:"
+echo ""
+echo "  1. Reload your shell:"
+echo "     source $SHELL_CONFIG"
+echo ""
+echo "  2. Run the setup wizard to configure API keys:"
+echo "     hermes setup"
+echo ""
+echo "  3. Start chatting:"
+echo "     hermes"
+echo ""
+echo "Other commands:"
+echo "  hermes status        # Check configuration"
+echo "  hermes gateway       # Start messaging gateway"
+echo "  hermes cron daemon   # Run cron daemon"
+echo "  hermes doctor        # Diagnose issues"
 echo ""
 
-# Show Node.js and Python versions
-echo "========================================="
-echo "Setup Complete!"
-echo "========================================="
-echo ""
-echo "Installed versions:"
-echo "  Node.js: $(node --version)"
-echo "  npm: $(npm --version)"
-echo "  Python: $(python3 --version)"
-echo "  uv: $(uv --version)"
-echo ""
-
-echo "========================================="
-echo "Next Steps:"
-echo "========================================="
-echo ""
-echo "1. Configure API Keys in .env file:"
-echo "   nano .env"
-echo ""
-echo "   Required API keys:"
-echo "   - OPENROUTER_API_KEY (https://openrouter.ai/keys)"
-echo "   - FIRECRAWL_API_KEY (https://firecrawl.dev/)"
-echo "   - NOUS_API_KEY (https://inference-api.nousresearch.com/)"
-echo "   - FAL_KEY (https://fal.ai/)"
-echo ""
-echo "   Optional API keys:"
-echo "   - BROWSERBASE_API_KEY (https://browserbase.com/)"
-echo "   - BROWSERBASE_PROJECT_ID"
-echo ""
-echo "2. Activate the virtual environment:"
-echo "   source venv/bin/activate"
-echo ""
-echo "3. Run the CLI:"
-echo "   ./hermes"
-echo ""
-echo "4. Or run a single query:"
-echo "   python run_agent.py --query \"your question here\""
-echo ""
-echo "5. List available tools:"
-echo "   python run_agent.py --list_tools"
-echo ""
-echo "========================================="
-echo "Configuration Files:"
-echo "========================================="
-echo "  .env - API keys and environment variables"
-echo "  cli-config.yaml - CLI settings and preferences"
-echo ""
-echo "For more information, see README.md"
-echo ""
+# Ask if they want to run setup wizard now
+read -p "Would you like to run the setup wizard now? [Y/n] " -n 1 -r
+echo
+if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+    echo ""
+    python -m hermes_cli.main setup
+fi
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index f5573082d6..5db2e1c244 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -29,6 +29,7 @@ def schedule_cronjob(
     schedule: str,
     name: Optional[str] = None,
     repeat: Optional[int] = None,
+    deliver: Optional[str] = None,
     task_id: str = None
 ) -> str:
     """
@@ -60,16 +61,35 @@ def schedule_cronjob(
                 - One-shot schedules default to repeat=1 (run once)
                 - Intervals/cron default to forever
                 - Set repeat=5 to run 5 times then auto-delete
+        deliver: Where to send the output. Options:
+                 - "origin": Back to where this job was created (default)
+                 - "local": Save to local files only (~/.hermes/cron/output/)
+                 - "telegram": Send to Telegram home channel
+                 - "discord": Send to Discord home channel
+                 - "telegram:123456": Send to specific chat ID
     
     Returns:
         JSON with job_id, next_run time, and confirmation
     """
+    # Get origin info from environment if available
+    origin = None
+    origin_platform = os.getenv("HERMES_SESSION_PLATFORM")
+    origin_chat_id = os.getenv("HERMES_SESSION_CHAT_ID")
+    if origin_platform and origin_chat_id:
+        origin = {
+            "platform": origin_platform,
+            "chat_id": origin_chat_id,
+            "chat_name": os.getenv("HERMES_SESSION_CHAT_NAME"),
+        }
+    
     try:
         job = create_job(
             prompt=prompt,
             schedule=schedule,
             name=name,
-            repeat=repeat
+            repeat=repeat,
+            deliver=deliver,
+            origin=origin
         )
         
         # Format repeat info for display
@@ -87,8 +107,9 @@ def schedule_cronjob(
             "name": job["name"],
             "schedule": job["schedule_display"],
             "repeat": repeat_display,
+            "deliver": job.get("deliver", "local"),
             "next_run_at": job["next_run_at"],
-            "message": f"Cronjob '{job['name']}' created. It will run {repeat_display}, next at {job['next_run_at']}."
+            "message": f"Cronjob '{job['name']}' created. It will run {repeat_display}, deliver to {job.get('deliver', 'local')}, next at {job['next_run_at']}."
         }, indent=2)
         
     except Exception as e:
@@ -122,6 +143,13 @@ REPEAT BEHAVIOR:
 - Intervals/cron: run forever by default
 - Set repeat=N to run exactly N times then auto-delete
 
+DELIVERY OPTIONS (where output goes):
+- "origin": Back to current chat (default if in messaging platform)
+- "local": Save to local files only (default if in CLI)
+- "telegram": Send to Telegram home channel
+- "discord": Send to Discord home channel
+- "telegram:123456": Send to specific chat (if user provides ID)
+
 Use for: reminders, periodic checks, scheduled reports, automated maintenance.""",
     "parameters": {
         "type": "object",
@@ -141,6 +169,10 @@ Use for: reminders, periodic checks, scheduled reports, automated maintenance.""
             "repeat": {
                 "type": "integer",
                 "description": "How many times to run. Omit for default (once for one-shot, forever for recurring). Set to N for exactly N runs."
+            },
+            "deliver": {
+                "type": "string",
+                "description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', or 'platform:chat_id'"
             }
         },
         "required": ["prompt", "schedule"]
@@ -189,6 +221,7 @@ def list_cronjobs(include_disabled: bool = False, task_id: str = None) -> str:
                 "prompt_preview": job["prompt"][:100] + "..." if len(job["prompt"]) > 100 else job["prompt"],
                 "schedule": job["schedule_display"],
                 "repeat": repeat_status,
+                "deliver": job.get("deliver", "local"),
                 "next_run_at": job.get("next_run_at"),
                 "last_run_at": job.get("last_run_at"),
                 "last_status": job.get("last_status"),
diff --git a/toolsets.py b/toolsets.py
index b74b2fd386..bd6b22dcee 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -132,6 +132,63 @@ TOOLSETS = {
             "schedule_cronjob", "list_cronjobs", "remove_cronjob"
         ],
         "includes": []
+    },
+    
+    # ==========================================================================
+    # Messaging Platform-Specific Toolsets
+    # ==========================================================================
+    
+    "hermes-telegram": {
+        "description": "Telegram bot toolset - web research, skills, cronjobs (no terminal/browser for security)",
+        "tools": [
+            # Web tools - safe for messaging
+            "web_search", "web_extract",
+            # Vision - analyze images sent by users
+            "vision_analyze",
+            # Skills - access knowledge base
+            "skills_categories", "skills_list", "skill_view",
+            # Cronjob management - let users schedule tasks
+            "schedule_cronjob", "list_cronjobs", "remove_cronjob"
+        ],
+        "includes": []
+    },
+    
+    "hermes-discord": {
+        "description": "Discord bot toolset - limited for public server safety (no terminal, no file access)",
+        "tools": [
+            # Web tools - safe for messaging
+            "web_search",
+            # Vision - analyze images
+            "vision_analyze",
+            # Skills - access knowledge base
+            "skills_categories", "skills_list", "skill_view",
+            # Cronjob - let users schedule reminders
+            "schedule_cronjob", "list_cronjobs", "remove_cronjob"
+        ],
+        "includes": []
+    },
+    
+    "hermes-whatsapp": {
+        "description": "WhatsApp bot toolset - similar to Telegram (personal messaging, more trusted)",
+        "tools": [
+            # Web tools
+            "web_search", "web_extract",
+            # Terminal - only for trusted personal accounts
+            "terminal",
+            # Vision
+            "vision_analyze",
+            # Skills
+            "skills_categories", "skills_list", "skill_view",
+            # Cronjob management
+            "schedule_cronjob", "list_cronjobs", "remove_cronjob"
+        ],
+        "includes": []
+    },
+    
+    "hermes-gateway": {
+        "description": "Gateway toolset - union of all messaging platform tools",
+        "tools": [],
+        "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp"]
     }
 }
 

From 3488576bd873795c3648551257514e8899cd00a6 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:13:41 -0800
Subject: [PATCH 06/48] Update terminal configuration and enhance CLI model
 management

- Changed default Docker, Singularity, and Modal images in configuration files to use "nikolaik/python-nodejs:python3.11-nodejs20" for improved compatibility.
- Updated the default model in the configuration to "anthropic/claude-sonnet-4.5" and adjusted related setup prompts for API provider configuration.
- Introduced a new CLI option for selecting a custom OpenAI-compatible endpoint, enhancing flexibility in model provider setup.
- Enhanced the prompt choice functionality to support arrow key navigation for better user experience in CLI interactions.
- Updated documentation in relevant files to reflect these changes and improve user guidance.
---
 cli-config.yaml.example |   6 +-
 hermes_cli/config.py    |  12 +-
 hermes_cli/setup.py     | 244 ++++++++++++++++++++++++++++------------
 pyproject.toml          |   3 +-
 tools/terminal_tool.py  |  16 ++-
 5 files changed, 198 insertions(+), 83 deletions(-)

diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 947fa11af9..81be7a4d71 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -55,7 +55,7 @@ terminal:
 #   cwd: "/workspace"
 #   timeout: 180
 #   lifetime_seconds: 300
-#   docker_image: "python:3.11"
+#   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 
 # -----------------------------------------------------------------------------
 # OPTION 4: Singularity/Apptainer container
@@ -67,7 +67,7 @@ terminal:
 #   cwd: "/workspace"
 #   timeout: 180
 #   lifetime_seconds: 300
-#   singularity_image: "docker://python:3.11"
+#   singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"
 
 # -----------------------------------------------------------------------------
 # OPTION 5: Modal cloud execution
@@ -79,7 +79,7 @@ terminal:
 #   cwd: "/workspace"
 #   timeout: 180
 #   lifetime_seconds: 300
-#   modal_image: "python:3.11"
+#   modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 
 # -----------------------------------------------------------------------------
 # SUDO SUPPORT (works with ALL backends above)
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 210473dbba..ad6423581a 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -71,7 +71,7 @@ def ensure_hermes_home():
 # =============================================================================
 
 DEFAULT_CONFIG = {
-    "model": "anthropic/claude-sonnet-4",
+    "model": "anthropic/claude-sonnet-4.5",
     "toolsets": ["hermes-cli"],
     "max_turns": 100,
     
@@ -79,7 +79,9 @@ DEFAULT_CONFIG = {
         "backend": "local",
         "cwd": ".",  # Use current directory
         "timeout": 180,
-        "docker_image": "python:3.11-slim",
+        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
+        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
+        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
     },
     
     "browser": {
@@ -248,6 +250,12 @@ def show_config():
     
     if terminal.get('backend') == 'docker':
         print(f"  Docker image: {terminal.get('docker_image', 'python:3.11-slim')}")
+    elif terminal.get('backend') == 'singularity':
+        print(f"  Image:        {terminal.get('singularity_image', 'docker://python:3.11')}")
+    elif terminal.get('backend') == 'modal':
+        print(f"  Modal image:  {terminal.get('modal_image', 'python:3.11')}")
+        modal_token = get_env_value('MODAL_TOKEN_ID')
+        print(f"  Modal token:  {'configured' if modal_token else '(not set)'}")
     elif terminal.get('backend') == 'ssh':
         ssh_host = get_env_value('TERMINAL_SSH_HOST')
         ssh_user = get_env_value('TERMINAL_SSH_USER')
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index c85f778c96..e892b8011e 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -85,30 +85,56 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
         sys.exit(1)
 
 def prompt_choice(question: str, choices: list, default: int = 0) -> int:
-    """Prompt for a choice from a list."""
+    """Prompt for a choice from a list with arrow key navigation."""
     print(color(question, Colors.YELLOW))
     
-    for i, choice in enumerate(choices):
-        marker = "●" if i == default else "○"
-        if i == default:
-            print(color(f"  {marker} {choice}", Colors.GREEN))
-        else:
-            print(f"  {marker} {choice}")
-    
-    while True:
-        try:
-            value = input(color(f"  Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM))
-            if not value:
-                return default
-            idx = int(value) - 1
-            if 0 <= idx < len(choices):
-                return idx
-            print_error(f"Please enter a number between 1 and {len(choices)}")
-        except ValueError:
-            print_error("Please enter a number")
-        except (KeyboardInterrupt, EOFError):
+    # Try to use interactive menu if available
+    try:
+        from simple_term_menu import TerminalMenu
+        
+        # Add visual indicators
+        menu_choices = [f"  {choice}" for choice in choices]
+        
+        terminal_menu = TerminalMenu(
+            menu_choices,
+            cursor_index=default,
+            menu_cursor="→ ",
+            menu_cursor_style=("fg_green", "bold"),
+            menu_highlight_style=("fg_green",),
+            cycle_cursor=True,
+            clear_screen=False,
+        )
+        
+        idx = terminal_menu.show()
+        if idx is None:  # User pressed Escape or Ctrl+C
             print()
             sys.exit(1)
+        print()  # Add newline after selection
+        return idx
+        
+    except ImportError:
+        # Fallback to number-based selection
+        for i, choice in enumerate(choices):
+            marker = "●" if i == default else "○"
+            if i == default:
+                print(color(f"  {marker} {choice}", Colors.GREEN))
+            else:
+                print(f"  {marker} {choice}")
+        
+        while True:
+            try:
+                value = input(color(f"  Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM))
+                if not value:
+                    return default
+                idx = int(value) - 1
+                if 0 <= idx < len(choices):
+                    return idx
+                print_error(f"Please enter a number between 1 and {len(choices)}")
+            except ValueError:
+                print_error("Please enter a number")
+            except (KeyboardInterrupt, EOFError):
+                print()
+                sys.exit(1)
 
 def prompt_yes_no(question: str, default: bool = True) -> bool:
     """Prompt for yes/no."""
@@ -159,25 +185,27 @@ def run_setup_wizard(args):
     
     # Check if already configured
     existing_or = get_env_value("OPENROUTER_API_KEY")
-    existing_ant = get_env_value("ANTHROPIC_API_KEY")
+    existing_custom = get_env_value("OPENAI_BASE_URL")
     
-    if existing_or or existing_ant:
-        configured = "OpenRouter" if existing_or else "Anthropic"
-        print_info(f"Currently configured: {configured}")
+    skip_provider_setup = False
+    if existing_or or existing_custom:
+        if existing_or:
+            print_info("Currently configured: OpenRouter")
+        else:
+            print_info(f"Currently configured: Custom endpoint ({existing_custom})")
+        
         if not prompt_yes_no("Reconfigure API provider?", False):
             print_info("Keeping existing configuration")
-        else:
-            existing_or = None  # Force reconfigure
+            skip_provider_setup = True
     
-    if not existing_or and not existing_ant:
+    if not skip_provider_setup:
         provider_choices = [
             "OpenRouter (recommended - access to all models)",
-            "Anthropic API (direct Claude access)",
-            "OpenAI API",
+            "Custom OpenAI-compatible endpoint",
             "Skip for now"
         ]
         
-        provider_idx = prompt_choice("Select your primary model provider:", provider_choices, 0)
+        provider_idx = prompt_choice("Select your API provider:", provider_choices, 0)
         
         if provider_idx == 0:  # OpenRouter
             print_info("Get your API key at: https://openrouter.ai/keys")
@@ -186,19 +214,31 @@ def run_setup_wizard(args):
                 save_env_value("OPENROUTER_API_KEY", api_key)
                 print_success("OpenRouter API key saved")
         
-        elif provider_idx == 1:  # Anthropic
-            print_info("Get your API key at: https://console.anthropic.com/")
-            api_key = prompt("Anthropic API key", password=True)
-            if api_key:
-                save_env_value("ANTHROPIC_API_KEY", api_key)
-                print_success("Anthropic API key saved")
-        
-        elif provider_idx == 2:  # OpenAI
-            print_info("Get your API key at: https://platform.openai.com/api-keys")
-            api_key = prompt("OpenAI API key", password=True)
+        elif provider_idx == 1:  # Custom endpoint
+            print_info("Custom OpenAI-Compatible Endpoint Configuration:")
+            print_info("Works with any API that follows OpenAI's chat completions spec")
+            
+            # Show current values if set
+            current_url = get_env_value("OPENAI_BASE_URL") or ""
+            current_key = get_env_value("OPENAI_API_KEY")
+            current_model = config.get('model', '')
+            
+            if current_url:
+                print_info(f"  Current URL: {current_url}")
+            if current_key:
+                print_info(f"  Current key: {current_key[:8]}... (configured)")
+            
+            base_url = prompt("  API base URL (e.g., https://api.example.com/v1)", current_url)
+            api_key = prompt("  API key", password=True)
+            model_name = prompt("  Model name (e.g., gpt-4, claude-3-opus)", current_model)
+            
+            if base_url:
+                save_env_value("OPENAI_BASE_URL", base_url)
             if api_key:
                 save_env_value("OPENAI_API_KEY", api_key)
-                print_success("OpenAI API key saved")
+            if model_name:
+                config['model'] = model_name
+            print_success("Custom endpoint configured")
     
     # =========================================================================
     # Step 2: Model Selection
@@ -209,28 +249,40 @@ def run_setup_wizard(args):
     print_info(f"Current: {current_model}")
     
     model_choices = [
-        "anthropic/claude-sonnet-4 (recommended)",
-        "anthropic/claude-opus-4",
-        "openai/gpt-4o",
-        "google/gemini-2.0-flash",
-        "Enter custom model",
-        "Keep current"
+        "anthropic/claude-sonnet-4.5 (recommended)",
+        "anthropic/claude-opus-4.5",
+        "openai/gpt-5.2",
+        "openai/gpt-5.2-codex",
+        "google/gemini-3-pro-preview",
+        "google/gemini-3-flash-preview",
+        "z-ai/glm-4.7",
+        "moonshotai/kimi-k2.5",
+        "minimax/minimax-m2.1",
+        "Custom model",
+        f"Keep current ({current_model})"
     ]
     
-    model_idx = prompt_choice("Select default model:", model_choices, 5)  # Default: keep current
+    model_idx = prompt_choice("Select default model:", model_choices, 10)  # Default: keep current
     
-    if model_idx == 0:
-        config['model'] = "anthropic/claude-sonnet-4"
-    elif model_idx == 1:
-        config['model'] = "anthropic/claude-opus-4"
-    elif model_idx == 2:
-        config['model'] = "openai/gpt-4o"
-    elif model_idx == 3:
-        config['model'] = "google/gemini-2.0-flash"
-    elif model_idx == 4:
-        custom = prompt("Enter model name (e.g., anthropic/claude-sonnet-4)")
+    model_map = {
+        0: "anthropic/claude-sonnet-4.5",
+        1: "anthropic/claude-opus-4.5",
+        2: "openai/gpt-5.2",
+        3: "openai/gpt-5.2-codex",
+        4: "google/gemini-3-pro-preview",
+        5: "google/gemini-3-flash-preview",
+        6: "z-ai/glm-4.7",
+        7: "moonshotai/kimi-k2.5",
+        8: "minimax/minimax-m2.1",
+    }
+    
+    if model_idx in model_map:
+        config['model'] = model_map[model_idx]
+    elif model_idx == 9:  # Custom
+        custom = prompt("Enter model name (e.g., anthropic/claude-sonnet-4.5)")
         if custom:
             config['model'] = custom
+    # else: Keep current (model_idx == 10)
     
     # =========================================================================
     # Step 3: Terminal Backend
@@ -244,46 +296,96 @@ def run_setup_wizard(args):
     terminal_choices = [
         "Local (run commands on this machine - no isolation)",
         "Docker (isolated containers - recommended for security)",
+        "Singularity/Apptainer (HPC clusters, shared compute)",
+        "Modal (cloud execution, GPU access, serverless)",
         "SSH (run commands on a remote server)",
-        "Keep current"
+        f"Keep current ({current_backend})"
     ]
     
     # Default based on current
-    default_terminal = {'local': 0, 'docker': 1, 'ssh': 2}.get(current_backend, 0)
+    default_terminal = {'local': 0, 'docker': 1, 'singularity': 2, 'modal': 3, 'ssh': 4}.get(current_backend, 0)
     
-    terminal_idx = prompt_choice("Select terminal backend:", terminal_choices, 3)  # Default: keep
+    terminal_idx = prompt_choice("Select terminal backend:", terminal_choices, 5)  # Default: keep
     
     if terminal_idx == 0:  # Local
         config.setdefault('terminal', {})['backend'] = 'local'
-        print_success("Terminal set to local")
+        print_info("Local Execution Configuration:")
+        print_info("Commands run directly on this machine (no isolation)")
         
-        if prompt_yes_no("Enable sudo support? (allows agent to run sudo commands)", False):
-            print_warning("SECURITY WARNING: Sudo password will be stored in plaintext")
-            sudo_pass = prompt("Sudo password (leave empty to skip)", password=True)
+        if prompt_yes_no("  Enable sudo support? (allows agent to run sudo commands)", False):
+            print_warning("  SECURITY WARNING: Sudo password will be stored in plaintext")
+            sudo_pass = prompt("  Sudo password (leave empty to skip)", password=True)
             if sudo_pass:
                 save_env_value("SUDO_PASSWORD", sudo_pass)
-                print_success("Sudo password saved")
+                print_success("  Sudo password saved")
+        
+        print_success("Terminal set to local")
     
     elif terminal_idx == 1:  # Docker
         config.setdefault('terminal', {})['backend'] = 'docker'
-        docker_image = prompt("Docker image", config.get('terminal', {}).get('docker_image', 'python:3.11-slim'))
+        default_docker = config.get('terminal', {}).get('docker_image', 'nikolaik/python-nodejs:python3.11-nodejs20')
+        print_info("Docker Configuration:")
+        docker_image = prompt("  Docker image", default_docker)
         config['terminal']['docker_image'] = docker_image
         print_success("Terminal set to Docker")
     
-    elif terminal_idx == 2:  # SSH
+    elif terminal_idx == 2:  # Singularity
+        config.setdefault('terminal', {})['backend'] = 'singularity'
+        default_singularity = config.get('terminal', {}).get('singularity_image', 'docker://nikolaik/python-nodejs:python3.11-nodejs20')
+        print_info("Singularity/Apptainer Configuration:")
+        print_info("Requires apptainer or singularity to be installed")
+        singularity_image = prompt("  Image (docker:// prefix for Docker Hub)", default_singularity)
+        config['terminal']['singularity_image'] = singularity_image
+        print_success("Terminal set to Singularity/Apptainer")
+    
+    elif terminal_idx == 3:  # Modal
+        config.setdefault('terminal', {})['backend'] = 'modal'
+        default_modal = config.get('terminal', {}).get('modal_image', 'nikolaik/python-nodejs:python3.11-nodejs20')
+        print_info("Modal Cloud Configuration:")
+        print_info("Get credentials at: https://modal.com/settings")
+        
+        # Always show current status and allow reconfiguration
+        current_token = get_env_value('MODAL_TOKEN_ID')
+        if current_token:
+            print_info(f"  Token ID: {current_token[:8]}... (configured)")
+        
+        modal_image = prompt("  Container image", default_modal)
+        config['terminal']['modal_image'] = modal_image
+        
+        token_id = prompt("  Modal token ID", current_token or "")
+        token_secret = prompt("  Modal token secret", password=True)
+        
+        if token_id:
+            save_env_value("MODAL_TOKEN_ID", token_id)
+        if token_secret:
+            save_env_value("MODAL_TOKEN_SECRET", token_secret)
+        
+        print_success("Terminal set to Modal")
+    
+    elif terminal_idx == 4:  # SSH
         config.setdefault('terminal', {})['backend'] = 'ssh'
+        print_info("SSH Remote Execution Configuration:")
+        print_info("Commands will run on a remote server over SSH")
         
         current_host = get_env_value('TERMINAL_SSH_HOST') or ''
         current_user = get_env_value('TERMINAL_SSH_USER') or os.getenv("USER", "")
+        current_port = get_env_value('TERMINAL_SSH_PORT') or '22'
+        current_key = get_env_value('TERMINAL_SSH_KEY') or '~/.ssh/id_rsa'
         
-        ssh_host = prompt("SSH host", current_host)
-        ssh_user = prompt("SSH user", current_user)
-        ssh_key = prompt("SSH key path", "~/.ssh/id_rsa")
+        if current_host:
+            print_info(f"  Current host: {current_user}@{current_host}:{current_port}")
+        
+        ssh_host = prompt("  SSH host", current_host)
+        ssh_user = prompt("  SSH user", current_user)
+        ssh_port = prompt("  SSH port", current_port)
+        ssh_key = prompt("  SSH key path (or leave empty for ssh-agent)", current_key)
         
         if ssh_host:
             save_env_value("TERMINAL_SSH_HOST", ssh_host)
         if ssh_user:
             save_env_value("TERMINAL_SSH_USER", ssh_user)
+        if ssh_port and ssh_port != '22':
+            save_env_value("TERMINAL_SSH_PORT", ssh_port)
         if ssh_key:
             save_env_value("TERMINAL_SSH_KEY", ssh_key)
         
diff --git a/pyproject.toml b/pyproject.toml
index 99c32f3cbb..0924ceaf68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,8 @@ modal = ["modal", "boto3"]
 dev = ["pytest", "pytest-asyncio"]
 messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0"]
 cron = ["croniter"]
-all = ["croniter", "python-telegram-bot>=20.0", "discord.py>=2.0"]
+cli = ["simple-term-menu"]
+all = ["croniter", "python-telegram-bot>=20.0", "discord.py>=2.0", "simple-term-menu"]
 
 [project.scripts]
 hermes = "hermes_cli.main:main"
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index a5058f44cd..da2b762b15 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -804,11 +804,13 @@ _cleanup_running = False
 # Configuration from environment variables
 def _get_env_config() -> Dict[str, Any]:
     """Get terminal environment configuration from environment variables."""
+    # Default image with Python and Node.js for maximum compatibility
+    default_image = "nikolaik/python-nodejs:python3.11-nodejs20"
     return {
         "env_type": os.getenv("TERMINAL_ENV", "local"),  # local, docker, singularity, modal, or ssh
-        "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", "python:3.11"),
-        "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", "docker://python:3.11"),
-        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", "python:3.11"),
+        "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image),
+        "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
+        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
         "cwd": os.getenv("TERMINAL_CWD", "/tmp"),
         "timeout": int(os.getenv("TERMINAL_TIMEOUT", "60")),
         "lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
@@ -1290,9 +1292,11 @@ if __name__ == "__main__":
     print("  result = terminal_tool(command='python server.py', background=True)")
 
     print("\nEnvironment Variables:")
-    print(f"  TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/modal)")
-    print(f"  TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', 'python:3.11-slim')}")
-    print(f"  TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', 'python:3.11-slim')}")
+    default_img = "nikolaik/python-nodejs:python3.11-nodejs20"
+    print(f"  TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/ssh)")
+    print(f"  TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}")
+    print(f"  TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}")
+    print(f"  TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
     print(f"  TERMINAL_CWD: {os.getenv('TERMINAL_CWD', '/tmp')}")
     print(f"  TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")
     print(f"  TERMINAL_LIFETIME_SECONDS: {os.getenv('TERMINAL_LIFETIME_SECONDS', '300')}")

From da4167560f5774a16b0dc61df9deaa12ada49dde Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:15:30 -0800
Subject: [PATCH 07/48] Enhance terminal backend selection in setup wizard

- Added platform detection to customize available terminal backend options based on the operating system (Linux, macOS, Windows).
- Updated terminal choices to include Singularity/Apptainer only for Linux users, with appropriate warnings for unsupported selections.
- Improved user prompts for Docker and local configurations to provide platform-specific guidance.
- Refactored backend selection logic to streamline the process and ensure accurate mapping of user choices to backend configurations.
---
 hermes_cli/setup.py | 54 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index e892b8011e..3b0f049123 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -293,25 +293,56 @@ def run_setup_wizard(args):
     current_backend = config.get('terminal', {}).get('backend', 'local')
     print_info(f"Current: {current_backend}")
     
+    # Detect platform for backend availability
+    import platform
+    is_linux = platform.system() == "Linux"
+    is_macos = platform.system() == "Darwin"
+    is_windows = platform.system() == "Windows"
+    
+    # Build choices based on platform
     terminal_choices = [
         "Local (run commands on this machine - no isolation)",
         "Docker (isolated containers - recommended for security)",
-        "Singularity/Apptainer (HPC clusters, shared compute)",
+    ]
+    
+    # Singularity/Apptainer is Linux-only (HPC)
+    if is_linux:
+        terminal_choices.append("Singularity/Apptainer (HPC clusters, shared compute)")
+    
+    terminal_choices.extend([
         "Modal (cloud execution, GPU access, serverless)",
         "SSH (run commands on a remote server)",
         f"Keep current ({current_backend})"
-    ]
+    ])
+    
+    # Build index map based on available choices
+    if is_linux:
+        backend_to_idx = {'local': 0, 'docker': 1, 'singularity': 2, 'modal': 3, 'ssh': 4}
+        idx_to_backend = {0: 'local', 1: 'docker', 2: 'singularity', 3: 'modal', 4: 'ssh'}
+        keep_current_idx = 5
+    else:
+        backend_to_idx = {'local': 0, 'docker': 1, 'modal': 2, 'ssh': 3}
+        idx_to_backend = {0: 'local', 1: 'docker', 2: 'modal', 3: 'ssh'}
+        keep_current_idx = 4
+        if current_backend == 'singularity':
+            print_warning("Singularity is only available on Linux - please select a different backend")
     
     # Default based on current
-    default_terminal = {'local': 0, 'docker': 1, 'singularity': 2, 'modal': 3, 'ssh': 4}.get(current_backend, 0)
+    default_terminal = backend_to_idx.get(current_backend, 0)
     
-    terminal_idx = prompt_choice("Select terminal backend:", terminal_choices, 5)  # Default: keep
+    terminal_idx = prompt_choice("Select terminal backend:", terminal_choices, keep_current_idx)
     
-    if terminal_idx == 0:  # Local
+    # Map index to backend name (handles platform differences)
+    selected_backend = idx_to_backend.get(terminal_idx)
+    
+    if selected_backend == 'local':
         config.setdefault('terminal', {})['backend'] = 'local'
         print_info("Local Execution Configuration:")
         print_info("Commands run directly on this machine (no isolation)")
         
+        if is_windows:
+            print_info("Note: On Windows, commands run via cmd.exe or PowerShell")
+        
         if prompt_yes_no("  Enable sudo support? (allows agent to run sudo commands)", False):
             print_warning("  SECURITY WARNING: Sudo password will be stored in plaintext")
             sudo_pass = prompt("  Sudo password (leave empty to skip)", password=True)
@@ -321,15 +352,19 @@ def run_setup_wizard(args):
         
         print_success("Terminal set to local")
     
-    elif terminal_idx == 1:  # Docker
+    elif selected_backend == 'docker':
         config.setdefault('terminal', {})['backend'] = 'docker'
         default_docker = config.get('terminal', {}).get('docker_image', 'nikolaik/python-nodejs:python3.11-nodejs20')
         print_info("Docker Configuration:")
+        if is_macos:
+            print_info("Requires Docker Desktop for Mac")
+        elif is_windows:
+            print_info("Requires Docker Desktop for Windows")
         docker_image = prompt("  Docker image", default_docker)
         config['terminal']['docker_image'] = docker_image
         print_success("Terminal set to Docker")
     
-    elif terminal_idx == 2:  # Singularity
+    elif selected_backend == 'singularity':
         config.setdefault('terminal', {})['backend'] = 'singularity'
         default_singularity = config.get('terminal', {}).get('singularity_image', 'docker://nikolaik/python-nodejs:python3.11-nodejs20')
         print_info("Singularity/Apptainer Configuration:")
@@ -338,7 +373,7 @@ def run_setup_wizard(args):
         config['terminal']['singularity_image'] = singularity_image
         print_success("Terminal set to Singularity/Apptainer")
     
-    elif terminal_idx == 3:  # Modal
+    elif selected_backend == 'modal':
         config.setdefault('terminal', {})['backend'] = 'modal'
         default_modal = config.get('terminal', {}).get('modal_image', 'nikolaik/python-nodejs:python3.11-nodejs20')
         print_info("Modal Cloud Configuration:")
@@ -362,7 +397,7 @@ def run_setup_wizard(args):
         
         print_success("Terminal set to Modal")
     
-    elif terminal_idx == 4:  # SSH
+    elif selected_backend == 'ssh':
         config.setdefault('terminal', {})['backend'] = 'ssh'
         print_info("SSH Remote Execution Configuration:")
         print_info("Commands will run on a remote server over SSH")
@@ -390,6 +425,7 @@ def run_setup_wizard(args):
             save_env_value("TERMINAL_SSH_KEY", ssh_key)
         
         print_success("Terminal set to SSH")
+    # else: Keep current (selected_backend is None)
     
     # =========================================================================
     # Step 4: Context Compression

From ef409c6a24f477ed76e46d751ecfc9cd4f4b90ab Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:19:07 -0800
Subject: [PATCH 08/48] Enhance repository cloning in install script

- Updated the install script to support both SSH and HTTPS cloning methods for the repository, improving flexibility for users with different access configurations.
- Added error handling and informative logging to guide users in case of cloning failures, particularly for private repositories requiring SSH key setup.
- Refactored the cloning logic to attempt SSH first, falling back to HTTPS if necessary, ensuring a smoother installation experience.
---
 scripts/install.sh | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/scripts/install.sh b/scripts/install.sh
index f038bf831d..69e283e56c 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -25,7 +25,8 @@ NC='\033[0m' # No Color
 BOLD='\033[1m'
 
 # Configuration
-REPO_URL="https://github.com/NousResearch/hermes-agent.git"
+REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
+REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
 INSTALL_DIR="${HERMES_INSTALL_DIR:-$HOME/.hermes-agent}"
 PYTHON_MIN_VERSION="3.10"
 
@@ -289,7 +290,22 @@ clone_repo() {
             exit 1
         fi
     else
-        git clone --branch "$BRANCH" "$REPO_URL" "$INSTALL_DIR"
+        # Try SSH first (for private repo access), fall back to HTTPS
+        log_info "Trying SSH clone..."
+        if git clone --branch "$BRANCH" "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
+            log_success "Cloned via SSH"
+        else
+            log_info "SSH failed, trying HTTPS..."
+            if git clone --branch "$BRANCH" "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
+                log_success "Cloned via HTTPS"
+            else
+                log_error "Failed to clone repository"
+                log_info "For private repo access, ensure your SSH key is added to GitHub:"
+                log_info "  ssh-add ~/.ssh/id_rsa"
+                log_info "  ssh -T git@github.com  # Test connection"
+                exit 1
+            fi
+        fi
     fi
     
     cd "$INSTALL_DIR"

From aa6394e94fdfeca5b345ba05dc77ba794149f4e7 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:19:12 -0800
Subject: [PATCH 09/48] Update install script to support SSH and HTTPS
 repository URLs

- Modified the install script to include separate variables for SSH and HTTPS repository URLs, enhancing flexibility for users during the cloning process.
- This change allows users to choose their preferred method of accessing the repository, improving the overall installation experience.
---
 scripts/install.ps1 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index a6679c934c..0f97d497ec 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -24,7 +24,8 @@ $ErrorActionPreference = "Stop"
 # Configuration
 # ============================================================================
 
-$RepoUrl = "https://github.com/NousResearch/hermes-agent.git"
+$RepoUrlSsh = "git@github.com:NousResearch/hermes-agent.git"
+$RepoUrlHttps = "https://github.com/NousResearch/hermes-agent.git"
 
 # ============================================================================
 # Helper functions

From 69a338610a7a1ba8cc5d57e6692b9c6380db64d7 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:19:26 -0800
Subject: [PATCH 10/48] Enhance repository cloning logic in install script

- Updated the install script to attempt cloning via SSH first for private repositories, falling back to HTTPS if the SSH method fails.
- Added detailed error handling and user guidance for SSH key setup, improving the installation experience for users with private repositories.
---
 scripts/install.ps1 | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 0f97d497ec..9cecb4582b 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -148,7 +148,26 @@ function Install-Repository {
             exit 1
         }
     } else {
-        git clone --branch $Branch $RepoUrl $InstallDir
+        # Try SSH first (for private repo access), fall back to HTTPS
+        Write-Info "Trying SSH clone..."
+        $sshResult = git clone --branch $Branch $RepoUrlSsh $InstallDir 2>&1
+        
+        if ($LASTEXITCODE -eq 0) {
+            Write-Success "Cloned via SSH"
+        } else {
+            Write-Info "SSH failed, trying HTTPS..."
+            $httpsResult = git clone --branch $Branch $RepoUrlHttps $InstallDir 2>&1
+            
+            if ($LASTEXITCODE -eq 0) {
+                Write-Success "Cloned via HTTPS"
+            } else {
+                Write-Error "Failed to clone repository"
+                Write-Info "For private repo access, ensure your SSH key is added to GitHub:"
+                Write-Info "  ssh-add ~/.ssh/id_rsa"
+                Write-Info "  ssh -T git@github.com  # Test connection"
+                exit 1
+            }
+        }
     }
     
     Write-Success "Repository ready"

From e87bee9ccd420e1fb802165420c5b2c03baccb0f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:23:20 -0800
Subject: [PATCH 11/48] Refactor setup wizard for improved API key and provider
 configuration

- Updated the setup wizard to clarify the OpenRouter API key requirement and enhance user prompts for API key input.
- Streamlined the main agent provider selection process, allowing users to choose between OpenRouter and custom endpoints with improved guidance.
- Renumbered setup steps for better organization and clarity, ensuring a smoother user experience during configuration.
- Enhanced error handling and user feedback for API configuration, emphasizing the importance of the OpenRouter key for certain tools.
---
 hermes_cli/setup.py | 132 ++++++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 60 deletions(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 3b0f049123..8afaac4f86 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -179,69 +179,81 @@ def run_setup_wizard(args):
     print_info("You can edit these files directly or use 'hermes config edit'")
     
     # =========================================================================
-    # Step 1: API Keys
+    # Step 1: OpenRouter API Key (Required for tools)
     # =========================================================================
-    print_header("Model/Auth Provider")
+    print_header("OpenRouter API Key (Required)")
+    print_info("OpenRouter is used for vision, web scraping, and tool operations")
+    print_info("even if you use a custom endpoint for your main agent.")
+    print_info("Get your API key at: https://openrouter.ai/keys")
     
-    # Check if already configured
     existing_or = get_env_value("OPENROUTER_API_KEY")
-    existing_custom = get_env_value("OPENAI_BASE_URL")
-    
-    skip_provider_setup = False
-    if existing_or or existing_custom:
-        if existing_or:
-            print_info("Currently configured: OpenRouter")
-        else:
-            print_info(f"Currently configured: Custom endpoint ({existing_custom})")
-        
-        if not prompt_yes_no("Reconfigure API provider?", False):
-            print_info("Keeping existing configuration")
-            skip_provider_setup = True
-    
-    if not skip_provider_setup:
-        provider_choices = [
-            "OpenRouter (recommended - access to all models)",
-            "Custom OpenAI-compatible endpoint",
-            "Skip for now"
-        ]
-        
-        provider_idx = prompt_choice("Select your API provider:", provider_choices, 0)
-        
-        if provider_idx == 0:  # OpenRouter
-            print_info("Get your API key at: https://openrouter.ai/keys")
-            api_key = prompt("OpenRouter API key", password=True)
+    if existing_or:
+        print_info(f"Current: {existing_or[:8]}... (configured)")
+        if prompt_yes_no("Update OpenRouter API key?", False):
+            api_key = prompt("  OpenRouter API key", password=True)
             if api_key:
                 save_env_value("OPENROUTER_API_KEY", api_key)
-                print_success("OpenRouter API key saved")
-        
-        elif provider_idx == 1:  # Custom endpoint
-            print_info("Custom OpenAI-Compatible Endpoint Configuration:")
-            print_info("Works with any API that follows OpenAI's chat completions spec")
-            
-            # Show current values if set
-            current_url = get_env_value("OPENAI_BASE_URL") or ""
-            current_key = get_env_value("OPENAI_API_KEY")
-            current_model = config.get('model', '')
-            
-            if current_url:
-                print_info(f"  Current URL: {current_url}")
-            if current_key:
-                print_info(f"  Current key: {current_key[:8]}... (configured)")
-            
-            base_url = prompt("  API base URL (e.g., https://api.example.com/v1)", current_url)
-            api_key = prompt("  API key", password=True)
-            model_name = prompt("  Model name (e.g., gpt-4, claude-3-opus)", current_model)
-            
-            if base_url:
-                save_env_value("OPENAI_BASE_URL", base_url)
-            if api_key:
-                save_env_value("OPENAI_API_KEY", api_key)
-            if model_name:
-                config['model'] = model_name
-            print_success("Custom endpoint configured")
+                print_success("OpenRouter API key updated")
+    else:
+        api_key = prompt("  OpenRouter API key", password=True)
+        if api_key:
+            save_env_value("OPENROUTER_API_KEY", api_key)
+            print_success("OpenRouter API key saved")
+        else:
+            print_warning("Skipped - some tools (vision, web scraping) won't work without this")
     
     # =========================================================================
-    # Step 2: Model Selection
+    # Step 2: Main Agent Provider
+    # =========================================================================
+    print_header("Main Agent Provider")
+    print_info("Choose how to connect to your main chat model.")
+    
+    existing_custom = get_env_value("OPENAI_BASE_URL")
+    
+    provider_choices = [
+        "OpenRouter (use same key for agent - recommended)",
+        "Custom OpenAI-compatible endpoint (separate from OpenRouter)",
+        f"Keep current" + (f" ({existing_custom})" if existing_custom else " (OpenRouter)")
+    ]
+    
+    provider_idx = prompt_choice("Select your main agent provider:", provider_choices, 2)
+    
+    if provider_idx == 0:  # OpenRouter for agent too
+        # Clear any custom endpoint - will use OpenRouter
+        if existing_custom:
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        print_success("Agent will use OpenRouter")
+    
+    elif provider_idx == 1:  # Custom endpoint
+        print_info("Custom OpenAI-Compatible Endpoint Configuration:")
+        print_info("Works with any API that follows OpenAI's chat completions spec")
+        
+        # Show current values if set
+        current_url = get_env_value("OPENAI_BASE_URL") or ""
+        current_key = get_env_value("OPENAI_API_KEY")
+        current_model = config.get('model', '')
+        
+        if current_url:
+            print_info(f"  Current URL: {current_url}")
+        if current_key:
+            print_info(f"  Current key: {current_key[:8]}... (configured)")
+        
+        base_url = prompt("  API base URL (e.g., https://api.example.com/v1)", current_url)
+        api_key = prompt("  API key", password=True)
+        model_name = prompt("  Model name (e.g., gpt-4, claude-3-opus)", current_model)
+        
+        if base_url:
+            save_env_value("OPENAI_BASE_URL", base_url)
+        if api_key:
+            save_env_value("OPENAI_API_KEY", api_key)
+        if model_name:
+            config['model'] = model_name
+        print_success("Custom endpoint configured")
+    # else: Keep current (provider_idx == 2)
+    
+    # =========================================================================
+    # Step 3: Model Selection
     # =========================================================================
     print_header("Default Model")
     
@@ -285,7 +297,7 @@ def run_setup_wizard(args):
     # else: Keep current (model_idx == 10)
     
     # =========================================================================
-    # Step 3: Terminal Backend
+    # Step 4: Terminal Backend
     # =========================================================================
     print_header("Terminal Backend")
     print_info("The terminal tool allows the agent to run commands.")
@@ -428,7 +440,7 @@ def run_setup_wizard(args):
     # else: Keep current (selected_backend is None)
     
     # =========================================================================
-    # Step 4: Context Compression
+    # Step 5: Context Compression
     # =========================================================================
     print_header("Context Compression")
     print_info("Automatically summarize old messages when context gets too long.")
@@ -453,7 +465,7 @@ def run_setup_wizard(args):
         config.setdefault('compression', {})['enabled'] = False
     
     # =========================================================================
-    # Step 5: Messaging Platforms (Optional)
+    # Step 6: Messaging Platforms (Optional)
     # =========================================================================
     print_header("Messaging Platforms (Optional)")
     print_info("Connect to messaging platforms to chat with Hermes from anywhere.")
@@ -495,7 +507,7 @@ def run_setup_wizard(args):
                 save_env_value("DISCORD_HOME_CHANNEL", home_channel)
     
     # =========================================================================
-    # Step 6: Additional Tools (Optional)
+    # Step 7: Additional Tools (Optional)
     # =========================================================================
     print_header("Additional Tools (Optional)")
     

From bbb5776763e4cc05a3395a94042bcd01ca3bb86f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:28:27 -0800
Subject: [PATCH 12/48] Enhance tool availability checks and user feedback in
 CLI

- Updated the CLI to include a new method for displaying warnings about disabled tools due to missing API keys.
- Integrated tool availability checks into the setup wizard and doctor commands, providing users with clear information on which tools are available and what is required for full functionality.
- Improved user prompts and feedback regarding API key configuration, emphasizing the importance of setting up keys for certain tools.
- Added detailed summaries of tool availability during setup and diagnostics, enhancing the overall user experience.
---
 cli.py               |  27 +++++++-
 hermes_cli/doctor.py |  31 +++++++++
 hermes_cli/setup.py  | 146 ++++++++++++++++++++++++++++++++++++-------
 model_tools.py       | 128 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 307 insertions(+), 25 deletions(-)

diff --git a/cli.py b/cli.py
index bd06439fa9..301c45be75 100755
--- a/cli.py
+++ b/cli.py
@@ -596,7 +596,7 @@ class HermesCLI:
             self._show_status()
         else:
             # Get tools for display
-            tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets)
+            tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
             
             # Get terminal working directory (where commands will execute)
             cwd = os.getenv("TERMINAL_CWD", os.getcwd())
@@ -611,8 +611,33 @@ class HermesCLI:
                 session_id=self.session_id,
             )
         
+        # Show tool availability warnings if any tools are disabled
+        self._show_tool_availability_warnings()
+        
         self.console.print()
     
+    def _show_tool_availability_warnings(self):
+        """Show warnings about disabled tools due to missing API keys."""
+        try:
+            from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
+            
+            available, unavailable = check_tool_availability()
+            
+            # Filter to only those missing API keys (not system deps)
+            api_key_missing = [u for u in unavailable if u["missing_vars"]]
+            
+            if api_key_missing:
+                self.console.print()
+                self.console.print("[yellow]⚠️  Some tools disabled (missing API keys):[/]")
+                for item in api_key_missing:
+                    tools_str = ", ".join(item["tools"][:2])  # Show first 2 tools
+                    if len(item["tools"]) > 2:
+                        tools_str += f", +{len(item['tools'])-2} more"
+                    self.console.print(f"   [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
+                self.console.print("[dim]   Run 'hermes setup' to configure[/]")
+        except Exception:
+            pass  # Don't crash on import errors
+    
     def _show_status(self):
         """Show current status bar."""
         # Get tool count
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 2b69317bb4..82b7e5414b 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -256,6 +256,37 @@ def run_doctor(args):
         except Exception as e:
             check_warn("Anthropic API", f"({e})")
     
+    # =========================================================================
+    # Check: Tool Availability
+    # =========================================================================
+    print()
+    print(color("◆ Tool Availability", Colors.CYAN, Colors.BOLD))
+    
+    try:
+        # Add project root to path for imports
+        sys.path.insert(0, str(PROJECT_ROOT))
+        from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
+        
+        available, unavailable = check_tool_availability()
+        
+        for tid in available:
+            info = TOOLSET_REQUIREMENTS.get(tid, {})
+            check_ok(info.get("name", tid))
+        
+        for item in unavailable:
+            if item["missing_vars"]:
+                vars_str = ", ".join(item["missing_vars"])
+                check_warn(item["name"], f"(missing {vars_str})")
+            else:
+                check_warn(item["name"], "(system dependency not met)")
+        
+        # Count disabled tools with API key requirements
+        api_disabled = [u for u in unavailable if u["missing_vars"]]
+        if api_disabled:
+            issues.append("Run 'hermes setup' to configure missing API keys for full tool access")
+    except Exception as e:
+        check_warn("Could not check tool availability", f"({e})")
+    
     # =========================================================================
     # Summary
     # =========================================================================
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8afaac4f86..a766e3afea 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -510,48 +510,148 @@ def run_setup_wizard(args):
     # Step 7: Additional Tools (Optional)
     # =========================================================================
     print_header("Additional Tools (Optional)")
+    print_info("These tools extend the agent's capabilities.")
+    print_info("Without their API keys, the corresponding features will be disabled.")
+    print()
     
-    # Firecrawl
-    if not get_env_value('FIRECRAWL_API_KEY'):
-        if prompt_yes_no("Set up web scraping (Firecrawl)?", False):
-            print_info("Get your API key at: https://firecrawl.dev/")
-            api_key = prompt("Firecrawl API key", password=True)
+    # Firecrawl - Web scraping
+    print_info("─" * 50)
+    print(color("  Web Search & Scraping (Firecrawl)", Colors.CYAN))
+    print_info("  Enables: web_search, web_extract tools")
+    print_info("  Use case: Search the web, read webpage content")
+    if get_env_value('FIRECRAWL_API_KEY'):
+        print_success("  Status: Configured ✓")
+        if prompt_yes_no("  Update Firecrawl API key?", False):
+            api_key = prompt("    API key", password=True)
             if api_key:
                 save_env_value("FIRECRAWL_API_KEY", api_key)
-                print_success("Firecrawl API key saved")
+                print_success("    Updated")
     else:
-        print_info("Firecrawl: already configured")
+        print_warning("  Status: Not configured (tools will be disabled)")
+        if prompt_yes_no("  Set up Firecrawl?", False):
+            print_info("    Get your API key at: https://firecrawl.dev/")
+            api_key = prompt("    API key", password=True)
+            if api_key:
+                save_env_value("FIRECRAWL_API_KEY", api_key)
+                print_success("    Configured ✓")
+    print()
     
-    # Browserbase
-    if not get_env_value('BROWSERBASE_API_KEY'):
-        if prompt_yes_no("Set up browser automation (Browserbase)?", False):
-            print_info("Get your API key at: https://browserbase.com/")
-            api_key = prompt("Browserbase API key", password=True)
-            project_id = prompt("Browserbase project ID")
+    # Browserbase - Browser automation
+    print_info("─" * 50)
+    print(color("  Browser Automation (Browserbase)", Colors.CYAN))
+    print_info("  Enables: browser_navigate, browser_click, etc.")
+    print_info("  Use case: Interact with web pages, fill forms, screenshots")
+    if get_env_value('BROWSERBASE_API_KEY'):
+        print_success("  Status: Configured ✓")
+        if prompt_yes_no("  Update Browserbase credentials?", False):
+            api_key = prompt("    API key", password=True)
+            project_id = prompt("    Project ID")
             if api_key:
                 save_env_value("BROWSERBASE_API_KEY", api_key)
             if project_id:
                 save_env_value("BROWSERBASE_PROJECT_ID", project_id)
-            print_success("Browserbase configured")
+            print_success("    Updated")
     else:
-        print_info("Browserbase: already configured")
+        print_warning("  Status: Not configured (tools will be disabled)")
+        if prompt_yes_no("  Set up Browserbase?", False):
+            print_info("    Get credentials at: https://browserbase.com/")
+            api_key = prompt("    API key", password=True)
+            project_id = prompt("    Project ID")
+            if api_key:
+                save_env_value("BROWSERBASE_API_KEY", api_key)
+            if project_id:
+                save_env_value("BROWSERBASE_PROJECT_ID", project_id)
+            print_success("    Configured ✓")
+    print()
     
-    # FAL
-    if not get_env_value('FAL_KEY'):
-        if prompt_yes_no("Set up image generation (FAL)?", False):
-            print_info("Get your API key at: https://fal.ai/")
-            api_key = prompt("FAL API key", password=True)
+    # FAL - Image generation
+    print_info("─" * 50)
+    print(color("  Image Generation (FAL)", Colors.CYAN))
+    print_info("  Enables: image_generate tool")
+    print_info("  Use case: Generate images from text prompts (FLUX)")
+    if get_env_value('FAL_KEY'):
+        print_success("  Status: Configured ✓")
+        if prompt_yes_no("  Update FAL API key?", False):
+            api_key = prompt("    API key", password=True)
             if api_key:
                 save_env_value("FAL_KEY", api_key)
-                print_success("FAL API key saved")
+                print_success("    Updated")
     else:
-        print_info("FAL: already configured")
+        print_warning("  Status: Not configured (tool will be disabled)")
+        if prompt_yes_no("  Set up FAL?", False):
+            print_info("    Get your API key at: https://fal.ai/")
+            api_key = prompt("    API key", password=True)
+            if api_key:
+                save_env_value("FAL_KEY", api_key)
+                print_success("    Configured ✓")
     
     # =========================================================================
     # Save config
     # =========================================================================
     save_config(config)
     
+    # =========================================================================
+    # Tool Availability Summary
+    # =========================================================================
+    print()
+    print_header("Tool Availability Summary")
+    
+    # Check which tools are available
+    tool_status = []
+    
+    # OpenRouter (required for vision, moa)
+    if get_env_value('OPENROUTER_API_KEY'):
+        tool_status.append(("Vision (image analysis)", True, None))
+        tool_status.append(("Mixture of Agents", True, None))
+    else:
+        tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
+        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
+    
+    # Firecrawl (web tools)
+    if get_env_value('FIRECRAWL_API_KEY'):
+        tool_status.append(("Web Search & Extract", True, None))
+    else:
+        tool_status.append(("Web Search & Extract", False, "FIRECRAWL_API_KEY"))
+    
+    # Browserbase (browser tools)
+    if get_env_value('BROWSERBASE_API_KEY'):
+        tool_status.append(("Browser Automation", True, None))
+    else:
+        tool_status.append(("Browser Automation", False, "BROWSERBASE_API_KEY"))
+    
+    # FAL (image generation)
+    if get_env_value('FAL_KEY'):
+        tool_status.append(("Image Generation", True, None))
+    else:
+        tool_status.append(("Image Generation", False, "FAL_KEY"))
+    
+    # Terminal (always available if system deps met)
+    tool_status.append(("Terminal/Commands", True, None))
+    
+    # Skills (always available if skills dir exists)
+    tool_status.append(("Skills Knowledge Base", True, None))
+    
+    # Print status
+    available_count = sum(1 for _, avail, _ in tool_status if avail)
+    total_count = len(tool_status)
+    
+    print_info(f"{available_count}/{total_count} tool categories available:")
+    print()
+    
+    for name, available, missing_var in tool_status:
+        if available:
+            print(f"   {color('✓', Colors.GREEN)} {name}")
+        else:
+            print(f"   {color('✗', Colors.RED)} {name} {color(f'(missing {missing_var})', Colors.DIM)}")
+    
+    print()
+    
+    disabled_tools = [(name, var) for name, avail, var in tool_status if not avail]
+    if disabled_tools:
+        print_warning("Some tools are disabled. Run 'hermes setup' again to configure them,")
+        print_warning("or edit ~/.hermes/.env directly to add the missing API keys.")
+        print()
+    
     # =========================================================================
     # Done!
     # =========================================================================
@@ -568,7 +668,7 @@ def run_setup_wizard(args):
     print(f"              Model, terminal backend, compression, etc.")
     print()
     print(f"   {color('API Keys:', Colors.YELLOW)}  {get_env_path()}")
-    print(f"              OpenRouter, Anthropic, Firecrawl, etc.")
+    print(f"              OpenRouter, Custom Endpoint, tool API keys")
     print()
     print(f"   {color('Data:', Colors.YELLOW)}      {hermes_home}/")
     print(f"              Cron jobs, sessions, logs")
diff --git a/model_tools.py b/model_tools.py
index 9878951d32..138860195f 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -28,7 +28,8 @@ Usage:
 
 import json
 import asyncio
-from typing import Dict, Any, List, Optional
+import os
+from typing import Dict, Any, List, Optional, Tuple
 
 from tools.web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_firecrawl_api_key
 from tools.terminal_tool import terminal_tool, check_terminal_requirements, TERMINAL_TOOL_DESCRIPTION, cleanup_vm
@@ -71,6 +72,131 @@ from toolsets import (
     get_toolset_info, print_toolset_tree
 )
 
+
+# =============================================================================
+# Tool Availability Checking
+# =============================================================================
+
+# Maps toolsets to their required API keys/environment variables
+TOOLSET_REQUIREMENTS = {
+    "web": {
+        "name": "Web Search & Extract",
+        "env_vars": ["FIRECRAWL_API_KEY"],
+        "check_fn": check_firecrawl_api_key,
+        "setup_url": "https://firecrawl.dev/",
+        "tools": ["web_search", "web_extract"],
+    },
+    "vision": {
+        "name": "Vision (Image Analysis)",
+        "env_vars": ["OPENROUTER_API_KEY"],
+        "check_fn": check_vision_requirements,
+        "setup_url": "https://openrouter.ai/keys",
+        "tools": ["vision_analyze"],
+    },
+    "moa": {
+        "name": "Mixture of Agents",
+        "env_vars": ["OPENROUTER_API_KEY"],
+        "check_fn": check_moa_requirements,
+        "setup_url": "https://openrouter.ai/keys",
+        "tools": ["mixture_of_agents"],
+    },
+    "image_gen": {
+        "name": "Image Generation",
+        "env_vars": ["FAL_KEY"],
+        "check_fn": check_image_generation_requirements,
+        "setup_url": "https://fal.ai/",
+        "tools": ["image_generate"],
+    },
+    "browser": {
+        "name": "Browser Automation",
+        "env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
+        "check_fn": check_browser_requirements,
+        "setup_url": "https://browserbase.com/",
+        "tools": ["browser_navigate", "browser_snapshot", "browser_click", "browser_type"],
+    },
+    "terminal": {
+        "name": "Terminal/Command Execution",
+        "env_vars": [],  # No API key required, just system dependencies
+        "check_fn": check_terminal_requirements,
+        "setup_url": None,
+        "tools": ["terminal"],
+    },
+    "skills": {
+        "name": "Skills Knowledge Base",
+        "env_vars": [],  # Just needs skills directory
+        "check_fn": check_skills_requirements,
+        "setup_url": None,
+        "tools": ["skills_categories", "skills_list", "skill_view"],
+    },
+}
+
+
+def check_tool_availability(quiet: bool = False) -> Tuple[List[str], List[Dict[str, Any]]]:
+    """
+    Check which tool categories are available based on API keys and requirements.
+    
+    Returns:
+        Tuple containing:
+        - List of available toolset names
+        - List of dicts with info about unavailable toolsets and what's missing
+    """
+    available = []
+    unavailable = []
+    
+    for toolset_id, info in TOOLSET_REQUIREMENTS.items():
+        if info["check_fn"]():
+            available.append(toolset_id)
+        else:
+            # Figure out what's missing
+            missing_vars = [var for var in info["env_vars"] if not os.getenv(var)]
+            unavailable.append({
+                "id": toolset_id,
+                "name": info["name"],
+                "missing_vars": missing_vars,
+                "setup_url": info["setup_url"],
+                "tools": info["tools"],
+            })
+    
+    return available, unavailable
+
+
+def print_tool_availability_warnings(unavailable: List[Dict[str, Any]], prefix: str = ""):
+    """Print warnings about unavailable tools."""
+    if not unavailable:
+        return
+    
+    # Filter to only those missing API keys (not system dependencies)
+    api_key_missing = [u for u in unavailable if u["missing_vars"]]
+    
+    if api_key_missing:
+        print(f"{prefix}⚠️  Some tools are disabled due to missing API keys:")
+        for item in api_key_missing:
+            vars_str = ", ".join(item["missing_vars"])
+            print(f"{prefix}   • {item['name']}: missing {vars_str}")
+            if item["setup_url"]:
+                print(f"{prefix}     Get key at: {item['setup_url']}")
+        print(f"{prefix}   Run 'hermes setup' to configure API keys")
+        print()
+
+
+def get_tool_availability_summary() -> Dict[str, Any]:
+    """
+    Get a summary of tool availability for display in status/doctor commands.
+    
+    Returns:
+        Dict with 'available' and 'unavailable' lists of tool info
+    """
+    available, unavailable = check_tool_availability()
+    
+    return {
+        "available": [
+            {"id": tid, "name": TOOLSET_REQUIREMENTS[tid]["name"], "tools": TOOLSET_REQUIREMENTS[tid]["tools"]}
+            for tid in available
+        ],
+        "unavailable": unavailable,
+    }
+
+
 def get_web_tool_definitions() -> List[Dict[str, Any]]:
     """
     Get tool definitions for web tools in OpenAI's expected format.

From fef504f0386908aaf0576d3ffe2c7125166e7b23 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:34:56 -0800
Subject: [PATCH 13/48] Refactor configuration file management and improve user
 feedback

- Updated the setup wizard and installation scripts to standardize the configuration file paths under ~/.hermes, enhancing clarity for users.
- Improved messaging in the CLI to clearly indicate where configuration files and data directories are located.
- Streamlined the creation of configuration files, ensuring they are easily accessible and organized within the new directory structure.
---
 hermes_cli/setup.py |  9 ++-----
 scripts/install.ps1 | 61 ++++++++++++++++++++++++---------------------
 scripts/install.sh  | 44 ++++++++++++++++++--------------
 3 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index a766e3afea..946021a2aa 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -662,16 +662,11 @@ def run_setup_wizard(args):
     print()
     
     # Show file locations prominently
-    print(color("📁 Your configuration files:", Colors.CYAN, Colors.BOLD))
+    print(color("📁 All your files are in ~/.hermes/:", Colors.CYAN, Colors.BOLD))
     print()
     print(f"   {color('Settings:', Colors.YELLOW)}  {get_config_path()}")
-    print(f"              Model, terminal backend, compression, etc.")
-    print()
     print(f"   {color('API Keys:', Colors.YELLOW)}  {get_env_path()}")
-    print(f"              OpenRouter, Custom Endpoint, tool API keys")
-    print()
-    print(f"   {color('Data:', Colors.YELLOW)}      {hermes_home}/")
-    print(f"              Cron jobs, sessions, logs")
+    print(f"   {color('Data:', Colors.YELLOW)}      {hermes_home}/cron/, sessions/, logs/")
     print()
     
     print(color("─" * 60, Colors.DIM))
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 9cecb4582b..caf80288d4 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -15,7 +15,8 @@ param(
     [switch]$NoVenv,
     [switch]$SkipSetup,
     [string]$Branch = "main",
-    [string]$InstallDir = "$env:USERPROFILE\.hermes-agent"
+    [string]$HermesHome = "$env:USERPROFILE\.hermes",
+    [string]$InstallDir = "$env:USERPROFILE\.hermes\hermes-agent"
 )
 
 $ErrorActionPreference = "Stop"
@@ -248,36 +249,40 @@ function Set-PathVariable {
 function Copy-ConfigTemplates {
     Write-Info "Setting up configuration files..."
     
-    Push-Location $InstallDir
+    # Create ~/.hermes directory structure (config at top level, code in subdir)
+    New-Item -ItemType Directory -Force -Path "$HermesHome\cron" | Out-Null
+    New-Item -ItemType Directory -Force -Path "$HermesHome\sessions" | Out-Null
+    New-Item -ItemType Directory -Force -Path "$HermesHome\logs" | Out-Null
     
-    # Create .env from example
-    if (-not (Test-Path ".env")) {
-        if (Test-Path ".env.example") {
-            Copy-Item ".env.example" ".env"
-            Write-Success "Created .env from template"
+    # Create .env at ~/.hermes/.env (top level, easy to find)
+    $envPath = "$HermesHome\.env"
+    if (-not (Test-Path $envPath)) {
+        $examplePath = "$InstallDir\.env.example"
+        if (Test-Path $examplePath) {
+            Copy-Item $examplePath $envPath
+            Write-Success "Created ~/.hermes/.env from template"
+        } else {
+            # Create empty .env if no example exists
+            New-Item -ItemType File -Force -Path $envPath | Out-Null
+            Write-Success "Created ~/.hermes/.env"
         }
     } else {
-        Write-Info ".env already exists, keeping it"
+        Write-Info "~/.hermes/.env already exists, keeping it"
     }
     
-    # Create cli-config.yaml from example
-    if (-not (Test-Path "cli-config.yaml")) {
-        if (Test-Path "cli-config.yaml.example") {
-            Copy-Item "cli-config.yaml.example" "cli-config.yaml"
-            Write-Success "Created cli-config.yaml from template"
+    # Create config.yaml at ~/.hermes/config.yaml (top level, easy to find)
+    $configPath = "$HermesHome\config.yaml"
+    if (-not (Test-Path $configPath)) {
+        $examplePath = "$InstallDir\cli-config.yaml.example"
+        if (Test-Path $examplePath) {
+            Copy-Item $examplePath $configPath
+            Write-Success "Created ~/.hermes/config.yaml from template"
         }
     } else {
-        Write-Info "cli-config.yaml already exists, keeping it"
+        Write-Info "~/.hermes/config.yaml already exists, keeping it"
     }
     
-    Pop-Location
-    
-    # Create user data directory
-    $hermesDir = "$env:USERPROFILE\.hermes"
-    New-Item -ItemType Directory -Force -Path "$hermesDir\cron" | Out-Null
-    New-Item -ItemType Directory -Force -Path "$hermesDir\sessions" | Out-Null
-    New-Item -ItemType Directory -Force -Path "$hermesDir\logs" | Out-Null
-    Write-Success "Created ~/.hermes data directory"
+    Write-Success "Configuration directory ready: ~/.hermes/"
 }
 
 function Install-NodeDeps {
@@ -330,16 +335,16 @@ function Write-Completion {
     Write-Host ""
     
     # Show file locations
-    Write-Host "📁 Your files:" -ForegroundColor Cyan
+    Write-Host "📁 Your files (all in ~/.hermes/):" -ForegroundColor Cyan
     Write-Host ""
-    Write-Host "   Install:   " -NoNewline -ForegroundColor Yellow
-    Write-Host "$InstallDir"
     Write-Host "   Config:    " -NoNewline -ForegroundColor Yellow
-    Write-Host "$env:USERPROFILE\.hermes\config.yaml"
+    Write-Host "$HermesHome\config.yaml"
     Write-Host "   API Keys:  " -NoNewline -ForegroundColor Yellow
-    Write-Host "$env:USERPROFILE\.hermes\.env"
+    Write-Host "$HermesHome\.env"
     Write-Host "   Data:      " -NoNewline -ForegroundColor Yellow
-    Write-Host "$env:USERPROFILE\.hermes\ (cron, sessions, logs)"
+    Write-Host "$HermesHome\cron\, sessions\, logs\"
+    Write-Host "   Code:      " -NoNewline -ForegroundColor Yellow
+    Write-Host "$HermesHome\hermes-agent\"
     Write-Host ""
     
     Write-Host "─────────────────────────────────────────────────────────" -ForegroundColor Cyan
diff --git a/scripts/install.sh b/scripts/install.sh
index 69e283e56c..463a0d5bea 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -27,7 +27,8 @@ BOLD='\033[1m'
 # Configuration
 REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
 REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
-INSTALL_DIR="${HERMES_INSTALL_DIR:-$HOME/.hermes-agent}"
+HERMES_HOME="$HOME/.hermes"
+INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
 PYTHON_MIN_VERSION="3.10"
 
 # Options
@@ -401,31 +402,36 @@ EOF
 copy_config_templates() {
     log_info "Setting up configuration files..."
     
-    # Create .env from example
-    if [ ! -f "$INSTALL_DIR/.env" ]; then
+    # Create ~/.hermes directory structure (config at top level, code in subdir)
+    mkdir -p "$HERMES_HOME/cron"
+    mkdir -p "$HERMES_HOME/sessions"
+    mkdir -p "$HERMES_HOME/logs"
+    
+    # Create .env at ~/.hermes/.env (top level, easy to find)
+    if [ ! -f "$HERMES_HOME/.env" ]; then
         if [ -f "$INSTALL_DIR/.env.example" ]; then
-            cp "$INSTALL_DIR/.env.example" "$INSTALL_DIR/.env"
-            log_success "Created .env from template"
+            cp "$INSTALL_DIR/.env.example" "$HERMES_HOME/.env"
+            log_success "Created ~/.hermes/.env from template"
+        else
+            # Create empty .env if no example exists
+            touch "$HERMES_HOME/.env"
+            log_success "Created ~/.hermes/.env"
         fi
     else
-        log_info ".env already exists, keeping it"
+        log_info "~/.hermes/.env already exists, keeping it"
     fi
     
-    # Create cli-config.yaml from example
-    if [ ! -f "$INSTALL_DIR/cli-config.yaml" ]; then
+    # Create config.yaml at ~/.hermes/config.yaml (top level, easy to find)
+    if [ ! -f "$HERMES_HOME/config.yaml" ]; then
         if [ -f "$INSTALL_DIR/cli-config.yaml.example" ]; then
-            cp "$INSTALL_DIR/cli-config.yaml.example" "$INSTALL_DIR/cli-config.yaml"
-            log_success "Created cli-config.yaml from template"
+            cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
+            log_success "Created ~/.hermes/config.yaml from template"
         fi
     else
-        log_info "cli-config.yaml already exists, keeping it"
+        log_info "~/.hermes/config.yaml already exists, keeping it"
     fi
     
-    # Create ~/.hermes directory for user data
-    mkdir -p "$HOME/.hermes/cron"
-    mkdir -p "$HOME/.hermes/sessions"
-    mkdir -p "$HOME/.hermes/logs"
-    log_success "Created ~/.hermes data directory"
+    log_success "Configuration directory ready: ~/.hermes/"
 }
 
 install_node_deps() {
@@ -473,12 +479,12 @@ print_success() {
     echo ""
     
     # Show file locations
-    echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
+    echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
     echo ""
-    echo -e "   ${YELLOW}Install:${NC}   $INSTALL_DIR"
     echo -e "   ${YELLOW}Config:${NC}    ~/.hermes/config.yaml"
     echo -e "   ${YELLOW}API Keys:${NC}  ~/.hermes/.env"
-    echo -e "   ${YELLOW}Data:${NC}      ~/.hermes/ (cron, sessions, logs)"
+    echo -e "   ${YELLOW}Data:${NC}      ~/.hermes/cron/, sessions/, logs/"
+    echo -e "   ${YELLOW}Code:${NC}      ~/.hermes/hermes-agent/"
     echo ""
     
     echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"

From 3ee788dacc79b5938b8558f1f0b459ca3ded5b48 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:39:23 -0800
Subject: [PATCH 14/48] Implement configuration migration system and enhance
 CLI setup

- Introduced a configuration migration system to check for missing required environment variables and outdated config fields, prompting users for necessary inputs during updates.
- Enhanced the CLI with new commands for checking and migrating configuration, improving user experience by providing clear guidance on required settings.
- Updated the setup wizard to detect existing installations and offer quick setup options for missing configurations, streamlining the user onboarding process.
- Improved messaging throughout the CLI to inform users about the status of their configuration and any required actions.
---
 hermes_cli/config.py | 313 ++++++++++++++++++++++++++++++++++++++-
 hermes_cli/main.py   |  47 ++++++
 hermes_cli/setup.py  | 343 ++++++++++++++++++++++++++++++-------------
 3 files changed, 598 insertions(+), 105 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index ad6423581a..6efcaa7f8b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -16,7 +16,7 @@ import os
 import sys
 import subprocess
 from pathlib import Path
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, List, Tuple
 
 import yaml
 
@@ -98,8 +98,219 @@ DEFAULT_CONFIG = {
         "compact": False,
         "personality": "kawaii",
     },
+    
+    # Config schema version - bump this when adding new required fields
+    "_config_version": 1,
 }
 
+# =============================================================================
+# Config Migration System
+# =============================================================================
+
+# Required environment variables with metadata for migration prompts
+REQUIRED_ENV_VARS = {
+    "OPENROUTER_API_KEY": {
+        "description": "OpenRouter API key (required for vision, web scraping, and tools)",
+        "prompt": "OpenRouter API key",
+        "url": "https://openrouter.ai/keys",
+        "required": True,
+        "password": True,
+    },
+}
+
+# Optional environment variables that enhance functionality
+OPTIONAL_ENV_VARS = {
+    "FIRECRAWL_API_KEY": {
+        "description": "Firecrawl API key for web search and scraping",
+        "prompt": "Firecrawl API key",
+        "url": "https://firecrawl.dev/",
+        "tools": ["web_search", "web_extract"],
+        "password": True,
+    },
+    "BROWSERBASE_API_KEY": {
+        "description": "Browserbase API key for browser automation",
+        "prompt": "Browserbase API key", 
+        "url": "https://browserbase.com/",
+        "tools": ["browser_navigate", "browser_click", "etc."],
+        "password": True,
+    },
+    "BROWSERBASE_PROJECT_ID": {
+        "description": "Browserbase project ID",
+        "prompt": "Browserbase project ID",
+        "url": "https://browserbase.com/",
+        "tools": ["browser_navigate", "browser_click", "etc."],
+        "password": False,
+    },
+    "FAL_KEY": {
+        "description": "FAL API key for image generation",
+        "prompt": "FAL API key",
+        "url": "https://fal.ai/",
+        "tools": ["image_generate"],
+        "password": True,
+    },
+    "OPENAI_BASE_URL": {
+        "description": "Custom OpenAI-compatible API endpoint URL",
+        "prompt": "API base URL (e.g., https://api.example.com/v1)",
+        "url": None,
+        "password": False,
+    },
+    "OPENAI_API_KEY": {
+        "description": "API key for custom OpenAI-compatible endpoint",
+        "prompt": "API key for custom endpoint",
+        "url": None,
+        "password": True,
+    },
+}
+
+
+def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
+    """
+    Check which environment variables are missing.
+    
+    Returns list of dicts with var info for missing variables.
+    """
+    missing = []
+    
+    # Check required vars
+    for var_name, info in REQUIRED_ENV_VARS.items():
+        if not get_env_value(var_name):
+            missing.append({"name": var_name, **info, "is_required": True})
+    
+    # Check optional vars (if not required_only)
+    if not required_only:
+        for var_name, info in OPTIONAL_ENV_VARS.items():
+            if not get_env_value(var_name):
+                missing.append({"name": var_name, **info, "is_required": False})
+    
+    return missing
+
+
+def get_missing_config_fields() -> List[Dict[str, Any]]:
+    """
+    Check which config fields are missing or outdated.
+    
+    Returns list of missing/outdated fields.
+    """
+    config = load_config()
+    missing = []
+    
+    # Check for new top-level keys in DEFAULT_CONFIG
+    for key, default_value in DEFAULT_CONFIG.items():
+        if key.startswith('_'):
+            continue  # Skip internal keys
+        if key not in config:
+            missing.append({
+                "key": key,
+                "default": default_value,
+                "description": f"New config section: {key}",
+            })
+        elif isinstance(default_value, dict):
+            # Check nested keys
+            for subkey, subvalue in default_value.items():
+                if subkey not in config.get(key, {}):
+                    missing.append({
+                        "key": f"{key}.{subkey}",
+                        "default": subvalue,
+                        "description": f"New config option: {key}.{subkey}",
+                    })
+    
+    return missing
+
+
+def check_config_version() -> Tuple[int, int]:
+    """
+    Check config version.
+    
+    Returns (current_version, latest_version).
+    """
+    config = load_config()
+    current = config.get("_config_version", 0)
+    latest = DEFAULT_CONFIG.get("_config_version", 1)
+    return current, latest
+
+
+def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]:
+    """
+    Migrate config to latest version, prompting for new required fields.
+    
+    Args:
+        interactive: If True, prompt user for missing values
+        quiet: If True, suppress output
+        
+    Returns:
+        Dict with migration results: {"env_added": [...], "config_added": [...], "warnings": [...]}
+    """
+    results = {"env_added": [], "config_added": [], "warnings": []}
+    
+    # Check config version
+    current_ver, latest_ver = check_config_version()
+    
+    if current_ver < latest_ver and not quiet:
+        print(f"Config version: {current_ver} → {latest_ver}")
+    
+    # Check for missing required env vars
+    missing_env = get_missing_env_vars(required_only=True)
+    
+    if missing_env and not quiet:
+        print("\n⚠️  Missing required environment variables:")
+        for var in missing_env:
+            print(f"   • {var['name']}: {var['description']}")
+    
+    if interactive and missing_env:
+        print("\nLet's configure them now:\n")
+        for var in missing_env:
+            if var.get("url"):
+                print(f"  Get your key at: {var['url']}")
+            
+            if var.get("password"):
+                import getpass
+                value = getpass.getpass(f"  {var['prompt']}: ")
+            else:
+                value = input(f"  {var['prompt']}: ").strip()
+            
+            if value:
+                save_env_value(var["name"], value)
+                results["env_added"].append(var["name"])
+                print(f"  ✓ Saved {var['name']}")
+            else:
+                results["warnings"].append(f"Skipped {var['name']} - some features may not work")
+            print()
+    
+    # Check for missing config fields
+    missing_config = get_missing_config_fields()
+    
+    if missing_config:
+        config = load_config()
+        
+        for field in missing_config:
+            key = field["key"]
+            default = field["default"]
+            
+            # Add with default value
+            if "." in key:
+                # Nested key
+                parent, child = key.split(".", 1)
+                if parent not in config:
+                    config[parent] = {}
+                config[parent][child] = default
+            else:
+                config[key] = default
+            
+            results["config_added"].append(key)
+            if not quiet:
+                print(f"  ✓ Added {key} = {default}")
+        
+        # Update version and save
+        config["_config_version"] = latest_ver
+        save_config(config)
+    elif current_ver < latest_ver:
+        # Just update version
+        config = load_config()
+        config["_config_version"] = latest_ver
+        save_config(config)
+    
+    return results
+
 
 def load_config() -> Dict[str, Any]:
     """Load configuration from ~/.hermes/config.yaml."""
@@ -395,6 +606,106 @@ def config_command(args):
     elif subcmd == "env-path":
         print(get_env_path())
     
+    elif subcmd == "migrate":
+        print()
+        print(color("🔄 Checking configuration for updates...", Colors.CYAN, Colors.BOLD))
+        print()
+        
+        # Check what's missing
+        missing_env = get_missing_env_vars(required_only=False)
+        missing_config = get_missing_config_fields()
+        current_ver, latest_ver = check_config_version()
+        
+        if not missing_env and not missing_config and current_ver >= latest_ver:
+            print(color("✓ Configuration is up to date!", Colors.GREEN))
+            print()
+            return
+        
+        # Show what needs to be updated
+        if current_ver < latest_ver:
+            print(f"  Config version: {current_ver} → {latest_ver}")
+        
+        if missing_config:
+            print(f"\n  {len(missing_config)} new config option(s) will be added with defaults")
+        
+        required_missing = [v for v in missing_env if v.get("is_required")]
+        optional_missing = [v for v in missing_env if not v.get("is_required")]
+        
+        if required_missing:
+            print(f"\n  ⚠️  {len(required_missing)} required API key(s) missing:")
+            for var in required_missing:
+                print(f"     • {var['name']}")
+        
+        if optional_missing:
+            print(f"\n  ℹ️  {len(optional_missing)} optional API key(s) not configured:")
+            for var in optional_missing:
+                tools = var.get("tools", [])
+                tools_str = f" (enables: {', '.join(tools[:2])})" if tools else ""
+                print(f"     • {var['name']}{tools_str}")
+        
+        print()
+        
+        # Run migration
+        results = migrate_config(interactive=True, quiet=False)
+        
+        print()
+        if results["env_added"] or results["config_added"]:
+            print(color("✓ Configuration updated!", Colors.GREEN))
+        
+        if results["warnings"]:
+            print()
+            for warning in results["warnings"]:
+                print(color(f"  ⚠️  {warning}", Colors.YELLOW))
+        
+        print()
+    
+    elif subcmd == "check":
+        # Non-interactive check for what's missing
+        print()
+        print(color("📋 Configuration Status", Colors.CYAN, Colors.BOLD))
+        print()
+        
+        current_ver, latest_ver = check_config_version()
+        if current_ver >= latest_ver:
+            print(f"  Config version: {current_ver} ✓")
+        else:
+            print(color(f"  Config version: {current_ver} → {latest_ver} (update available)", Colors.YELLOW))
+        
+        print()
+        print(color("  Required:", Colors.BOLD))
+        for var_name in REQUIRED_ENV_VARS:
+            if get_env_value(var_name):
+                print(f"    ✓ {var_name}")
+            else:
+                print(color(f"    ✗ {var_name} (missing)", Colors.RED))
+        
+        print()
+        print(color("  Optional:", Colors.BOLD))
+        for var_name, info in OPTIONAL_ENV_VARS.items():
+            if get_env_value(var_name):
+                print(f"    ✓ {var_name}")
+            else:
+                tools = info.get("tools", [])
+                tools_str = f" → {', '.join(tools[:2])}" if tools else ""
+                print(color(f"    ○ {var_name}{tools_str}", Colors.DIM))
+        
+        missing_config = get_missing_config_fields()
+        if missing_config:
+            print()
+            print(color(f"  {len(missing_config)} new config option(s) available", Colors.YELLOW))
+            print(f"    Run 'hermes config migrate' to add them")
+        
+        print()
+    
     else:
         print(f"Unknown config command: {subcmd}")
+        print()
+        print("Available commands:")
+        print("  hermes config           Show current configuration")
+        print("  hermes config edit      Open config in editor")
+        print("  hermes config set K V   Set a config value")
+        print("  hermes config check     Check for missing/outdated config")
+        print("  hermes config migrate   Update config with new options")
+        print("  hermes config path      Show config file path")
+        print("  hermes config env-path  Show .env file path")
         sys.exit(1)
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index a16fd7f1c6..c51ab8d613 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -170,6 +170,47 @@ def cmd_update(args):
                 print("→ Updating Node.js dependencies...")
                 subprocess.run(["npm", "install", "--silent"], cwd=PROJECT_ROOT, check=False)
         
+        print()
+        print("✓ Code updated!")
+        
+        # Check for config migrations
+        print()
+        print("→ Checking configuration for new options...")
+        
+        from hermes_cli.config import (
+            get_missing_env_vars, get_missing_config_fields, 
+            check_config_version, migrate_config
+        )
+        
+        missing_env = get_missing_env_vars(required_only=True)
+        missing_config = get_missing_config_fields()
+        current_ver, latest_ver = check_config_version()
+        
+        needs_migration = missing_env or missing_config or current_ver < latest_ver
+        
+        if needs_migration:
+            print()
+            if missing_env:
+                print(f"  ⚠️  {len(missing_env)} new required setting(s) need configuration")
+            if missing_config:
+                print(f"  ℹ️  {len(missing_config)} new config option(s) available")
+            
+            print()
+            response = input("Would you like to configure them now? [Y/n]: ").strip().lower()
+            
+            if response in ('', 'y', 'yes'):
+                print()
+                results = migrate_config(interactive=True, quiet=False)
+                
+                if results["env_added"] or results["config_added"]:
+                    print()
+                    print("✓ Configuration updated!")
+            else:
+                print()
+                print("Skipped. Run 'hermes config migrate' later to configure.")
+        else:
+            print("  ✓ Configuration is up to date")
+        
         print()
         print("✓ Update complete!")
         print()
@@ -380,6 +421,12 @@ For more help on a command:
     # config env-path
     config_env = config_subparsers.add_parser("env-path", help="Print .env file path")
     
+    # config check
+    config_check = config_subparsers.add_parser("check", help="Check for missing/outdated config")
+    
+    # config migrate
+    config_migrate = config_subparsers.add_parser("migrate", help="Update config with new options")
+    
     config_parser.set_defaults(func=cmd_config)
     
     # =========================================================================
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 946021a2aa..4b4e5f3b0e 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -152,6 +152,106 @@ def prompt_yes_no(question: str, default: bool = True) -> bool:
         print_error("Please enter 'y' or 'n'")
 
 
+def _print_setup_summary(config: dict, hermes_home):
+    """Print the setup completion summary."""
+    # Tool availability summary
+    print()
+    print_header("Tool Availability Summary")
+    
+    tool_status = []
+    
+    # OpenRouter (required for vision, moa)
+    if get_env_value('OPENROUTER_API_KEY'):
+        tool_status.append(("Vision (image analysis)", True, None))
+        tool_status.append(("Mixture of Agents", True, None))
+    else:
+        tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
+        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
+    
+    # Firecrawl (web tools)
+    if get_env_value('FIRECRAWL_API_KEY'):
+        tool_status.append(("Web Search & Extract", True, None))
+    else:
+        tool_status.append(("Web Search & Extract", False, "FIRECRAWL_API_KEY"))
+    
+    # Browserbase (browser tools)
+    if get_env_value('BROWSERBASE_API_KEY'):
+        tool_status.append(("Browser Automation", True, None))
+    else:
+        tool_status.append(("Browser Automation", False, "BROWSERBASE_API_KEY"))
+    
+    # FAL (image generation)
+    if get_env_value('FAL_KEY'):
+        tool_status.append(("Image Generation", True, None))
+    else:
+        tool_status.append(("Image Generation", False, "FAL_KEY"))
+    
+    # Terminal (always available if system deps met)
+    tool_status.append(("Terminal/Commands", True, None))
+    
+    # Skills (always available if skills dir exists)
+    tool_status.append(("Skills Knowledge Base", True, None))
+    
+    # Print status
+    available_count = sum(1 for _, avail, _ in tool_status if avail)
+    total_count = len(tool_status)
+    
+    print_info(f"{available_count}/{total_count} tool categories available:")
+    print()
+    
+    for name, available, missing_var in tool_status:
+        if available:
+            print(f"   {color('✓', Colors.GREEN)} {name}")
+        else:
+            print(f"   {color('✗', Colors.RED)} {name} {color(f'(missing {missing_var})', Colors.DIM)}")
+    
+    print()
+    
+    disabled_tools = [(name, var) for name, avail, var in tool_status if not avail]
+    if disabled_tools:
+        print_warning("Some tools are disabled. Run 'hermes setup' again to configure them,")
+        print_warning("or edit ~/.hermes/.env directly to add the missing API keys.")
+        print()
+    
+    # Done banner
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.GREEN))
+    print(color("│              ✓ Setup Complete!                          │", Colors.GREEN))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.GREEN))
+    print()
+    
+    # Show file locations prominently
+    print(color("📁 All your files are in ~/.hermes/:", Colors.CYAN, Colors.BOLD))
+    print()
+    print(f"   {color('Settings:', Colors.YELLOW)}  {get_config_path()}")
+    print(f"   {color('API Keys:', Colors.YELLOW)}  {get_env_path()}")
+    print(f"   {color('Data:', Colors.YELLOW)}      {hermes_home}/cron/, sessions/, logs/")
+    print()
+    
+    print(color("─" * 60, Colors.DIM))
+    print()
+    print(color("📝 To edit your configuration:", Colors.CYAN, Colors.BOLD))
+    print()
+    print(f"   {color('hermes config', Colors.GREEN)}        View current settings")
+    print(f"   {color('hermes config edit', Colors.GREEN)}   Open config in your editor")
+    print(f"   {color('hermes config set KEY VALUE', Colors.GREEN)}")
+    print(f"                         Set a specific value")
+    print()
+    print(f"   Or edit the files directly:")
+    print(f"   {color(f'nano {get_config_path()}', Colors.DIM)}")
+    print(f"   {color(f'nano {get_env_path()}', Colors.DIM)}")
+    print()
+    
+    print(color("─" * 60, Colors.DIM))
+    print()
+    print(color("🚀 Ready to go!", Colors.CYAN, Colors.BOLD))
+    print()
+    print(f"   {color('hermes', Colors.GREEN)}              Start chatting")
+    print(f"   {color('hermes gateway', Colors.GREEN)}      Start messaging gateway")
+    print(f"   {color('hermes doctor', Colors.GREEN)}       Check for issues")
+    print()
+
+
 def run_setup_wizard(args):
     """Run the interactive setup wizard."""
     ensure_hermes_home()
@@ -159,6 +259,24 @@ def run_setup_wizard(args):
     config = load_config()
     hermes_home = get_hermes_home()
     
+    # Check if this is an existing installation with config
+    is_existing = get_env_value("OPENROUTER_API_KEY") is not None or get_config_path().exists()
+    
+    # Import migration helpers
+    from hermes_cli.config import (
+        get_missing_env_vars, get_missing_config_fields,
+        check_config_version, migrate_config,
+        REQUIRED_ENV_VARS, OPTIONAL_ENV_VARS
+    )
+    
+    # Check what's missing
+    missing_required = [v for v in get_missing_env_vars(required_only=False) if v.get("is_required")]
+    missing_optional = [v for v in get_missing_env_vars(required_only=False) if not v.get("is_required")]
+    missing_config = get_missing_config_fields()
+    current_ver, latest_ver = check_config_version()
+    
+    has_missing = missing_required or missing_optional or missing_config or current_ver < latest_ver
+    
     print()
     print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA))
     print(color("│             🦋 Hermes Agent Setup Wizard                │", Colors.MAGENTA))
@@ -167,8 +285,126 @@ def run_setup_wizard(args):
     print(color("│  Press Ctrl+C at any time to exit.                     │", Colors.MAGENTA))
     print(color("└─────────────────────────────────────────────────────────┘", Colors.MAGENTA))
     
+    # If existing installation, show what's missing and offer quick mode
+    quick_mode = False
+    if is_existing and has_missing:
+        print()
+        print_header("Existing Installation Detected")
+        print_success("You already have Hermes configured!")
+        print()
+        
+        if missing_required:
+            print_warning(f"  {len(missing_required)} required setting(s) missing:")
+            for var in missing_required:
+                print(f"     • {var['name']}")
+        
+        if missing_optional:
+            print_info(f"  {len(missing_optional)} optional tool(s) not configured:")
+            for var in missing_optional[:3]:  # Show first 3
+                tools = var.get("tools", [])
+                tools_str = f" → {', '.join(tools[:2])}" if tools else ""
+                print(f"     • {var['name']}{tools_str}")
+            if len(missing_optional) > 3:
+                print(f"     • ...and {len(missing_optional) - 3} more")
+        
+        if missing_config:
+            print_info(f"  {len(missing_config)} new config option(s) available")
+        
+        print()
+        
+        setup_choices = [
+            "Quick setup - just configure missing items",
+            "Full setup - reconfigure everything",
+            "Skip - exit setup"
+        ]
+        
+        choice = prompt_choice("What would you like to do?", setup_choices, 0)
+        
+        if choice == 0:
+            quick_mode = True
+        elif choice == 2:
+            print()
+            print_info("Exiting. Run 'hermes setup' again when ready.")
+            return
+        # choice == 1 continues with full setup
+        
+    elif is_existing and not has_missing:
+        print()
+        print_header("Configuration Status")
+        print_success("Your configuration is complete!")
+        print()
+        
+        if not prompt_yes_no("Would you like to reconfigure anyway?", False):
+            print()
+            print_info("Exiting. Your configuration is already set up.")
+            print_info(f"Config: {get_config_path()}")
+            print_info(f"Secrets: {get_env_path()}")
+            return
+    
+    # Quick mode: only configure missing items
+    if quick_mode:
+        print()
+        print_header("Quick Setup - Missing Items Only")
+        
+        # Handle missing required env vars
+        if missing_required:
+            for var in missing_required:
+                print()
+                print(color(f"  {var['name']}", Colors.CYAN))
+                print_info(f"  {var.get('description', '')}")
+                if var.get("url"):
+                    print_info(f"  Get key at: {var['url']}")
+                
+                if var.get("password"):
+                    value = prompt(f"  {var.get('prompt', var['name'])}", password=True)
+                else:
+                    value = prompt(f"  {var.get('prompt', var['name'])}")
+                
+                if value:
+                    save_env_value(var["name"], value)
+                    print_success(f"  Saved {var['name']}")
+                else:
+                    print_warning(f"  Skipped {var['name']}")
+        
+        # Handle missing optional env vars
+        if missing_optional:
+            print()
+            print_header("Optional Tools (Quick Setup)")
+            
+            for var in missing_optional:
+                tools = var.get("tools", [])
+                tools_str = f" (enables: {', '.join(tools[:2])})" if tools else ""
+                
+                if prompt_yes_no(f"Configure {var['name']}{tools_str}?", False):
+                    if var.get("url"):
+                        print_info(f"  Get key at: {var['url']}")
+                    
+                    if var.get("password"):
+                        value = prompt(f"  {var.get('prompt', var['name'])}", password=True)
+                    else:
+                        value = prompt(f"  {var.get('prompt', var['name'])}")
+                    
+                    if value:
+                        save_env_value(var["name"], value)
+                        print_success(f"  Saved")
+        
+        # Handle missing config fields
+        if missing_config:
+            print()
+            print_info(f"Adding {len(missing_config)} new config option(s) with defaults...")
+            for field in missing_config:
+                print_success(f"  Added {field['key']} = {field['default']}")
+            
+            # Update config version
+            config["_config_version"] = latest_ver
+            save_config(config)
+        
+        # Jump to summary
+        _print_setup_summary(config, hermes_home)
+        return
+    
     # =========================================================================
-    # Step 0: Show paths
+    # Step 0: Show paths (full setup)
     # =========================================================================
     print_header("Configuration Location")
     print_info(f"Config file:  {get_config_path()}")
@@ -586,108 +822,7 @@ def run_setup_wizard(args):
                 print_success("    Configured ✓")
     
     # =========================================================================
-    # Save config
+    # Save config and show summary
     # =========================================================================
     save_config(config)
-    
-    # =========================================================================
-    # Tool Availability Summary
-    # =========================================================================
-    print()
-    print_header("Tool Availability Summary")
-    
-    # Check which tools are available
-    tool_status = []
-    
-    # OpenRouter (required for vision, moa)
-    if get_env_value('OPENROUTER_API_KEY'):
-        tool_status.append(("Vision (image analysis)", True, None))
-        tool_status.append(("Mixture of Agents", True, None))
-    else:
-        tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
-        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
-    
-    # Firecrawl (web tools)
-    if get_env_value('FIRECRAWL_API_KEY'):
-        tool_status.append(("Web Search & Extract", True, None))
-    else:
-        tool_status.append(("Web Search & Extract", False, "FIRECRAWL_API_KEY"))
-    
-    # Browserbase (browser tools)
-    if get_env_value('BROWSERBASE_API_KEY'):
-        tool_status.append(("Browser Automation", True, None))
-    else:
-        tool_status.append(("Browser Automation", False, "BROWSERBASE_API_KEY"))
-    
-    # FAL (image generation)
-    if get_env_value('FAL_KEY'):
-        tool_status.append(("Image Generation", True, None))
-    else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
-    
-    # Terminal (always available if system deps met)
-    tool_status.append(("Terminal/Commands", True, None))
-    
-    # Skills (always available if skills dir exists)
-    tool_status.append(("Skills Knowledge Base", True, None))
-    
-    # Print status
-    available_count = sum(1 for _, avail, _ in tool_status if avail)
-    total_count = len(tool_status)
-    
-    print_info(f"{available_count}/{total_count} tool categories available:")
-    print()
-    
-    for name, available, missing_var in tool_status:
-        if available:
-            print(f"   {color('✓', Colors.GREEN)} {name}")
-        else:
-            print(f"   {color('✗', Colors.RED)} {name} {color(f'(missing {missing_var})', Colors.DIM)}")
-    
-    print()
-    
-    disabled_tools = [(name, var) for name, avail, var in tool_status if not avail]
-    if disabled_tools:
-        print_warning("Some tools are disabled. Run 'hermes setup' again to configure them,")
-        print_warning("or edit ~/.hermes/.env directly to add the missing API keys.")
-        print()
-    
-    # =========================================================================
-    # Done!
-    # =========================================================================
-    print()
-    print(color("┌─────────────────────────────────────────────────────────┐", Colors.GREEN))
-    print(color("│              ✓ Setup Complete!                          │", Colors.GREEN))
-    print(color("└─────────────────────────────────────────────────────────┘", Colors.GREEN))
-    print()
-    
-    # Show file locations prominently
-    print(color("📁 All your files are in ~/.hermes/:", Colors.CYAN, Colors.BOLD))
-    print()
-    print(f"   {color('Settings:', Colors.YELLOW)}  {get_config_path()}")
-    print(f"   {color('API Keys:', Colors.YELLOW)}  {get_env_path()}")
-    print(f"   {color('Data:', Colors.YELLOW)}      {hermes_home}/cron/, sessions/, logs/")
-    print()
-    
-    print(color("─" * 60, Colors.DIM))
-    print()
-    print(color("📝 To edit your configuration:", Colors.CYAN, Colors.BOLD))
-    print()
-    print(f"   {color('hermes config', Colors.GREEN)}        View current settings")
-    print(f"   {color('hermes config edit', Colors.GREEN)}   Open config in your editor")
-    print(f"   {color('hermes config set KEY VALUE', Colors.GREEN)}")
-    print(f"                         Set a specific value")
-    print()
-    print(f"   Or edit the files directly:")
-    print(f"   {color(f'nano {get_config_path()}', Colors.DIM)}")
-    print(f"   {color(f'nano {get_env_path()}', Colors.DIM)}")
-    print()
-    
-    print(color("─" * 60, Colors.DIM))
-    print()
-    print(color("🚀 Ready to go!", Colors.CYAN, Colors.BOLD))
-    print()
-    print(f"   {color('hermes', Colors.GREEN)}              Start chatting")
-    print(f"   {color('hermes gateway', Colors.GREEN)}      Start messaging gateway")
-    print(f"   {color('hermes doctor', Colors.GREEN)}       Check for issues")
-    print()
+    _print_setup_summary(config, hermes_home)

From ff776b57bf4f0c1b410080cc54b34ee2ba04dc2a Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 19:45:42 -0800
Subject: [PATCH 15/48] Remove outdated .cursorrules file and add comprehensive
 AGENTS.md documentation

- Deleted the .cursorrules file, which contained legacy information about the Hermes-Agent project structure and development environment.
- Introduced AGENTS.md, a detailed development guide for the Hermes Agent, outlining project structure, configuration management, CLI architecture, and agent functionality.
- Enhanced user guidance for setting up the development environment and utilizing the CLI effectively, including new commands for configuration management.
---
 .cursorrules | 201 ------------------------
 AGENTS.md    | 427 +++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md    |   7 +-
 3 files changed, 433 insertions(+), 202 deletions(-)
 delete mode 100644 .cursorrules
 create mode 100644 AGENTS.md

diff --git a/.cursorrules b/.cursorrules
deleted file mode 100644
index 4a4641c4f4..0000000000
--- a/.cursorrules
+++ /dev/null
@@ -1,201 +0,0 @@
-Hermes-Agent is an agent harness for LLMs with an interactive CLI.
-
-## Development Environment
-
-**IMPORTANT**: Always use the virtual environment if it exists:
-```bash
-source venv/bin/activate  # Before running any Python commands
-```
-
-## Project Structure
-
-- `hermes` - CLI launcher script (run with `./hermes`)
-- `cli.py` - Interactive CLI with Rich UI, prompt_toolkit, animated spinners
-- `cli-config.yaml` - CLI configuration (model, terminal, toolsets, personalities)
-- `tools/` - Individual tool implementations (web, terminal, browser, vision, etc.)
-- `tools/__init__.py` - Exports all tools for importing
-- `model_tools.py` - Consolidates tool schemas and handlers for the agent
-- `toolsets.py` - Groups tools into logical toolsets (web, terminal, browser, etc.)
-- `toolset_distributions.py` - Probability-based tool selection for data generation
-- `run_agent.py` - Primary agent runner with AIAgent class and KawaiiSpinner
-- `batch_runner.py` - Parallel batch processing with checkpointing
-- `tests/` - Test scripts
-
-## File Dependency Chain
-
-```
-tools/*.py → tools/__init__.py → model_tools.py → toolsets.py → toolset_distributions.py
-                                       ↑
-run_agent.py ──────────────────────────┘
-cli.py → run_agent.py (uses AIAgent with quiet_mode=True)
-batch_runner.py → run_agent.py + toolset_distributions.py
-```
-
-Always ensure consistency between tools, model_tools.py, and toolsets.py when changing any of them.
-
-## CLI Architecture (cli.py)
-
-The interactive CLI uses:
-- **Rich** - For the welcome banner and styled panels
-- **prompt_toolkit** - For fixed input area with history and `patch_stdout`
-- **KawaiiSpinner** (in run_agent.py) - Animated feedback during API calls and tool execution
-
-Key components:
-- `HermesCLI` class - Main CLI controller with commands and conversation loop
-- `load_cli_config()` - Loads `cli-config.yaml`, sets environment variables for terminal
-- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
-- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
-
-CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging and enable kawaii-style feedback instead.
-
-### Adding CLI Commands
-
-1. Add to `COMMANDS` dict with description
-2. Add handler in `process_command()` method
-3. For persistent settings, use `save_config_value()` to update `cli-config.yaml`
-
-## Adding a New Tool
-
-Follow this strict order to maintain consistency:
-
-1. Create `tools/your_tool.py` with:
-   - Handler function (sync or async) returning a JSON string via `json.dumps()`
-   - `check_*_requirements()` function to verify dependencies (e.g., API keys)
-   - Schema definition following OpenAI function-calling format
-
-2. Export in `tools/__init__.py`:
-   - Import the handler and check function
-   - Add to `__all__` list
-
-3. Register in `model_tools.py`:
-   - Create `get_*_tool_definitions()` function or add to existing
-   - Add routing in `handle_function_call()` dispatcher
-   - Update `get_all_tool_names()` with the tool name
-   - Update `get_toolset_for_tool()` mapping
-   - Update `get_available_toolsets()` and `check_toolset_requirements()`
-
-4. Add to toolset in `toolsets.py`:
-   - Add to existing toolset or create new one in TOOLSETS dict
-
-5. Optionally add to `toolset_distributions.py` for batch processing
-
-## Tool Implementation Pattern
-
-```python
-# tools/example_tool.py
-import json
-import os
-
-def check_example_requirements() -> bool:
-    """Check if required API keys/dependencies are available."""
-    return bool(os.getenv("EXAMPLE_API_KEY"))
-
-def example_tool(param: str, task_id: str = None) -> str:
-    """Execute the tool and return JSON string result."""
-    try:
-        result = {"success": True, "data": "..."}
-        return json.dumps(result, ensure_ascii=False)
-    except Exception as e:
-        return json.dumps({"error": str(e)}, ensure_ascii=False)
-```
-
-All tool handlers MUST return a JSON string. Never return raw dicts.
-
-## Stateful Tools
-
-Tools that maintain state (terminal, browser) require:
-- `task_id` parameter for session isolation between concurrent tasks
-- `cleanup_*()` function to release resources
-- Cleanup is called automatically in run_agent.py after conversation completes
-
-## Environment Variables
-
-API keys are loaded from `.env` file in repo root:
-- `OPENROUTER_API_KEY` - Main LLM API access (primary provider)
-- `FIRECRAWL_API_KEY` - Web search/extract tools
-- `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation
-- `FAL_KEY` - Image generation (FLUX model)
-- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools
-
-Terminal tool configuration (can also be set in `cli-config.yaml`):
-- `TERMINAL_ENV` - Backend: local, docker, singularity, modal, or ssh
-- `TERMINAL_CWD` - Working directory
-- `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` - For SSH backend
-
-## Agent Loop (run_agent.py)
-
-The AIAgent class handles:
-- Processing enabled toolsets to provide to the model
-- Piping prompts to the agent
-- Looping LLM calls when tools are invoked, until natural language response
-- Returning the final response
-
-Uses OpenAI-compatible API (primarily OpenRouter) with the OpenAI Python SDK.
-
-## Reasoning Model Support
-
-For models that support chain-of-thought reasoning:
-- Extract `reasoning_content` from API responses
-- Store in `assistant_msg["reasoning"]` for trajectory export
-- Pass back via `reasoning_content` field on subsequent turns
-
-## Trajectory Format
-
-Conversations are saved in ShareGPT format for training:
-```json
-{"from": "system", "value": "System prompt with <tools>...</tools>"}
-{"from": "human", "value": "User message"}
-{"from": "gpt", "value": "<think>reasoning</think>\n<tool_call>{...}</tool_call>"}
-{"from": "tool", "value": "<tool_response>{...}</tool_response>"}
-{"from": "gpt", "value": "Final response"}
-```
-
-Tool calls use `<tool_call>` XML tags, responses use `<tool_response>` tags, reasoning uses `<think>` tags.
-
-## Batch Processing (batch_runner.py)
-
-For processing multiple prompts:
-- Parallel execution with multiprocessing
-- Content-based resume for fault tolerance (matches on prompt text, not indices)
-- Toolset distributions control probabilistic tool availability per prompt
-- Output: `data/<run_name>/trajectories.jsonl` (combined) + individual batch files
-
-## Logging
-
-Trajectories restructure tools as a system prompt for storage in a format suitable for later training use.
-
-## Skills System
-
-Skills are on-demand knowledge documents the agent can load. Located in `skills/` directory:
-
-```
-skills/
-├── mlops/                    # Category folder
-│   ├── axolotl/             # Skill folder
-│   │   ├── SKILL.md         # Main instructions (required)
-│   │   ├── references/      # Additional docs, API specs
-│   │   └── templates/       # Output formats, configs
-│   └── vllm/
-│       └── SKILL.md
-└── example-skill/
-    └── SKILL.md
-```
-
-**Progressive disclosure** (token-efficient):
-1. `skills_categories()` - List category names (~50 tokens)
-2. `skills_list(category)` - Name + description per skill (~3k tokens)
-3. `skill_view(name)` - Full content + tags + linked files
-
-SKILL.md files use YAML frontmatter:
-```yaml
----
-name: skill-name
-description: Brief description for listing
-tags: [tag1, tag2]
-related_skills: [other-skill]
-version: 1.0.0
----
-# Skill Content...
-```
-
-Tool files: `tools/skills_tool.py` → `model_tools.py` → `toolsets.py`
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000000..5385219d8b
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,427 @@
+# Hermes Agent - Development Guide
+
+Instructions for AI coding assistants (GitHub Copilot, Cursor, etc.) and human developers.
+
+Hermes-Agent is an AI agent harness with tool-calling capabilities, interactive CLI, messaging integrations, and scheduled tasks.
+
+## Development Environment
+
+**IMPORTANT**: Always use the virtual environment if it exists:
+```bash
+source venv/bin/activate  # Before running any Python commands
+```
+
+## Project Structure
+
+```
+hermes-agent/
+├── hermes_cli/           # Unified CLI commands
+│   ├── main.py           # Entry point, command dispatcher
+│   ├── setup.py          # Interactive setup wizard
+│   ├── config.py         # Config management & migration
+│   ├── status.py         # Status display
+│   ├── doctor.py         # Diagnostics
+│   ├── gateway.py        # Gateway management
+│   └── cron.py           # Cron job management
+├── tools/                # Tool implementations
+├── gateway/              # Messaging platform adapters
+├── cron/                 # Scheduler implementation
+├── skills/               # Knowledge documents
+├── cli.py                # Interactive CLI (Rich UI)
+├── run_agent.py          # Agent runner with AIAgent class
+├── model_tools.py        # Tool schemas and handlers
+├── toolsets.py           # Tool groupings
+├── toolset_distributions.py  # Probability-based tool selection
+└── batch_runner.py       # Parallel batch processing
+```
+
+**User Configuration** (stored in `~/.hermes/`):
+- `~/.hermes/config.yaml` - Settings (model, terminal, toolsets, etc.)
+- `~/.hermes/.env` - API keys and secrets
+
+## File Dependency Chain
+
+```
+tools/*.py → tools/__init__.py → model_tools.py → toolsets.py → toolset_distributions.py
+                                       ↑
+run_agent.py ──────────────────────────┘
+cli.py → run_agent.py (uses AIAgent with quiet_mode=True)
+batch_runner.py → run_agent.py + toolset_distributions.py
+```
+
+Always ensure consistency between tools, model_tools.py, and toolsets.py when changing any of them.
+
+---
+
+## AIAgent Class
+
+The main agent is implemented in `run_agent.py`:
+
+```python
+class AIAgent:
+    def __init__(
+        self,
+        model: str = "anthropic/claude-sonnet-4",
+        api_key: str = None,
+        base_url: str = "https://openrouter.ai/api/v1",
+        max_turns: int = 20,
+        enabled_toolsets: list = None,
+        disabled_toolsets: list = None,
+        verbose_logging: bool = False,
+    ):
+        # Initialize OpenAI client, load tools based on toolsets
+        ...
+    
+    def chat(self, user_message: str, task_id: str = None) -> str:
+        # Main entry point - runs the agent loop
+        ...
+```
+
+### Agent Loop
+
+The core loop in `_run_agent_loop()`:
+
+```
+1. Add user message to conversation
+2. Call LLM with tools
+3. If LLM returns tool calls:
+   - Execute each tool
+   - Add tool results to conversation
+   - Go to step 2
+4. If LLM returns text response:
+   - Return response to user
+```
+
+```python
+while turns < max_turns:
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        tools=tool_schemas,
+    )
+    
+    if response.tool_calls:
+        for tool_call in response.tool_calls:
+            result = await execute_tool(tool_call)
+            messages.append(tool_result_message(result))
+        turns += 1
+    else:
+        return response.content
+```
+
+### Conversation Management
+
+Messages are stored as a list of dicts following OpenAI format:
+
+```python
+messages = [
+    {"role": "system", "content": "You are a helpful assistant..."},
+    {"role": "user", "content": "Search for Python tutorials"},
+    {"role": "assistant", "content": None, "tool_calls": [...]},
+    {"role": "tool", "tool_call_id": "...", "content": "..."},
+    {"role": "assistant", "content": "Here's what I found..."},
+]
+```
+
+### Reasoning Model Support
+
+For models that support chain-of-thought reasoning:
+- Extract `reasoning_content` from API responses
+- Store in `assistant_msg["reasoning"]` for trajectory export
+- Pass back via `reasoning_content` field on subsequent turns
+
+---
+
+## CLI Architecture (cli.py)
+
+The interactive CLI uses:
+- **Rich** - For the welcome banner and styled panels
+- **prompt_toolkit** - For fixed input area with history and `patch_stdout`
+- **KawaiiSpinner** (in run_agent.py) - Animated feedback during API calls and tool execution
+
+Key components:
+- `HermesCLI` class - Main CLI controller with commands and conversation loop
+- `load_cli_config()` - Loads config, sets environment variables for terminal
+- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
+- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
+
+CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
+
+### Adding CLI Commands
+
+1. Add to `COMMANDS` dict with description
+2. Add handler in `process_command()` method
+3. For persistent settings, use `save_config_value()` to update config
+
+---
+
+## Hermes CLI Commands
+
+The unified `hermes` command provides all functionality:
+
+| Command | Description |
+|---------|-------------|
+| `hermes` | Interactive chat (default) |
+| `hermes chat -q "..."` | Single query mode |
+| `hermes setup` | Configure API keys and settings |
+| `hermes config` | View current configuration |
+| `hermes config edit` | Open config in editor |
+| `hermes config set KEY VAL` | Set a specific value |
+| `hermes config check` | Check for missing config |
+| `hermes config migrate` | Prompt for missing config interactively |
+| `hermes status` | Show configuration status |
+| `hermes doctor` | Diagnose issues |
+| `hermes update` | Update to latest (checks for new config) |
+| `hermes gateway` | Start messaging gateway |
+| `hermes cron list` | View scheduled jobs |
+| `hermes version` | Show version info |
+
+---
+
+## Configuration System
+
+Configuration files are stored in `~/.hermes/` for easy user access:
+- `~/.hermes/config.yaml` - All settings (model, terminal, compression, etc.)
+- `~/.hermes/.env` - API keys and secrets
+
+### Adding New Configuration Options
+
+When adding new configuration variables, you MUST follow this process:
+
+#### For config.yaml options:
+
+1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
+2. **CRITICAL**: Bump `_config_version` in `DEFAULT_CONFIG` when adding required fields
+3. This triggers migration prompts for existing users on next `hermes update` or `hermes setup`
+
+Example:
+```python
+DEFAULT_CONFIG = {
+    # ... existing config ...
+    
+    "new_feature": {
+        "enabled": True,
+        "option": "default_value",
+    },
+    
+    # BUMP THIS when adding required fields
+    "_config_version": 2,  # Was 1, now 2
+}
+```
+
+#### For .env variables (API keys/secrets):
+
+1. Add to `REQUIRED_ENV_VARS` or `OPTIONAL_ENV_VARS` in `hermes_cli/config.py`
+2. Include metadata for the migration system:
+
+```python
+OPTIONAL_ENV_VARS = {
+    # ... existing vars ...
+    "NEW_API_KEY": {
+        "description": "What this key is for",
+        "prompt": "Display name in prompts",
+        "url": "https://where-to-get-it.com/",
+        "tools": ["tools_it_enables"],  # What tools need this
+        "password": True,  # Mask input
+    },
+}
+```
+
+#### Update related files:
+
+- `hermes_cli/setup.py` - Add prompts in the setup wizard
+- `cli-config.yaml.example` - Add example with comments
+- Update README.md if user-facing
+
+### Config Version Migration
+
+The system uses `_config_version` to detect outdated configs:
+
+1. `check_for_missing_config()` compares user config to `DEFAULT_CONFIG`
+2. `migrate_config()` interactively prompts for missing values
+3. Called automatically by `hermes update` and optionally by `hermes setup`
+
+---
+
+## Environment Variables
+
+API keys are loaded from `~/.hermes/.env`:
+- `OPENROUTER_API_KEY` - Main LLM API access (primary provider)
+- `FIRECRAWL_API_KEY` - Web search/extract tools
+- `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation
+- `FAL_KEY` - Image generation (FLUX model)
+- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools
+
+Terminal tool configuration (in `~/.hermes/config.yaml`):
+- `terminal.backend` - Backend: local, docker, singularity, modal, or ssh
+- `terminal.cwd` - Working directory ("." = current directory)
+- `terminal.docker_image` - Image for Docker backend
+- `terminal.singularity_image` - Image for Singularity backend
+- `terminal.modal_image` - Image for Modal backend
+- SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env
+
+---
+
+## Adding New Tools
+
+Follow this strict order to maintain consistency:
+
+1. Create `tools/your_tool.py` with:
+   - Handler function (sync or async) returning a JSON string via `json.dumps()`
+   - `check_*_requirements()` function to verify dependencies (e.g., API keys)
+   - Schema definition following OpenAI function-calling format
+
+2. Export in `tools/__init__.py`:
+   - Import the handler and check function
+   - Add to `__all__` list
+
+3. Register in `model_tools.py`:
+   - Add to `TOOLSET_REQUIREMENTS` if it needs API keys
+   - Create `get_*_tool_definitions()` function or add to existing
+   - Add routing in `handle_function_call()` dispatcher
+   - Update `get_all_tool_names()` with the tool name
+   - Update `get_toolset_for_tool()` mapping
+   - Update `get_available_toolsets()` and `check_toolset_requirements()`
+
+4. Add to toolset in `toolsets.py`:
+   - Add to existing toolset or create new one in TOOLSETS dict
+
+5. If the tool requires an API key:
+   - Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py`
+   - The tool will be auto-disabled if the key is missing
+
+6. Optionally add to `toolset_distributions.py` for batch processing
+
+### Tool Implementation Pattern
+
+```python
+# tools/example_tool.py
+import json
+import os
+
+def check_example_requirements() -> bool:
+    """Check if required API keys/dependencies are available."""
+    return bool(os.getenv("EXAMPLE_API_KEY"))
+
+def example_tool(param: str, task_id: str = None) -> str:
+    """Execute the tool and return JSON string result."""
+    try:
+        result = {"success": True, "data": "..."}
+        return json.dumps(result, ensure_ascii=False)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, ensure_ascii=False)
+```
+
+All tool handlers MUST return a JSON string. Never return raw dicts.
+
+### Dynamic Tool Availability
+
+Tools are automatically disabled when their API keys are missing:
+
+```python
+# In model_tools.py
+TOOLSET_REQUIREMENTS = {
+    "web": {"env_vars": ["FIRECRAWL_API_KEY"]},
+    "browser": {"env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"]},
+    "creative": {"env_vars": ["FAL_KEY"]},
+}
+```
+
+The `check_tool_availability()` function determines which tools to include.
+
+### Stateful Tools
+
+Tools that maintain state (terminal, browser) require:
+- `task_id` parameter for session isolation between concurrent tasks
+- `cleanup_*()` function to release resources
+- Cleanup is called automatically in run_agent.py after conversation completes
+
+---
+
+## Trajectory Format
+
+Conversations are saved in ShareGPT format for training:
+```json
+{"from": "system", "value": "System prompt with <tools>...</tools>"}
+{"from": "human", "value": "User message"}
+{"from": "gpt", "value": "<think>reasoning</think>\n<tool_call>{...}</tool_call>"}
+{"from": "tool", "value": "<tool_response>{...}</tool_response>"}
+{"from": "gpt", "value": "Final response"}
+```
+
+Tool calls use `<tool_call>` XML tags, responses use `<tool_response>` tags, reasoning uses `<think>` tags.
+
+### Trajectory Export
+
+```python
+agent = AIAgent(save_trajectories=True)
+agent.chat("Do something")
+# Saves to trajectories/*.jsonl in ShareGPT format
+```
+
+---
+
+## Batch Processing (batch_runner.py)
+
+For processing multiple prompts:
+- Parallel execution with multiprocessing
+- Content-based resume for fault tolerance (matches on prompt text, not indices)
+- Toolset distributions control probabilistic tool availability per prompt
+- Output: `data/<run_name>/trajectories.jsonl` (combined) + individual batch files
+
+```bash
+python batch_runner.py \
+    --dataset_file=prompts.jsonl \
+    --batch_size=20 \
+    --num_workers=4 \
+    --run_name=my_run
+```
+
+---
+
+## Skills System
+
+Skills are on-demand knowledge documents the agent can load. Located in `skills/` directory:
+
+```
+skills/
+├── mlops/                    # Category folder
+│   ├── axolotl/             # Skill folder
+│   │   ├── SKILL.md         # Main instructions (required)
+│   │   ├── references/      # Additional docs, API specs
+│   │   └── templates/       # Output formats, configs
+│   └── vllm/
+│       └── SKILL.md
+└── example-skill/
+    └── SKILL.md
+```
+
+**Progressive disclosure** (token-efficient):
+1. `skills_categories()` - List category names (~50 tokens)
+2. `skills_list(category)` - Name + description per skill (~3k tokens)
+3. `skill_view(name)` - Full content + tags + linked files
+
+SKILL.md files use YAML frontmatter:
+```yaml
+---
+name: skill-name
+description: Brief description for listing
+tags: [tag1, tag2]
+related_skills: [other-skill]
+version: 1.0.0
+---
+# Skill Content...
+```
+
+Tool files: `tools/skills_tool.py` → `model_tools.py` → `toolsets.py`
+
+---
+
+## Testing Changes
+
+After making changes:
+
+1. Run `hermes doctor` to check setup
+2. Run `hermes config check` to verify config
+3. Test with `hermes chat -q "test message"`
+4. For new config options, test fresh install: `rm -rf ~/.hermes && hermes setup`
diff --git a/README.md b/README.md
index aa46830319..6a36cc7669 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,8 @@ All your settings are stored in `~/.hermes/` for easy access:
 hermes config              # View current configuration
 hermes config edit         # Open config.yaml in your editor
 hermes config set KEY VAL  # Set a specific value
+hermes config check        # Check for missing options (after updates)
+hermes config migrate      # Interactively add missing options
 
 # Examples:
 hermes config set model anthropic/claude-opus-4
@@ -83,9 +85,11 @@ hermes                    # Interactive chat (default)
 hermes chat -q "Hello"    # Single query mode
 hermes setup              # Configure API keys and settings
 hermes config             # View/edit configuration
+hermes config check       # Check for missing config (useful after updates)
+hermes config migrate     # Interactively add missing options
 hermes status             # Show configuration status
 hermes doctor             # Diagnose issues
-hermes update             # Update to latest version
+hermes update             # Update to latest version (prompts for new config)
 hermes gateway            # Start messaging gateway
 hermes cron list          # View scheduled jobs
 hermes version            # Show version info
@@ -471,6 +475,7 @@ Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
 - **Gateway won't start**: Check `hermes gateway status` and logs
+- **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
 
 ---
 

From c9011fc7e192a3006843fed56e303a68c7ee6c71 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 22:18:18 -0800
Subject: [PATCH 16/48] Add uninstall command to CLI and update documentation

- Introduced a new `uninstall` command in the CLI for the Hermes Agent, allowing users to remove the agent while optionally retaining configuration files for future reinstallation.
- Updated AGENTS.md and README.md to include the new uninstall functionality, enhancing user guidance on available commands and their purposes.
- Improved command-line interface with detailed help options for the uninstall process, including flags for full removal and confirmation prompts.
---
 AGENTS.md               |   2 +
 README.md               |   1 +
 hermes_cli/main.py      |  28 ++++
 hermes_cli/uninstall.py | 341 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 372 insertions(+)
 create mode 100644 hermes_cli/uninstall.py

diff --git a/AGENTS.md b/AGENTS.md
index 5385219d8b..fd59d3b20e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -22,6 +22,7 @@ hermes-agent/
 │   ├── status.py         # Status display
 │   ├── doctor.py         # Diagnostics
 │   ├── gateway.py        # Gateway management
+│   ├── uninstall.py      # Uninstaller
 │   └── cron.py           # Cron job management
 ├── tools/                # Tool implementations
 ├── gateway/              # Messaging platform adapters
@@ -172,6 +173,7 @@ The unified `hermes` command provides all functionality:
 | `hermes status` | Show configuration status |
 | `hermes doctor` | Diagnose issues |
 | `hermes update` | Update to latest (checks for new config) |
+| `hermes uninstall` | Uninstall (can keep configs for reinstall) |
 | `hermes gateway` | Start messaging gateway |
 | `hermes cron list` | View scheduled jobs |
 | `hermes version` | Show version info |
diff --git a/README.md b/README.md
index 6a36cc7669..ab2b3bcfe0 100644
--- a/README.md
+++ b/README.md
@@ -90,6 +90,7 @@ hermes config migrate     # Interactively add missing options
 hermes status             # Show configuration status
 hermes doctor             # Diagnose issues
 hermes update             # Update to latest version (prompts for new config)
+hermes uninstall          # Uninstall (can keep configs for later reinstall)
 hermes gateway            # Start messaging gateway
 hermes cron list          # View scheduled jobs
 hermes version            # Show version info
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index c51ab8d613..a3100279a0 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -18,6 +18,8 @@ Usage:
     hermes cron daemon         # Run cron daemon
     hermes doctor              # Check configuration and dependencies
     hermes version             # Show version
+    hermes update              # Update to latest version
+    hermes uninstall           # Uninstall Hermes Agent
 """
 
 import argparse
@@ -108,6 +110,12 @@ def cmd_version(args):
         print("OpenAI SDK: Not installed")
 
 
+def cmd_uninstall(args):
+    """Uninstall Hermes Agent."""
+    from hermes_cli.uninstall import run_uninstall
+    run_uninstall(args)
+
+
 def cmd_update(args):
     """Update Hermes Agent to the latest version."""
     import subprocess
@@ -448,6 +456,26 @@ For more help on a command:
     )
     update_parser.set_defaults(func=cmd_update)
     
+    # =========================================================================
+    # uninstall command
+    # =========================================================================
+    uninstall_parser = subparsers.add_parser(
+        "uninstall",
+        help="Uninstall Hermes Agent",
+        description="Remove Hermes Agent from your system. Can keep configs/data for reinstall."
+    )
+    uninstall_parser.add_argument(
+        "--full",
+        action="store_true",
+        help="Full uninstall - remove everything including configs and data"
+    )
+    uninstall_parser.add_argument(
+        "--yes", "-y",
+        action="store_true",
+        help="Skip confirmation prompts"
+    )
+    uninstall_parser.set_defaults(func=cmd_uninstall)
+    
     # =========================================================================
     # Parse and execute
     # =========================================================================
diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
new file mode 100644
index 0000000000..a38c9303c8
--- /dev/null
+++ b/hermes_cli/uninstall.py
@@ -0,0 +1,341 @@
+"""
+Hermes Agent Uninstaller.
+
+Provides options for:
+- Full uninstall: Remove everything including configs and data
+- Keep data: Remove code but keep ~/.hermes/ (configs, sessions, logs)
+"""
+
+import os
+import sys
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+# ANSI colors
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+
+def color(text: str, *codes) -> str:
+    """Apply color codes to text (only in TTY)."""
+    if not sys.stdout.isatty():
+        return text
+    return "".join(codes) + text + Colors.RESET
+
+def log_info(msg: str):
+    print(f"{color('→', Colors.CYAN)} {msg}")
+
+def log_success(msg: str):
+    print(f"{color('✓', Colors.GREEN)} {msg}")
+
+def log_warn(msg: str):
+    print(f"{color('⚠', Colors.YELLOW)} {msg}")
+
+def log_error(msg: str):
+    print(f"{color('✗', Colors.RED)} {msg}")
+
+
+def get_project_root() -> Path:
+    """Get the project installation directory."""
+    return Path(__file__).parent.parent.resolve()
+
+
+def get_hermes_home() -> Path:
+    """Get the Hermes home directory (~/.hermes)."""
+    return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+
+
+def find_shell_configs() -> list:
+    """Find shell configuration files that might have PATH entries."""
+    home = Path.home()
+    configs = []
+    
+    candidates = [
+        home / ".bashrc",
+        home / ".bash_profile",
+        home / ".profile",
+        home / ".zshrc",
+        home / ".zprofile",
+    ]
+    
+    for config in candidates:
+        if config.exists():
+            configs.append(config)
+    
+    return configs
+
+
+def remove_path_from_shell_configs():
+    """Remove Hermes PATH entries from shell configuration files."""
+    configs = find_shell_configs()
+    removed_from = []
+    
+    for config_path in configs:
+        try:
+            content = config_path.read_text()
+            original_content = content
+            
+            # Remove lines containing hermes-agent or hermes PATH entries
+            new_lines = []
+            skip_next = False
+            
+            for line in content.split('\n'):
+                # Skip the "# Hermes Agent" comment and following line
+                if '# Hermes Agent' in line or '# hermes-agent' in line:
+                    skip_next = True
+                    continue
+                if skip_next and ('hermes' in line.lower() and 'PATH' in line):
+                    skip_next = False
+                    continue
+                skip_next = False
+                
+                # Remove any PATH line containing hermes
+                if 'hermes' in line.lower() and ('PATH=' in line or 'path=' in line.lower()):
+                    continue
+                    
+                new_lines.append(line)
+            
+            new_content = '\n'.join(new_lines)
+            
+            # Clean up multiple blank lines
+            while '\n\n\n' in new_content:
+                new_content = new_content.replace('\n\n\n', '\n\n')
+            
+            if new_content != original_content:
+                config_path.write_text(new_content)
+                removed_from.append(config_path)
+                
+        except Exception as e:
+            log_warn(f"Could not update {config_path}: {e}")
+    
+    return removed_from
+
+
+def remove_wrapper_script():
+    """Remove the hermes wrapper script if it exists."""
+    wrapper_paths = [
+        Path.home() / ".local" / "bin" / "hermes",
+        Path("/usr/local/bin/hermes"),
+    ]
+    
+    removed = []
+    for wrapper in wrapper_paths:
+        if wrapper.exists():
+            try:
+                # Check if it's our wrapper (contains hermes_cli reference)
+                content = wrapper.read_text()
+                if 'hermes_cli' in content or 'hermes-agent' in content:
+                    wrapper.unlink()
+                    removed.append(wrapper)
+            except Exception as e:
+                log_warn(f"Could not remove {wrapper}: {e}")
+    
+    return removed
+
+
+def uninstall_gateway_service():
+    """Stop and uninstall the gateway service if running."""
+    import platform
+    
+    if platform.system() != "Linux":
+        return False
+    
+    service_file = Path.home() / ".config" / "systemd" / "user" / "hermes-gateway.service"
+    
+    if not service_file.exists():
+        return False
+    
+    try:
+        # Stop the service
+        subprocess.run(
+            ["systemctl", "--user", "stop", "hermes-gateway"],
+            capture_output=True,
+            check=False
+        )
+        
+        # Disable the service
+        subprocess.run(
+            ["systemctl", "--user", "disable", "hermes-gateway"],
+            capture_output=True,
+            check=False
+        )
+        
+        # Remove service file
+        service_file.unlink()
+        
+        # Reload systemd
+        subprocess.run(
+            ["systemctl", "--user", "daemon-reload"],
+            capture_output=True,
+            check=False
+        )
+        
+        return True
+        
+    except Exception as e:
+        log_warn(f"Could not fully remove gateway service: {e}")
+        return False
+
+
+def run_uninstall(args):
+    """
+    Run the uninstall process.
+    
+    Options:
+    - Full uninstall: removes code + ~/.hermes/ (configs, data, logs)
+    - Keep data: removes code but keeps ~/.hermes/ for future reinstall
+    """
+    project_root = get_project_root()
+    hermes_home = get_hermes_home()
+    
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA, Colors.BOLD))
+    print(color("│            🦋 Hermes Agent Uninstaller                  │", Colors.MAGENTA, Colors.BOLD))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.MAGENTA, Colors.BOLD))
+    print()
+    
+    # Show what will be affected
+    print(color("Current Installation:", Colors.CYAN, Colors.BOLD))
+    print(f"  Code:    {project_root}")
+    print(f"  Config:  {hermes_home / 'config.yaml'}")
+    print(f"  Secrets: {hermes_home / '.env'}")
+    print(f"  Data:    {hermes_home / 'cron/'}, {hermes_home / 'sessions/'}, {hermes_home / 'logs/'}")
+    print()
+    
+    # Ask for confirmation
+    print(color("Uninstall Options:", Colors.YELLOW, Colors.BOLD))
+    print()
+    print("  1) " + color("Keep data", Colors.GREEN) + " - Remove code only, keep configs/sessions/logs")
+    print("     (Recommended - you can reinstall later with your settings intact)")
+    print()
+    print("  2) " + color("Full uninstall", Colors.RED) + " - Remove everything including all data")
+    print("     (Warning: This deletes all configs, sessions, and logs permanently)")
+    print()
+    print("  3) " + color("Cancel", Colors.CYAN) + " - Don't uninstall")
+    print()
+    
+    try:
+        choice = input(color("Select option [1/2/3]: ", Colors.BOLD)).strip()
+    except (KeyboardInterrupt, EOFError):
+        print()
+        print("Cancelled.")
+        return
+    
+    if choice == "3" or choice.lower() in ("c", "cancel", "q", "quit", "n", "no"):
+        print()
+        print("Uninstall cancelled.")
+        return
+    
+    full_uninstall = (choice == "2")
+    
+    # Final confirmation
+    print()
+    if full_uninstall:
+        print(color("⚠️  WARNING: This will permanently delete ALL Hermes data!", Colors.RED, Colors.BOLD))
+        print(color("   Including: configs, API keys, sessions, scheduled jobs, logs", Colors.RED))
+    else:
+        print("This will remove the Hermes code but keep your configuration and data.")
+    
+    print()
+    try:
+        confirm = input(f"Type '{color('yes', Colors.YELLOW)}' to confirm: ").strip().lower()
+    except (KeyboardInterrupt, EOFError):
+        print()
+        print("Cancelled.")
+        return
+    
+    if confirm != "yes":
+        print()
+        print("Uninstall cancelled.")
+        return
+    
+    print()
+    print(color("Uninstalling...", Colors.CYAN, Colors.BOLD))
+    print()
+    
+    # 1. Stop and uninstall gateway service
+    log_info("Checking for gateway service...")
+    if uninstall_gateway_service():
+        log_success("Gateway service stopped and removed")
+    else:
+        log_info("No gateway service found")
+    
+    # 2. Remove PATH entries from shell configs
+    log_info("Removing PATH entries from shell configs...")
+    removed_configs = remove_path_from_shell_configs()
+    if removed_configs:
+        for config in removed_configs:
+            log_success(f"Updated {config}")
+    else:
+        log_info("No PATH entries found to remove")
+    
+    # 3. Remove wrapper script
+    log_info("Removing hermes command...")
+    removed_wrappers = remove_wrapper_script()
+    if removed_wrappers:
+        for wrapper in removed_wrappers:
+            log_success(f"Removed {wrapper}")
+    else:
+        log_info("No wrapper script found")
+    
+    # 4. Remove installation directory (code)
+    log_info(f"Removing installation directory...")
+    
+    # Check if we're running from within the install dir
+    # We need to be careful here
+    try:
+        if project_root.exists():
+            # If the install is inside ~/.hermes/, just remove the hermes-agent subdir
+            if hermes_home in project_root.parents or project_root.parent == hermes_home:
+                shutil.rmtree(project_root)
+                log_success(f"Removed {project_root}")
+            else:
+                # Installation is somewhere else entirely
+                shutil.rmtree(project_root)
+                log_success(f"Removed {project_root}")
+    except Exception as e:
+        log_warn(f"Could not fully remove {project_root}: {e}")
+        log_info("You may need to manually remove it")
+    
+    # 5. Optionally remove ~/.hermes/ data directory
+    if full_uninstall:
+        log_info("Removing configuration and data...")
+        try:
+            if hermes_home.exists():
+                shutil.rmtree(hermes_home)
+                log_success(f"Removed {hermes_home}")
+        except Exception as e:
+            log_warn(f"Could not fully remove {hermes_home}: {e}")
+            log_info("You may need to manually remove it")
+    else:
+        log_info(f"Keeping configuration and data in {hermes_home}")
+    
+    # Done
+    print()
+    print(color("┌─────────────────────────────────────────────────────────┐", Colors.GREEN, Colors.BOLD))
+    print(color("│              ✓ Uninstall Complete!                      │", Colors.GREEN, Colors.BOLD))
+    print(color("└─────────────────────────────────────────────────────────┘", Colors.GREEN, Colors.BOLD))
+    print()
+    
+    if not full_uninstall:
+        print(color("Your configuration and data have been preserved:", Colors.CYAN))
+        print(f"  {hermes_home}/")
+        print()
+        print("To reinstall later with your existing settings:")
+        print(color("  curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash", Colors.DIM))
+        print()
+    
+    print(color("Reload your shell to complete the process:", Colors.YELLOW))
+    print("  source ~/.bashrc  # or ~/.zshrc")
+    print()
+    print("Thank you for using Hermes Agent! 🦋")
+    print()

From be91af7551f657f5856e93bdf11dcf9f908c806e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 2 Feb 2026 23:08:27 -0800
Subject: [PATCH 17/48] Refactor TODO list and remove completed items

Removed high-priority immediate fixes section and reorganized the TODO list. Updated various sections to reflect new priorities and ideas.
---
 TODO.md | 324 ++------------------------------------------------------
 1 file changed, 7 insertions(+), 317 deletions(-)

diff --git a/TODO.md b/TODO.md
index e25eed631e..3f42923acf 100644
--- a/TODO.md
+++ b/TODO.md
@@ -4,101 +4,6 @@
 
 ---
 
-## 🚨 HIGH PRIORITY - Immediate Fixes
-
-These items need to be addressed ASAP:
-
-### 1. SUDO Breaking Terminal Tool 🔐 ✅ COMPLETE
-- [x] **Problem:** SUDO commands break the terminal tool execution (hangs indefinitely)
-- [x] **Fix:** Created custom environment wrappers in `tools/terminal_tool.py`
-  - `stdin=subprocess.DEVNULL` prevents hanging on interactive prompts
-  - Sudo fails gracefully with clear error if no password configured
-  - Same UX as Claude Code - agent sees error, tells user to run it themselves
-- [x] **All 5 environments now have consistent behavior:**
-  - `_LocalEnvironment` - local execution
-  - `_DockerEnvironment` - Docker containers
-  - `_SingularityEnvironment` - Singularity/Apptainer containers
-  - `_ModalEnvironment` - Modal cloud sandboxes
-  - `_SSHEnvironment` - remote SSH execution
-- [x] **Optional sudo support via `SUDO_PASSWORD` env var:**
-  - Shared `_transform_sudo_command()` helper used by all environments
-  - If set, auto-transforms `sudo cmd` → pipes password via `sudo -S`
-  - Documented in `.env.example`, `cli-config.yaml`, and README
-  - Works for chained commands: `cmd1 && sudo cmd2`
-- [x] **Interactive sudo prompt in CLI mode:**
-  - When sudo detected and no password configured, prompts user
-  - 45-second timeout (auto-skips if no input)
-  - Hidden password input via `getpass` (password not visible)
-  - Password cached for session (don't ask repeatedly)
-  - Spinner pauses during prompt for clean UX
-  - Uses `HERMES_INTERACTIVE` env var to detect CLI mode
-
-### 2. Fix `browser_get_images` Tool 🖼️ ✅ VERIFIED WORKING
-- [x] **Tested:** Tool works correctly on multiple sites
-- [x] **Results:** Successfully extracts image URLs, alt text, dimensions
-- [x] **Note:** Some sites (Pixabay, etc.) have Cloudflare bot protection that blocks headless browsers - this is expected behavior, not a bug
-
-### 3. Better Action Logging for Debugging 📝 ✅ COMPLETE
-- [x] **Problem:** Need better logging of agent actions for debugging
-- [x] **Implementation:**
-  - Save full session trajectories to `logs/` directory as JSON
-  - Each session gets a unique file: `session_YYYYMMDD_HHMMSS_UUID.json`
-  - Logs all messages, tool calls with inputs/outputs, timestamps
-  - Structured JSON format for easy parsing and replay
-  - Automatic on CLI runs (configurable)
-
-### 4. Automatic Context Compression 🗜️ ✅ COMPLETE
-- [x] **Problem:** Long conversations exceed model context limits, causing errors
-- [x] **Solution:** Auto-compress middle turns when approaching limit
-- [x] **Implementation:**
-  - Fetches model context lengths from OpenRouter `/api/v1/models` API (cached 1hr)
-  - Tracks actual token usage from API responses (`usage.prompt_tokens`)
-  - Triggers at 85% of model's context limit (configurable)
-  - Protects first 3 turns (system, initial request, first response)
-  - Protects last 4 turns (recent context most relevant)
-  - Summarizes middle turns using fast model (Gemini Flash)
-  - Inserts summary as user message, conversation continues seamlessly
-  - If context error occurs, attempts compression before failing
-- [x] **Configuration (cli-config.yaml / env vars):**
-  - `CONTEXT_COMPRESSION_ENABLED` (default: true)
-  - `CONTEXT_COMPRESSION_THRESHOLD` (default: 0.85 = 85%)
-  - `CONTEXT_COMPRESSION_MODEL` (default: google/gemini-2.0-flash-001)
-
-### 5. Stream Thinking Summaries in Real-Time 💭 ⏸️ DEFERRED
-- [ ] **Problem:** Thinking/reasoning summaries not shown while streaming
-- [ ] **Complexity:** This is a significant refactor - leaving for later
-
-**OpenRouter Streaming Info:**
-- Uses `stream=True` with OpenAI SDK
-- Reasoning comes in `choices[].delta.reasoning_details` chunks
-- Types: `reasoning.summary`, `reasoning.text`, `reasoning.encrypted`
-- Tool call arguments stream as partial JSON (need accumulation)
-- Items paradigm: same ID emitted multiple times with updated content
-
-**Key Challenges:**
-- Tool call JSON accumulation (partial `{"query": "wea` → `{"query": "weather"}`)
-- Multiple concurrent outputs (thinking + tool calls + text simultaneously)
-- State management for partial responses
-- Error handling if connection drops mid-stream
-- Deciding when tool calls are "complete" enough to execute
-
-**UX Questions to Resolve:**
-- Show raw thinking text or summarized?
-- Live expanding text vs. spinner replacement?
-- Markdown rendering while streaming?
-- How to handle thinking + tool call display simultaneously?
-
-**Implementation Options:**
-- New `run_conversation_streaming()` method (keep non-streaming as fallback)
-- Wrapper that handles streaming internally
-- Big refactor of existing `run_conversation()`
-
-**References:**
-- https://openrouter.ai/docs/api/reference/streaming
-- https://openrouter.ai/docs/guides/best-practices/reasoning-tokens#streaming-response
-
----
-
 ## 1. Subagent Architecture (Context Isolation) 🎯
 
 **Problem:** Long-running tools (terminal commands, browser automation, complex file operations) consume massive context. A single `ls -la` can add hundreds of lines. Browser snapshots, debugging sessions, and iterative terminal work quickly bloat the main conversation, leaving less room for actual reasoning.
@@ -218,38 +123,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 3. Tool Composition & Learning 🔧
-
-**Problem:** Tools are atomic. Complex tasks require repeated manual orchestration of the same tool sequences.
-
-**Ideas:**
-- [ ] **Macro tools / Tool chains** - Define reusable tool sequences:
-  ```yaml
-  research_topic:
-    description: "Deep research on a topic"
-    steps:
-      - web_search: {query: "$topic"}
-      - web_extract: {urls: "$search_results.urls[:3]"}
-      - summarize: {content: "$extracted"}
-  ```
-  - Could be defined in skills or a new `macros/` directory
-  - Agent can invoke macro as single tool call
-  
-- [ ] **Tool failure patterns** - Learn from failures:
-  - Track: tool, input pattern, error type, what worked instead
-  - Before calling a tool, check: "Has this pattern failed before?"
-  - Persistent across sessions (stored in skills or separate DB)
-  
-- [ ] **Parallel tool execution** - When tools are independent, run concurrently:
-  - Detect independence (no data dependencies between calls)
-  - Use `asyncio.gather()` for parallel execution
-  - Already have async support in some tools, just need orchestration
-
-**Files to modify:** `model_tools.py`, `toolsets.py`, new `tool_macros.py`
-
----
-
-## 4. Dynamic Skills Expansion 📚
+## 3. Dynamic Skills Expansion 📚
 
 **Problem:** Skills system is elegant but static. Skills must be manually created and added.
 
@@ -278,7 +152,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 5. Interactive Clarifying Questions Tool ❓
+## 4. Interactive Clarifying Questions Tool ❓
 
 **Problem:** Agent sometimes makes assumptions or guesses when it should ask the user. Currently can only ask via text, which gets lost in long outputs.
 
@@ -314,7 +188,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 6. Collaborative Problem Solving 🤝
+## 5. Collaborative Problem Solving 🤝
 
 **Problem:** Interaction is command/response. Complex problems benefit from dialogue.
 
@@ -333,7 +207,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 7. Project-Local Context 💾
+## 6. Project-Local Context 💾
 
 **Problem:** Valuable context lost between sessions.
 
@@ -351,30 +225,7 @@ These items need to be addressed ASAP:
 
 **Files to modify:** New `project_context.py`, auto-load in `run_agent.py`
 
----
-
-## 8. Graceful Degradation & Robustness 🛡️
-
-**Problem:** When things go wrong, recovery is limited. Should fail gracefully.
-
-**Ideas:**
-- [ ] **Fallback chains** - When primary approach fails, have backups:
-  - `web_extract` fails → try `browser_navigate` → try `web_search` for cached version
-  - Define fallback order per tool type
-  
-- [ ] **Partial progress preservation** - Don't lose work on failure:
-  - Long task fails midway → save what we've got
-  - "I completed 3/5 steps before the error. Here's what I have..."
-  
-- [ ] **Self-healing** - Detect and recover from bad states:
-  - Browser stuck → close and retry
-  - Terminal hung → timeout and reset
-
-**Files to modify:** `model_tools.py`, tool implementations, new `fallback_manager.py`
-
----
-
-## 9. Tools & Skills Wishlist 🧰
+## 6. Tools & Skills Wishlist 🧰
 
 *Things that would need new tool implementations (can't do well with current tools):*
 
@@ -441,7 +292,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 10. Messaging Platform Integrations 💬 ✅ COMPLETE
+## 7. Messaging Platform Integrations 💬 ✅ COMPLETE
 
 **Problem:** Agent currently only works via `cli.py` which requires direct terminal access. Users may want to interact via messaging apps from their phone or other devices.
 
@@ -496,71 +347,7 @@ These items need to be addressed ASAP:
 
 ---
 
-## 11. Scheduled Tasks / Cron Jobs ⏰ ✅ COMPLETE
-
-**Problem:** Agent only runs on-demand. Some tasks benefit from scheduled execution (daily summaries, monitoring, reminders).
-
-**Solution Implemented:**
-
-- [x] **Cron-style scheduler** - Run agent turns on a schedule
-  - Jobs stored in `~/.hermes/cron/jobs.json`
-  - Each job: `{ id, name, prompt, schedule, repeat, enabled, next_run_at, ... }`
-  - Built-in scheduler daemon or system cron integration
-  
-- [x] **Schedule formats:**
-  - Duration: `30m`, `2h`, `1d` (one-shot delay)
-  - Interval: `every 30m`, `every 2h` (recurring)
-  - Cron expression: `0 9 * * *` (requires `croniter` package)
-  - ISO timestamp: `2026-02-03T14:00:00` (one-shot at specific time)
-
-- [x] **Repeat options:**
-  - `repeat=None` (or omit): One-shot schedules run once; intervals/cron run forever
-  - `repeat=1`: Run once then auto-delete
-  - `repeat=N`: Run exactly N times then auto-delete
-  
-- [x] **CLI interface:**
-  ```bash
-  # List scheduled jobs
-  /cron
-  /cron list
-  
-  # Add a one-shot job (runs once in 30 minutes)
-  /cron add 30m "Remind me to check the build status"
-  
-  # Add a recurring job (every 2 hours)
-  /cron add "every 2h" "Check server status at 192.168.1.100"
-  
-  # Add a cron expression (daily at 9am)
-  /cron add "0 9 * * *" "Generate morning briefing"
-  
-  # Remove a job
-  /cron remove <job_id>
-  ```
-
-- [x] **Agent self-scheduling tools** (hermes-cli toolset):
-  - `schedule_cronjob(prompt, schedule, name?, repeat?)` - Create a scheduled task
-  - `list_cronjobs()` - View all scheduled jobs
-  - `remove_cronjob(job_id)` - Cancel a job
-  - Tool descriptions emphasize: **cronjobs run in isolated sessions with NO context**
-
-- [x] **Daemon modes:**
-  ```bash
-  # Built-in daemon (checks every 60 seconds)
-  python cli.py --cron-daemon
-  
-  # Single tick for system cron integration
-  python cli.py --cron-tick-once
-  ```
-
-- [x] **Output storage:** `~/.hermes/cron/output/{job_id}/{timestamp}.md`
-
-**Files created:** `cron/__init__.py`, `cron/jobs.py`, `cron/scheduler.py`, `tools/cronjob_tools.py`
-
-**Toolset:** `hermes-cli` (default for CLI) includes cronjob tools; not in batch runner toolsets
-
----
-
-## 12. Text-to-Speech (TTS) 🔊
+## 8. Text-to-Speech (TTS) 🔊
 
 **Problem:** Agent can only respond with text. Some users prefer audio responses (accessibility, hands-free use, podcasts).
 
@@ -620,103 +407,6 @@ These items need to be addressed ASAP:
 
 **Files to create:** `tools/transcribe_tool.py`, integrate with messaging monitors
 
----
-
-## Priority Order (Suggested)
-
-1. **🎯 Subagent Architecture** - Critical for context management, enables everything else
-2. **Memory & Context Management** - Complements subagents for remaining context
-3. **Self-Reflection** - Improves reliability and reduces wasted tool calls  
-4. **Project-Local Context** - Practical win, keeps useful info across sessions
-5. **Messaging Integrations** - Unlocks mobile access, new interaction patterns
-6. **Scheduled Tasks / Cron Jobs** - Enables automation, reminders, monitoring
-7. **Tool Composition** - Quality of life, builds on other improvements
-8. **Dynamic Skills** - Force multiplier for repeated tasks
-9. **Interactive Clarifying Questions** - Better UX for ambiguous tasks
-10. **TTS / Audio Transcription** - Accessibility, hands-free use
-
----
-
-## Removed Items (Unrealistic)
-
-The following were removed because they're architecturally impossible:
-
-- ~~Proactive suggestions / Prefetching~~ - Agent only runs on user request, can't interject
-- ~~Clipboard integration~~ - No access to user's local system clipboard
-
-The following **moved to active TODO** (now possible with new architecture):
-
-- ~~Session save/restore~~ → See **Messaging Integrations** (session persistence)
-- ~~Voice/TTS playback~~ → See **TTS** (can generate audio files, send via messaging)
-- ~~Set reminders~~ → See **Scheduled Tasks / Cron Jobs**
-
-The following were removed because they're **already possible**:
-
-- ~~HTTP/API Client~~ → Use `curl` or Python `requests` in terminal
-- ~~Structured Data Manipulation~~ → Use `pandas` in terminal
-- ~~Git-Native Operations~~ → Use `git` CLI in terminal
-- ~~Symbolic Math~~ → Use `SymPy` in terminal
-- ~~Code Quality Tools~~ → Run linters (`eslint`, `black`, `mypy`) in terminal
-- ~~Testing Framework~~ → Run `pytest`, `jest`, etc. in terminal
-- ~~Translation~~ → LLM handles this fine, or use translation APIs
-
----
-
----
-
-## 🧪 Brainstorm Ideas (Not Yet Fleshed Out)
-
-*These are early-stage ideas that need more thinking before implementation. Captured here so they don't get lost.*
-
-### Remote/Distributed Execution 🌐
-
-**Concept:** Run agent on a powerful remote server while interacting from a thin client.
-
-**Why interesting:**
-- Run on beefy GPU server for local LLM inference
-- Agent has access to remote machine's resources (files, tools, internet)
-- User interacts via lightweight client (phone, low-power laptop)
-
-**Open questions:**
-- How does this differ from just SSH + running cli.py on remote?
-- Would need secure communication channel (WebSocket? gRPC?)
-- How to handle tool outputs that reference remote paths?
-- Credential management for remote execution
-- Latency considerations for interactive use
-
-**Possible architecture:**
-```
-┌─────────────┐         ┌─────────────────────────┐
-│ Thin Client │ ◄─────► │ Remote Hermes Server    │
-│ (phone/web) │  WS/API │ - Full agent + tools    │
-└─────────────┘         │ - GPU for local LLM     │
-                        │ - Access to server files│
-                        └─────────────────────────┘
-```
-
-**Related to:** Messaging integrations (could be the "server" that monitors receive from)
-
----
-
-### Multi-Agent Parallel Execution 🤖🤖
-
-**Concept:** Extension of Subagent Architecture (Section 1) - run multiple subagents in parallel.
-
-**Why interesting:**
-- Independent subtasks don't need to wait for each other
-- "Research X while setting up Y" - both run simultaneously
-- Faster completion for complex multi-part tasks
-
-**Open questions:**
-- How to detect which tasks are truly independent?
-- Resource management (API rate limits, concurrent connections)
-- How to merge results when parallel tasks have conflicts?
-- Cost implications of multiple parallel LLM calls
-
-*Note: Basic subagent delegation (Section 1) should be implemented first, parallel execution is an optimization on top.*
-
----
-
 ### Plugin/Extension System 🔌
 
 **Concept:** Allow users to add custom tools/skills without modifying core code.

From 76d929e177251e674e670a39e5dc7348be3f4545 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 23:35:18 -0800
Subject: [PATCH 18/48] Implement dangerous command approval system for
 terminal tool

- Added a safety mechanism to detect and approve potentially dangerous commands (e.g., `rm -rf`, `DROP TABLE`).
- Introduced an approval flow for local/SSH backends, prompting users for confirmation with options to allow once, for the session, or permanently.
- Updated configuration to include a `command_allowlist` for storing approved patterns.
- Enhanced messaging for sudo failures in messaging contexts.
- Updated relevant documentation in AGENTS.md and TODO.md to reflect these changes.
---
 AGENTS.md              |  29 +++++
 TODO.md                |  18 +++
 hermes_cli/config.py   |   3 +
 model_tools.py         |   8 +-
 tools/terminal_tool.py | 250 ++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 306 insertions(+), 2 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index fd59d3b20e..0e3eab5287 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -262,6 +262,35 @@ Terminal tool configuration (in `~/.hermes/config.yaml`):
 - `terminal.modal_image` - Image for Modal backend
 - SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env
 
+### Dangerous Command Approval
+
+The terminal tool includes safety checks for potentially destructive commands (e.g., `rm -rf`, `DROP TABLE`, `chmod 777`, etc.):
+
+**Behavior by Backend:**
+- **Docker/Singularity/Modal**: Commands run unrestricted (isolated containers)
+- **Local/SSH**: Dangerous commands trigger approval flow
+
+**Approval Flow (CLI):**
+```
+⚠️  Potentially dangerous command detected: recursive delete
+    rm -rf /tmp/test
+
+    [o]nce  |  [s]ession  |  [a]lways  |  [d]eny
+    Choice [o/s/a/D]: 
+```
+
+**Approval Flow (Messaging):**
+- Command is blocked with explanation
+- Agent explains and asks user to confirm
+- If user says "yes/approve/do it", agent retries with `force=True`
+
+**Configuration:**
+- `command_allowlist` in `~/.hermes/config.yaml` stores permanently allowed patterns
+- Add patterns via "always" approval or edit directly
+
+**Sudo Handling (Messaging):**
+- If sudo fails over messaging, output includes tip to add `SUDO_PASSWORD` to `~/.hermes/.env`
+
 ---
 
 ## Adding New Tools
diff --git a/TODO.md b/TODO.md
index 3f42923acf..dc116539b7 100644
--- a/TODO.md
+++ b/TODO.md
@@ -423,4 +423,22 @@
 
 ---
 
+## Recently Completed ✅
+
+### Dangerous Command Approval System
+**Implemented:** Dangerous command detection and approval for terminal tool.
+
+**Features:**
+- Pattern-based detection of dangerous commands (rm -rf, DROP TABLE, chmod 777, etc.)
+- CLI prompt with options: `[o]nce | [s]ession | [a]lways | [d]eny`
+- Session caching (approved patterns don't re-prompt)
+- Permanent allowlist in `~/.hermes/config.yaml`
+- Force flag for agent to bypass after user confirmation
+- Skip check for isolated backends (Docker, Singularity, Modal)
+- Helpful sudo failure messages for messaging platforms
+
+**Files:** `tools/terminal_tool.py`, `model_tools.py`, `hermes_cli/config.py`
+
+---
+
 *Last updated: $(date +%Y-%m-%d)* 🤖
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 6efcaa7f8b..65443d623a 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -99,6 +99,9 @@ DEFAULT_CONFIG = {
         "personality": "kawaii",
     },
     
+    # Permanently allowed dangerous command patterns (added via "always" approval)
+    "command_allowlist": [],
+    
     # Config schema version - bump this when adding new required fields
     "_config_version": 1,
 }
diff --git a/model_tools.py b/model_tools.py
index 138860195f..3bdcbf4f06 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -274,6 +274,11 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
                             "type": "integer",
                             "description": "Command timeout in seconds (optional)",
                             "minimum": 1
+                        },
+                        "force": {
+                            "type": "boolean",
+                            "description": "Skip dangerous command safety check. Only use after user explicitly confirms they want to run a blocked command.",
+                            "default": False
                         }
                     },
                     "required": ["command"]
@@ -776,8 +781,9 @@ def handle_terminal_function_call(function_name: str, function_args: Dict[str, A
         command = function_args.get("command")
         background = function_args.get("background", False)
         timeout = function_args.get("timeout")
+        force = function_args.get("force", False)  # Skip dangerous command check if user confirmed
 
-        return terminal_tool(command=command, background=background, timeout=timeout, task_id=task_id)
+        return terminal_tool(command=command, background=background, timeout=timeout, task_id=task_id, force=force)
 
     else:
         return json.dumps({"error": f"Unknown terminal function: {function_name}"}, ensure_ascii=False)
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index da2b762b15..81578e7ca9 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -207,6 +207,233 @@ def _check_disk_usage_warning():
 # Session-cached sudo password (persists until CLI exits)
 _cached_sudo_password: str = ""
 
+# =============================================================================
+# Dangerous Command Approval System
+# =============================================================================
+
+# Session-cached dangerous command approvals (pattern -> approved)
+_session_approved_patterns: set = set()
+
+# Dangerous command patterns (regex, description)
+DANGEROUS_PATTERNS = [
+    (r'\brm\s+(-[^\s]*\s+)*/', "delete in root path"),
+    (r'\brm\s+(-[^\s]*)?r', "recursive delete"),
+    (r'\bchmod\s+(-[^\s]*\s+)*777\b', "world-writable permissions"),
+    (r'\bchown\s+(-[^\s]*)?R\s+root', "recursive chown to root"),
+    (r'\bmkfs\b', "format filesystem"),
+    (r'\bdd\s+.*if=', "disk copy"),
+    (r'>\s*/dev/sd', "write to block device"),
+    (r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"),
+    (r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
+    (r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
+    (r'>\s*/etc/', "overwrite system config"),
+    (r'\bsystemctl\s+(stop|disable|mask)\b', "stop/disable system service"),
+    (r'\bkill\s+-9\s+-1\b', "kill all processes"),
+    (r'\bpkill\s+-9\b', "force kill processes"),
+    (r':()\s*{\s*:\s*\|\s*:&\s*}\s*;:', "fork bomb"),
+]
+
+
+def _load_permanent_allowlist() -> set:
+    """Load permanently allowed command patterns from config."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        patterns = config.get("command_allowlist", [])
+        return set(patterns) if patterns else set()
+    except Exception:
+        return set()
+
+
+def _save_permanent_allowlist(patterns: set):
+    """Save permanently allowed command patterns to config."""
+    try:
+        from hermes_cli.config import load_config, save_config
+        config = load_config()
+        config["command_allowlist"] = list(patterns)
+        save_config(config)
+    except Exception as e:
+        print(f"  ⚠️ Could not save allowlist: {e}")
+
+
+def _detect_dangerous_command(command: str) -> tuple:
+    """
+    Check if command matches any dangerous patterns.
+    
+    Returns:
+        (is_dangerous, pattern_key, description) or (False, None, None)
+    """
+    import re
+    command_lower = command.lower()
+    
+    for pattern, description in DANGEROUS_PATTERNS:
+        if re.search(pattern, command_lower, re.IGNORECASE):
+            # Use a simplified pattern key for caching (first word + key chars)
+            pattern_key = pattern.split(r'\b')[1] if r'\b' in pattern else pattern[:20]
+            return (True, pattern_key, description)
+    
+    return (False, None, None)
+
+
+def _is_command_approved(pattern_key: str) -> bool:
+    """Check if a pattern is approved (session or permanent)."""
+    if pattern_key in _session_approved_patterns:
+        return True
+    
+    permanent = _load_permanent_allowlist()
+    if pattern_key in permanent:
+        return True
+    
+    return False
+
+
+def _prompt_dangerous_approval(command: str, description: str, timeout_seconds: int = 60) -> str:
+    """
+    Prompt user to approve a dangerous command (CLI only).
+    
+    Returns: 'once', 'session', 'always', or 'deny'
+    """
+    import sys
+    import threading
+    
+    # Pause spinner if one is running
+    os.environ["HERMES_SPINNER_PAUSE"] = "1"
+    
+    try:
+        print()
+        print(f"  ⚠️  \033[33mPotentially dangerous command detected:\033[0m {description}")
+        print(f"      \033[2m{command[:100]}{'...' if len(command) > 100 else ''}\033[0m")
+        print()
+        print(f"      [\033[32mo\033[0m]nce  |  [\033[33ms\033[0m]ession  |  [\033[36ma\033[0m]lways  |  [\033[31md\033[0m]eny")
+        print()
+        sys.stdout.flush()
+        
+        result = {"choice": ""}
+        
+        def get_input():
+            try:
+                result["choice"] = input("      Choice [o/s/a/D]: ").strip().lower()
+            except:
+                result["choice"] = ""
+        
+        thread = threading.Thread(target=get_input, daemon=True)
+        thread.start()
+        thread.join(timeout=timeout_seconds)
+        
+        if thread.is_alive():
+            print("\n      ⏱ Timeout - denying command")
+            return "deny"
+        
+        choice = result["choice"]
+        
+        if choice in ('o', 'once'):
+            print("      ✓ Allowed once")
+            return "once"
+        elif choice in ('s', 'session'):
+            print("      ✓ Allowed for this session")
+            return "session"
+        elif choice in ('a', 'always'):
+            print("      ✓ Added to permanent allowlist")
+            return "always"
+        else:
+            print("      ✗ Denied")
+            return "deny"
+            
+    except (EOFError, KeyboardInterrupt):
+        print("\n      ✗ Cancelled")
+        return "deny"
+    finally:
+        if "HERMES_SPINNER_PAUSE" in os.environ:
+            del os.environ["HERMES_SPINNER_PAUSE"]
+        print()
+        sys.stdout.flush()
+
+
+def _check_dangerous_command(command: str, env_type: str) -> dict:
+    """
+    Check if command is dangerous and handle approval.
+    
+    Only applies to local/ssh backends in interactive contexts.
+    
+    Args:
+        command: The command to check
+        env_type: The terminal backend type
+        
+    Returns:
+        {"approved": True/False, "message": str or None}
+    """
+    # Skip check for isolated environments (containers are disposable)
+    if env_type in ("docker", "singularity", "modal"):
+        return {"approved": True, "message": None}
+    
+    # Detect dangerous command
+    is_dangerous, pattern_key, description = _detect_dangerous_command(command)
+    
+    if not is_dangerous:
+        return {"approved": True, "message": None}
+    
+    # Check if already approved
+    if _is_command_approved(pattern_key):
+        return {"approved": True, "message": None}
+    
+    # Check context - only prompt in interactive modes
+    is_cli = os.getenv("HERMES_INTERACTIVE")
+    is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
+    
+    if not is_cli and not is_gateway:
+        # Programmatic use - allow (user opted into local backend)
+        return {"approved": True, "message": None}
+    
+    if is_gateway:
+        # Messaging context - return informative denial, agent should ask user
+        return {
+            "approved": False,
+            "pattern_key": pattern_key,
+            "message": f"⚠️ This command was blocked because it's potentially dangerous ({description}). If you want me to run it anyway, please confirm by saying 'yes, run it' or 'approve'."
+        }
+    
+    # CLI context - prompt user
+    choice = _prompt_dangerous_approval(command, description)
+    
+    if choice == "deny":
+        return {"approved": False, "message": "Command denied by user"}
+    
+    # Handle approval
+    if choice == "session":
+        _session_approved_patterns.add(pattern_key)
+    elif choice == "always":
+        _session_approved_patterns.add(pattern_key)
+        permanent = _load_permanent_allowlist()
+        permanent.add(pattern_key)
+        _save_permanent_allowlist(permanent)
+    
+    return {"approved": True, "message": None}
+
+
+def _handle_sudo_failure(output: str, env_type: str) -> str:
+    """
+    Check for sudo failure and add helpful message for messaging contexts.
+    
+    Returns enhanced output if sudo failed in messaging context, else original.
+    """
+    is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
+    
+    if not is_gateway:
+        return output
+    
+    # Check for sudo failure indicators
+    sudo_failures = [
+        "sudo: a password is required",
+        "sudo: no tty present",
+        "sudo: a terminal is required",
+    ]
+    
+    for failure in sudo_failures:
+        if failure in output:
+            return output + "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
+    
+    return output
+
 
 def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
     """
@@ -1050,7 +1277,8 @@ def terminal_tool(
     command: str,
     background: bool = False,
     timeout: Optional[int] = None,
-    task_id: Optional[str] = None
+    task_id: Optional[str] = None,
+    force: bool = False
 ) -> str:
     """
     Execute a command using mini-swe-agent's execution environments.
@@ -1060,6 +1288,7 @@ def terminal_tool(
         background: Whether to run in background (default: False)
         timeout: Command timeout in seconds (default: from config)
         task_id: Unique identifier for environment isolation (optional)
+        force: If True, skip dangerous command check (use after user confirms)
 
     Returns:
         str: JSON string with output, exit_code, and error fields
@@ -1073,6 +1302,9 @@ def terminal_tool(
 
         # With custom timeout
         >>> result = terminal_tool(command="long_task.sh", timeout=300)
+        
+        # Force run after user confirmation
+        >>> result = terminal_tool(command="rm -rf /tmp/old", force=True)
     """
     global _active_environments, _last_activity
 
@@ -1149,6 +1381,19 @@ def terminal_tool(
             _last_activity[effective_task_id] = time.time()
             env = _active_environments[effective_task_id]
 
+        # Check for dangerous commands (only for local/ssh in interactive modes)
+        # Skip check if force=True (user has confirmed they want to run it)
+        if not force:
+            approval = _check_dangerous_command(command, env_type)
+            if not approval["approved"]:
+                # Command was blocked - return informative message
+                return json.dumps({
+                    "output": "",
+                    "exit_code": -1,
+                    "error": approval.get("message", "Command denied - potentially dangerous operation"),
+                    "status": "blocked"
+                }, ensure_ascii=False)
+
         # Prepare command for execution
         if background:
             # Run in background with nohup and redirect output
@@ -1205,6 +1450,9 @@ def terminal_tool(
             output = result.get("output", "")
             returncode = result.get("returncode", 0)
             
+            # Add helpful message for sudo failures in messaging context
+            output = _handle_sudo_failure(output, env_type)
+            
             # Truncate output if too long
             MAX_OUTPUT_CHARS = 50000
             if len(output) > MAX_OUTPUT_CHARS:

From 5d3398aa8a206c94a761ad9f2dd3f44fcbdc1f97 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 2 Feb 2026 23:46:41 -0800
Subject: [PATCH 19/48] Refactor terminal tool command approval process and
 enhance CLI feedback

- Updated the terminal tool's command approval flow to improve user interaction when executing potentially dangerous commands, replacing the previous confirmation method with a clear explanation and instructions for adding commands to the allowlist.
- Removed the internal `force` parameter from the model API, ensuring that dangerous command approvals are handled solely through user prompts.
- Enhanced the CLI to provide better feedback regarding tool availability, including improved messaging for enabled and disabled toolsets.
- Updated AGENTS.md to reflect changes in the command approval process and configuration instructions.
---
 AGENTS.md              |  4 +-
 cli.py                 | 89 ++++++++++++++++++++++++++++++++++++------
 model_tools.py         | 28 +++++++------
 tools/terminal_tool.py | 13 +++---
 4 files changed, 101 insertions(+), 33 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 0e3eab5287..c658ae6e1c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -281,8 +281,8 @@ The terminal tool includes safety checks for potentially destructive commands (e
 
 **Approval Flow (Messaging):**
 - Command is blocked with explanation
-- Agent explains and asks user to confirm
-- If user says "yes/approve/do it", agent retries with `force=True`
+- Agent explains the command was blocked for safety
+- User must add the pattern to their allowlist via `hermes config edit` or run the command directly on their machine
 
 **Configuration:**
 - `command_allowlist` in `~/.hermes/config.yaml` stores permanently allowed patterns
diff --git a/cli.py b/cli.py
index 301c45be75..15d1f8ed60 100755
--- a/cli.py
+++ b/cli.py
@@ -126,8 +126,20 @@ def load_cli_config() -> Dict[str, Any]:
         try:
             with open(config_path, "r") as f:
                 file_config = yaml.safe_load(f) or {}
-            # Deep merge with defaults
+            
+            # Handle model config - can be string (new format) or dict (old format)
+            if "model" in file_config:
+                if isinstance(file_config["model"], str):
+                    # New format: model is just a string, convert to dict structure
+                    defaults["model"]["default"] = file_config["model"]
+                elif isinstance(file_config["model"], dict):
+                    # Old format: model is a dict with default/base_url
+                    defaults["model"].update(file_config["model"])
+            
+            # Deep merge other keys with defaults
             for key in defaults:
+                if key == "model":
+                    continue  # Already handled above
                 if key in file_config:
                     if isinstance(defaults[key], dict) and isinstance(file_config[key], dict):
                         defaults[key].update(file_config[key])
@@ -306,9 +318,17 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic
         enabled_toolsets: List of enabled toolset names
         session_id: Unique session identifier for logging
     """
+    from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
+    
     tools = tools or []
     enabled_toolsets = enabled_toolsets or []
     
+    # Get unavailable tools info for coloring
+    _, unavailable_toolsets = check_tool_availability(quiet=True)
+    disabled_tools = set()
+    for item in unavailable_toolsets:
+        disabled_tools.update(item.get("tools", []))
+    
     # Build the side-by-side content using a table for precise control
     layout_table = Table.grid(padding=(0, 2))
     layout_table.add_column("left", justify="center")
@@ -334,8 +354,10 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic
     right_lines = []
     right_lines.append("[bold #FFBF00]Available Tools[/]")
     
-    # Group tools by toolset
+    # Group tools by toolset (include all possible tools, both enabled and disabled)
     toolsets_dict = {}
+    
+    # First, add all enabled tools
     for tool in tools:
         tool_name = tool["function"]["name"]
         toolset = get_toolset_for_tool(tool_name) or "other"
@@ -343,6 +365,17 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic
             toolsets_dict[toolset] = []
         toolsets_dict[toolset].append(tool_name)
     
+    # Also add disabled toolsets so they show in the banner
+    for item in unavailable_toolsets:
+        # Map the internal toolset ID to display name
+        toolset_id = item["id"]
+        display_name = f"{toolset_id}_tools" if not toolset_id.endswith("_tools") else toolset_id
+        if display_name not in toolsets_dict:
+            toolsets_dict[display_name] = []
+        for tool_name in item.get("tools", []):
+            if tool_name not in toolsets_dict[display_name]:
+                toolsets_dict[display_name].append(tool_name)
+    
     # Display tools grouped by toolset (compact format, max 8 groups)
     sorted_toolsets = sorted(toolsets_dict.keys())
     display_toolsets = sorted_toolsets[:8]
@@ -350,11 +383,38 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic
     
     for toolset in display_toolsets:
         tool_names = toolsets_dict[toolset]
-        # Join tool names with commas, wrap if too long
-        tools_str = ", ".join(sorted(tool_names))
-        if len(tools_str) > 45:
-            tools_str = tools_str[:42] + "..."
-        right_lines.append(f"[dim #B8860B]{toolset}:[/] [#FFF8DC]{tools_str}[/]")
+        # Color each tool name - red if disabled, normal if enabled
+        colored_names = []
+        for name in sorted(tool_names):
+            if name in disabled_tools:
+                colored_names.append(f"[red]{name}[/]")
+            else:
+                colored_names.append(f"[#FFF8DC]{name}[/]")
+        
+        tools_str = ", ".join(colored_names)
+        # Truncate if too long (accounting for markup)
+        if len(", ".join(sorted(tool_names))) > 45:
+            # Rebuild with truncation
+            short_names = []
+            length = 0
+            for name in sorted(tool_names):
+                if length + len(name) + 2 > 42:
+                    short_names.append("...")
+                    break
+                short_names.append(name)
+                length += len(name) + 2
+            # Re-color the truncated list
+            colored_names = []
+            for name in short_names:
+                if name == "...":
+                    colored_names.append("[dim]...[/]")
+                elif name in disabled_tools:
+                    colored_names.append(f"[red]{name}[/]")
+                else:
+                    colored_names.append(f"[#FFF8DC]{name}[/]")
+            tools_str = ", ".join(colored_names)
+        
+        right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
     
     if remaining_toolsets > 0:
         right_lines.append(f"[dim #B8860B](and {remaining_toolsets} more toolsets...)[/]")
@@ -509,9 +569,14 @@ class HermesCLI:
         self.verbose = verbose if verbose is not None else CLI_CONFIG["agent"].get("verbose", False)
         
         # Configuration - priority: CLI args > env vars > config file
-        self.model = model or os.getenv("LLM_MODEL", CLI_CONFIG["model"]["default"])
-        self.base_url = base_url or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
-        self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+        # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
+        self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
+        
+        # Base URL: custom endpoint (OPENAI_BASE_URL) takes precedence over OpenRouter
+        self.base_url = base_url or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
+        
+        # API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
         self.max_turns = max_turns if max_turns != 20 else CLI_CONFIG["agent"].get("max_turns", 20)
         
         # Parse and validate toolsets
@@ -641,7 +706,7 @@ class HermesCLI:
     def _show_status(self):
         """Show current status bar."""
         # Get tool count
-        tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets)
+        tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
         tool_count = len(tools) if tools else 0
         
         # Format model name (shorten if needed)
@@ -684,7 +749,7 @@ class HermesCLI:
     
     def show_tools(self):
         """Display available tools with kawaii ASCII art."""
-        tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets)
+        tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
         
         if not tools:
             print("(;_;) No tools available")
diff --git a/model_tools.py b/model_tools.py
index 3bdcbf4f06..0b48855725 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -274,11 +274,6 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
                             "type": "integer",
                             "description": "Command timeout in seconds (optional)",
                             "minimum": 1
-                        },
-                        "force": {
-                            "type": "boolean",
-                            "description": "Skip dangerous command safety check. Only use after user explicitly confirms they want to run a blocked command.",
-                            "default": False
                         }
                     },
                     "required": ["command"]
@@ -644,7 +639,8 @@ def get_tool_definitions(
             if validate_toolset(toolset_name):
                 resolved_tools = resolve_toolset(toolset_name)
                 tools_to_include.update(resolved_tools)
-                print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
+                if not quiet_mode:
+                    print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
             else:
                 # Try legacy compatibility
                 if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools", "cronjob_tools"]:
@@ -666,9 +662,11 @@ def get_tool_definitions(
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.update(legacy_tools)
-                    print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
+                    if not quiet_mode:
+                        print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
                 else:
-                    print(f"⚠️  Unknown toolset: {toolset_name}")
+                    if not quiet_mode:
+                        print(f"⚠️  Unknown toolset: {toolset_name}")
     elif disabled_toolsets:
         # Start with all tools from all toolsets, then remove disabled ones
         # Note: Only tools that are part of toolsets are accessible
@@ -687,7 +685,8 @@ def get_tool_definitions(
             if validate_toolset(toolset_name):
                 resolved_tools = resolve_toolset(toolset_name)
                 tools_to_include.difference_update(resolved_tools)
-                print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
+                if not quiet_mode:
+                    print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
             else:
                 # Try legacy compatibility
                 if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools", "cronjob_tools"]:
@@ -708,9 +707,11 @@ def get_tool_definitions(
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.difference_update(legacy_tools)
-                    print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
+                    if not quiet_mode:
+                        print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
                 else:
-                    print(f"⚠️  Unknown toolset: {toolset_name}")
+                    if not quiet_mode:
+                        print(f"⚠️  Unknown toolset: {toolset_name}")
     else:
         # No filtering - include all tools from all defined toolsets
         from toolsets import get_all_toolsets
@@ -781,9 +782,10 @@ def handle_terminal_function_call(function_name: str, function_args: Dict[str, A
         command = function_args.get("command")
         background = function_args.get("background", False)
         timeout = function_args.get("timeout")
-        force = function_args.get("force", False)  # Skip dangerous command check if user confirmed
+        # Note: force parameter exists internally but is NOT exposed to the model
+        # Dangerous command approval is handled via user prompts only
 
-        return terminal_tool(command=command, background=background, timeout=timeout, task_id=task_id, force=force)
+        return terminal_tool(command=command, background=background, timeout=timeout, task_id=task_id)
 
     else:
         return json.dumps({"error": f"Unknown terminal function: {function_name}"}, ensure_ascii=False)
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 81578e7ca9..72301ed199 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -300,11 +300,12 @@ def _prompt_dangerous_approval(command: str, description: str, timeout_seconds:
     os.environ["HERMES_SPINNER_PAUSE"] = "1"
     
     try:
+        # Use simple ASCII art for compatibility (no ANSI color codes)
         print()
-        print(f"  ⚠️  \033[33mPotentially dangerous command detected:\033[0m {description}")
-        print(f"      \033[2m{command[:100]}{'...' if len(command) > 100 else ''}\033[0m")
+        print(f"  ⚠️  DANGEROUS COMMAND: {description}")
+        print(f"      {command[:80]}{'...' if len(command) > 80 else ''}")
         print()
-        print(f"      [\033[32mo\033[0m]nce  |  [\033[33ms\033[0m]ession  |  [\033[36ma\033[0m]lways  |  [\033[31md\033[0m]eny")
+        print(f"      [o]nce  |  [s]ession  |  [a]lways  |  [d]eny")
         print()
         sys.stdout.flush()
         
@@ -389,14 +390,14 @@ def _check_dangerous_command(command: str, env_type: str) -> dict:
         return {
             "approved": False,
             "pattern_key": pattern_key,
-            "message": f"⚠️ This command was blocked because it's potentially dangerous ({description}). If you want me to run it anyway, please confirm by saying 'yes, run it' or 'approve'."
+            "message": f"BLOCKED: This command is potentially dangerous ({description}). Tell the user and ask if they want to add this command pattern to their allowlist. They can do this via 'hermes config edit' or by running the command directly on their machine."
         }
     
     # CLI context - prompt user
     choice = _prompt_dangerous_approval(command, description)
     
     if choice == "deny":
-        return {"approved": False, "message": "Command denied by user"}
+        return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
     
     # Handle approval
     if choice == "session":
@@ -1304,7 +1305,7 @@ def terminal_tool(
         >>> result = terminal_tool(command="long_task.sh", timeout=300)
         
         # Force run after user confirmation
-        >>> result = terminal_tool(command="rm -rf /tmp/old", force=True)
+        # Note: force parameter is internal only, not exposed to model API
     """
     global _active_environments, _last_activity
 

From 3e634aa7e4f505312a25f39456cc05316d82371b Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 07:02:59 -0800
Subject: [PATCH 20/48] Update requirements and enhance environment variable
 loading in gateway

- Updated requirements.txt to uncomment and ensure the installation of `python-telegram-bot` and `discord.py` packages.
- Enhanced the gateway run script to load environment variables from a specified path, improving configuration management and flexibility for different environments.
---
 gateway/run.py   | 8 ++++++++
 requirements.txt | 4 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index dfa97c4574..9c354ebd43 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -24,6 +24,14 @@ from typing import Dict, Optional, Any, List
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
+# Load environment variables from ~/.hermes/.env
+from dotenv import load_dotenv
+_env_path = Path.home() / '.hermes' / '.env'
+if _env_path.exists():
+    load_dotenv(_env_path)
+# Also try project .env as fallback
+load_dotenv()
+
 from gateway.config import (
     Platform,
     GatewayConfig,
diff --git a/requirements.txt b/requirements.txt
index 68a31e4479..98db357c93 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -35,10 +35,10 @@ croniter
 
 # Optional: For messaging platform integrations (gateway)
 # Telegram: pip install python-telegram-bot
-# python-telegram-bot>=20.0
+python-telegram-bot>=20.0
 
 # Discord: pip install discord.py
-# discord.py>=2.0
+discord.py>=2.0
 
 # WhatsApp: Requires Node.js bridge (see docs/messaging.md)
 # aiohttp  # For WhatsApp bridge communication
\ No newline at end of file

From 17a5efb416b521b0a89191dd1324e4f161ff9a0b Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 10:46:23 -0800
Subject: [PATCH 21/48] Enhance messaging gateway configuration and security
 features

- Added new environment variables for Telegram and Discord bot configurations, including `TELEGRAM_ALLOWED_USERS` and `DISCORD_ALLOWED_USERS`, to restrict bot access to specific users.
- Updated documentation in AGENTS.md and README.md to include detailed setup instructions for the messaging gateway, emphasizing the importance of user allowlists for security.
- Improved the CLI setup wizard to prompt for allowed user IDs during configuration, enhancing user guidance and security awareness.
- Refined the gateway run script to support user authorization checks, ensuring only allowed users can interact with the bot.
---
 .env.example            |   1 +
 AGENTS.md               |  37 ++++++++++
 README.md               |  62 +++++++++++++---
 cli-config.yaml.example |   5 +-
 gateway/run.py          |  65 +++++++++++++++--
 hermes_cli/config.py    |  38 ++++++++++
 hermes_cli/gateway.py   | 152 +++++++++++++++++++++++++++++++++++-----
 hermes_cli/setup.py     |  69 ++++++++++++++++++
 toolsets.py             |   6 +-
 9 files changed, 397 insertions(+), 38 deletions(-)

diff --git a/.env.example b/.env.example
index 9c73f74e9b..98c5ea1922 100644
--- a/.env.example
+++ b/.env.example
@@ -40,6 +40,7 @@ FAL_KEY=
 # - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account)
 TERMINAL_ENV=local
 
+
 # Container images (for singularity/docker/modal backends)
 TERMINAL_DOCKER_IMAGE=python:3.11
 TERMINAL_SINGULARITY_IMAGE=docker://python:3.11
diff --git a/AGENTS.md b/AGENTS.md
index c658ae6e1c..b495704dd7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -180,6 +180,43 @@ The unified `hermes` command provides all functionality:
 
 ---
 
+## Messaging Gateway
+
+The gateway connects Hermes to Telegram, Discord, and WhatsApp.
+
+### Configuration (in `~/.hermes/.env`):
+
+```bash
+# Telegram
+TELEGRAM_BOT_TOKEN=123456:ABC-DEF...      # From @BotFather
+TELEGRAM_ALLOWED_USERS=123456789,987654   # Comma-separated user IDs (from @userinfobot)
+
+# Discord  
+DISCORD_BOT_TOKEN=MTIz...                 # From Developer Portal
+DISCORD_ALLOWED_USERS=123456789012345678  # Comma-separated user IDs
+```
+
+### Security (User Allowlists):
+
+**IMPORTANT**: Without an allowlist, anyone who finds your bot can use it!
+
+The gateway checks `{PLATFORM}_ALLOWED_USERS` environment variables:
+- If set: Only listed user IDs can interact with the bot
+- If unset: All users are allowed (dangerous with terminal access!)
+
+Users can find their IDs:
+- **Telegram**: Message [@userinfobot](https://t.me/userinfobot)
+- **Discord**: Enable Developer Mode, right-click name → Copy ID
+
+### Platform Toolsets:
+
+Each platform has a dedicated toolset in `toolsets.py`:
+- `hermes-telegram`: Full tools including terminal (with safety checks)
+- `hermes-discord`: Full tools including terminal
+- `hermes-whatsapp`: Full tools including terminal
+
+---
+
 ## Configuration System
 
 Configuration files are stored in `~/.hermes/` for easy user access:
diff --git a/README.md b/README.md
index ab2b3bcfe0..047bec2e0c 100644
--- a/README.md
+++ b/README.md
@@ -187,21 +187,61 @@ hermes config set terminal.backend modal
 
 ### 📱 Messaging Gateway
 
-Chat with Hermes from Telegram, Discord, or WhatsApp:
+Chat with Hermes from Telegram, Discord, or WhatsApp.
+
+#### Telegram Setup
+
+1. **Create a bot:** Message [@BotFather](https://t.me/BotFather) on Telegram, use `/newbot`
+2. **Get your user ID:** Message [@userinfobot](https://t.me/userinfobot) - it replies with your numeric ID
+3. **Configure:**
 
 ```bash
-# Configure your bot token
-hermes config set TELEGRAM_BOT_TOKEN "your_token"
-
-# Start the gateway
-hermes gateway
-
-# Or install as a service
-hermes gateway install
-hermes gateway start
+# Add to ~/.hermes/.env:
+TELEGRAM_BOT_TOKEN=123456:ABC-DEF...
+TELEGRAM_ALLOWED_USERS=YOUR_USER_ID    # Comma-separated for multiple users
 ```
 
-See [docs/messaging.md](docs/messaging.md) for full setup.
+4. **Start the gateway:**
+
+```bash
+hermes gateway              # Run in foreground
+hermes gateway install      # Install as systemd service (Linux)
+hermes gateway start        # Start the service
+```
+
+#### Discord Setup
+
+1. **Create a bot:** Go to [Discord Developer Portal](https://discord.com/developers/applications)
+2. **Get your user ID:** Enable Developer Mode in Discord settings, right-click your name → Copy ID
+3. **Configure:**
+
+```bash
+# Add to ~/.hermes/.env:
+DISCORD_BOT_TOKEN=MTIz...
+DISCORD_ALLOWED_USERS=YOUR_USER_ID
+```
+
+#### Security (Important!)
+
+**Without an allowlist, anyone who finds your bot can use it!**
+
+```bash
+# Restrict to specific users (recommended):
+TELEGRAM_ALLOWED_USERS=123456789,987654321
+DISCORD_ALLOWED_USERS=123456789012345678
+
+# Or allow all users in a specific platform:
+# (Leave the variable unset - NOT recommended for bots with terminal access)
+```
+
+#### Gateway Commands
+
+| Command | Description |
+|---------|-------------|
+| `/new` or `/reset` | Start fresh conversation |
+| `/status` | Show session info |
+
+See [docs/messaging.md](docs/messaging.md) for WhatsApp and advanced setup.
 
 ### ⏰ Scheduled Tasks (Cron)
 
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 81be7a4d71..5b4f16ada6 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -23,9 +23,12 @@ model:
 # OPTION 1: Local execution (default)
 # Commands run directly on your machine in the current directory
 # -----------------------------------------------------------------------------
+# Working directory behavior:
+#   - CLI (`hermes` command): Uses "." (current directory where you run hermes)
+#   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
 terminal:
   env_type: "local"
-  cwd: "."  # Use "." for current directory, or specify absolute path
+  cwd: "."  # CLI working directory - "." means current directory
   timeout: 180
   lifetime_seconds: 300
   # sudo_password: ""  # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!
diff --git a/gateway/run.py b/gateway/run.py
index 9c354ebd43..de7dd8447a 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -24,7 +24,7 @@ from typing import Dict, Optional, Any, List
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
-# Load environment variables from ~/.hermes/.env
+# Load environment variables from ~/.hermes/.env first
 from dotenv import load_dotenv
 _env_path = Path.home() / '.hermes' / '.env'
 if _env_path.exists():
@@ -32,6 +32,15 @@ if _env_path.exists():
 # Also try project .env as fallback
 load_dotenv()
 
+# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
+os.environ["HERMES_QUIET"] = "1"
+
+# Set terminal working directory for messaging platforms
+# Uses MESSAGING_CWD if set, otherwise defaults to home directory
+# This is separate from CLI which uses the directory where `hermes` is run
+messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
+os.environ["TERMINAL_CWD"] = messaging_cwd
+
 from gateway.config import (
     Platform,
     GatewayConfig,
@@ -163,19 +172,63 @@ class GatewayRunner:
         
         return None
     
+    def _is_user_authorized(self, source: SessionSource) -> bool:
+        """
+        Check if a user is authorized to use the bot.
+        
+        Authorization is checked via environment variables:
+        - GATEWAY_ALLOWED_USERS: Comma-separated list of user IDs (all platforms)
+        - TELEGRAM_ALLOWED_USERS: Telegram-specific user IDs
+        - DISCORD_ALLOWED_USERS: Discord-specific user IDs
+        
+        If no allowlist is configured, all users are allowed (open access).
+        """
+        user_id = source.user_id
+        if not user_id:
+            return False  # Can't verify unknown users
+        
+        # Check platform-specific allowlist first
+        platform_env_map = {
+            Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
+            Platform.DISCORD: "DISCORD_ALLOWED_USERS",
+            Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
+        }
+        
+        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""))
+        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "")
+        
+        # If no allowlists configured, allow all (backward compatible)
+        if not platform_allowlist and not global_allowlist:
+            return True
+        
+        # Check if user is in any allowlist
+        allowed_ids = set()
+        if platform_allowlist:
+            allowed_ids.update(uid.strip() for uid in platform_allowlist.split(","))
+        if global_allowlist:
+            allowed_ids.update(uid.strip() for uid in global_allowlist.split(","))
+        
+        return user_id in allowed_ids
+    
     async def _handle_message(self, event: MessageEvent) -> Optional[str]:
         """
         Handle an incoming message from any platform.
         
         This is the core message processing pipeline:
-        1. Check for commands (/new, /reset, etc.)
-        2. Get or create session
-        3. Build context for agent
-        4. Run agent conversation
-        5. Return response
+        1. Check user authorization
+        2. Check for commands (/new, /reset, etc.)
+        3. Get or create session
+        4. Build context for agent
+        5. Run agent conversation
+        6. Return response
         """
         source = event.source
         
+        # Check if user is authorized
+        if not self._is_user_authorized(source):
+            print(f"[gateway] Unauthorized user: {source.user_id} ({source.user_name}) on {source.platform.value}")
+            return None  # Silently ignore unauthorized users
+        
         # Check for reset commands
         command = event.get_command()
         if command in ["new", "reset"]:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 65443d623a..e24e2a3a4d 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -163,6 +163,44 @@ OPTIONAL_ENV_VARS = {
         "url": None,
         "password": True,
     },
+    # Messaging platform tokens
+    "TELEGRAM_BOT_TOKEN": {
+        "description": "Telegram bot token from @BotFather",
+        "prompt": "Telegram bot token",
+        "url": "https://t.me/BotFather",
+        "password": True,
+    },
+    "TELEGRAM_ALLOWED_USERS": {
+        "description": "Comma-separated Telegram user IDs allowed to use the bot (get ID from @userinfobot)",
+        "prompt": "Allowed Telegram user IDs (comma-separated)",
+        "url": "https://t.me/userinfobot",
+        "password": False,
+    },
+    "DISCORD_BOT_TOKEN": {
+        "description": "Discord bot token from Developer Portal",
+        "prompt": "Discord bot token",
+        "url": "https://discord.com/developers/applications",
+        "password": True,
+    },
+    "DISCORD_ALLOWED_USERS": {
+        "description": "Comma-separated Discord user IDs allowed to use the bot",
+        "prompt": "Allowed Discord user IDs (comma-separated)",
+        "url": None,
+        "password": False,
+    },
+    # Terminal configuration
+    "MESSAGING_CWD": {
+        "description": "Working directory for terminal commands via messaging (Telegram/Discord/etc). CLI always uses current directory.",
+        "prompt": "Messaging working directory (default: home)",
+        "url": None,
+        "password": False,
+    },
+    "SUDO_PASSWORD": {
+        "description": "Sudo password for terminal commands requiring root access",
+        "prompt": "Sudo password",
+        "url": None,
+        "password": True,
+    },
 }
 
 
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 678a68927a..579ea5f1e0 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -6,6 +6,7 @@ Handles: hermes gateway [run|start|stop|restart|status|install|uninstall]
 
 import asyncio
 import os
+import signal
 import subprocess
 import sys
 from pathlib import Path
@@ -13,6 +14,70 @@ from pathlib import Path
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
 
 
+# =============================================================================
+# Process Management (for manual gateway runs)
+# =============================================================================
+
+def find_gateway_pids() -> list:
+    """Find PIDs of running gateway processes."""
+    pids = []
+    try:
+        # Look for gateway processes with multiple patterns
+        patterns = [
+            "hermes_cli.main gateway",
+            "hermes gateway",
+            "gateway/run.py",
+        ]
+        
+        result = subprocess.run(
+            ["ps", "aux"],
+            capture_output=True,
+            text=True
+        )
+        
+        for line in result.stdout.split('\n'):
+            # Skip grep and current process
+            if 'grep' in line or str(os.getpid()) in line:
+                continue
+            
+            for pattern in patterns:
+                if pattern in line:
+                    parts = line.split()
+                    if len(parts) > 1:
+                        try:
+                            pid = int(parts[1])
+                            if pid not in pids:
+                                pids.append(pid)
+                        except ValueError:
+                            continue
+                    break
+    except Exception:
+        pass
+    
+    return pids
+
+
+def kill_gateway_processes(force: bool = False) -> int:
+    """Kill any running gateway processes. Returns count killed."""
+    pids = find_gateway_pids()
+    killed = 0
+    
+    for pid in pids:
+        try:
+            if force:
+                os.kill(pid, signal.SIGKILL)
+            else:
+                os.kill(pid, signal.SIGTERM)
+            killed += 1
+        except ProcessLookupError:
+            # Process already gone
+            pass
+        except PermissionError:
+            print(f"⚠ Permission denied to kill PID {pid}")
+    
+    return killed
+
+
 def is_linux() -> bool:
     return sys.platform.startswith('linux')
 
@@ -343,29 +408,80 @@ def gateway_command(args):
             sys.exit(1)
     
     elif subcmd == "stop":
-        if is_linux():
-            systemd_stop()
-        elif is_macos():
-            launchd_stop()
-        else:
-            print("Not supported on this platform.")
-            sys.exit(1)
+        # Try service first, fall back to killing processes directly
+        service_available = False
+        
+        if is_linux() and get_systemd_unit_path().exists():
+            try:
+                systemd_stop()
+                service_available = True
+            except subprocess.CalledProcessError:
+                pass  # Fall through to process kill
+        elif is_macos() and get_launchd_plist_path().exists():
+            try:
+                launchd_stop()
+                service_available = True
+            except subprocess.CalledProcessError:
+                pass
+        
+        if not service_available:
+            # Kill gateway processes directly
+            killed = kill_gateway_processes()
+            if killed:
+                print(f"✓ Stopped {killed} gateway process(es)")
+            else:
+                print("✗ No gateway processes found")
     
     elif subcmd == "restart":
-        if is_linux():
-            systemd_restart()
-        elif is_macos():
-            launchd_restart()
-        else:
-            print("Not supported on this platform.")
-            sys.exit(1)
+        # Try service first, fall back to killing and restarting
+        service_available = False
+        
+        if is_linux() and get_systemd_unit_path().exists():
+            try:
+                systemd_restart()
+                service_available = True
+            except subprocess.CalledProcessError:
+                pass
+        elif is_macos() and get_launchd_plist_path().exists():
+            try:
+                launchd_restart()
+                service_available = True
+            except subprocess.CalledProcessError:
+                pass
+        
+        if not service_available:
+            # Manual restart: kill existing processes
+            killed = kill_gateway_processes()
+            if killed:
+                print(f"✓ Stopped {killed} gateway process(es)")
+            
+            import time
+            time.sleep(2)
+            
+            # Start fresh
+            print("Starting gateway...")
+            run_gateway(verbose=False)
     
     elif subcmd == "status":
         deep = getattr(args, 'deep', False)
-        if is_linux():
+        
+        # Check for service first
+        if is_linux() and get_systemd_unit_path().exists():
             systemd_status(deep)
-        elif is_macos():
+        elif is_macos() and get_launchd_plist_path().exists():
             launchd_status(deep)
         else:
-            print("Not supported on this platform.")
-            sys.exit(1)
+            # Check for manually running processes
+            pids = find_gateway_pids()
+            if pids:
+                print(f"✓ Gateway is running (PID: {', '.join(map(str, pids))})")
+                print("  (Running manually, not as a system service)")
+                print()
+                print("To install as a service:")
+                print("  hermes gateway install")
+            else:
+                print("✗ Gateway is not running")
+                print()
+                print("To start:")
+                print("  hermes gateway          # Run in foreground")
+                print("  hermes gateway install  # Install as service")
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 4b4e5f3b0e..98420725e7 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -591,6 +591,23 @@ def run_setup_wizard(args):
         if is_windows:
             print_info("Note: On Windows, commands run via cmd.exe or PowerShell")
         
+        # Messaging working directory configuration
+        print_info("")
+        print_info("Working Directory for Messaging (Telegram/Discord/etc):")
+        print_info("  The CLI always uses the directory you run 'hermes' from")
+        print_info("  But messaging bots need a static starting directory")
+        
+        current_cwd = get_env_value('MESSAGING_CWD') or str(Path.home())
+        print_info(f"  Current: {current_cwd}")
+        
+        cwd_input = prompt("  Messaging working directory", current_cwd)
+        # Expand ~ to full path
+        if cwd_input.startswith('~'):
+            cwd_expanded = str(Path.home()) + cwd_input[1:]
+        else:
+            cwd_expanded = cwd_input
+        save_env_value("MESSAGING_CWD", cwd_expanded)
+        
         if prompt_yes_no("  Enable sudo support? (allows agent to run sudo commands)", False):
             print_warning("  SECURITY WARNING: Sudo password will be stored in plaintext")
             sudo_pass = prompt("  Sudo password (leave empty to skip)", password=True)
@@ -720,10 +737,36 @@ def run_setup_wizard(args):
             save_env_value("TELEGRAM_BOT_TOKEN", token)
             print_success("Telegram token saved")
             
+            # Allowed users (security)
+            print()
+            print_info("🔒 Security: Restrict who can use your bot")
+            print_info("   To find your Telegram user ID:")
+            print_info("   1. Message @userinfobot on Telegram")
+            print_info("   2. It will reply with your numeric ID (e.g., 123456789)")
+            print()
+            allowed_users = prompt("Allowed user IDs (comma-separated, leave empty for open access)")
+            if allowed_users:
+                save_env_value("TELEGRAM_ALLOWED_USERS", allowed_users.replace(" ", ""))
+                print_success("Telegram allowlist configured - only listed users can use the bot")
+            else:
+                print_info("⚠️  No allowlist set - anyone who finds your bot can use it!")
+            
             home_channel = prompt("Home channel ID (optional, for cron delivery)")
             if home_channel:
                 save_env_value("TELEGRAM_HOME_CHANNEL", home_channel)
     
+    # Check/update existing Telegram allowlist
+    elif existing_telegram:
+        existing_allowlist = get_env_value('TELEGRAM_ALLOWED_USERS')
+        if not existing_allowlist:
+            print_info("⚠️  Telegram has no user allowlist - anyone can use your bot!")
+            if prompt_yes_no("Add allowed users now?", True):
+                print_info("   To find your Telegram user ID: message @userinfobot")
+                allowed_users = prompt("Allowed user IDs (comma-separated)")
+                if allowed_users:
+                    save_env_value("TELEGRAM_ALLOWED_USERS", allowed_users.replace(" ", ""))
+                    print_success("Telegram allowlist configured")
+    
     # Discord
     existing_discord = get_env_value('DISCORD_BOT_TOKEN')
     if existing_discord:
@@ -738,10 +781,36 @@ def run_setup_wizard(args):
             save_env_value("DISCORD_BOT_TOKEN", token)
             print_success("Discord token saved")
             
+            # Allowed users (security)
+            print()
+            print_info("🔒 Security: Restrict who can use your bot")
+            print_info("   To find your Discord user ID:")
+            print_info("   1. Enable Developer Mode in Discord settings")
+            print_info("   2. Right-click your name → Copy ID")
+            print()
+            allowed_users = prompt("Allowed user IDs (comma-separated, leave empty for open access)")
+            if allowed_users:
+                save_env_value("DISCORD_ALLOWED_USERS", allowed_users.replace(" ", ""))
+                print_success("Discord allowlist configured")
+            else:
+                print_info("⚠️  No allowlist set - anyone in servers with your bot can use it!")
+            
             home_channel = prompt("Home channel ID (optional, for cron delivery)")
             if home_channel:
                 save_env_value("DISCORD_HOME_CHANNEL", home_channel)
     
+    # Check/update existing Discord allowlist
+    elif existing_discord:
+        existing_allowlist = get_env_value('DISCORD_ALLOWED_USERS')
+        if not existing_allowlist:
+            print_info("⚠️  Discord has no user allowlist - anyone can use your bot!")
+            if prompt_yes_no("Add allowed users now?", True):
+                print_info("   To find Discord ID: Enable Developer Mode, right-click name → Copy ID")
+                allowed_users = prompt("Allowed user IDs (comma-separated)")
+                if allowed_users:
+                    save_env_value("DISCORD_ALLOWED_USERS", allowed_users.replace(" ", ""))
+                    print_success("Discord allowlist configured")
+    
     # =========================================================================
     # Step 7: Additional Tools (Optional)
     # =========================================================================
diff --git a/toolsets.py b/toolsets.py
index bd6b22dcee..5d08731ec0 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -139,9 +139,11 @@ TOOLSETS = {
     # ==========================================================================
     
     "hermes-telegram": {
-        "description": "Telegram bot toolset - web research, skills, cronjobs (no terminal/browser for security)",
+        "description": "Telegram bot toolset - full access for personal use (terminal has safety checks)",
         "tools": [
-            # Web tools - safe for messaging
+            # Terminal - enabled with dangerous command approval system
+            "terminal",
+            # Web tools
             "web_search", "web_extract",
             # Vision - analyze images sent by users
             "vision_analyze",

From 7eac4ee9fe9feb9cd23b26345171473dddf44a30 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 14:48:19 -0800
Subject: [PATCH 22/48] Update agent configuration for maximum tool-calling
 iterations

- Increased the default maximum tool-calling iterations from 20 to 60 in the CLI configuration and related files, allowing for more complex tasks.
- Updated documentation and comments to reflect the new recommended range for iterations, enhancing user guidance.
- Implemented backward compatibility for loading max iterations from the root-level configuration, ensuring a smooth transition for existing users.
- Adjusted the setup wizard to prompt for the maximum iterations setting, improving user experience during configuration.
---
 TODO.md                 | 145 ++++++++++++++++++++++++++++++++++++++++
 cli-config.yaml.example |   6 +-
 cli.py                  |  26 +++++--
 gateway/run.py          |   4 ++
 hermes_cli/config.py    |   7 ++
 hermes_cli/setup.py     |  27 +++++++-
 run_agent.py            |  48 +++++++++++--
 7 files changed, 246 insertions(+), 17 deletions(-)

diff --git a/TODO.md b/TODO.md
index dc116539b7..454c7ff4bc 100644
--- a/TODO.md
+++ b/TODO.md
@@ -441,4 +441,149 @@
 
 ---
 
+## 14. Learning Machine / Dynamic Memory System 🧠
+
+*Inspired by [Dash](~/agent-codebases/dash) - a self-learning data agent.*
+
+**Problem:** Agent starts fresh every session. Valuable learnings from debugging, error patterns, successful approaches, and user preferences are lost.
+
+**Dash's Key Insight:** Separate **Knowledge** (static, curated) from **Learnings** (dynamic, discovered):
+
+| System | What It Stores | How It Evolves |
+|--------|---------------|----------------|
+| **Knowledge** (Skills) | Validated approaches, templates, best practices | Curated by user |
+| **Learnings** | Error patterns, gotchas, discovered fixes | Managed automatically |
+
+**Tools to implement:**
+- [ ] `save_learning(topic, learning, context?)` - Record a discovered pattern
+  ```python
+  save_learning(
+    topic="python-ssl",
+    learning="On Ubuntu 22.04, SSL certificate errors often fixed by: apt install ca-certificates",
+    context="Debugging requests SSL failure"
+  )
+  ```
+- [ ] `search_learnings(query)` - Find relevant past learnings
+  ```python
+  search_learnings("SSL certificate error Python")
+  # Returns: "On Ubuntu 22.04, SSL certificate errors often fixed by..."
+  ```
+
+**User Profile & Memory:**
+- [ ] `user_profile` - Structured facts about user preferences
+  ```yaml
+  # ~/.hermes/user_profile.yaml
+  coding_style:
+    python_formatter: black
+    type_hints: always
+    test_framework: pytest
+  preferences:
+    verbosity: detailed
+    confirm_destructive: true
+  environment:
+    os: linux
+    shell: bash
+    default_python: 3.11
+  ```
+- [ ] `user_memory` - Unstructured observations the agent learns
+  ```yaml
+  # ~/.hermes/user_memory.yaml
+  - "User prefers tabs over spaces despite black's defaults"
+  - "User's main project is ~/work/myapp - a Django app"
+  - "User often works late - don't ask about timezone"
+  ```
+
+**When to learn:**
+- After fixing an error that took multiple attempts
+- When user corrects the agent's approach
+- When a workaround is discovered for a tool limitation
+- When user expresses a preference
+
+**Storage:** Vector database (ChromaDB) or simple YAML with embedding search.
+
+**Files to create:** `tools/learning_tools.py`, `learning/store.py`, `~/.hermes/learnings/`
+
+---
+
+## 15. Layered Context Architecture 📊
+
+*Inspired by Dash's "Six Layers of Context" - grounding responses in multiple sources.*
+
+**Problem:** Context sources are ad-hoc. No clear hierarchy or strategy for what context to include when.
+
+**Proposed Layers for Hermes:**
+
+| Layer | Source | When Loaded | Example |
+|-------|--------|-------------|---------|
+| 1. **Project Context** | `.hermes/context.md` | Auto on cwd | "This is a FastAPI project using PostgreSQL" |
+| 2. **Skills** | `skills/*.md` | On request | "How to set up React project" |
+| 3. **User Profile** | `~/.hermes/user_profile.yaml` | Always | "User prefers pytest, uses black" |
+| 4. **Learnings** | `~/.hermes/learnings/` | Semantic search | "SSL fix for Ubuntu" |
+| 5. **External Knowledge** | Web search, docs | On demand | Current API docs, Stack Overflow |
+| 6. **Runtime Introspection** | Tool calls | Real-time | File contents, terminal output |
+
+**Benefits:**
+- Clear mental model for what context is available
+- Prioritization: local > learned > external
+- Debugging: "Why did agent do X?" → check which layers contributed
+
+**Files to modify:** `run_agent.py` (context loading), new `context/layers.py`
+
+---
+
+## 16. Evaluation System with LLM Grading 📏
+
+*Inspired by Dash's evaluation framework.*
+
+**Problem:** `batch_runner.py` runs test cases but lacks quality assessment.
+
+**Dash's Approach:**
+- **String matching** (default) - Check if expected strings appear
+- **LLM grader** (-g flag) - GPT evaluates response quality
+- **Result comparison** (-r flag) - Compare against golden output
+
+**Implementation for Hermes:**
+
+- [ ] **Test case format:**
+  ```python
+  TestCase(
+    name="create_python_project",
+    prompt="Create a new Python project with FastAPI and tests",
+    expected_strings=["requirements.txt", "main.py", "test_"],  # Basic check
+    golden_actions=["write:main.py", "write:requirements.txt", "terminal:pip install"],
+    grader_criteria="Should create complete project structure with working code"
+  )
+  ```
+
+- [ ] **LLM grader mode:**
+  ```python
+  def grade_response(response: str, criteria: str) -> Grade:
+      """Use GPT to evaluate response quality."""
+      prompt = f"""
+      Evaluate this agent response against the criteria.
+      Criteria: {criteria}
+      Response: {response}
+      
+      Score (1-5) and explain why.
+      """
+      # Returns: Grade(score=4, explanation="Created all files but tests are minimal")
+  ```
+
+- [ ] **Action comparison mode:**
+  - Record tool calls made during test
+  - Compare against expected actions
+  - "Expected terminal call to pip install, got npm install"
+
+- [ ] **CLI flags:**
+  ```bash
+  python batch_runner.py eval test_cases.yaml       # String matching
+  python batch_runner.py eval test_cases.yaml -g    # + LLM grading
+  python batch_runner.py eval test_cases.yaml -r    # + Result comparison
+  python batch_runner.py eval test_cases.yaml -v    # Verbose (show responses)
+  ```
+
+**Files to modify:** `batch_runner.py`, new `evals/test_cases.py`, new `evals/grader.py`
+
+---
+
 *Last updated: $(date +%Y-%m-%d)* 🤖
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 5b4f16ada6..63e4f75556 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -146,8 +146,10 @@ compression:
 # Agent Behavior
 # =============================================================================
 agent:
-  # Maximum conversation turns before stopping
-  max_turns: 20
+  # Maximum tool-calling iterations per conversation
+  # Higher = more room for complex tasks, but costs more tokens
+  # Recommended: 20-30 for focused tasks, 50-100 for open exploration
+  max_turns: 60
   
   # Enable verbose logging
   verbose: false
diff --git a/cli.py b/cli.py
index 15d1f8ed60..9718ebea01 100755
--- a/cli.py
+++ b/cli.py
@@ -95,7 +95,7 @@ def load_cli_config() -> Dict[str, Any]:
             "summary_model": "google/gemini-2.0-flash-001",  # Fast/cheap model for summaries
         },
         "agent": {
-            "max_turns": 20,
+            "max_turns": 60,  # Default max tool-calling iterations
             "verbose": False,
             "system_prompt": "",
             "personalities": {
@@ -145,6 +145,10 @@ def load_cli_config() -> Dict[str, Any]:
                         defaults[key].update(file_config[key])
                     else:
                         defaults[key] = file_config[key]
+            
+            # Handle root-level max_turns (backwards compat) - copy to agent.max_turns
+            if "max_turns" in file_config and "agent" not in file_config:
+                defaults["agent"]["max_turns"] = file_config["max_turns"]
         except Exception as e:
             print(f"[Warning] Failed to load cli-config.yaml: {e}")
     
@@ -547,7 +551,7 @@ class HermesCLI:
         toolsets: List[str] = None,
         api_key: str = None,
         base_url: str = None,
-        max_turns: int = 20,
+        max_turns: int = 60,
         verbose: bool = False,
         compact: bool = False,
     ):
@@ -559,7 +563,7 @@ class HermesCLI:
             toolsets: List of toolsets to enable (default: all)
             api_key: API key (default: from environment)
             base_url: API base URL (default: OpenRouter)
-            max_turns: Maximum conversation turns
+            max_turns: Maximum tool-calling iterations (default: 60)
             verbose: Enable verbose logging
             compact: Use compact display mode
         """
@@ -577,7 +581,17 @@ class HermesCLI:
         
         # API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter
         self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
-        self.max_turns = max_turns if max_turns != 20 else CLI_CONFIG["agent"].get("max_turns", 20)
+        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
+        if max_turns != 60:  # CLI arg was explicitly set
+            self.max_turns = max_turns
+        elif os.getenv("HERMES_MAX_ITERATIONS"):
+            self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
+        elif CLI_CONFIG["agent"].get("max_turns"):
+            self.max_turns = CLI_CONFIG["agent"]["max_turns"]
+        elif CLI_CONFIG.get("max_turns"):  # Backwards compat: root-level max_turns
+            self.max_turns = CLI_CONFIG["max_turns"]
+        else:
+            self.max_turns = 60
         
         # Parse and validate toolsets
         self.enabled_toolsets = toolsets
@@ -1377,7 +1391,7 @@ def main(
     model: str = None,
     api_key: str = None,
     base_url: str = None,
-    max_turns: int = 20,
+    max_turns: int = 60,
     verbose: bool = False,
     compact: bool = False,
     list_tools: bool = False,
@@ -1396,7 +1410,7 @@ def main(
         model: Model to use (default: anthropic/claude-opus-4-20250514)
         api_key: API key for authentication
         base_url: Base URL for the API
-        max_turns: Maximum conversation turns (default: 20)
+        max_turns: Maximum tool-calling iterations (default: 60)
         verbose: Enable verbose logging
         compact: Use compact display mode
         list_tools: List available tools and exit
diff --git a/gateway/run.py b/gateway/run.py
index de7dd8447a..3bb32239f5 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -360,8 +360,12 @@ class GatewayRunner:
         toolset = toolset_map.get(source.platform, "hermes-telegram")
         
         def run_sync():
+            # Read from env var or use default (same as CLI)
+            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
+            
             agent = AIAgent(
                 model=os.getenv("HERMES_MODEL", "anthropic/claude-sonnet-4"),
+                max_iterations=max_iterations,
                 quiet_mode=True,
                 enabled_toolsets=[toolset],
                 ephemeral_system_prompt=context_prompt,
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index e24e2a3a4d..2a5833fd49 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -201,6 +201,13 @@ OPTIONAL_ENV_VARS = {
         "url": None,
         "password": True,
     },
+    # Agent configuration
+    "HERMES_MAX_ITERATIONS": {
+        "description": "Maximum tool-calling iterations per conversation (default: 25 for messaging, 10 for CLI)",
+        "prompt": "Max iterations",
+        "url": None,
+        "password": False,
+    },
 }
 
 
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 98420725e7..fddf8e5837 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -693,7 +693,28 @@ def run_setup_wizard(args):
     # else: Keep current (selected_backend is None)
     
     # =========================================================================
-    # Step 5: Context Compression
+    # Step 5: Agent Settings
+    # =========================================================================
+    print_header("Agent Settings")
+    
+    # Max iterations
+    current_max = get_env_value('HERMES_MAX_ITERATIONS') or '60'
+    print_info("Maximum tool-calling iterations per conversation.")
+    print_info("Higher = more complex tasks, but costs more tokens.")
+    print_info("Recommended: 30-60 for most tasks, 100+ for open exploration.")
+    
+    max_iter_str = prompt("Max iterations", current_max)
+    try:
+        max_iter = int(max_iter_str)
+        if max_iter > 0:
+            save_env_value("HERMES_MAX_ITERATIONS", str(max_iter))
+            config['max_turns'] = max_iter
+            print_success(f"Max iterations set to {max_iter}")
+    except ValueError:
+        print_warning("Invalid number, keeping current value")
+    
+    # =========================================================================
+    # Step 6: Context Compression
     # =========================================================================
     print_header("Context Compression")
     print_info("Automatically summarize old messages when context gets too long.")
@@ -718,7 +739,7 @@ def run_setup_wizard(args):
         config.setdefault('compression', {})['enabled'] = False
     
     # =========================================================================
-    # Step 6: Messaging Platforms (Optional)
+    # Step 7: Messaging Platforms (Optional)
     # =========================================================================
     print_header("Messaging Platforms (Optional)")
     print_info("Connect to messaging platforms to chat with Hermes from anywhere.")
@@ -812,7 +833,7 @@ def run_setup_wizard(args):
                     print_success("Discord allowlist configured")
     
     # =========================================================================
-    # Step 7: Additional Tools (Optional)
+    # Step 8: Additional Tools (Optional)
     # =========================================================================
     print_header("Additional Tools (Optional)")
     print_info("These tools extend the agent's capabilities.")
diff --git a/run_agent.py b/run_agent.py
index 963a9db4f8..b8cbad5813 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -585,7 +585,7 @@ class AIAgent:
         base_url: str = None,
         api_key: str = None,
         model: str = "anthropic/claude-sonnet-4-20250514",  # OpenRouter format
-        max_iterations: int = 10,
+        max_iterations: int = 60,  # Default tool-calling iterations
         tool_delay: float = 1.0,
         enabled_toolsets: List[str] = None,
         disabled_toolsets: List[str] = None,
@@ -1966,11 +1966,47 @@ class AIAgent:
                     final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
                     break
         
-        # Handle max iterations reached
-        if api_call_count >= self.max_iterations:
-            print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Stopping to prevent infinite loop.")
-            if final_response is None:
-                final_response = "I've reached the maximum number of iterations. Here's what I found so far."
+        # Handle max iterations reached - ask model to summarize what it found
+        if api_call_count >= self.max_iterations and final_response is None:
+            print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Requesting summary...")
+            
+            # Inject a user message asking for a summary
+            summary_request = (
+                "You've reached the maximum number of tool-calling iterations allowed. "
+                "Please provide a final response summarizing what you've found and accomplished so far, "
+                "without calling any more tools."
+            )
+            messages.append({"role": "user", "content": summary_request})
+            
+            # Make one final API call WITHOUT tools to force a text response
+            try:
+                api_messages = messages.copy()
+                if self.ephemeral_system_prompt:
+                    api_messages = [{"role": "system", "content": self.ephemeral_system_prompt}] + api_messages
+                
+                summary_response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=api_messages,
+                    # No tools parameter - forces text response
+                    extra_headers=self.extra_headers,
+                    extra_body=self.extra_body,
+                )
+                
+                if summary_response.choices and summary_response.choices[0].message.content:
+                    final_response = summary_response.choices[0].message.content
+                    # Strip think blocks from final response
+                    if "<think>" in final_response:
+                        import re
+                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                    
+                    # Add to messages for session continuity
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
+                    
+            except Exception as e:
+                logging.warning(f"Failed to get summary response: {e}")
+                final_response = f"I reached the maximum iterations ({self.max_iterations}) but couldn't summarize. Error: {str(e)}"
         
         # Determine if conversation completed successfully
         completed = final_response is not None and api_call_count < self.max_iterations

From a09b018bd50e6ff2909a5194bcdcafbb0002311c Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 14:51:31 -0800
Subject: [PATCH 23/48] Implement continuous typing indicator in message
 handling

- Added a new private method `_keep_typing` to send a typing indicator continuously while processing messages, refreshing every 4 seconds to comply with Telegram/Discord limitations.
- Updated the `handle_message` method to initiate the typing indicator at the start of message processing and ensure it stops once processing is complete, improving user experience during message handling.
---
 gateway/platforms/base.py | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index e9a5f828e8..6601c52de4 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -171,20 +171,34 @@ class BasePlatformAdapter(ABC):
         """
         pass
     
+    async def _keep_typing(self, chat_id: str, interval: float = 4.0) -> None:
+        """
+        Continuously send typing indicator until cancelled.
+        
+        Telegram/Discord typing status expires after ~5 seconds, so we refresh every 4.
+        """
+        try:
+            while True:
+                await self.send_typing(chat_id)
+                await asyncio.sleep(interval)
+        except asyncio.CancelledError:
+            pass  # Normal cancellation when handler completes
+    
     async def handle_message(self, event: MessageEvent) -> None:
         """
         Process an incoming message.
         
         Calls the registered message handler and sends the response.
+        Keeps typing indicator active throughout processing.
         """
         if not self._message_handler:
             return
         
+        # Start continuous typing indicator (refreshes every 4 seconds)
+        typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id))
+        
         try:
-            # Send typing indicator
-            await self.send_typing(event.source.chat_id)
-            
-            # Call the handler
+            # Call the handler (this can take a while with tool calls)
             response = await self._message_handler(event)
             
             # Send response if any
@@ -196,6 +210,13 @@ class BasePlatformAdapter(ABC):
                 )
         except Exception as e:
             print(f"[{self.name}] Error handling message: {e}")
+        finally:
+            # Stop typing indicator
+            typing_task.cancel()
+            try:
+                await typing_task
+            except asyncio.CancelledError:
+                pass
     
     def build_source(
         self,

From e7f0ffbf5d1e51e7a3cbefcc43e5b6180a37303b Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 14:54:43 -0800
Subject: [PATCH 24/48] Add tool progress notifications for messaging channels

- Introduced a new callback mechanism in the AIAgent class to send tool progress messages during execution, enhancing user feedback in messaging platforms.
- Updated the GatewayRunner to support tool progress notifications, allowing users to enable or disable this feature via environment variables.
- Enhanced the CLI setup wizard to prompt users for enabling tool progress messages and selecting the notification mode (all or new), improving configuration options.
- Updated relevant documentation to reflect the new features and configuration settings for tool progress notifications.
---
 gateway/run.py       | 92 ++++++++++++++++++++++++++++++++++++++++++--
 hermes_cli/config.py | 14 ++++++-
 hermes_cli/setup.py  | 22 +++++++++++
 run_agent.py         | 17 ++++++++
 4 files changed, 141 insertions(+), 4 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 3bb32239f5..06937d5450 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -349,6 +349,7 @@ class GatewayRunner:
         This is run in a thread pool to not block the event loop.
         """
         from run_agent import AIAgent
+        import queue
         
         # Determine toolset based on platform
         toolset_map = {
@@ -359,6 +360,76 @@ class GatewayRunner:
         }
         toolset = toolset_map.get(source.platform, "hermes-telegram")
         
+        # Check if tool progress notifications are enabled
+        tool_progress_enabled = os.getenv("HERMES_TOOL_PROGRESS", "").lower() in ("1", "true", "yes")
+        progress_mode = os.getenv("HERMES_TOOL_PROGRESS_MODE", "new")  # "all" or "new" (only new tools)
+        
+        # Queue for progress messages (thread-safe)
+        progress_queue = queue.Queue() if tool_progress_enabled else None
+        last_tool = [None]  # Mutable container for tracking in closure
+        
+        def progress_callback(tool_name: str, preview: str = None):
+            """Callback invoked by agent when a tool is called."""
+            if not progress_queue:
+                return
+            
+            # "new" mode: only report when tool changes
+            if progress_mode == "new" and tool_name == last_tool[0]:
+                return
+            last_tool[0] = tool_name
+            
+            # Build progress message
+            tool_emojis = {
+                "terminal": "💻",
+                "web_search": "🔍",
+                "web_extract": "📄",
+                "read_file": "📖",
+                "write_file": "✍️",
+                "list_directory": "📂",
+                "image_generate": "🎨",
+                "browser_navigate": "🌐",
+                "browser_click": "👆",
+                "moa_query": "🧠",
+            }
+            emoji = tool_emojis.get(tool_name, "⚙️")
+            
+            if tool_name == "terminal" and preview:
+                msg = f"{emoji} `{preview}`..."
+            else:
+                msg = f"{emoji} {tool_name}..."
+            
+            progress_queue.put(msg)
+        
+        # Background task to send progress messages
+        async def send_progress_messages():
+            if not progress_queue:
+                return
+            
+            adapter = self.adapters.get(source.platform)
+            if not adapter:
+                return
+            
+            while True:
+                try:
+                    # Non-blocking check with small timeout
+                    msg = progress_queue.get_nowait()
+                    await adapter.send(chat_id=source.chat_id, content=msg)
+                    await asyncio.sleep(0.5)  # Small delay between messages
+                except queue.Empty:
+                    await asyncio.sleep(0.3)  # Check again soon
+                except asyncio.CancelledError:
+                    # Drain remaining messages
+                    while not progress_queue.empty():
+                        try:
+                            msg = progress_queue.get_nowait()
+                            await adapter.send(chat_id=source.chat_id, content=msg)
+                        except:
+                            break
+                    return
+                except Exception as e:
+                    print(f"[Gateway] Progress message error: {e}")
+                    await asyncio.sleep(1)
+        
         def run_sync():
             # Read from env var or use default (same as CLI)
             max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
@@ -370,6 +441,7 @@ class GatewayRunner:
                 enabled_toolsets=[toolset],
                 ephemeral_system_prompt=context_prompt,
                 session_id=session_id,
+                tool_progress_callback=progress_callback if tool_progress_enabled else None,
             )
             
             # If we have history, we need to restore it
@@ -379,9 +451,23 @@ class GatewayRunner:
             result = agent.run_conversation(message)
             return result.get("final_response", "(No response)")
         
-        # Run in thread pool to not block
-        loop = asyncio.get_event_loop()
-        response = await loop.run_in_executor(None, run_sync)
+        # Start progress message sender if enabled
+        progress_task = None
+        if tool_progress_enabled:
+            progress_task = asyncio.create_task(send_progress_messages())
+        
+        try:
+            # Run in thread pool to not block
+            loop = asyncio.get_event_loop()
+            response = await loop.run_in_executor(None, run_sync)
+        finally:
+            # Stop progress sender
+            if progress_task:
+                progress_task.cancel()
+                try:
+                    await progress_task
+                except asyncio.CancelledError:
+                    pass
         
         return response
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 2a5833fd49..a0d98b6acb 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -203,11 +203,23 @@ OPTIONAL_ENV_VARS = {
     },
     # Agent configuration
     "HERMES_MAX_ITERATIONS": {
-        "description": "Maximum tool-calling iterations per conversation (default: 25 for messaging, 10 for CLI)",
+        "description": "Maximum tool-calling iterations per conversation (default: 60)",
         "prompt": "Max iterations",
         "url": None,
         "password": False,
     },
+    "HERMES_TOOL_PROGRESS": {
+        "description": "Send tool progress messages in messaging channels (true/false)",
+        "prompt": "Enable tool progress messages",
+        "url": None,
+        "password": False,
+    },
+    "HERMES_TOOL_PROGRESS_MODE": {
+        "description": "Progress mode: 'all' (every tool) or 'new' (only when tool changes)",
+        "prompt": "Progress mode (all/new)",
+        "url": None,
+        "password": False,
+    },
 }
 
 
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index fddf8e5837..06668d4e94 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -713,6 +713,28 @@ def run_setup_wizard(args):
     except ValueError:
         print_warning("Invalid number, keeping current value")
     
+    # Tool progress notifications (for messaging)
+    print_info("")
+    print_info("Tool Progress Notifications (Messaging only)")
+    print_info("Send status messages when the agent uses tools.")
+    print_info("Example: '💻 ls -la...' or '🔍 web_search...'")
+    
+    current_progress = get_env_value('HERMES_TOOL_PROGRESS') or 'false'
+    if prompt_yes_no("Enable tool progress messages?", current_progress.lower() in ('1', 'true', 'yes')):
+        save_env_value("HERMES_TOOL_PROGRESS", "true")
+        
+        # Progress mode
+        current_mode = get_env_value('HERMES_TOOL_PROGRESS_MODE') or 'new'
+        print_info("  Mode options:")
+        print_info("    'new' - Only when switching tools (less spam)")
+        print_info("    'all' - Every tool call")
+        mode = prompt("  Progress mode", current_mode)
+        if mode.lower() in ('all', 'new'):
+            save_env_value("HERMES_TOOL_PROGRESS_MODE", mode.lower())
+        print_success("Tool progress enabled")
+    else:
+        save_env_value("HERMES_TOOL_PROGRESS", "false")
+    
     # =========================================================================
     # Step 6: Context Compression
     # =========================================================================
diff --git a/run_agent.py b/run_agent.py
index b8cbad5813..c88d2e60f0 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -600,6 +600,7 @@ class AIAgent:
         providers_order: List[str] = None,
         provider_sort: str = None,
         session_id: str = None,
+        tool_progress_callback: callable = None,
     ):
         """
         Initialize the AI Agent.
@@ -623,6 +624,7 @@ class AIAgent:
             providers_order (List[str]): OpenRouter providers to try in order (optional)
             provider_sort (str): Sort providers by price/throughput/latency (optional)
             session_id (str): Pre-generated session ID for logging (optional, auto-generated if not provided)
+            tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
         """
         self.model = model
         self.max_iterations = max_iterations
@@ -634,6 +636,8 @@ class AIAgent:
         self.log_prefix_chars = log_prefix_chars
         self.log_prefix = f"{log_prefix} " if log_prefix else ""
         self.base_url = base_url or ""  # Store for OpenRouter detection
+        self.tool_progress_callback = tool_progress_callback
+        self._last_reported_tool = None  # Track for "new tool" mode
         
         # Store OpenRouter provider preferences
         self.providers_allowed = providers_allowed
@@ -1793,6 +1797,19 @@ class AIAgent:
                             args_str = json.dumps(function_args, ensure_ascii=False)
                             args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
                             print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
+                        
+                        # Fire progress callback if registered (for messaging platforms)
+                        if self.tool_progress_callback:
+                            try:
+                                # Build preview for terminal commands
+                                if function_name == "terminal":
+                                    cmd = function_args.get("command", "")
+                                    preview = cmd[:50] + "..." if len(cmd) > 50 else cmd
+                                else:
+                                    preview = None
+                                self.tool_progress_callback(function_name, preview)
+                            except Exception as cb_err:
+                                logging.debug(f"Tool progress callback error: {cb_err}")
 
                         tool_start_time = time.time()
 

From 9d9eea9ac970670699ae6f68707d7660dd62f153 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 14:57:27 -0800
Subject: [PATCH 25/48] Enhance agent configuration and documentation for tool
 progress and working directory

- Updated the AIAgent class to include new parameters for maximum iterations and tool progress callback, improving agent behavior and user feedback.
- Added detailed documentation on working directory behavior for CLI and messaging platforms, clarifying the use of `MESSAGING_CWD`.
- Introduced tool progress notifications in messaging, allowing users to receive real-time updates during tool execution.
- Updated relevant sections in AGENTS.md, README.md, and messaging.md to reflect these enhancements and provide clearer setup instructions.
---
 AGENTS.md         | 42 +++++++++++++++++++++++++++++--
 README.md         | 37 +++++++++++++++++++++++++++
 docs/messaging.md | 64 +++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index b495704dd7..aa64b42dd6 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -65,10 +65,12 @@ class AIAgent:
         model: str = "anthropic/claude-sonnet-4",
         api_key: str = None,
         base_url: str = "https://openrouter.ai/api/v1",
-        max_turns: int = 20,
+        max_iterations: int = 60,        # Max tool-calling loops
         enabled_toolsets: list = None,
         disabled_toolsets: list = None,
         verbose_logging: bool = False,
+        quiet_mode: bool = False,         # Suppress progress output
+        tool_progress_callback: callable = None,  # Called on each tool use
     ):
         # Initialize OpenAI client, load tools based on toolsets
         ...
@@ -194,8 +196,23 @@ TELEGRAM_ALLOWED_USERS=123456789,987654   # Comma-separated user IDs (from @user
 # Discord  
 DISCORD_BOT_TOKEN=MTIz...                 # From Developer Portal
 DISCORD_ALLOWED_USERS=123456789012345678  # Comma-separated user IDs
+
+# Agent Behavior
+HERMES_MAX_ITERATIONS=60                  # Max tool-calling iterations
+MESSAGING_CWD=/home/myuser                # Terminal working directory for messaging
+
+# Tool Progress (optional)
+HERMES_TOOL_PROGRESS=true                 # Send progress messages
+HERMES_TOOL_PROGRESS_MODE=new             # "new" or "all"
 ```
 
+### Working Directory Behavior
+
+- **CLI (`hermes` command)**: Uses current directory (`.` → `os.getcwd()`)
+- **Messaging (Telegram/Discord)**: Uses `MESSAGING_CWD` (default: home directory)
+
+This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location.
+
 ### Security (User Allowlists):
 
 **IMPORTANT**: Without an allowlist, anyone who finds your bot can use it!
@@ -208,6 +225,21 @@ Users can find their IDs:
 - **Telegram**: Message [@userinfobot](https://t.me/userinfobot)
 - **Discord**: Enable Developer Mode, right-click name → Copy ID
 
+### Tool Progress Notifications
+
+When `HERMES_TOOL_PROGRESS=true`, the bot sends status messages as it works:
+- `💻 \`ls -la\`...` (terminal commands show the actual command)
+- `🔍 web_search...`
+- `📄 web_extract...`
+
+Modes:
+- `new`: Only when switching to a different tool (less spam)
+- `all`: Every single tool call
+
+### Typing Indicator
+
+The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences.
+
 ### Platform Toolsets:
 
 Each platform has a dedicated toolset in `toolsets.py`:
@@ -293,12 +325,18 @@ API keys are loaded from `~/.hermes/.env`:
 
 Terminal tool configuration (in `~/.hermes/config.yaml`):
 - `terminal.backend` - Backend: local, docker, singularity, modal, or ssh
-- `terminal.cwd` - Working directory ("." = current directory)
+- `terminal.cwd` - Working directory for CLI ("." = current directory)
 - `terminal.docker_image` - Image for Docker backend
 - `terminal.singularity_image` - Image for Singularity backend
 - `terminal.modal_image` - Image for Modal backend
 - SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env
 
+Agent behavior (in `~/.hermes/.env`):
+- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 60)
+- `MESSAGING_CWD` - Working directory for messaging platforms (default: ~)
+- `HERMES_TOOL_PROGRESS` - Enable tool progress messages (`true`/`false`)
+- `HERMES_TOOL_PROGRESS_MODE` - Progress mode: `new` (tool changes) or `all`
+
 ### Dangerous Command Approval
 
 The terminal tool includes safety checks for potentially destructive commands (e.g., `rm -rf`, `DROP TABLE`, `chmod 777`, etc.):
diff --git a/README.md b/README.md
index 047bec2e0c..8a999cb1a6 100644
--- a/README.md
+++ b/README.md
@@ -241,6 +241,33 @@ DISCORD_ALLOWED_USERS=123456789012345678
 | `/new` or `/reset` | Start fresh conversation |
 | `/status` | Show session info |
 
+#### Working Directory
+
+- **CLI (`hermes`)**: Uses current directory where you run the command
+- **Messaging**: Uses `MESSAGING_CWD` (default: home directory `~`)
+
+```bash
+# Set custom messaging working directory in ~/.hermes/.env
+MESSAGING_CWD=/home/myuser/projects
+```
+
+#### Tool Progress Notifications
+
+Get real-time updates as the agent works:
+
+```bash
+# Enable in ~/.hermes/.env
+HERMES_TOOL_PROGRESS=true
+HERMES_TOOL_PROGRESS_MODE=new    # or "all" for every tool call
+```
+
+When enabled, you'll see messages like:
+```
+💻 `ls -la`...
+🔍 web_search...
+📄 web_extract...
+```
+
 See [docs/messaging.md](docs/messaging.md) for WhatsApp and advanced setup.
 
 ### ⏰ Scheduled Tasks (Cron)
@@ -473,9 +500,19 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 | Variable | Description |
 |----------|-------------|
 | `TELEGRAM_BOT_TOKEN` | Telegram bot token (@BotFather) |
+| `TELEGRAM_ALLOWED_USERS` | Comma-separated user IDs allowed to use bot |
 | `TELEGRAM_HOME_CHANNEL` | Default channel for cron delivery |
 | `DISCORD_BOT_TOKEN` | Discord bot token |
+| `DISCORD_ALLOWED_USERS` | Comma-separated user IDs allowed to use bot |
 | `DISCORD_HOME_CHANNEL` | Default channel for cron delivery |
+| `MESSAGING_CWD` | Working directory for terminal in messaging (default: ~) |
+
+**Agent Behavior:**
+| Variable | Description |
+|----------|-------------|
+| `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 60) |
+| `HERMES_TOOL_PROGRESS` | Send progress messages when using tools (`true`/`false`) |
+| `HERMES_TOOL_PROGRESS_MODE` | `new` (only when tool changes) or `all` (every call) |
 
 **Context Compression:**
 | Variable | Description |
diff --git a/docs/messaging.md b/docs/messaging.md
index 5059401c21..fa3d447ddb 100644
--- a/docs/messaging.md
+++ b/docs/messaging.md
@@ -164,7 +164,7 @@ There are **three ways** to configure the gateway (in order of precedence):
 
 ### 1. Environment Variables (`.env` file) - Recommended for Quick Setup
 
-Add to your `.env` file in the project root:
+Add to your `~/.hermes/.env` file:
 
 ```bash
 # =============================================================================
@@ -173,6 +173,7 @@ Add to your `.env` file in the project root:
 
 # Telegram - get from @BotFather on Telegram
 TELEGRAM_BOT_TOKEN=your_telegram_bot_token
+TELEGRAM_ALLOWED_USERS=123456789,987654321    # Security: restrict to these user IDs
 
 # Optional: Default channel for cron job delivery
 TELEGRAM_HOME_CHANNEL=-1001234567890
@@ -180,6 +181,7 @@ TELEGRAM_HOME_CHANNEL_NAME="My Notes"
 
 # Discord - get from Discord Developer Portal
 DISCORD_BOT_TOKEN=your_discord_bot_token
+DISCORD_ALLOWED_USERS=123456789012345678      # Security: restrict to these user IDs
 
 # Optional: Default channel for cron job delivery
 DISCORD_HOME_CHANNEL=123456789012345678
@@ -188,6 +190,26 @@ DISCORD_HOME_CHANNEL_NAME="#bot-updates"
 # WhatsApp - requires Node.js bridge setup
 WHATSAPP_ENABLED=true
 
+# =============================================================================
+# AGENT SETTINGS
+# =============================================================================
+
+# Max tool-calling iterations per conversation (default: 60)
+HERMES_MAX_ITERATIONS=60
+
+# Working directory for terminal commands (default: home ~)
+MESSAGING_CWD=/home/myuser
+
+# =============================================================================
+# TOOL PROGRESS NOTIFICATIONS
+# =============================================================================
+
+# Show progress messages as agent uses tools
+HERMES_TOOL_PROGRESS=true
+
+# Mode: "new" (only when tool changes) or "all" (every tool call)
+HERMES_TOOL_PROGRESS_MODE=new
+
 # =============================================================================
 # SESSION SETTINGS
 # =============================================================================
@@ -247,11 +269,43 @@ Each platform has its own toolset for security:
 | Platform | Toolset | Capabilities |
 |----------|---------|--------------|
 | CLI | `hermes-cli` | Full access (terminal, browser, etc.) |
-| Telegram | `hermes-telegram` | Web, vision, skills, cronjobs |
-| Discord | `hermes-discord` | Web search, vision, skills, cronjobs |
-| WhatsApp | `hermes-whatsapp` | Web, terminal, vision, skills, cronjobs |
+| Telegram | `hermes-telegram` | Full tools including terminal |
+| Discord | `hermes-discord` | Full tools including terminal |
+| WhatsApp | `hermes-whatsapp` | Full tools including terminal |
 
-Discord has a more limited toolset because it often runs in public servers.
+## User Experience Features
+
+### Typing Indicator
+
+The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences.
+
+### Tool Progress Notifications
+
+When `HERMES_TOOL_PROGRESS=true`, the bot sends status messages as it works:
+
+```
+💻 `ls -la`...
+🔍 web_search...
+📄 web_extract...
+🎨 image_generate...
+```
+
+Terminal commands show the actual command (truncated to 50 chars). Other tools just show the tool name.
+
+**Modes:**
+- `new`: Only sends message when switching to a different tool (less spam)
+- `all`: Sends message for every single tool call
+
+### Working Directory
+
+- **CLI (`hermes` command)**: Uses current directory where you run the command
+- **Messaging**: Uses `MESSAGING_CWD` (default: home directory `~`)
+
+This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location.
+
+### Max Iterations
+
+If the agent hits the max iteration limit while working, instead of a generic error, it asks the model to summarize what it found so far. This gives you a useful response even when the task couldn't be fully completed.
 
 ## Cron Job Delivery
 

From 488deb04a4f9ab3433aefbb4077aebbe51e3191d Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 15:02:41 -0800
Subject: [PATCH 26/48] fix telegram, import asyncio

---
 gateway/platforms/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 6601c52de4..839561b507 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -5,6 +5,7 @@ All platform adapters (Telegram, Discord, WhatsApp) inherit from this
 and implement the required methods.
 """
 
+import asyncio
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime

From 221fb17c5e3956520852f19a83ade4411fe70c07 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 15:06:18 -0800
Subject: [PATCH 27/48] Refine typing indicator behavior in message handling

- Adjusted the `_keep_typing` method to refresh the typing indicator every 2 seconds instead of 4, improving responsiveness after progress messages.
- Updated the `GatewayRunner` to restore the typing indicator after sending progress messages, enhancing user experience during message processing.
---
 gateway/platforms/base.py | 5 +++--
 gateway/run.py            | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 839561b507..21a806c1c8 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -172,11 +172,12 @@ class BasePlatformAdapter(ABC):
         """
         pass
     
-    async def _keep_typing(self, chat_id: str, interval: float = 4.0) -> None:
+    async def _keep_typing(self, chat_id: str, interval: float = 2.0) -> None:
         """
         Continuously send typing indicator until cancelled.
         
-        Telegram/Discord typing status expires after ~5 seconds, so we refresh every 4.
+        Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
+        to recover quickly after progress messages interrupt it.
         """
         try:
             while True:
diff --git a/gateway/run.py b/gateway/run.py
index 06937d5450..113707f652 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -414,7 +414,9 @@ class GatewayRunner:
                     # Non-blocking check with small timeout
                     msg = progress_queue.get_nowait()
                     await adapter.send(chat_id=source.chat_id, content=msg)
-                    await asyncio.sleep(0.5)  # Small delay between messages
+                    # Restore typing indicator after sending progress message
+                    await asyncio.sleep(0.3)
+                    await adapter.send_typing(source.chat_id)
                 except queue.Empty:
                     await asyncio.sleep(0.3)  # Check again soon
                 except asyncio.CancelledError:

From 212460289b51fe1ff64c0cf5deb450f4c9b709fe Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 15:26:59 -0800
Subject: [PATCH 28/48] Enhance skills tool to have an arg so it is more
 reliably called, and error handling in agent

- Updated the `skills_categories` function to include a `verbose` parameter, allowing users to request skill counts per category.
- Modified the `handle_skills_function_call` method to pass the `verbose` argument to `skills_categories`.
- Improved error handling in the `AIAgent` class by injecting a recovery message when invalid JSON arguments are detected, guiding users on how to correct their tool calls.
- Enhanced the `GatewayRunner` to return a user-friendly error message if the agent fails to generate a final response, improving overall user experience.
---
 gateway/run.py       | 11 ++++++++++-
 model_tools.py       | 12 +++++++++---
 run_agent.py         | 24 ++++++++++++++----------
 tools/skills_tool.py |  3 ++-
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 113707f652..b2159e28e1 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -451,7 +451,16 @@ class GatewayRunner:
             # TODO: Implement proper history restoration
             
             result = agent.run_conversation(message)
-            return result.get("final_response", "(No response)")
+            
+            # Return final response, or a message if something went wrong
+            final_response = result.get("final_response")
+            if final_response:
+                return final_response
+            elif result.get("error"):
+                # Agent couldn't recover - show the error
+                return f"⚠️ {result['error']}"
+            else:
+                return "(No response generated)"
         
         # Start progress message sender if enabled
         progress_task = None
diff --git a/model_tools.py b/model_tools.py
index 0b48855725..e78323f60e 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -406,10 +406,15 @@ def get_skills_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "skills_categories",
-                "description": "List available skill categories. Call first if you want to discover categories, then use skills_list(category) to filter, or call skills_list if unsure.",
+                "description": "List available skill categories. Call this first to discover what skill categories exist, then use skills_list(category) to see skills in a category.",
                 "parameters": {
                     "type": "object",
-                    "properties": {},
+                    "properties": {
+                        "verbose": {
+                            "type": "boolean",
+                            "description": "If true, include skill counts per category. Default: false."
+                        }
+                    },
                     "required": []
                 }
             }
@@ -907,7 +912,8 @@ def handle_skills_function_call(function_name: str, function_args: Dict[str, Any
         str: Function result as JSON string
     """
     if function_name == "skills_categories":
-        return skills_categories()
+        verbose = function_args.get("verbose", False)
+        return skills_categories(verbose=verbose)
     
     elif function_name == "skills_list":
         category = function_args.get("category")
diff --git a/run_agent.py b/run_agent.py
index c88d2e60f0..502e6f60f7 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1725,16 +1725,20 @@ class AIAgent:
                             # Don't add anything to messages, just retry the API call
                             continue
                         else:
-                            print(f"{self.log_prefix}❌ Max retries (3) for invalid JSON arguments exceeded. Stopping as partial.")
-                            self._invalid_json_retries = 0  # Reset for next conversation
-                            return {
-                                "final_response": None,
-                                "messages": messages,  # Messages up to last valid point
-                                "api_calls": api_call_count,
-                                "completed": False,
-                                "partial": True,
-                                "error": f"Model generated invalid JSON arguments for tool '{tool_name}': {error_msg}"
-                            }
+                            # Instead of returning partial, inject a helpful message and let model recover
+                            print(f"{self.log_prefix}⚠️  Injecting recovery message for invalid JSON...")
+                            self._invalid_json_retries = 0  # Reset for next attempt
+                            
+                            # Add a user message explaining the issue
+                            recovery_msg = (
+                                f"Your tool call to '{tool_name}' had invalid JSON arguments. "
+                                f"Error: {error_msg}. "
+                                f"For tools with no required parameters, use an empty object: {{}}. "
+                                f"Please either retry the tool call with valid JSON, or respond without using that tool."
+                            )
+                            messages.append({"role": "user", "content": recovery_msg})
+                            # Continue the loop - model will see this message and can recover
+                            continue
                     
                     # Reset retry counter on successful JSON validation
                     self._invalid_json_retries = 0
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index 258284ad0b..a275c58dee 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -349,7 +349,7 @@ def _load_category_description(category_dir: Path) -> Optional[str]:
         return None
 
 
-def skills_categories(task_id: str = None) -> str:
+def skills_categories(verbose: bool = False, task_id: str = None) -> str:
     """
     List available skill categories with descriptions (progressive disclosure tier 0).
     
@@ -358,6 +358,7 @@ def skills_categories(task_id: str = None) -> str:
     or first paragraph to explain what skills are in that category.
     
     Args:
+        verbose: If True, include skill counts per category (default: False, but currently always included)
         task_id: Optional task identifier (unused, for API consistency)
         
     Returns:

From beeb7896e07e3f6608042857585349f0ec6cb8b8 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 15:42:54 -0800
Subject: [PATCH 29/48] Refactor message handling and error logging in agent
 and gateway

- Updated the AIAgent class to extract the first user message for trajectory formatting, improving the accuracy of user queries in the trajectory format.
- Enhanced the GatewayRunner to convert transcript history into the agent format, ensuring proper handling of message roles and content.
- Adjusted the typing indicator refresh rate to every 2 seconds for better responsiveness.
- Improved error handling in the message sending process for the Telegram adapter, implementing a fallback mechanism for Markdown parsing failures, and logging send failures for better debugging.
---
 gateway/platforms/base.py     | 18 ++++++++++++++++--
 gateway/platforms/telegram.py | 28 +++++++++++++++++++++-------
 gateway/run.py                | 13 +++++++++----
 run_agent.py                  | 13 ++++++++++---
 4 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 21a806c1c8..16e7daf3b2 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -196,7 +196,7 @@ class BasePlatformAdapter(ABC):
         if not self._message_handler:
             return
         
-        # Start continuous typing indicator (refreshes every 4 seconds)
+        # Start continuous typing indicator (refreshes every 2 seconds)
         typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id))
         
         try:
@@ -205,13 +205,27 @@ class BasePlatformAdapter(ABC):
             
             # Send response if any
             if response:
-                await self.send(
+                result = await self.send(
                     chat_id=event.source.chat_id,
                     content=response,
                     reply_to=event.message_id
                 )
+                
+                # Log send failures (don't raise - user already saw tool progress)
+                if not result.success:
+                    print(f"[{self.name}] Failed to send response: {result.error}")
+                    # Try sending without markdown as fallback
+                    fallback_result = await self.send(
+                        chat_id=event.source.chat_id,
+                        content=f"(Response formatting failed, plain text:)\n\n{response[:3500]}",
+                        reply_to=event.message_id
+                    )
+                    if not fallback_result.success:
+                        print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
         except Exception as e:
             print(f"[{self.name}] Error handling message: {e}")
+            import traceback
+            traceback.print_exc()
         finally:
             # Stop typing indicator
             typing_task.cancel()
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index ef46351c6a..10c67c96b9 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -142,13 +142,27 @@ class TelegramAdapter(BasePlatformAdapter):
             thread_id = metadata.get("thread_id") if metadata else None
             
             for i, chunk in enumerate(chunks):
-                msg = await self._bot.send_message(
-                    chat_id=int(chat_id),
-                    text=chunk,
-                    parse_mode=ParseMode.MARKDOWN,
-                    reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
-                    message_thread_id=int(thread_id) if thread_id else None,
-                )
+                # Try Markdown first, fall back to plain text if it fails
+                try:
+                    msg = await self._bot.send_message(
+                        chat_id=int(chat_id),
+                        text=chunk,
+                        parse_mode=ParseMode.MARKDOWN,
+                        reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                        message_thread_id=int(thread_id) if thread_id else None,
+                    )
+                except Exception as md_error:
+                    # Markdown parsing failed, try plain text
+                    if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
+                        msg = await self._bot.send_message(
+                            chat_id=int(chat_id),
+                            text=chunk,
+                            parse_mode=None,  # Plain text
+                            reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                            message_thread_id=int(thread_id) if thread_id else None,
+                        )
+                    else:
+                        raise  # Re-raise if not a parse error
                 message_ids.append(str(msg.message_id))
             
             return SendResult(
diff --git a/gateway/run.py b/gateway/run.py
index b2159e28e1..b10f4cf54e 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -446,11 +446,16 @@ class GatewayRunner:
                 tool_progress_callback=progress_callback if tool_progress_enabled else None,
             )
             
-            # If we have history, we need to restore it
-            # For now, we pass the message directly
-            # TODO: Implement proper history restoration
+            # Convert transcript history to agent format
+            # Transcript has timestamps; agent expects {"role": ..., "content": ...}
+            agent_history = []
+            for msg in history:
+                role = msg.get("role")
+                content = msg.get("content")
+                if role and content:
+                    agent_history.append({"role": role, "content": content})
             
-            result = agent.run_conversation(message)
+            result = agent.run_conversation(message, conversation_history=agent_history)
             
             # Return final response, or a message if something went wrong
             final_response = result.get("final_response")
diff --git a/run_agent.py b/run_agent.py
index 502e6f60f7..72541f6bdb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1272,9 +1272,16 @@ class AIAgent:
             return
         
         try:
-            # Convert to trajectory format (reuse existing method)
-            # Use empty string as user_query since it's embedded in messages
-            trajectory = self._convert_to_trajectory_format(messages, "", True)
+            # Extract the first user message for the trajectory format
+            # The first message should be the user's initial query
+            first_user_query = ""
+            for msg in messages:
+                if msg.get("role") == "user":
+                    first_user_query = msg.get("content", "")
+                    break
+            
+            # Convert to trajectory format
+            trajectory = self._convert_to_trajectory_format(messages, first_user_query, True)
             
             # Build the session log entry
             entry = {

From 9bfe185a2e31bcc5743cf30cc195d7b35a14b565 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 16:15:49 -0800
Subject: [PATCH 30/48] Implement interrupt handling for agent and CLI input
 and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.
---
 cli.py         | 200 ++++++++++++++++++++++++++++++++++++++++++-------
 gateway/run.py | 104 +++++++++++++++++++++++--
 run_agent.py   |  66 +++++++++++++++-
 3 files changed, 336 insertions(+), 34 deletions(-)

diff --git a/cli.py b/cli.py
index 9718ebea01..795c880bee 100755
--- a/cli.py
+++ b/cli.py
@@ -33,6 +33,15 @@ from prompt_toolkit.history import FileHistory
 from prompt_toolkit.styles import Style as PTStyle
 from prompt_toolkit.formatted_text import HTML
 from prompt_toolkit.patch_stdout import patch_stdout
+from prompt_toolkit.application import Application, get_app
+from prompt_toolkit.buffer import Buffer
+from prompt_toolkit.layout import Layout, HSplit, Window, FormattedTextControl
+from prompt_toolkit.layout.processors import BeforeInput
+from prompt_toolkit.widgets import TextArea
+from prompt_toolkit.key_binding import KeyBindings
+import asyncio
+import threading
+import queue
 
 # Load environment variables first
 from dotenv import load_dotenv
@@ -1284,17 +1293,52 @@ class HermesCLI:
         print("─" * 60, flush=True)
         
         try:
-            # Run the conversation
-            result = self.agent.run_conversation(
-                user_message=message,
-                conversation_history=self.conversation_history[:-1],  # Exclude the message we just added
-            )
+            # Run the conversation with interrupt monitoring
+            result = None
+            
+            def run_agent():
+                nonlocal result
+                result = self.agent.run_conversation(
+                    user_message=message,
+                    conversation_history=self.conversation_history[:-1],  # Exclude the message we just added
+                )
+            
+            # Start agent in background thread
+            agent_thread = threading.Thread(target=run_agent)
+            agent_thread.start()
+            
+            # Monitor for new input in the pending queue while agent runs
+            interrupt_msg = None
+            while agent_thread.is_alive():
+                # Check if there's new input in the queue (from the persistent input area)
+                if hasattr(self, '_pending_input'):
+                    try:
+                        interrupt_msg = self._pending_input.get(timeout=0.1)
+                        if interrupt_msg:
+                            print(f"\n⚡ New message detected, interrupting...")
+                            self.agent.interrupt(interrupt_msg)
+                            break
+                    except:
+                        pass  # Queue empty or timeout, continue waiting
+                else:
+                    # Fallback if no queue (shouldn't happen)
+                    agent_thread.join(0.1)
+            
+            agent_thread.join()  # Ensure agent thread completes
             
             # Update history with full conversation
-            self.conversation_history = result.get("messages", self.conversation_history)
+            self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
             
             # Get the final response
-            response = result.get("final_response", "")
+            response = result.get("final_response", "") if result else ""
+            
+            # Handle interrupt - check if we were interrupted
+            pending_message = None
+            if result and result.get("interrupted"):
+                pending_message = result.get("interrupt_message") or interrupt_msg
+                # Add indicator that we were interrupted
+                if response and pending_message:
+                    response = response + "\n\n---\n_[Interrupted - processing new message]_"
             
             if response:
                 # Use simple print for compatibility with prompt_toolkit's patch_stdout
@@ -1307,6 +1351,11 @@ class HermesCLI:
                 print()
                 print("─" * 60)
             
+            # If we have a pending message from interrupt, process it immediately
+            if pending_message:
+                print(f"\n📨 Processing: '{pending_message[:50]}{'...' if len(pending_message) > 50 else ''}'")
+                return self.chat(pending_message)  # Recursive call to handle the new message
+            
             return response
             
         except Exception as e:
@@ -1345,22 +1394,101 @@ class HermesCLI:
             return None
     
     def run(self):
-        """Run the interactive CLI loop with fixed input at bottom."""
+        """Run the interactive CLI loop with persistent input at bottom."""
         self.show_banner()
-        
-        # These Rich prints work fine BEFORE patch_stdout
         self.console.print("[#FFF8DC]Welcome to Hermes Agent! Type your message or /help for commands.[/]")
         self.console.print()
         
-        # Use patch_stdout to ensure all output appears above the input prompt
-        with patch_stdout():
-            while True:
+        # State for async operation
+        self._agent_running = False
+        self._pending_input = queue.Queue()
+        self._should_exit = False
+        
+        # Create a persistent input area using prompt_toolkit Application
+        input_buffer = Buffer()
+        
+        # Key bindings for the input area
+        kb = KeyBindings()
+        
+        @kb.add('enter')
+        def handle_enter(event):
+            """Handle Enter key - submit input."""
+            text = event.app.current_buffer.text.strip()
+            if text:
+                # Store the input
+                self._pending_input.put(text)
+                # Clear the buffer
+                event.app.current_buffer.reset()
+        
+        @kb.add('c-c')
+        def handle_ctrl_c(event):
+            """Handle Ctrl+C - interrupt or exit."""
+            if self._agent_running and self.agent:
+                print("\n⚡ Interrupting agent...")
+                self.agent.interrupt()
+            else:
+                self._should_exit = True
+                event.app.exit()
+        
+        @kb.add('c-d')
+        def handle_ctrl_d(event):
+            """Handle Ctrl+D - exit."""
+            self._should_exit = True
+            event.app.exit()
+        
+        # Create the input area widget
+        input_area = TextArea(
+            height=1,
+            prompt='❯ ',
+            style='class:input-area',
+            multiline=False,
+            wrap_lines=False,
+        )
+        
+        # Create a status line that shows when agent is working
+        def get_status_text():
+            if self._agent_running:
+                return [('class:status', ' 🔄 Agent working... (type to interrupt) ')]
+            return [('class:status', '')]
+        
+        status_window = Window(
+            content=FormattedTextControl(get_status_text),
+            height=1,
+        )
+        
+        # Layout with status and input at bottom
+        layout = Layout(
+            HSplit([
+                Window(height=0),  # Spacer that expands
+                status_window,
+                input_area,
+            ])
+        )
+        
+        # Style for the application
+        style = PTStyle.from_dict({
+            'input-area': '#FFF8DC',
+            'status': 'bg:#333333 #FFD700',
+        })
+        
+        # Create the application
+        app = Application(
+            layout=layout,
+            key_bindings=kb,
+            style=style,
+            full_screen=False,
+            mouse_support=False,
+        )
+        
+        # Background thread to process inputs and run agent
+        def process_loop():
+            while not self._should_exit:
                 try:
-                    user_input = self.get_input()
-                    
-                    if user_input is None:
-                        print("\nGoodbye! ⚕")
-                        break
+                    # Check for pending input with timeout
+                    try:
+                        user_input = self._pending_input.get(timeout=0.1)
+                    except queue.Empty:
+                        continue
                     
                     if not user_input:
                         continue
@@ -1368,16 +1496,38 @@ class HermesCLI:
                     # Check for commands
                     if user_input.startswith("/"):
                         if not self.process_command(user_input):
-                            print("\nGoodbye! ⚕")
-                            break
+                            self._should_exit = True
+                            # Schedule app exit
+                            if app.is_running:
+                                app.exit()
                         continue
                     
-                    # Regular chat message
-                    self.chat(user_input)
+                    # Regular chat - run agent
+                    self._agent_running = True
+                    app.invalidate()  # Refresh status line
                     
-                except KeyboardInterrupt:
-                    print("\nInterrupted. Type /quit to exit.")
-                    continue
+                    try:
+                        self.chat(user_input)
+                    finally:
+                        self._agent_running = False
+                        app.invalidate()  # Refresh status line
+                    
+                except Exception as e:
+                    print(f"Error: {e}")
+        
+        # Start processing thread
+        process_thread = threading.Thread(target=process_loop, daemon=True)
+        process_thread.start()
+        
+        # Run the application with patch_stdout for proper output handling
+        try:
+            with patch_stdout():
+                app.run()
+        except (EOFError, KeyboardInterrupt):
+            pass
+        finally:
+            self._should_exit = True
+            print("\nGoodbye! ⚕")
 
 
 # ============================================================================
diff --git a/gateway/run.py b/gateway/run.py
index b10f4cf54e..76f9d4db41 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -72,6 +72,11 @@ class GatewayRunner:
         self.delivery_router = DeliveryRouter(self.config)
         self._running = False
         self._shutdown_event = asyncio.Event()
+        
+        # Track running agents per session for interrupt support
+        # Key: session_key, Value: AIAgent instance
+        self._running_agents: Dict[str, Any] = {}
+        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
     
     async def start(self) -> bool:
         """
@@ -217,10 +222,11 @@ class GatewayRunner:
         This is the core message processing pipeline:
         1. Check user authorization
         2. Check for commands (/new, /reset, etc.)
-        3. Get or create session
-        4. Build context for agent
-        5. Run agent conversation
-        6. Return response
+        3. Check for running agent and interrupt if needed
+        4. Get or create session
+        5. Build context for agent
+        6. Run agent conversation
+        7. Return response
         """
         source = event.source
         
@@ -229,7 +235,7 @@ class GatewayRunner:
             print(f"[gateway] Unauthorized user: {source.user_id} ({source.user_name}) on {source.platform.value}")
             return None  # Silently ignore unauthorized users
         
-        # Check for reset commands
+        # Check for commands
         command = event.get_command()
         if command in ["new", "reset"]:
             return await self._handle_reset_command(event)
@@ -237,8 +243,21 @@ class GatewayRunner:
         if command == "status":
             return await self._handle_status_command(event)
         
+        if command == "stop":
+            return await self._handle_stop_command(event)
+        
         # Get or create session
         session_entry = self.session_store.get_or_create_session(source)
+        session_key = session_entry.session_key
+        
+        # Check if there's already a running agent for this session
+        if session_key in self._running_agents:
+            running_agent = self._running_agents[session_key]
+            print(f"[gateway] ⚡ Interrupting running agent for session {session_key[:20]}...")
+            running_agent.interrupt(event.text)
+            # Store the new message to be processed after current agent finishes
+            self._pending_messages[session_key] = event.text
+            return None  # Don't respond yet - let the interrupt handle it
         
         # Build session context
         context = build_session_context(source, self.config, session_entry)
@@ -259,7 +278,8 @@ class GatewayRunner:
                 context_prompt=context_prompt,
                 history=history,
                 source=source,
-                session_id=session_entry.session_id
+                session_id=session_entry.session_id,
+                session_key=session_key
             )
             
             # Append to transcript
@@ -309,6 +329,10 @@ class GatewayRunner:
         
         connected_platforms = [p.value for p in self.adapters.keys()]
         
+        # Check if there's an active agent
+        session_key = session_entry.session_key
+        is_running = session_key in self._running_agents
+        
         lines = [
             "📊 **Hermes Gateway Status**",
             "",
@@ -316,12 +340,26 @@ class GatewayRunner:
             f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
             f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
             f"**Tokens:** {session_entry.total_tokens:,}",
+            f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
             "",
             f"**Connected Platforms:** {', '.join(connected_platforms)}",
         ]
         
         return "\n".join(lines)
     
+    async def _handle_stop_command(self, event: MessageEvent) -> str:
+        """Handle /stop command - interrupt a running agent."""
+        source = event.source
+        session_entry = self.session_store.get_or_create_session(source)
+        session_key = session_entry.session_key
+        
+        if session_key in self._running_agents:
+            agent = self._running_agents[session_key]
+            agent.interrupt()
+            return "⚡ Stopping the current task... The agent will finish its current step and respond."
+        else:
+            return "No active task to stop."
+    
     def _set_session_env(self, context: SessionContext) -> None:
         """Set environment variables for the current session."""
         os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
@@ -341,12 +379,14 @@ class GatewayRunner:
         context_prompt: str,
         history: List[Dict[str, Any]],
         source: SessionSource,
-        session_id: str
+        session_id: str,
+        session_key: str = None
     ) -> str:
         """
         Run the agent with the given message and context.
         
         This is run in a thread pool to not block the event loop.
+        Supports interruption via new messages.
         """
         from run_agent import AIAgent
         import queue
@@ -432,6 +472,10 @@ class GatewayRunner:
                     print(f"[Gateway] Progress message error: {e}")
                     await asyncio.sleep(1)
         
+        # We need to share the agent instance for interrupt support
+        agent_holder = [None]  # Mutable container for the agent instance
+        result_holder = [None]  # Mutable container for the result
+        
         def run_sync():
             # Read from env var or use default (same as CLI)
             max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
@@ -446,6 +490,9 @@ class GatewayRunner:
                 tool_progress_callback=progress_callback if tool_progress_enabled else None,
             )
             
+            # Store agent reference for interrupt support
+            agent_holder[0] = agent
+            
             # Convert transcript history to agent format
             # Transcript has timestamps; agent expects {"role": ..., "content": ...}
             agent_history = []
@@ -456,6 +503,7 @@ class GatewayRunner:
                     agent_history.append({"role": role, "content": content})
             
             result = agent.run_conversation(message, conversation_history=agent_history)
+            result_holder[0] = result
             
             # Return final response, or a message if something went wrong
             final_response = result.get("final_response")
@@ -472,14 +520,56 @@ class GatewayRunner:
         if tool_progress_enabled:
             progress_task = asyncio.create_task(send_progress_messages())
         
+        # Track this agent as running for this session (for interrupt support)
+        # We do this in a callback after the agent is created
+        async def track_agent():
+            # Wait for agent to be created
+            while agent_holder[0] is None:
+                await asyncio.sleep(0.05)
+            if session_key:
+                self._running_agents[session_key] = agent_holder[0]
+        
+        tracking_task = asyncio.create_task(track_agent())
+        
         try:
             # Run in thread pool to not block
             loop = asyncio.get_event_loop()
             response = await loop.run_in_executor(None, run_sync)
+            
+            # Check if we were interrupted and have a pending message
+            result = result_holder[0]
+            if result and result.get("interrupted") and session_key:
+                pending = self._pending_messages.pop(session_key, None)
+                if pending:
+                    print(f"[gateway] 📨 Processing interrupted message: '{pending[:40]}...'")
+                    # Add an indicator to the response
+                    if response:
+                        response = response + "\n\n---\n_[Interrupted - processing your new message]_"
+                    
+                    # Send the interrupted response first
+                    adapter = self.adapters.get(source.platform)
+                    if adapter and response:
+                        await adapter.send(chat_id=source.chat_id, content=response)
+                    
+                    # Now process the pending message with updated history
+                    updated_history = result.get("messages", history)
+                    return await self._run_agent(
+                        message=pending,
+                        context_prompt=context_prompt,
+                        history=updated_history,
+                        source=source,
+                        session_id=session_id,
+                        session_key=session_key
+                    )
         finally:
             # Stop progress sender
             if progress_task:
                 progress_task.cancel()
+            
+            # Clean up tracking
+            tracking_task.cancel()
+            if session_key and session_key in self._running_agents:
+                del self._running_agents[session_key]
                 try:
                     await progress_task
                 except asyncio.CancelledError:
diff --git a/run_agent.py b/run_agent.py
index 72541f6bdb..7b70289fff 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -639,6 +639,10 @@ class AIAgent:
         self.tool_progress_callback = tool_progress_callback
         self._last_reported_tool = None  # Track for "new tool" mode
         
+        # Interrupt mechanism for breaking out of tool loops
+        self._interrupt_requested = False
+        self._interrupt_message = None  # Optional message that triggered interrupt
+        
         # Store OpenRouter provider preferences
         self.providers_allowed = providers_allowed
         self.providers_ignored = providers_ignored
@@ -1302,6 +1306,42 @@ class AIAgent:
             if self.verbose_logging:
                 logging.warning(f"Failed to save session log: {e}")
     
+    def interrupt(self, message: str = None) -> None:
+        """
+        Request the agent to interrupt its current tool-calling loop.
+        
+        Call this from another thread (e.g., input handler, message receiver)
+        to gracefully stop the agent and process a new message.
+        
+        Args:
+            message: Optional new message that triggered the interrupt.
+                     If provided, the agent will include this in its response context.
+        
+        Example (CLI):
+            # In a separate input thread:
+            if user_typed_something:
+                agent.interrupt(user_input)
+        
+        Example (Messaging):
+            # When new message arrives for active session:
+            if session_has_running_agent:
+                running_agent.interrupt(new_message.text)
+        """
+        self._interrupt_requested = True
+        self._interrupt_message = message
+        if not self.quiet_mode:
+            print(f"\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
+    
+    def clear_interrupt(self) -> None:
+        """Clear any pending interrupt request."""
+        self._interrupt_requested = False
+        self._interrupt_message = None
+    
+    @property
+    def is_interrupted(self) -> bool:
+        """Check if an interrupt has been requested."""
+        return self._interrupt_requested
+    
     def run_conversation(
         self,
         user_message: str,
@@ -1359,8 +1399,19 @@ class AIAgent:
         # Main conversation loop
         api_call_count = 0
         final_response = None
+        interrupted = False
+        
+        # Clear any stale interrupt state at start
+        self.clear_interrupt()
         
         while api_call_count < self.max_iterations:
+            # Check for interrupt request (e.g., user sent new message)
+            if self._interrupt_requested:
+                interrupted = True
+                if not self.quiet_mode:
+                    print(f"\n⚡ Breaking out of tool loop due to interrupt...")
+                break
+            
             api_call_count += 1
             
             # Prepare messages for API call
@@ -2059,13 +2110,24 @@ class AIAgent:
         self._session_messages = messages
         self._save_session_log(messages)
         
-        return {
+        # Build result with interrupt info if applicable
+        result = {
             "final_response": final_response,
             "messages": messages,
             "api_calls": api_call_count,
             "completed": completed,
-            "partial": False  # True only when stopped due to invalid tool calls
+            "partial": False,  # True only when stopped due to invalid tool calls
+            "interrupted": interrupted,
         }
+        
+        # Include interrupt message if one triggered the interrupt
+        if interrupted and self._interrupt_message:
+            result["interrupt_message"] = self._interrupt_message
+        
+        # Clear interrupt state after handling
+        self.clear_interrupt()
+        
+        return result
     
     def chat(self, message: str) -> str:
         """

From 51a6b7d2b5dcbe06a8edce9b0c8d176d91068276 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 20:10:15 -0800
Subject: [PATCH 31/48] Implement interrupt handling for message processing in
 GatewayRunner and BasePlatformAdapter

- Introduced a monitoring mechanism in GatewayRunner to detect incoming messages while an agent is active, allowing for graceful interruption and processing of new messages.
- Enhanced BasePlatformAdapter to manage active sessions and pending messages, ensuring that new messages can interrupt ongoing tasks effectively.
- Improved the handling of pending messages by checking for interrupts and processing them in the correct order, enhancing user experience during message interactions.
- Updated the cleanup process for active tasks to ensure proper resource management after interruptions.
---
 gateway/platforms/base.py | 58 ++++++++++++++++++++++++-
 gateway/run.py            | 90 +++++++++++++++++++++++++++------------
 2 files changed, 118 insertions(+), 30 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 16e7daf3b2..aea73bb09d 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -108,6 +108,11 @@ class BasePlatformAdapter(ABC):
         self.platform = platform
         self._message_handler: Optional[MessageHandler] = None
         self._running = False
+        
+        # Track active message handlers per session for interrupt support
+        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
+        self._active_sessions: Dict[str, asyncio.Event] = {}
+        self._pending_messages: Dict[str, MessageEvent] = {}
     
     @property
     def name(self) -> str:
@@ -190,12 +195,33 @@ class BasePlatformAdapter(ABC):
         """
         Process an incoming message.
         
-        Calls the registered message handler and sends the response.
-        Keeps typing indicator active throughout processing.
+        This method returns quickly by spawning background tasks.
+        This allows new messages to be processed even while an agent is running,
+        enabling interruption support.
         """
         if not self._message_handler:
             return
         
+        session_key = event.source.chat_id
+        
+        # Check if there's already an active handler for this session
+        if session_key in self._active_sessions:
+            # Store this as a pending message - it will interrupt the running agent
+            print(f"[{self.name}] ⚡ New message while session {session_key} is active - triggering interrupt")
+            self._pending_messages[session_key] = event
+            # Signal the interrupt (the processing task checks this)
+            self._active_sessions[session_key].set()
+            return  # Don't process now - will be handled after current task finishes
+        
+        # Spawn background task to process this message
+        asyncio.create_task(self._process_message_background(event, session_key))
+    
+    async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
+        """Background task that actually processes the message."""
+        # Create interrupt event for this session
+        interrupt_event = asyncio.Event()
+        self._active_sessions[session_key] = interrupt_event
+        
         # Start continuous typing indicator (refreshes every 2 seconds)
         typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id))
         
@@ -222,6 +248,23 @@ class BasePlatformAdapter(ABC):
                     )
                     if not fallback_result.success:
                         print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
+            
+            # Check if there's a pending message that was queued during our processing
+            if session_key in self._pending_messages:
+                pending_event = self._pending_messages.pop(session_key)
+                print(f"[{self.name}] 📨 Processing queued message from interrupt")
+                # Clean up current session before processing pending
+                if session_key in self._active_sessions:
+                    del self._active_sessions[session_key]
+                typing_task.cancel()
+                try:
+                    await typing_task
+                except asyncio.CancelledError:
+                    pass
+                # Process pending message in new background task
+                await self._process_message_background(pending_event, session_key)
+                return  # Already cleaned up
+                
         except Exception as e:
             print(f"[{self.name}] Error handling message: {e}")
             import traceback
@@ -233,6 +276,17 @@ class BasePlatformAdapter(ABC):
                 await typing_task
             except asyncio.CancelledError:
                 pass
+            # Clean up session tracking
+            if session_key in self._active_sessions:
+                del self._active_sessions[session_key]
+    
+    def has_pending_interrupt(self, session_key: str) -> bool:
+        """Check if there's a pending interrupt for a session."""
+        return session_key in self._active_sessions and self._active_sessions[session_key].is_set()
+    
+    def get_pending_message(self, session_key: str) -> Optional[MessageEvent]:
+        """Get and clear any pending message for a session."""
+        return self._pending_messages.get(session_key)
     
     def build_source(
         self,
diff --git a/gateway/run.py b/gateway/run.py
index 76f9d4db41..08de429c43 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -531,6 +531,27 @@ class GatewayRunner:
         
         tracking_task = asyncio.create_task(track_agent())
         
+        # Monitor for interrupts from the adapter (new messages arriving)
+        async def monitor_for_interrupt():
+            adapter = self.adapters.get(source.platform)
+            if not adapter:
+                return
+            
+            chat_id = source.chat_id
+            while True:
+                await asyncio.sleep(0.2)  # Check every 200ms
+                # Check if adapter has a pending interrupt for this session
+                if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(chat_id):
+                    agent = agent_holder[0]
+                    if agent:
+                        pending_event = adapter.get_pending_message(chat_id)
+                        pending_text = pending_event.text if pending_event else None
+                        print(f"[gateway] ⚡ Interrupt detected from adapter, signaling agent...")
+                        agent.interrupt(pending_text)
+                        break
+        
+        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
+        
         try:
             # Run in thread pool to not block
             loop = asyncio.get_event_loop()
@@ -538,42 +559,55 @@ class GatewayRunner:
             
             # Check if we were interrupted and have a pending message
             result = result_holder[0]
-            if result and result.get("interrupted") and session_key:
-                pending = self._pending_messages.pop(session_key, None)
-                if pending:
-                    print(f"[gateway] 📨 Processing interrupted message: '{pending[:40]}...'")
-                    # Add an indicator to the response
-                    if response:
-                        response = response + "\n\n---\n_[Interrupted - processing your new message]_"
-                    
-                    # Send the interrupted response first
-                    adapter = self.adapters.get(source.platform)
-                    if adapter and response:
-                        await adapter.send(chat_id=source.chat_id, content=response)
-                    
-                    # Now process the pending message with updated history
-                    updated_history = result.get("messages", history)
-                    return await self._run_agent(
-                        message=pending,
-                        context_prompt=context_prompt,
-                        history=updated_history,
-                        source=source,
-                        session_id=session_id,
-                        session_key=session_key
-                    )
+            adapter = self.adapters.get(source.platform)
+            
+            # Get pending message from adapter if interrupted
+            pending = None
+            if result and result.get("interrupted") and adapter:
+                pending_event = adapter.get_pending_message(source.chat_id)
+                if pending_event:
+                    pending = pending_event.text
+                elif result.get("interrupt_message"):
+                    pending = result.get("interrupt_message")
+            
+            if pending:
+                print(f"[gateway] 📨 Processing interrupted message: '{pending[:40]}...'")
+                # Add an indicator to the response
+                if response:
+                    response = response + "\n\n---\n_[Interrupted - processing your new message]_"
+                
+                # Send the interrupted response first
+                if adapter and response:
+                    await adapter.send(chat_id=source.chat_id, content=response)
+                
+                # Now process the pending message with updated history
+                updated_history = result.get("messages", history)
+                return await self._run_agent(
+                    message=pending,
+                    context_prompt=context_prompt,
+                    history=updated_history,
+                    source=source,
+                    session_id=session_id,
+                    session_key=session_key
+                )
         finally:
-            # Stop progress sender
+            # Stop progress sender and interrupt monitor
             if progress_task:
                 progress_task.cancel()
+            interrupt_monitor.cancel()
             
             # Clean up tracking
             tracking_task.cancel()
             if session_key and session_key in self._running_agents:
                 del self._running_agents[session_key]
-                try:
-                    await progress_task
-                except asyncio.CancelledError:
-                    pass
+            
+            # Wait for cancelled tasks
+            for task in [progress_task, interrupt_monitor, tracking_task]:
+                if task:
+                    try:
+                        await task
+                    except asyncio.CancelledError:
+                        pass
         
         return response
 

From f018999da97862bfc919a8eaddfec57ce0cdea18 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 3 Feb 2026 23:41:26 -0800
Subject: [PATCH 32/48] initial RL training tools and loop

---
 model_tools.py            | 360 ++++++++++++++++++++++++++++++-
 rl_cli.py                 | 363 +++++++++++++++++++++++++++++++
 tools/__init__.py         |  31 +++
 tools/rl_training_tool.py | 436 ++++++++++++++++++++++++++++++++++++++
 toolsets.py               |  12 ++
 5 files changed, 1199 insertions(+), 3 deletions(-)
 create mode 100644 rl_cli.py
 create mode 100644 tools/rl_training_tool.py

diff --git a/model_tools.py b/model_tools.py
index e78323f60e..ebabaf5646 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -39,6 +39,21 @@ from tools.vision_tools import vision_analyze_tool, check_vision_requirements
 from tools.mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
 from tools.image_generation_tool import image_generate_tool, check_image_generation_requirements
 from tools.skills_tool import skills_categories, skills_list, skill_view, check_skills_requirements, SKILLS_TOOL_DESCRIPTION
+# RL Training tools (Tinker-Atropos)
+from tools.rl_training_tool import (
+    rl_list_environments,
+    rl_select_environment,
+    rl_get_current_config,
+    rl_edit_config,
+    rl_start_training,
+    rl_check_status,
+    rl_stop_training,
+    rl_get_results,
+    rl_test_inference,
+    rl_list_runs,
+    rl_health_check,
+    check_rl_api_keys,
+)
 # Cronjob management tools (CLI-only)
 from tools.cronjob_tools import (
     schedule_cronjob,
@@ -128,6 +143,19 @@ TOOLSET_REQUIREMENTS = {
         "setup_url": None,
         "tools": ["skills_categories", "skills_list", "skill_view"],
     },
+    "rl": {
+        "name": "RL Training (Tinker-Atropos)",
+        "env_vars": ["TINKER_API_KEY", "WANDB_API_KEY"],
+        "check_fn": check_rl_api_keys,
+        "setup_url": "https://wandb.ai/authorize",
+        "tools": [
+            "rl_list_environments", "rl_select_environment",
+            "rl_get_current_config", "rl_edit_config",
+            "rl_start_training", "rl_check_status",
+            "rl_stop_training", "rl_get_results",
+            "rl_test_inference", "rl_list_runs",
+        ],
+    },
 }
 
 
@@ -471,6 +499,199 @@ def get_cronjob_tool_definitions_formatted() -> List[Dict[str, Any]]:
     ]]
 
 
+def get_rl_tool_definitions() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for RL training tools in OpenAI's expected format.
+    
+    These tools enable running RL training through Tinker-Atropos.
+    
+    Returns:
+        List[Dict]: List of RL tool definitions compatible with OpenAI API
+    """
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_list_environments",
+                "description": "List all available RL environments. Returns environment names, paths, and descriptions. TIP: Read the file_path with file tools to understand how each environment works (verifiers, data loading, rewards).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": []
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_select_environment",
+                "description": "Select an RL environment for training. Loads the environment's default configuration. After selecting, use rl_get_current_config() to see settings and rl_edit_config() to modify them.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string",
+                            "description": "Name of the environment to select (from rl_list_environments)"
+                        }
+                    },
+                    "required": ["name"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_get_current_config",
+                "description": "Get the current environment configuration. Returns only fields that can be modified: group_size, max_token_length, total_steps, steps_per_eval, use_wandb, wandb_name, max_num_workers.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": []
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_edit_config",
+                "description": "Update a configuration field. Valid fields: group_size (int), max_token_length (int), total_steps (int), steps_per_eval (int), use_wandb (bool), wandb_name (str), max_num_workers (int).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "field": {
+                            "type": "string",
+                            "description": "Name of the field to update"
+                        },
+                        "value": {
+                            "description": "New value for the field"
+                        }
+                    },
+                    "required": ["field", "value"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_start_training",
+                "description": "Start a new RL training run. WARNING: Training can take hours. Use rl_check_status() to monitor (30-minute intervals recommended). Test with rl_test_inference() first!",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "wandb_project": {
+                            "type": "string",
+                            "description": "WandB project name for logging",
+                            "default": "rl-training"
+                        },
+                        "lora_rank": {
+                            "type": "integer",
+                            "description": "LoRA rank for training",
+                            "default": 32
+                        },
+                        "learning_rate": {
+                            "type": "number",
+                            "description": "Learning rate",
+                            "default": 4e-5
+                        }
+                    },
+                    "required": []
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_check_status",
+                "description": "Get status and metrics for a training run. RATE LIMITED: enforces 30-minute minimum between checks for the same run. Returns WandB metrics: step, state, reward_mean, loss, percent_correct.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "run_id": {
+                            "type": "string",
+                            "description": "The run ID from rl_start_training()"
+                        }
+                    },
+                    "required": ["run_id"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_stop_training",
+                "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "run_id": {
+                            "type": "string",
+                            "description": "The run ID to stop"
+                        }
+                    },
+                    "required": ["run_id"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_get_results",
+                "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "run_id": {
+                            "type": "string",
+                            "description": "The run ID to get results for"
+                        }
+                    },
+                    "required": ["run_id"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_test_inference",
+                "description": "Test inference + verifier on sample prompts WITHOUT full training. Use to validate environments before committing to long training runs. Tests data loading, inference, and verifier logic.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "prompts": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "List of test prompts to run through the environment"
+                        },
+                        "max_tokens": {
+                            "type": "integer",
+                            "description": "Maximum tokens to generate per prompt",
+                            "default": 256
+                        },
+                        "temperature": {
+                            "type": "number",
+                            "description": "Sampling temperature",
+                            "default": 1.0
+                        }
+                    },
+                    "required": ["prompts"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "rl_list_runs",
+                "description": "List all training runs (active and completed) with their status.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": []
+                }
+            }
+        }
+    ]
+
+
 def get_all_tool_names() -> List[str]:
     """
     Get the names of all available tools across all toolsets.
@@ -519,6 +740,16 @@ def get_all_tool_names() -> List[str]:
             "schedule_cronjob", "list_cronjobs", "remove_cronjob"
         ])
     
+    # RL Training tools
+    if check_rl_api_keys():
+        tool_names.extend([
+            "rl_list_environments", "rl_select_environment",
+            "rl_get_current_config", "rl_edit_config",
+            "rl_start_training", "rl_check_status",
+            "rl_stop_training", "rl_get_results",
+            "rl_test_inference", "rl_list_runs"
+        ])
+    
     return tool_names
 
 
@@ -557,7 +788,18 @@ def get_toolset_for_tool(tool_name: str) -> str:
         # Cronjob management tools
         "schedule_cronjob": "cronjob_tools",
         "list_cronjobs": "cronjob_tools",
-        "remove_cronjob": "cronjob_tools"
+        "remove_cronjob": "cronjob_tools",
+        # RL Training tools
+        "rl_list_environments": "rl_tools",
+        "rl_select_environment": "rl_tools",
+        "rl_get_current_config": "rl_tools",
+        "rl_edit_config": "rl_tools",
+        "rl_start_training": "rl_tools",
+        "rl_check_status": "rl_tools",
+        "rl_stop_training": "rl_tools",
+        "rl_get_results": "rl_tools",
+        "rl_test_inference": "rl_tools",
+        "rl_list_runs": "rl_tools",
     }
     
     return toolset_mapping.get(tool_name, "unknown")
@@ -635,6 +877,11 @@ def get_tool_definitions(
         for tool in get_cronjob_tool_definitions_formatted():
             all_available_tools_map[tool["function"]["name"]] = tool
     
+    # RL Training tools
+    if check_rl_api_keys():
+        for tool in get_rl_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
+    
     # Determine which tools to include based on toolsets
     tools_to_include = set()
     
@@ -663,7 +910,14 @@ def get_tool_definitions(
                             "browser_press", "browser_close", "browser_get_images",
                             "browser_vision"
                         ],
-                        "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"]
+                        "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
+                        "rl_tools": [
+                            "rl_list_environments", "rl_select_environment",
+                            "rl_get_current_config", "rl_edit_config",
+                            "rl_start_training", "rl_check_status",
+                            "rl_stop_training", "rl_get_results",
+                            "rl_test_inference", "rl_list_runs"
+                        ]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.update(legacy_tools)
@@ -708,7 +962,14 @@ def get_tool_definitions(
                             "browser_press", "browser_close", "browser_get_images",
                             "browser_vision"
                         ],
-                        "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"]
+                        "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
+                        "rl_tools": [
+                            "rl_list_environments", "rl_select_environment",
+                            "rl_get_current_config", "rl_edit_config",
+                            "rl_start_training", "rl_check_status",
+                            "rl_stop_training", "rl_get_results",
+                            "rl_test_inference", "rl_list_runs"
+                        ]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.difference_update(legacy_tools)
@@ -1018,6 +1279,89 @@ def handle_cronjob_function_call(
     return json.dumps({"error": f"Unknown cronjob function: {function_name}"}, ensure_ascii=False)
 
 
+def handle_rl_function_call(
+    function_name: str,
+    function_args: Dict[str, Any]
+) -> str:
+    """
+    Handle function calls for RL training tools.
+    
+    These tools communicate with the RL API server to manage training runs.
+    
+    Args:
+        function_name (str): Name of the RL function to call
+        function_args (Dict): Arguments for the function
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    # Run async functions in event loop
+    import asyncio
+    
+    try:
+        loop = asyncio.get_event_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+    
+    if function_name == "rl_list_environments":
+        return loop.run_until_complete(rl_list_environments())
+    
+    elif function_name == "rl_select_environment":
+        return loop.run_until_complete(
+            rl_select_environment(name=function_args.get("name", ""))
+        )
+    
+    elif function_name == "rl_get_current_config":
+        return loop.run_until_complete(rl_get_current_config())
+    
+    elif function_name == "rl_edit_config":
+        return loop.run_until_complete(
+            rl_edit_config(
+                field=function_args.get("field", ""),
+                value=function_args.get("value")
+            )
+        )
+    
+    elif function_name == "rl_start_training":
+        return loop.run_until_complete(
+            rl_start_training(
+                wandb_project=function_args.get("wandb_project", "rl-training"),
+                lora_rank=function_args.get("lora_rank", 32),
+                learning_rate=function_args.get("learning_rate", 4e-5)
+            )
+        )
+    
+    elif function_name == "rl_check_status":
+        return loop.run_until_complete(
+            rl_check_status(run_id=function_args.get("run_id", ""))
+        )
+    
+    elif function_name == "rl_stop_training":
+        return loop.run_until_complete(
+            rl_stop_training(run_id=function_args.get("run_id", ""))
+        )
+    
+    elif function_name == "rl_get_results":
+        return loop.run_until_complete(
+            rl_get_results(run_id=function_args.get("run_id", ""))
+        )
+    
+    elif function_name == "rl_test_inference":
+        return loop.run_until_complete(
+            rl_test_inference(
+                prompts=function_args.get("prompts", []),
+                max_tokens=function_args.get("max_tokens", 256),
+                temperature=function_args.get("temperature", 1.0)
+            )
+        )
+    
+    elif function_name == "rl_list_runs":
+        return loop.run_until_complete(rl_list_runs())
+    
+    return json.dumps({"error": f"Unknown RL function: {function_name}"}, ensure_ascii=False)
+
+
 def handle_function_call(
     function_name: str, 
     function_args: Dict[str, Any], 
@@ -1081,6 +1425,16 @@ def handle_function_call(
         elif function_name in ["schedule_cronjob", "list_cronjobs", "remove_cronjob"]:
             return handle_cronjob_function_call(function_name, function_args, task_id)
 
+        # Route RL training tools
+        elif function_name in [
+            "rl_list_environments", "rl_select_environment",
+            "rl_get_current_config", "rl_edit_config",
+            "rl_start_training", "rl_check_status",
+            "rl_stop_training", "rl_get_results",
+            "rl_test_inference", "rl_list_runs"
+        ]:
+            return handle_rl_function_call(function_name, function_args)
+
         else:
             error_msg = f"Unknown function: {function_name}"
             print(f"❌ {error_msg}")
diff --git a/rl_cli.py b/rl_cli.py
new file mode 100644
index 0000000000..cd76c91d67
--- /dev/null
+++ b/rl_cli.py
@@ -0,0 +1,363 @@
+#!/usr/bin/env python3
+"""
+RL Training CLI Runner
+
+Dedicated CLI runner for RL training workflows with:
+- Extended timeouts for long-running training
+- RL-focused system prompts
+- Full toolset including RL training tools
+- Special handling for 30-minute check intervals
+
+Usage:
+    python rl_cli.py "Train a model on GSM8k for math reasoning"
+    python rl_cli.py --interactive
+    python rl_cli.py --list-environments
+
+Environment Variables:
+    TINKER_API_KEY: API key for Tinker service (required)
+    WANDB_API_KEY: API key for WandB metrics (required)
+    RL_API_URL: URL of RL API server (default: http://localhost:8080)
+    OPENROUTER_API_KEY: API key for OpenRouter (required for agent)
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+import fire
+
+# Load environment variables from .env file
+from dotenv import load_dotenv
+
+env_path = Path(__file__).parent / '.env'
+if env_path.exists():
+    load_dotenv(dotenv_path=env_path)
+    print(f"✅ Loaded environment variables from {env_path}")
+
+# Import agent and tools
+from run_agent import AIAgent
+from model_tools import get_tool_definitions, check_toolset_requirements
+from tools.rl_training_tool import check_rl_api_keys, get_missing_keys, rl_health_check
+
+
+# ============================================================================
+# RL-Specific Configuration
+# ============================================================================
+
+# Extended timeouts for long-running RL operations
+RL_MAX_ITERATIONS = 200  # Allow many more iterations for long workflows
+
+# RL-focused system prompt
+RL_SYSTEM_PROMPT = """You are an automated post-training engineer specializing in reinforcement learning for language models.
+
+## Your Capabilities
+
+You have access to RL training tools for running reinforcement learning on models through Tinker-Atropos:
+
+1. **DISCOVER**: Use `rl_list_environments` to see available RL environments
+2. **INSPECT**: Read environment files to understand how they work (verifiers, data loading, rewards)
+3. **INSPECT DATA**: Use terminal to explore HuggingFace datasets and understand their format
+4. **CREATE**: Copy existing environments as templates, modify for your needs
+5. **CONFIGURE**: Use `rl_select_environment` and `rl_edit_config` to set up training
+6. **TEST**: Always use `rl_test_inference` before full training to validate your setup
+7. **TRAIN**: Use `rl_start_training` to begin, `rl_check_status` to monitor
+8. **EVALUATE**: Use `rl_get_results` and analyze WandB metrics to assess performance
+
+## Environment Files
+
+Environment files are located in: `tinker-atropos/tinker_atropos/environments/`
+
+Study existing environments to learn patterns. Look for:
+- `load_dataset()` calls - how data is loaded
+- `score_answer()` / `score()` - verification logic
+- `get_next_item()` - prompt formatting
+- `system_prompt` - instruction format
+- `config_init()` - default configuration
+
+## Creating New Environments
+
+To create a new environment:
+1. Read an existing environment file (e.g., gsm8k_tinker.py)
+2. Use terminal to explore the target dataset format
+3. Copy the environment file as a template
+4. Modify the dataset loading, prompt formatting, and verifier logic
+5. Test with `rl_test_inference` before training
+
+## Important Guidelines
+
+- **Always test before training**: Training runs take hours - verify everything works first
+- **Monitor metrics**: Check WandB for reward/mean and percent_correct
+- **Status check intervals**: Wait at least 30 minutes between status checks
+- **Early stopping**: Stop training early if metrics look bad or stagnant
+- **Iterate quickly**: Start with small total_steps to validate, then scale up
+
+## Available Toolsets
+
+You have access to:
+- **RL tools**: Environment discovery, config management, training, testing
+- **Terminal**: Run commands, inspect files, explore datasets
+- **Web**: Search for information, documentation, papers
+- **File tools**: Read and modify code files
+
+When asked to train a model, follow this workflow:
+1. List available environments
+2. Select and configure the appropriate environment
+3. Test with sample prompts
+4. Start training with conservative settings
+5. Monitor progress and adjust as needed
+"""
+
+# Toolsets to enable for RL workflows
+RL_TOOLSETS = ["base", "terminal", "web", "rl"]
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+def check_requirements():
+    """Check that all required environment variables and services are available."""
+    errors = []
+    
+    # Check API keys
+    if not os.getenv("OPENROUTER_API_KEY"):
+        errors.append("OPENROUTER_API_KEY not set - required for agent")
+    
+    missing_rl_keys = get_missing_keys()
+    if missing_rl_keys:
+        errors.append(f"Missing RL API keys: {', '.join(missing_rl_keys)}")
+    
+    if errors:
+        print("❌ Missing requirements:")
+        for error in errors:
+            print(f"   - {error}")
+        print("\nPlease set these environment variables in your .env file or shell.")
+        return False
+    
+    return True
+
+
+async def check_rl_server():
+    """Check if the RL API server is running."""
+    try:
+        result = await rl_health_check()
+        import json
+        data = json.loads(result)
+        if "error" in data:
+            return False, data["error"]
+        return True, data
+    except Exception as e:
+        return False, str(e)
+
+
+def list_environments_sync():
+    """List available environments (synchronous wrapper)."""
+    from tools.rl_training_tool import rl_list_environments
+    import json
+    
+    async def _list():
+        result = await rl_list_environments()
+        return json.loads(result)
+    
+    return asyncio.run(_list())
+
+
+# ============================================================================
+# Main CLI
+# ============================================================================
+
+def main(
+    task: str = None,
+    model: str = "anthropic/claude-sonnet-4-20250514",
+    api_key: str = None,
+    base_url: str = "https://openrouter.ai/api/v1",
+    max_iterations: int = RL_MAX_ITERATIONS,
+    interactive: bool = False,
+    list_environments: bool = False,
+    check_server: bool = False,
+    verbose: bool = False,
+    save_trajectories: bool = True,
+):
+    """
+    RL Training CLI - Dedicated runner for RL training workflows.
+    
+    Args:
+        task: The training task/goal (e.g., "Train a model on GSM8k for math")
+        model: Model to use for the agent (default: claude-sonnet-4)
+        api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided)
+        base_url: API base URL (default: OpenRouter)
+        max_iterations: Maximum agent iterations (default: 200 for long workflows)
+        interactive: Run in interactive mode (multiple conversations)
+        list_environments: Just list available RL environments and exit
+        check_server: Check if RL API server is running and exit
+        verbose: Enable verbose logging
+        save_trajectories: Save conversation trajectories (default: True for RL)
+    
+    Examples:
+        # Train on a specific environment
+        python rl_cli.py "Train a model on GSM8k math problems"
+        
+        # Interactive mode
+        python rl_cli.py --interactive
+        
+        # List available environments
+        python rl_cli.py --list-environments
+        
+        # Check server status
+        python rl_cli.py --check-server
+    """
+    print("🎯 RL Training Agent")
+    print("=" * 60)
+    
+    # Handle server check
+    if check_server:
+        print("\n🔍 Checking RL API server...")
+        ok, result = asyncio.run(check_rl_server())
+        if ok:
+            print("✅ RL API server is running")
+            print(f"   Environments discovered: {result.get('environments_discovered', 'unknown')}")
+            print(f"   Current environment: {result.get('current_environment', 'none')}")
+            print(f"   Active runs: {result.get('active_runs', 0)}")
+        else:
+            print(f"❌ RL API server not accessible: {result}")
+            print("\nTo start the server:")
+            print("  cd tinker-atropos && uvicorn rl_api_server:app --port 8080")
+        return
+    
+    # Handle environment listing
+    if list_environments:
+        print("\n📋 Available RL Environments:")
+        print("-" * 40)
+        try:
+            data = list_environments_sync()
+            if "error" in data:
+                print(f"❌ Error: {data['error']}")
+                return
+            
+            envs = data.get("environments", [])
+            if not envs:
+                print("No environments found.")
+                print("\nMake sure the RL API server is running:")
+                print("  cd tinker-atropos && uvicorn rl_api_server:app --port 8080")
+                return
+            
+            for env in envs:
+                print(f"\n  📦 {env['name']}")
+                print(f"     Class: {env['class_name']}")
+                print(f"     Path: {env['file_path']}")
+                if env.get('description'):
+                    desc = env['description'][:100] + "..." if len(env.get('description', '')) > 100 else env.get('description', '')
+                    print(f"     Description: {desc}")
+            
+            print(f"\n📊 Total: {len(envs)} environments")
+            print("\nUse `rl_select_environment(name)` to select an environment for training.")
+        except Exception as e:
+            print(f"❌ Error listing environments: {e}")
+            print("\nMake sure the RL API server is running:")
+            print("  cd tinker-atropos && uvicorn rl_api_server:app --port 8080")
+        return
+    
+    # Check requirements
+    if not check_requirements():
+        sys.exit(1)
+    
+    # Set default task if none provided
+    if not task and not interactive:
+        print("\n⚠️  No task provided. Use --interactive for interactive mode or provide a task.")
+        print("\nExamples:")
+        print('  python rl_cli.py "Train a model on GSM8k math problems"')
+        print('  python rl_cli.py "Create an RL environment for code generation"')
+        print('  python rl_cli.py --interactive')
+        return
+    
+    # Get API key
+    api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        print("❌ No API key provided. Set OPENROUTER_API_KEY or pass --api-key")
+        sys.exit(1)
+    
+    print(f"\n🤖 Model: {model}")
+    print(f"🔧 Max iterations: {max_iterations}")
+    print(f"📁 Toolsets: {', '.join(RL_TOOLSETS)}")
+    print("=" * 60)
+    
+    # Create agent with RL configuration
+    agent = AIAgent(
+        base_url=base_url,
+        api_key=api_key,
+        model=model,
+        max_iterations=max_iterations,
+        enabled_toolsets=RL_TOOLSETS,
+        save_trajectories=save_trajectories,
+        verbose_logging=verbose,
+        quiet_mode=False,
+        ephemeral_system_prompt=RL_SYSTEM_PROMPT,
+    )
+    
+    if interactive:
+        # Interactive mode - multiple conversations
+        print("\n🔄 Interactive RL Training Mode")
+        print("Type 'quit' or 'exit' to end the session.")
+        print("Type 'status' to check active training runs.")
+        print("-" * 40)
+        
+        while True:
+            try:
+                user_input = input("\n🎯 RL Task> ").strip()
+                
+                if not user_input:
+                    continue
+                
+                if user_input.lower() in ('quit', 'exit', 'q'):
+                    print("\n👋 Goodbye!")
+                    break
+                
+                if user_input.lower() == 'status':
+                    # Quick status check
+                    from tools.rl_training_tool import rl_list_runs
+                    import json
+                    result = asyncio.run(rl_list_runs())
+                    runs = json.loads(result)
+                    if isinstance(runs, list) and runs:
+                        print("\n📊 Active Runs:")
+                        for run in runs:
+                            print(f"  - {run['run_id']}: {run['environment']} ({run['status']})")
+                    else:
+                        print("\nNo active runs.")
+                    continue
+                
+                # Run the agent
+                print("\n" + "=" * 60)
+                response = agent.run_conversation(user_input)
+                print("\n" + "=" * 60)
+                
+            except KeyboardInterrupt:
+                print("\n\n👋 Interrupted. Goodbye!")
+                break
+            except Exception as e:
+                print(f"\n❌ Error: {e}")
+                if verbose:
+                    import traceback
+                    traceback.print_exc()
+    else:
+        # Single task mode
+        print(f"\n📝 Task: {task}")
+        print("-" * 40)
+        
+        try:
+            response = agent.run_conversation(task)
+            print("\n" + "=" * 60)
+            print("✅ Task completed")
+        except KeyboardInterrupt:
+            print("\n\n⚠️ Interrupted by user")
+        except Exception as e:
+            print(f"\n❌ Error: {e}")
+            if verbose:
+                import traceback
+                traceback.print_exc()
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
diff --git a/tools/__init__.py b/tools/__init__.py
index 3365dab441..dd8bb4dacc 100644
--- a/tools/__init__.py
+++ b/tools/__init__.py
@@ -95,6 +95,23 @@ from .cronjob_tools import (
     REMOVE_CRONJOB_SCHEMA
 )
 
+# RL Training tools (Tinker-Atropos)
+from .rl_training_tool import (
+    rl_list_environments,
+    rl_select_environment,
+    rl_get_current_config,
+    rl_edit_config,
+    rl_start_training,
+    rl_check_status,
+    rl_stop_training,
+    rl_get_results,
+    rl_test_inference,
+    rl_list_runs,
+    rl_health_check,
+    check_rl_api_keys,
+    get_missing_keys,
+)
+
 __all__ = [
     # Web tools
     'web_search_tool',
@@ -152,5 +169,19 @@ __all__ = [
     'SCHEDULE_CRONJOB_SCHEMA',
     'LIST_CRONJOBS_SCHEMA',
     'REMOVE_CRONJOB_SCHEMA',
+    # RL Training tools
+    'rl_list_environments',
+    'rl_select_environment',
+    'rl_get_current_config',
+    'rl_edit_config',
+    'rl_start_training',
+    'rl_check_status',
+    'rl_stop_training',
+    'rl_get_results',
+    'rl_test_inference',
+    'rl_list_runs',
+    'rl_health_check',
+    'check_rl_api_keys',
+    'get_missing_keys',
 ]
 
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
new file mode 100644
index 0000000000..1b7401c1c4
--- /dev/null
+++ b/tools/rl_training_tool.py
@@ -0,0 +1,436 @@
+#!/usr/bin/env python3
+"""
+RL Training Tools Module
+
+This module provides tools for running RL training through Tinker-Atropos.
+Communicates with the RL API server (rl_api_server.py) to manage:
+- Environment discovery and selection
+- Configuration management
+- Training run lifecycle
+- WandB metrics monitoring
+- Inference-only testing
+
+Required environment variables:
+- TINKER_API_KEY: API key for Tinker service
+- WANDB_API_KEY: API key for Weights & Biases metrics
+
+Optional environment variables:
+- RL_API_URL: URL of the RL API server (default: http://localhost:8080)
+- WANDB_ENTITY: WandB entity/team name
+- WANDB_PROJECT: Default WandB project name
+
+Usage:
+    from tools.rl_training_tool import (
+        rl_list_environments,
+        rl_select_environment,
+        rl_get_current_config,
+        rl_edit_config,
+        rl_start_training,
+        rl_check_status,
+        rl_stop_training,
+        rl_get_results,
+        rl_test_inference,
+    )
+"""
+
+import json
+import os
+import time
+from typing import Any, Dict, List, Optional
+
+import aiohttp
+
+# ============================================================================
+# Configuration
+# ============================================================================
+
+# Default RL API server URL (can be overridden via environment variable)
+RL_API_URL = os.getenv("RL_API_URL", "http://localhost:8080")
+
+# Rate limiting for status checks (30 minutes in seconds)
+MIN_STATUS_CHECK_INTERVAL = 30 * 60
+_last_status_check: Dict[str, float] = {}
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+async def _make_request(
+    method: str,
+    endpoint: str,
+    data: Optional[Dict] = None,
+    timeout: int = 30,
+) -> Dict[str, Any]:
+    """Make an HTTP request to the RL API server."""
+    url = f"{RL_API_URL}{endpoint}"
+    
+    async with aiohttp.ClientSession() as session:
+        try:
+            if method == "GET":
+                async with session.get(url, timeout=timeout) as response:
+                    if response.status == 200:
+                        return await response.json()
+                    else:
+                        error_text = await response.text()
+                        return {"error": f"HTTP {response.status}: {error_text}"}
+            elif method == "POST":
+                async with session.post(url, json=data, timeout=timeout) as response:
+                    if response.status == 200:
+                        return await response.json()
+                    else:
+                        error_text = await response.text()
+                        return {"error": f"HTTP {response.status}: {error_text}"}
+        except aiohttp.ClientConnectorError:
+            return {
+                "error": f"Cannot connect to RL API server at {RL_API_URL}. "
+                         "Make sure the server is running: "
+                         "cd tinker-atropos && uvicorn rl_api_server:app --port 8080"
+            }
+        except Exception as e:
+            return {"error": f"Request failed: {str(e)}"}
+
+
+# ============================================================================
+# Environment Discovery Tools
+# ============================================================================
+
+async def rl_list_environments() -> str:
+    """
+    List all available RL environments.
+    
+    Scans tinker-atropos/tinker_atropos/environments/ for Python files
+    containing classes that inherit from BaseEnv.
+    
+    Returns information about each environment including:
+    - name: Environment identifier
+    - class_name: Python class name
+    - file_path: Path to the environment file
+    - description: Brief description if available
+    
+    TIP: To create or modify RL environments:
+    1. Use terminal/file tools to inspect existing environments
+    2. Study how they load datasets, define verifiers, and structure rewards
+    3. Inspect HuggingFace datasets to understand data formats
+    4. Copy an existing environment as a template
+    5. Test with rl_test_inference before running full training
+    
+    Returns:
+        JSON string with list of environments or error message
+    """
+    result = await _make_request("GET", "/environments")
+    
+    if "error" in result:
+        return json.dumps(result, indent=2)
+    
+    # Add helpful tips to the response
+    response = {
+        "environments": result,
+        "count": len(result),
+        "tips": [
+            "Use rl_select_environment(name) to select an environment",
+            "Read the file_path with file tools to understand how each environment works",
+            "Look for load_dataset(), score_answer(), get_next_item() methods",
+        ]
+    }
+    
+    return json.dumps(response, indent=2)
+
+
+async def rl_select_environment(name: str) -> str:
+    """
+    Select an RL environment for training.
+    
+    This loads the environment's default configuration into the config state.
+    After selecting, use rl_get_current_config() to see the configuration
+    and rl_edit_config() to modify specific fields.
+    
+    Args:
+        name: Name of the environment to select (from rl_list_environments)
+    
+    Returns:
+        JSON string with selection result, file path, and current config
+    
+    TIP: Read the returned file_path to understand how the environment works:
+    - How it loads data (load_dataset calls)
+    - How it verifies answers (score_answer method)
+    - What prompts it uses (system_prompt, get_next_item)
+    """
+    result = await _make_request("POST", f"/environments/{name}/select")
+    return json.dumps(result, indent=2)
+
+
+# ============================================================================
+# Configuration Tools
+# ============================================================================
+
+async def rl_get_current_config() -> str:
+    """
+    Get the current environment configuration.
+    
+    Returns only the fields that are safe to modify. Other fields
+    (tokenizer_name, rollout_server_url, etc.) are fixed by the system.
+    
+    Available fields:
+    - group_size: Rollouts per prompt (4-16 typical)
+    - max_token_length: Max generation tokens (2048-16384)
+    - total_steps: Training steps (50-2000)
+    - steps_per_eval: Steps between evaluations
+    - use_wandb: Enable WandB logging
+    - wandb_name: WandB run name prefix
+    - max_num_workers: Concurrent workers (-1 = auto)
+    
+    Returns:
+        JSON string with current config fields and their values
+    """
+    result = await _make_request("GET", "/config")
+    return json.dumps(result, indent=2)
+
+
+async def rl_edit_config(field: str, value: Any) -> str:
+    """
+    Update a configuration field.
+    
+    Only exposed fields can be modified. Validates field name and type.
+    
+    Args:
+        field: Name of the field to update (e.g., "group_size", "total_steps")
+        value: New value for the field
+    
+    Valid fields:
+    - group_size (int): Rollouts per prompt
+    - max_token_length (int): Max generation tokens
+    - total_steps (int): Training steps
+    - steps_per_eval (int): Eval frequency
+    - use_wandb (bool): Enable logging
+    - wandb_name (str): Run name prefix
+    - max_num_workers (int): Workers count
+    
+    Returns:
+        JSON string with updated config or error message
+    """
+    result = await _make_request("POST", "/config", {"field": field, "value": value})
+    return json.dumps(result, indent=2)
+
+
+# ============================================================================
+# Training Management Tools
+# ============================================================================
+
+async def rl_start_training(
+    wandb_project: str = "rl-training",
+    lora_rank: int = 32,
+    learning_rate: float = 4e-5,
+) -> str:
+    """
+    Start a new RL training run with the current environment and config.
+    
+    Requires an environment to be selected first using rl_select_environment().
+    
+    WARNING: Training runs can take hours to days. Use rl_check_status() to
+    monitor progress (recommended: check every 30 minutes at most).
+    
+    Args:
+        wandb_project: WandB project name for logging
+        lora_rank: LoRA rank for training (default: 32)
+        learning_rate: Learning rate (default: 4e-5)
+    
+    Returns:
+        JSON string with run_id and initial status
+    
+    TIP: Before starting training:
+    1. Test with rl_test_inference() to verify the environment works
+    2. Start with fewer total_steps to validate the setup
+    3. Monitor WandB metrics for reward/mean and percent_correct
+    """
+    result = await _make_request("POST", "/runs", {
+        "wandb_project": wandb_project,
+        "lora_rank": lora_rank,
+        "learning_rate": learning_rate,
+    })
+    return json.dumps(result, indent=2)
+
+
+async def rl_check_status(run_id: str) -> str:
+    """
+    Get status and metrics for a training run.
+    
+    RATE LIMITED: For long-running training, this function enforces a
+    minimum 30-minute interval between checks for the same run_id.
+    
+    Fetches latest metrics from WandB if available:
+    - step: Current training step
+    - state: Run state (running, finished, crashed)
+    - reward_mean: Average reward across batches
+    - loss: Training loss
+    - percent_correct: Training accuracy
+    - eval_percent_correct: Evaluation accuracy
+    
+    Args:
+        run_id: The run ID returned by rl_start_training()
+    
+    Returns:
+        JSON string with run status and metrics, or rate limit message
+    """
+    global _last_status_check
+    
+    # Check rate limiting
+    now = time.time()
+    if run_id in _last_status_check:
+        elapsed = now - _last_status_check[run_id]
+        if elapsed < MIN_STATUS_CHECK_INTERVAL:
+            remaining = MIN_STATUS_CHECK_INTERVAL - elapsed
+            return json.dumps({
+                "rate_limited": True,
+                "run_id": run_id,
+                "message": f"Rate limited. Next check available in {remaining/60:.0f} minutes.",
+                "next_check_in_seconds": remaining,
+            }, indent=2)
+    
+    _last_status_check[run_id] = now
+    result = await _make_request("GET", f"/runs/{run_id}")
+    return json.dumps(result, indent=2)
+
+
+async def rl_stop_training(run_id: str) -> str:
+    """
+    Stop a running training job.
+    
+    Use this if:
+    - Metrics look bad or training is stagnant
+    - You want to try different settings
+    - You need to free up resources
+    
+    Args:
+        run_id: The run ID to stop
+    
+    Returns:
+        JSON string with stop confirmation
+    """
+    result = await _make_request("POST", f"/runs/{run_id}/stop")
+    return json.dumps(result, indent=2)
+
+
+async def rl_get_results(run_id: str) -> str:
+    """
+    Get final results and metrics for a completed training run.
+    
+    Returns:
+    - Final metrics (reward, loss, accuracy)
+    - WandB run URL for detailed analysis
+    - Path to trained weights (tinker:// URL)
+    
+    Args:
+        run_id: The run ID to get results for
+    
+    Returns:
+        JSON string with final results and weights path
+    """
+    result = await _make_request("GET", f"/runs/{run_id}/metrics")
+    return json.dumps(result, indent=2)
+
+
+# ============================================================================
+# Inference Testing Tools
+# ============================================================================
+
+async def rl_test_inference(
+    prompts: List[str],
+    max_tokens: int = 256,
+    temperature: float = 1.0,
+) -> str:
+    """
+    Test inference + verifier on sample prompts WITHOUT full training.
+    
+    Use this to validate environments before committing to long training runs.
+    Tests:
+    - Data loading and formatting
+    - Model inference through Tinker
+    - Verifier/reward function logic
+    
+    NOTE: This still requires the RL API server to be running with
+    Tinker access for the Sample() method.
+    
+    Args:
+        prompts: List of test prompts to run through the environment
+        max_tokens: Maximum tokens to generate per prompt
+        temperature: Sampling temperature
+    
+    Returns:
+        JSON string with responses and verifier scores for each prompt
+    
+    TIP: Include prompts with known correct/incorrect answers to verify
+    the reward function is working correctly.
+    """
+    result = await _make_request("POST", "/test/inference", {
+        "prompts": prompts,
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+    })
+    return json.dumps(result, indent=2)
+
+
+# ============================================================================
+# Utility Tools
+# ============================================================================
+
+async def rl_list_runs() -> str:
+    """
+    List all training runs (active and completed).
+    
+    Returns:
+        JSON string with list of runs and their status
+    """
+    result = await _make_request("GET", "/runs")
+    return json.dumps(result, indent=2)
+
+
+# ============================================================================
+# Requirements Check
+# ============================================================================
+
+def check_rl_api_keys() -> bool:
+    """
+    Check if required API keys are available in environment variables.
+    
+    Required:
+    - TINKER_API_KEY: For Tinker training service
+    - WANDB_API_KEY: For metrics logging and fetching
+    
+    Returns:
+        bool: True if all required keys are set, False otherwise
+    """
+    tinker_key = os.getenv("TINKER_API_KEY")
+    wandb_key = os.getenv("WANDB_API_KEY")
+    
+    return bool(tinker_key) and bool(wandb_key)
+
+
+def get_missing_keys() -> List[str]:
+    """
+    Get list of missing required API keys.
+    
+    Returns:
+        List of missing key names
+    """
+    missing = []
+    if not os.getenv("TINKER_API_KEY"):
+        missing.append("TINKER_API_KEY")
+    if not os.getenv("WANDB_API_KEY"):
+        missing.append("WANDB_API_KEY")
+    return missing
+
+
+# ============================================================================
+# Debug/Status
+# ============================================================================
+
+async def rl_health_check() -> str:
+    """
+    Check if the RL API server is running and accessible.
+    
+    Returns:
+        JSON string with server health status
+    """
+    result = await _make_request("GET", "/health")
+    return json.dumps(result, indent=2)
diff --git a/toolsets.py b/toolsets.py
index 5d08731ec0..e4644251c7 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -90,6 +90,18 @@ TOOLSETS = {
         "includes": []
     },
     
+    "rl": {
+        "description": "RL training tools for running reinforcement learning on Tinker-Atropos",
+        "tools": [
+            "rl_list_environments", "rl_select_environment",
+            "rl_get_current_config", "rl_edit_config",
+            "rl_start_training", "rl_check_status",
+            "rl_stop_training", "rl_get_results",
+            "rl_test_inference", "rl_list_runs"
+        ],
+        "includes": []
+    },
+    
     # Scenario-specific toolsets
     
     "debugging": {

From 8380895ae31f4c6e7f2e2fd5b33c869325f8e7ff Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 4 Feb 2026 00:35:45 -0800
Subject: [PATCH 33/48] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 8a999cb1a6..4f19cf9722 100644
--- a/README.md
+++ b/README.md
@@ -64,13 +64,13 @@ You need at least one LLM provider:
 | Provider | Get Key | Env Variable |
 |----------|---------|--------------|
 | **OpenRouter** (recommended) | [openrouter.ai/keys](https://openrouter.ai/keys) | `OPENROUTER_API_KEY` |
-| Anthropic | [console.anthropic.com](https://console.anthropic.com/) | `ANTHROPIC_API_KEY` |
-| OpenAI | [platform.openai.com](https://platform.openai.com/api-keys) | `OPENAI_API_KEY` |
+
 
 ### Optional API Keys
 
 | Feature | Provider | Env Variable |
 |---------|----------|--------------|
+| Custom OpenAI Endpoint (OAI or VLLM/SGLANG) | [platform.openai.com](https://platform.openai.com/api-keys) | `OPENAI_API_KEY` |
 | Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` |
 | Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
 | Image generation | [FAL](https://fal.ai/) | `FAL_KEY` |

From f6574978de39c6ccae8a06d13ddabbb2c72c9ce1 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 4 Feb 2026 09:36:51 -0800
Subject: [PATCH 34/48] Add RL training configuration and tools

- Updated `.env.example` to include Tinker and WandB API keys for reinforcement learning training.
- Enhanced `model_tools.py` to clarify configuration options and streamline the RL training process.
- Expanded `README.md` with detailed instructions for setting up RL training using Tinker and WandB.
- Modified `hermes_cli` files to integrate RL training tools and ensure proper configuration checks.
- Improved `rl_training_tool.py` to reflect changes in training parameters and configuration management.
---
 .env.example              | 18 +++++++++++
 README.md                 | 56 ++++++++++++++++++++++++++++++++++
 hermes_cli/config.py      | 14 +++++++++
 hermes_cli/setup.py       | 49 ++++++++++++++++++++++++++++++
 hermes_cli/status.py      |  2 ++
 model_tools.py            | 32 ++++----------------
 tools/rl_training_tool.py | 63 ++++++++++++++++-----------------------
 7 files changed, 169 insertions(+), 65 deletions(-)

diff --git a/.env.example b/.env.example
index 98c5ea1922..85ecf09d73 100644
--- a/.env.example
+++ b/.env.example
@@ -165,3 +165,21 @@ IMAGE_TOOLS_DEBUG=false
 # CONTEXT_COMPRESSION_ENABLED=true        # Enable auto-compression (default: true)
 # CONTEXT_COMPRESSION_THRESHOLD=0.85      # Compress at 85% of context limit
 # CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001  # Fast model for summaries
+
+# =============================================================================
+# RL TRAINING (Tinker + Atropos)
+# =============================================================================
+# Run reinforcement learning training on language models using the Tinker API.
+# Requires the rl-server to be running (from tinker-atropos package).
+
+# Tinker API Key - RL training service
+# Get at: https://tinker-console.thinkingmachines.ai/keys
+TINKER_API_KEY=
+
+# Weights & Biases API Key - Experiment tracking and metrics
+# Get at: https://wandb.ai/authorize
+WANDB_API_KEY=
+
+# RL API Server URL (default: http://localhost:8080)
+# Change if running the rl-server on a different host/port
+# RL_API_URL=http://localhost:8080
diff --git a/README.md b/README.md
index 8a999cb1a6..f49ae26afd 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,7 @@ You need at least one LLM provider:
 | Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` |
 | Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
 | Image generation | [FAL](https://fal.ai/) | `FAL_KEY` |
+| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` |
 | Messaging | Telegram, Discord | `TELEGRAM_BOT_TOKEN`, `DISCORD_BOT_TOKEN` |
 
 ---
@@ -270,6 +271,61 @@ When enabled, you'll see messages like:
 
 See [docs/messaging.md](docs/messaging.md) for WhatsApp and advanced setup.
 
+### 🤖 RL Training (Tinker + Atropos)
+
+Train language models with reinforcement learning using the Tinker API and Atropos framework.
+
+#### Requirements
+
+1. **API Keys:** Add to `~/.hermes/.env`:
+```bash
+TINKER_API_KEY=your-tinker-key      # Get from https://tinker-console.thinkingmachines.ai/keys
+WANDB_API_KEY=your-wandb-key        # Get from https://wandb.ai/authorize
+```
+
+2. **Install tinker-atropos:** (in a separate directory)
+```bash
+cd ~/tinker-atropos
+pip install -e .
+```
+
+3. **Start the RL API server:**
+```bash
+rl-server    # Runs on port 8080 by default
+```
+
+#### Using RL Tools
+
+The agent can now use RL training tools:
+
+```
+You: Start training on GSM8k with group_size=16
+
+Agent: I'll set up an RL training run on the GSM8k environment...
+[Uses rl_list_environments, rl_select_environment, rl_edit_config, rl_start_training]
+```
+
+#### Available RL Tools
+
+| Tool | Description |
+|------|-------------|
+| `rl_list_environments` | List available RL environments |
+| `rl_select_environment` | Select an environment for training |
+| `rl_get_current_config` | View all configurable options |
+| `rl_edit_config` | Change a configuration value |
+| `rl_start_training` | Start a training run |
+| `rl_check_status` | Check training progress |
+| `rl_stop_training` | Stop a running training |
+| `rl_get_results` | Fetch WandB metrics |
+
+#### Dedicated RL CLI
+
+For extended RL workflows with longer timeouts:
+
+```bash
+python rl_cli.py --model "anthropic/claude-sonnet-4-20250514"
+```
+
 ### ⏰ Scheduled Tasks (Cron)
 
 Schedule tasks to run automatically:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index a0d98b6acb..82ce6ae7ec 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -151,6 +151,20 @@ OPTIONAL_ENV_VARS = {
         "tools": ["image_generate"],
         "password": True,
     },
+    "TINKER_API_KEY": {
+        "description": "Tinker API key for RL training",
+        "prompt": "Tinker API key",
+        "url": "https://tinker-console.thinkingmachines.ai/keys",
+        "tools": ["rl_start_training", "rl_check_status", "rl_stop_training"],
+        "password": True,
+    },
+    "WANDB_API_KEY": {
+        "description": "Weights & Biases API key for experiment tracking",
+        "prompt": "WandB API key",
+        "url": "https://wandb.ai/authorize",
+        "tools": ["rl_get_results", "rl_check_status"],
+        "password": True,
+    },
     "OPENAI_BASE_URL": {
         "description": "Custom OpenAI-compatible API endpoint URL",
         "prompt": "API base URL (e.g., https://api.example.com/v1)",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 06668d4e94..83f42730c9 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -186,6 +186,14 @@ def _print_setup_summary(config: dict, hermes_home):
     else:
         tool_status.append(("Image Generation", False, "FAL_KEY"))
     
+    # Tinker + WandB (RL training)
+    if get_env_value('TINKER_API_KEY') and get_env_value('WANDB_API_KEY'):
+        tool_status.append(("RL Training (Tinker)", True, None))
+    elif get_env_value('TINKER_API_KEY'):
+        tool_status.append(("RL Training (Tinker)", False, "WANDB_API_KEY"))
+    else:
+        tool_status.append(("RL Training (Tinker)", False, "TINKER_API_KEY"))
+    
     # Terminal (always available if system deps met)
     tool_status.append(("Terminal/Commands", True, None))
     
@@ -932,6 +940,47 @@ def run_setup_wizard(args):
             if api_key:
                 save_env_value("FAL_KEY", api_key)
                 print_success("    Configured ✓")
+    print()
+    
+    # Tinker + WandB - RL Training
+    print_info("─" * 50)
+    print(color("  RL Training (Tinker + WandB)", Colors.CYAN))
+    print_info("  Enables: rl_start_training, rl_check_status, rl_get_results tools")
+    print_info("  Use case: Run reinforcement learning training via Tinker API")
+    tinker_configured = get_env_value('TINKER_API_KEY')
+    wandb_configured = get_env_value('WANDB_API_KEY')
+    
+    if tinker_configured and wandb_configured:
+        print_success("  Status: Configured ✓")
+        if prompt_yes_no("  Update RL training credentials?", False):
+            api_key = prompt("    Tinker API key", password=True)
+            if api_key:
+                save_env_value("TINKER_API_KEY", api_key)
+            wandb_key = prompt("    WandB API key", password=True)
+            if wandb_key:
+                save_env_value("WANDB_API_KEY", wandb_key)
+            print_success("    Updated")
+    else:
+        if tinker_configured:
+            print_warning("  Status: Tinker configured, WandB missing")
+        elif wandb_configured:
+            print_warning("  Status: WandB configured, Tinker missing")
+        else:
+            print_warning("  Status: Not configured (tools will be disabled)")
+        
+        if prompt_yes_no("  Set up RL Training?", False):
+            print_info("    Get Tinker key at: https://tinker-console.thinkingmachines.ai/keys")
+            print_info("    Get WandB key at: https://wandb.ai/authorize")
+            api_key = prompt("    Tinker API key", password=True)
+            if api_key:
+                save_env_value("TINKER_API_KEY", api_key)
+            wandb_key = prompt("    WandB API key", password=True)
+            if wandb_key:
+                save_env_value("WANDB_API_KEY", wandb_key)
+            if api_key and wandb_key:
+                print_success("    Configured ✓")
+            else:
+                print_warning("    Partially configured (both keys required)")
     
     # =========================================================================
     # Save config and show summary
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 2d24bb50a5..bbbdc2af56 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -74,6 +74,8 @@ def show_status(args):
         "Firecrawl": "FIRECRAWL_API_KEY",
         "Browserbase": "BROWSERBASE_API_KEY",
         "FAL": "FAL_KEY",
+        "Tinker": "TINKER_API_KEY",
+        "WandB": "WANDB_API_KEY",
     }
     
     for name, env_var in keys.items():
diff --git a/model_tools.py b/model_tools.py
index ebabaf5646..d84c329611 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -554,13 +554,13 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "rl_edit_config",
-                "description": "Update a configuration field. Valid fields: group_size (int), max_token_length (int), total_steps (int), steps_per_eval (int), use_wandb (bool), wandb_name (str), max_num_workers (int).",
+                "description": "Update a configuration field. Use rl_get_current_config() first to see all available fields for the selected environment. Each environment has different configurable options. Infrastructure settings (tokenizer, URLs, lora_rank, learning_rate) are locked.",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "field": {
                             "type": "string",
-                            "description": "Name of the field to update"
+                            "description": "Name of the field to update (get available fields from rl_get_current_config)"
                         },
                         "value": {
                             "description": "New value for the field"
@@ -574,26 +574,10 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "rl_start_training",
-                "description": "Start a new RL training run. WARNING: Training can take hours. Use rl_check_status() to monitor (30-minute intervals recommended). Test with rl_test_inference() first!",
+                "description": "Start a new RL training run with the current environment and config. Most training parameters (lora_rank, learning_rate, etc.) are fixed. Use rl_edit_config() to set group_size, batch_size, wandb_project before starting. WARNING: Training takes hours. Test with rl_test_inference() first!",
                 "parameters": {
                     "type": "object",
-                    "properties": {
-                        "wandb_project": {
-                            "type": "string",
-                            "description": "WandB project name for logging",
-                            "default": "rl-training"
-                        },
-                        "lora_rank": {
-                            "type": "integer",
-                            "description": "LoRA rank for training",
-                            "default": 32
-                        },
-                        "learning_rate": {
-                            "type": "number",
-                            "description": "Learning rate",
-                            "default": 4e-5
-                        }
-                    },
+                    "properties": {},
                     "required": []
                 }
             }
@@ -1324,13 +1308,7 @@ def handle_rl_function_call(
         )
     
     elif function_name == "rl_start_training":
-        return loop.run_until_complete(
-            rl_start_training(
-                wandb_project=function_args.get("wandb_project", "rl-training"),
-                lora_rank=function_args.get("lora_rank", 32),
-                learning_rate=function_args.get("learning_rate", 4e-5)
-            )
-        )
+        return loop.run_until_complete(rl_start_training())
     
     elif function_name == "rl_check_status":
         return loop.run_until_complete(
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 1b7401c1c4..7c40bc7243 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -168,20 +168,22 @@ async def rl_get_current_config() -> str:
     """
     Get the current environment configuration.
     
-    Returns only the fields that are safe to modify. Other fields
-    (tokenizer_name, rollout_server_url, etc.) are fixed by the system.
+    Returns all configurable fields for the selected environment.
+    Each environment may have different configuration options.
     
-    Available fields:
-    - group_size: Rollouts per prompt (4-16 typical)
-    - max_token_length: Max generation tokens (2048-16384)
-    - total_steps: Training steps (50-2000)
-    - steps_per_eval: Steps between evaluations
-    - use_wandb: Enable WandB logging
+    Fields are divided into:
+    - configurable_fields: Can be changed with rl_edit_config()
+    - locked_fields: Infrastructure settings that cannot be changed
+    
+    Common configurable fields include:
+    - group_size: Rollouts per prompt
+    - batch_size: Training batch size
     - wandb_name: WandB run name prefix
-    - max_num_workers: Concurrent workers (-1 = auto)
+    - system_prompt: Model instructions
+    - And any environment-specific options
     
     Returns:
-        JSON string with current config fields and their values
+        JSON string with configurable and locked fields
     """
     result = await _make_request("GET", "/config")
     return json.dumps(result, indent=2)
@@ -191,21 +193,15 @@ async def rl_edit_config(field: str, value: Any) -> str:
     """
     Update a configuration field.
     
-    Only exposed fields can be modified. Validates field name and type.
+    Use rl_get_current_config() first to see available fields for the
+    selected environment. Each environment has different options.
+    
+    Locked fields (infrastructure settings) cannot be changed.
     
     Args:
-        field: Name of the field to update (e.g., "group_size", "total_steps")
+        field: Name of the field to update (from rl_get_current_config)
         value: New value for the field
     
-    Valid fields:
-    - group_size (int): Rollouts per prompt
-    - max_token_length (int): Max generation tokens
-    - total_steps (int): Training steps
-    - steps_per_eval (int): Eval frequency
-    - use_wandb (bool): Enable logging
-    - wandb_name (str): Run name prefix
-    - max_num_workers (int): Workers count
-    
     Returns:
         JSON string with updated config or error message
     """
@@ -217,37 +213,28 @@ async def rl_edit_config(field: str, value: Any) -> str:
 # Training Management Tools
 # ============================================================================
 
-async def rl_start_training(
-    wandb_project: str = "rl-training",
-    lora_rank: int = 32,
-    learning_rate: float = 4e-5,
-) -> str:
+async def rl_start_training() -> str:
     """
     Start a new RL training run with the current environment and config.
     
     Requires an environment to be selected first using rl_select_environment().
+    Use rl_edit_config() to set group_size, batch_size, wandb_project before starting.
     
-    WARNING: Training runs can take hours to days. Use rl_check_status() to
-    monitor progress (recommended: check every 30 minutes at most).
+    Most training parameters are fixed (lora_rank=32, learning_rate=4e-5, etc.)
+    and cannot be changed.
     
-    Args:
-        wandb_project: WandB project name for logging
-        lora_rank: LoRA rank for training (default: 32)
-        learning_rate: Learning rate (default: 4e-5)
+    WARNING: Training runs take hours. Use rl_check_status() to monitor
+    progress (recommended: check every 30 minutes at most).
     
     Returns:
         JSON string with run_id and initial status
     
     TIP: Before starting training:
     1. Test with rl_test_inference() to verify the environment works
-    2. Start with fewer total_steps to validate the setup
+    2. Configure group_size and batch_size appropriately
     3. Monitor WandB metrics for reward/mean and percent_correct
     """
-    result = await _make_request("POST", "/runs", {
-        "wandb_project": wandb_project,
-        "lora_rank": lora_rank,
-        "learning_rate": learning_rate,
-    })
+    result = await _make_request("POST", "/runs", {})
     return json.dumps(result, indent=2)
 
 

From 12bbca95ecf4bbca5e3d4056526584ae3624e3c7 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 4 Feb 2026 10:36:01 -0800
Subject: [PATCH 35/48] Add tinker-atropos submodule and update RL training
 tools

- Added the tinker-atropos submodule for enhanced RL training capabilities.
- Updated model_tools.py to reorder RL function definitions and improve descriptions.
- Modified rl_cli.py to include checks for the tinker-atropos setup and provide user guidance.
- Adjusted toolsets.py and __init__.py to reflect changes in RL function availability.
- Enhanced rl_training_tool.py to manage training processes directly without a separate API server.
---
 .gitmodules               |    3 +
 model_tools.py            |   74 ++-
 rl_cli.py                 |   66 ++-
 tinker-atropos            |    1 +
 tools/__init__.py         |    6 +-
 tools/rl_training_tool.py | 1163 +++++++++++++++++++++++++++++++------
 toolsets.py               |    2 +-
 7 files changed, 1059 insertions(+), 256 deletions(-)
 create mode 160000 tinker-atropos

diff --git a/.gitmodules b/.gitmodules
index f08f6745bf..6a494f4bc2 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "mini-swe-agent"]
 	path = mini-swe-agent
 	url = https://github.com/SWE-agent/mini-swe-agent
+[submodule "tinker-atropos"]
+	path = tinker-atropos
+	url = https://github.com/nousresearch/tinker-atropos
diff --git a/model_tools.py b/model_tools.py
index d84c329611..847e56effe 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -49,9 +49,8 @@ from tools.rl_training_tool import (
     rl_check_status,
     rl_stop_training,
     rl_get_results,
-    rl_test_inference,
     rl_list_runs,
-    rl_health_check,
+    rl_test_inference,
     check_rl_api_keys,
 )
 # Cronjob management tools (CLI-only)
@@ -153,7 +152,7 @@ TOOLSET_REQUIREMENTS = {
             "rl_get_current_config", "rl_edit_config",
             "rl_start_training", "rl_check_status",
             "rl_stop_training", "rl_get_results",
-            "rl_test_inference", "rl_list_runs",
+            "rl_list_runs", "rl_test_inference",
         ],
     },
 }
@@ -574,7 +573,7 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "rl_start_training",
-                "description": "Start a new RL training run with the current environment and config. Most training parameters (lora_rank, learning_rate, etc.) are fixed. Use rl_edit_config() to set group_size, batch_size, wandb_project before starting. WARNING: Training takes hours. Test with rl_test_inference() first!",
+                "description": "Start a new RL training run with the current environment and config. Most training parameters (lora_rank, learning_rate, etc.) are fixed. Use rl_edit_config() to set group_size, batch_size, wandb_project before starting. WARNING: Training takes hours.",
                 "parameters": {
                     "type": "object",
                     "properties": {},
@@ -636,39 +635,39 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
         {
             "type": "function",
             "function": {
-                "name": "rl_test_inference",
-                "description": "Test inference + verifier on sample prompts WITHOUT full training. Use to validate environments before committing to long training runs. Tests data loading, inference, and verifier logic.",
+                "name": "rl_list_runs",
+                "description": "List all training runs (active and completed) with their status.",
                 "parameters": {
                     "type": "object",
-                    "properties": {
-                        "prompts": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "List of test prompts to run through the environment"
-                        },
-                        "max_tokens": {
-                            "type": "integer",
-                            "description": "Maximum tokens to generate per prompt",
-                            "default": 256
-                        },
-                        "temperature": {
-                            "type": "number",
-                            "description": "Sampling temperature",
-                            "default": 1.0
-                        }
-                    },
-                    "required": ["prompts"]
+                    "properties": {},
+                    "required": []
                 }
             }
         },
         {
             "type": "function",
             "function": {
-                "name": "rl_list_runs",
-                "description": "List all training runs (active and completed) with their status.",
+                "name": "rl_test_inference",
+                "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps × 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.",
                 "parameters": {
                     "type": "object",
-                    "properties": {},
+                    "properties": {
+                        "num_steps": {
+                            "type": "integer",
+                            "description": "Number of steps to run (default: 3, recommended max for testing)",
+                            "default": 3
+                        },
+                        "group_size": {
+                            "type": "integer",
+                            "description": "Completions per step (default: 16, like training)",
+                            "default": 16
+                        },
+                        "models": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, zhipu-ai/glm-4-flash, minimax/minimax-m1"
+                        }
+                    },
                     "required": []
                 }
             }
@@ -731,7 +730,7 @@ def get_all_tool_names() -> List[str]:
             "rl_get_current_config", "rl_edit_config",
             "rl_start_training", "rl_check_status",
             "rl_stop_training", "rl_get_results",
-            "rl_test_inference", "rl_list_runs"
+            "rl_list_runs"
         ])
     
     return tool_names
@@ -782,7 +781,6 @@ def get_toolset_for_tool(tool_name: str) -> str:
         "rl_check_status": "rl_tools",
         "rl_stop_training": "rl_tools",
         "rl_get_results": "rl_tools",
-        "rl_test_inference": "rl_tools",
         "rl_list_runs": "rl_tools",
     }
     
@@ -900,7 +898,7 @@ def get_tool_definitions(
                             "rl_get_current_config", "rl_edit_config",
                             "rl_start_training", "rl_check_status",
                             "rl_stop_training", "rl_get_results",
-                            "rl_test_inference", "rl_list_runs"
+                            "rl_list_runs"
                         ]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
@@ -952,7 +950,7 @@ def get_tool_definitions(
                             "rl_get_current_config", "rl_edit_config",
                             "rl_start_training", "rl_check_status",
                             "rl_stop_training", "rl_get_results",
-                            "rl_test_inference", "rl_list_runs"
+                            "rl_list_runs"
                         ]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
@@ -1325,18 +1323,18 @@ def handle_rl_function_call(
             rl_get_results(run_id=function_args.get("run_id", ""))
         )
     
+    elif function_name == "rl_list_runs":
+        return loop.run_until_complete(rl_list_runs())
+    
     elif function_name == "rl_test_inference":
         return loop.run_until_complete(
             rl_test_inference(
-                prompts=function_args.get("prompts", []),
-                max_tokens=function_args.get("max_tokens", 256),
-                temperature=function_args.get("temperature", 1.0)
+                num_steps=function_args.get("num_steps", 3),
+                group_size=function_args.get("group_size", 16),
+                models=function_args.get("models"),
             )
         )
     
-    elif function_name == "rl_list_runs":
-        return loop.run_until_complete(rl_list_runs())
-    
     return json.dumps({"error": f"Unknown RL function: {function_name}"}, ensure_ascii=False)
 
 
@@ -1409,7 +1407,7 @@ def handle_function_call(
             "rl_get_current_config", "rl_edit_config",
             "rl_start_training", "rl_check_status",
             "rl_stop_training", "rl_get_results",
-            "rl_test_inference", "rl_list_runs"
+            "rl_list_runs"
         ]:
             return handle_rl_function_call(function_name, function_args)
 
diff --git a/rl_cli.py b/rl_cli.py
index cd76c91d67..fe0eecfd48 100644
--- a/rl_cli.py
+++ b/rl_cli.py
@@ -16,7 +16,6 @@ Usage:
 Environment Variables:
     TINKER_API_KEY: API key for Tinker service (required)
     WANDB_API_KEY: API key for WandB metrics (required)
-    RL_API_URL: URL of RL API server (default: http://localhost:8080)
     OPENROUTER_API_KEY: API key for OpenRouter (required for agent)
 """
 
@@ -38,7 +37,7 @@ if env_path.exists():
 # Import agent and tools
 from run_agent import AIAgent
 from model_tools import get_tool_definitions, check_toolset_requirements
-from tools.rl_training_tool import check_rl_api_keys, get_missing_keys, rl_health_check
+from tools.rl_training_tool import check_rl_api_keys, get_missing_keys
 
 
 # ============================================================================
@@ -138,17 +137,21 @@ def check_requirements():
     return True
 
 
-async def check_rl_server():
-    """Check if the RL API server is running."""
-    try:
-        result = await rl_health_check()
-        import json
-        data = json.loads(result)
-        if "error" in data:
-            return False, data["error"]
-        return True, data
-    except Exception as e:
-        return False, str(e)
+def check_tinker_atropos():
+    """Check if tinker-atropos submodule is properly set up."""
+    tinker_path = Path(__file__).parent / "tinker-atropos"
+    
+    if not tinker_path.exists():
+        return False, "tinker-atropos submodule not found. Run: git submodule update --init"
+    
+    envs_path = tinker_path / "tinker_atropos" / "environments"
+    if not envs_path.exists():
+        return False, f"environments directory not found at {envs_path}"
+    
+    env_files = list(envs_path.glob("*.py"))
+    env_files = [f for f in env_files if not f.name.startswith("_")]
+    
+    return True, {"path": str(tinker_path), "environments_count": len(env_files)}
 
 
 def list_environments_sync():
@@ -210,19 +213,27 @@ def main(
     print("🎯 RL Training Agent")
     print("=" * 60)
     
-    # Handle server check
+    # Handle setup check
     if check_server:
-        print("\n🔍 Checking RL API server...")
-        ok, result = asyncio.run(check_rl_server())
+        print("\n🔍 Checking tinker-atropos setup...")
+        ok, result = check_tinker_atropos()
         if ok:
-            print("✅ RL API server is running")
-            print(f"   Environments discovered: {result.get('environments_discovered', 'unknown')}")
-            print(f"   Current environment: {result.get('current_environment', 'none')}")
-            print(f"   Active runs: {result.get('active_runs', 0)}")
+            print("✅ tinker-atropos submodule found")
+            print(f"   Path: {result.get('path')}")
+            print(f"   Environments found: {result.get('environments_count', 0)}")
+            
+            # Also check API keys
+            missing = get_missing_keys()
+            if missing:
+                print(f"\n⚠️  Missing API keys: {', '.join(missing)}")
+                print("   Add them to ~/.hermes/.env")
+            else:
+                print("✅ API keys configured")
         else:
-            print(f"❌ RL API server not accessible: {result}")
-            print("\nTo start the server:")
-            print("  cd tinker-atropos && uvicorn rl_api_server:app --port 8080")
+            print(f"❌ tinker-atropos not set up: {result}")
+            print("\nTo set up:")
+            print("  git submodule update --init")
+            print("  pip install -e ./tinker-atropos")
         return
     
     # Handle environment listing
@@ -238,8 +249,8 @@ def main(
             envs = data.get("environments", [])
             if not envs:
                 print("No environments found.")
-                print("\nMake sure the RL API server is running:")
-                print("  cd tinker-atropos && uvicorn rl_api_server:app --port 8080")
+                print("\nMake sure tinker-atropos is set up:")
+                print("  git submodule update --init")
                 return
             
             for env in envs:
@@ -254,8 +265,9 @@ def main(
             print("\nUse `rl_select_environment(name)` to select an environment for training.")
         except Exception as e:
             print(f"❌ Error listing environments: {e}")
-            print("\nMake sure the RL API server is running:")
-            print("  cd tinker-atropos && uvicorn rl_api_server:app --port 8080")
+            print("\nMake sure tinker-atropos is set up:")
+            print("  git submodule update --init")
+            print("  pip install -e ./tinker-atropos")
         return
     
     # Check requirements
diff --git a/tinker-atropos b/tinker-atropos
new file mode 160000
index 0000000000..65f084ee80
--- /dev/null
+++ b/tinker-atropos
@@ -0,0 +1 @@
+Subproject commit 65f084ee8054a5d02aeac76e24ed60388511c82b
diff --git a/tools/__init__.py b/tools/__init__.py
index dd8bb4dacc..0b6bcdcc96 100644
--- a/tools/__init__.py
+++ b/tools/__init__.py
@@ -105,9 +105,8 @@ from .rl_training_tool import (
     rl_check_status,
     rl_stop_training,
     rl_get_results,
-    rl_test_inference,
     rl_list_runs,
-    rl_health_check,
+    rl_test_inference,
     check_rl_api_keys,
     get_missing_keys,
 )
@@ -178,9 +177,8 @@ __all__ = [
     'rl_check_status',
     'rl_stop_training',
     'rl_get_results',
-    'rl_test_inference',
     'rl_list_runs',
-    'rl_health_check',
+    'rl_test_inference',
     'check_rl_api_keys',
     'get_missing_keys',
 ]
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 7c40bc7243..3c257c4c58 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -3,22 +3,18 @@
 RL Training Tools Module
 
 This module provides tools for running RL training through Tinker-Atropos.
-Communicates with the RL API server (rl_api_server.py) to manage:
-- Environment discovery and selection
-- Configuration management
-- Training run lifecycle
+Directly manages training processes without requiring a separate API server.
+
+Features:
+- Environment discovery (AST-based scanning for BaseEnv subclasses)
+- Configuration management with locked infrastructure settings
+- Training run lifecycle via subprocess management
 - WandB metrics monitoring
-- Inference-only testing
 
 Required environment variables:
 - TINKER_API_KEY: API key for Tinker service
 - WANDB_API_KEY: API key for Weights & Biases metrics
 
-Optional environment variables:
-- RL_API_URL: URL of the RL API server (default: http://localhost:8080)
-- WANDB_ENTITY: WandB entity/team name
-- WANDB_PROJECT: Default WandB project name
-
 Usage:
     from tools.rl_training_tool import (
         rl_list_environments,
@@ -29,66 +25,429 @@ Usage:
         rl_check_status,
         rl_stop_training,
         rl_get_results,
-        rl_test_inference,
     )
 """
 
+import ast
+import asyncio
+import importlib.util
 import json
 import os
+import subprocess
+import sys
 import time
+import uuid
+import yaml
+from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any, Dict, List, Optional
 
-import aiohttp
-
 # ============================================================================
-# Configuration
+# Path Configuration
 # ============================================================================
 
-# Default RL API server URL (can be overridden via environment variable)
-RL_API_URL = os.getenv("RL_API_URL", "http://localhost:8080")
+# Path to tinker-atropos submodule (relative to hermes-agent root)
+HERMES_ROOT = Path(__file__).parent.parent
+TINKER_ATROPOS_ROOT = HERMES_ROOT / "tinker-atropos"
+ENVIRONMENTS_DIR = TINKER_ATROPOS_ROOT / "tinker_atropos" / "environments"
+CONFIGS_DIR = TINKER_ATROPOS_ROOT / "configs"
+LOGS_DIR = TINKER_ATROPOS_ROOT / "logs"
 
-# Rate limiting for status checks (30 minutes in seconds)
-MIN_STATUS_CHECK_INTERVAL = 30 * 60
+# Ensure logs directory exists
+LOGS_DIR.mkdir(exist_ok=True)
+
+
+# ============================================================================
+# Locked Configuration (Infrastructure Settings)
+# ============================================================================
+
+# These fields cannot be changed by the model - they're tuned for our infrastructure
+LOCKED_FIELDS = {
+    "env": {
+        "tokenizer_name": "Qwen/Qwen3-8B",
+        "rollout_server_url": "http://localhost:8000",
+        "use_wandb": True,
+        "max_token_length": 8192,
+        "max_num_workers": 2048,
+        "worker_timeout": 3600,
+        "total_steps": 2500,
+        "steps_per_eval": 25,
+        "max_batches_offpolicy": 3,
+        "inference_weight": 1.0,
+        "eval_limit_ratio": 0.1,
+    },
+    "openai": [
+        {
+            "model_name": "Qwen/Qwen3-8B",
+            "base_url": "http://localhost:8001/v1",
+            "api_key": "x",
+            "weight": 1.0,
+            "num_requests_for_eval": 256,
+            "timeout": 3600,
+        }
+    ],
+    "tinker": {
+        "lora_rank": 32,
+        "learning_rate": 0.00004,
+        "max_token_trainer_length": 9000,
+        "checkpoint_dir": "./temp/",
+        "save_checkpoint_interval": 25,
+    },
+    "slurm": False,
+    "testing": False,
+}
+
+LOCKED_FIELD_NAMES = set(LOCKED_FIELDS.get("env", {}).keys())
+
+
+# ============================================================================
+# State Management
+# ============================================================================
+
+@dataclass
+class EnvironmentInfo:
+    """Information about a discovered environment."""
+    name: str
+    class_name: str
+    file_path: str
+    description: str = ""
+    config_class: str = "BaseEnvConfig"
+
+
+@dataclass
+class RunState:
+    """State for a training run."""
+    run_id: str
+    environment: str
+    config: Dict[str, Any]
+    status: str = "pending"  # pending, starting, running, stopping, stopped, completed, failed
+    error_message: str = ""
+    wandb_project: str = ""
+    wandb_run_name: str = ""
+    start_time: float = 0.0
+    # Process handles
+    api_process: Optional[subprocess.Popen] = None
+    trainer_process: Optional[subprocess.Popen] = None
+    env_process: Optional[subprocess.Popen] = None
+
+
+# Global state
+_environments: List[EnvironmentInfo] = []
+_current_env: Optional[str] = None
+_current_config: Dict[str, Any] = {}
+_env_config_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
+_active_runs: Dict[str, RunState] = {}
 _last_status_check: Dict[str, float] = {}
 
+# Rate limiting for status checks (30 minutes)
+MIN_STATUS_CHECK_INTERVAL = 30 * 60
+
 
 # ============================================================================
-# Helper Functions
+# Environment Discovery
 # ============================================================================
 
-async def _make_request(
-    method: str,
-    endpoint: str,
-    data: Optional[Dict] = None,
-    timeout: int = 30,
-) -> Dict[str, Any]:
-    """Make an HTTP request to the RL API server."""
-    url = f"{RL_API_URL}{endpoint}"
+def _scan_environments() -> List[EnvironmentInfo]:
+    """
+    Scan the environments directory for BaseEnv subclasses using AST.
+    """
+    environments = []
     
-    async with aiohttp.ClientSession() as session:
+    if not ENVIRONMENTS_DIR.exists():
+        return environments
+    
+    for py_file in ENVIRONMENTS_DIR.glob("*.py"):
+        if py_file.name.startswith("_"):
+            continue
+        
         try:
-            if method == "GET":
-                async with session.get(url, timeout=timeout) as response:
-                    if response.status == 200:
-                        return await response.json()
-                    else:
-                        error_text = await response.text()
-                        return {"error": f"HTTP {response.status}: {error_text}"}
-            elif method == "POST":
-                async with session.post(url, json=data, timeout=timeout) as response:
-                    if response.status == 200:
-                        return await response.json()
-                    else:
-                        error_text = await response.text()
-                        return {"error": f"HTTP {response.status}: {error_text}"}
-        except aiohttp.ClientConnectorError:
-            return {
-                "error": f"Cannot connect to RL API server at {RL_API_URL}. "
-                         "Make sure the server is running: "
-                         "cd tinker-atropos && uvicorn rl_api_server:app --port 8080"
-            }
+            with open(py_file, "r") as f:
+                tree = ast.parse(f.read())
+            
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    # Check if class has BaseEnv as base
+                    for base in node.bases:
+                        base_name = ""
+                        if isinstance(base, ast.Name):
+                            base_name = base.id
+                        elif isinstance(base, ast.Attribute):
+                            base_name = base.attr
+                        
+                        if base_name == "BaseEnv":
+                            # Extract name from class attribute if present
+                            env_name = py_file.stem
+                            description = ""
+                            config_class = "BaseEnvConfig"
+                            
+                            for item in node.body:
+                                if isinstance(item, ast.Assign):
+                                    for target in item.targets:
+                                        if isinstance(target, ast.Name):
+                                            if target.id == "name" and isinstance(item.value, ast.Constant):
+                                                env_name = item.value.value
+                                            elif target.id == "env_config_cls" and isinstance(item.value, ast.Name):
+                                                config_class = item.value.id
+                                
+                                # Get docstring
+                                if isinstance(item, ast.Expr) and isinstance(item.value, ast.Constant):
+                                    if isinstance(item.value.value, str) and not description:
+                                        description = item.value.value.split("\n")[0].strip()
+                            
+                            environments.append(EnvironmentInfo(
+                                name=env_name,
+                                class_name=node.name,
+                                file_path=str(py_file),
+                                description=description or f"Environment from {py_file.name}",
+                                config_class=config_class,
+                            ))
+                            break
         except Exception as e:
-            return {"error": f"Request failed: {str(e)}"}
+            print(f"Warning: Could not parse {py_file}: {e}")
+    
+    return environments
+
+
+def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
+    """
+    Dynamically import an environment and extract its config fields.
+    """
+    try:
+        # Load the environment module
+        spec = importlib.util.spec_from_file_location("env_module", env_file_path)
+        module = importlib.util.module_from_spec(spec)
+        sys.modules["env_module"] = module
+        spec.loader.exec_module(module)
+        
+        # Find the BaseEnv subclass
+        env_class = None
+        for name, obj in vars(module).items():
+            if isinstance(obj, type) and name != "BaseEnv":
+                if hasattr(obj, "config_init") and callable(getattr(obj, "config_init")):
+                    env_class = obj
+                    break
+        
+        if not env_class:
+            return {}
+        
+        # Call config_init to get the actual config
+        env_config, server_configs = env_class.config_init()
+        config_class = type(env_config)
+        
+        # Extract fields from the Pydantic model
+        fields = {}
+        for field_name, field_info in config_class.model_fields.items():
+            field_type = field_info.annotation
+            default = field_info.default
+            description = field_info.description or ""
+            
+            is_locked = field_name in LOCKED_FIELD_NAMES
+            
+            # Convert type to string
+            type_name = getattr(field_type, "__name__", str(field_type))
+            if hasattr(field_type, "__origin__"):
+                type_name = str(field_type)
+            
+            fields[field_name] = {
+                "type": type_name,
+                "default": default if default is not None else None,
+                "description": description,
+                "locked": is_locked,
+                "current_value": LOCKED_FIELDS.get("env", {}).get(field_name, default) if is_locked else default,
+            }
+        
+        return fields
+        
+    except Exception as e:
+        print(f"Warning: Could not introspect environment config: {e}")
+        return {}
+
+
+def _initialize_environments():
+    """Initialize environment list on first use."""
+    global _environments
+    if not _environments:
+        _environments = _scan_environments()
+
+
+# ============================================================================
+# Subprocess Management
+# ============================================================================
+
+async def _spawn_training_run(run_state: RunState, config_path: Path):
+    """
+    Spawn the three processes needed for training:
+    1. run-api (Atropos API server)
+    2. launch_training.py (Tinker trainer + inference server)
+    3. environment.py serve (the Atropos environment)
+    """
+    run_id = run_state.run_id
+    
+    # Log file paths
+    api_log = LOGS_DIR / f"api_{run_id}.log"
+    trainer_log = LOGS_DIR / f"trainer_{run_id}.log"
+    env_log = LOGS_DIR / f"env_{run_id}.log"
+    
+    try:
+        # Step 1: Start the Atropos API server (run-api)
+        print(f"[{run_id}] Starting Atropos API server (run-api)...")
+        
+        api_log_file = open(api_log, "w")
+        run_state.api_process = subprocess.Popen(
+            ["run-api"],
+            stdout=api_log_file,
+            stderr=subprocess.STDOUT,
+            cwd=str(TINKER_ATROPOS_ROOT),
+        )
+        
+        # Wait for API to start
+        await asyncio.sleep(5)
+        
+        if run_state.api_process.poll() is not None:
+            run_state.status = "failed"
+            run_state.error_message = f"API server exited with code {run_state.api_process.returncode}. Check {api_log}"
+            return
+        
+        print(f"[{run_id}] Atropos API server started")
+        
+        # Step 2: Start the Tinker trainer
+        print(f"[{run_id}] Starting Tinker trainer: launch_training.py --config {config_path}")
+        
+        trainer_log_file = open(trainer_log, "w")
+        run_state.trainer_process = subprocess.Popen(
+            ["python", "launch_training.py", "--config", str(config_path)],
+            stdout=trainer_log_file,
+            stderr=subprocess.STDOUT,
+            cwd=str(TINKER_ATROPOS_ROOT),
+            env={**os.environ, "TINKER_API_KEY": os.getenv("TINKER_API_KEY", "")},
+        )
+        
+        # Wait for trainer to initialize (it starts FastAPI inference server on 8001)
+        print(f"[{run_id}] Waiting 30 seconds for trainer to initialize...")
+        await asyncio.sleep(30)
+        
+        if run_state.trainer_process.poll() is not None:
+            run_state.status = "failed"
+            run_state.error_message = f"Trainer exited with code {run_state.trainer_process.returncode}. Check {trainer_log}"
+            if run_state.api_process:
+                run_state.api_process.terminate()
+            return
+        
+        print(f"[{run_id}] Trainer started, inference server on port 8001")
+        
+        # Step 3: Start the environment
+        print(f"[{run_id}] Waiting 90 more seconds before starting environment...")
+        await asyncio.sleep(90)
+        
+        # Find the environment file
+        env_info = None
+        for env in _environments:
+            if env.name == run_state.environment:
+                env_info = env
+                break
+        
+        if not env_info:
+            run_state.status = "failed"
+            run_state.error_message = f"Environment '{run_state.environment}' not found"
+            return
+        
+        print(f"[{run_id}] Starting environment: {env_info.file_path} serve")
+        
+        env_log_file = open(env_log, "w")
+        run_state.env_process = subprocess.Popen(
+            ["python", str(env_info.file_path), "serve", "--config", str(config_path)],
+            stdout=env_log_file,
+            stderr=subprocess.STDOUT,
+            cwd=str(TINKER_ATROPOS_ROOT),
+        )
+        
+        # Wait for environment to connect
+        await asyncio.sleep(10)
+        
+        if run_state.env_process.poll() is not None:
+            run_state.status = "failed"
+            run_state.error_message = f"Environment exited with code {run_state.env_process.returncode}. Check {env_log}"
+            if run_state.trainer_process:
+                run_state.trainer_process.terminate()
+            if run_state.api_process:
+                run_state.api_process.terminate()
+            return
+        
+        run_state.status = "running"
+        run_state.start_time = time.time()
+        print(f"[{run_id}] Training run started successfully!")
+        
+        # Start background monitoring
+        asyncio.create_task(_monitor_training_run(run_state))
+        
+    except Exception as e:
+        run_state.status = "failed"
+        run_state.error_message = str(e)
+        _stop_training_run(run_state)
+
+
+async def _monitor_training_run(run_state: RunState):
+    """Background task to monitor a training run."""
+    while run_state.status == "running":
+        await asyncio.sleep(30)  # Check every 30 seconds
+        
+        # Check if any process has died
+        if run_state.env_process and run_state.env_process.poll() is not None:
+            exit_code = run_state.env_process.returncode
+            if exit_code == 0:
+                run_state.status = "completed"
+            else:
+                run_state.status = "failed"
+                run_state.error_message = f"Environment process exited with code {exit_code}"
+            _stop_training_run(run_state)
+            break
+        
+        if run_state.trainer_process and run_state.trainer_process.poll() is not None:
+            exit_code = run_state.trainer_process.returncode
+            if exit_code == 0:
+                run_state.status = "completed"
+            else:
+                run_state.status = "failed"
+                run_state.error_message = f"Trainer process exited with code {exit_code}"
+            _stop_training_run(run_state)
+            break
+        
+        if run_state.api_process and run_state.api_process.poll() is not None:
+            run_state.status = "failed"
+            run_state.error_message = f"API server exited unexpectedly"
+            _stop_training_run(run_state)
+            break
+
+
+def _stop_training_run(run_state: RunState):
+    """Stop all processes for a training run."""
+    # Stop in reverse order: env -> trainer -> api
+    if run_state.env_process and run_state.env_process.poll() is None:
+        print(f"[{run_state.run_id}] Stopping environment process...")
+        run_state.env_process.terminate()
+        try:
+            run_state.env_process.wait(timeout=10)
+        except subprocess.TimeoutExpired:
+            run_state.env_process.kill()
+    
+    if run_state.trainer_process and run_state.trainer_process.poll() is None:
+        print(f"[{run_state.run_id}] Stopping trainer process...")
+        run_state.trainer_process.terminate()
+        try:
+            run_state.trainer_process.wait(timeout=10)
+        except subprocess.TimeoutExpired:
+            run_state.trainer_process.kill()
+    
+    if run_state.api_process and run_state.api_process.poll() is None:
+        print(f"[{run_state.run_id}] Stopping API server...")
+        run_state.api_process.terminate()
+        try:
+            run_state.api_process.wait(timeout=10)
+        except subprocess.TimeoutExpired:
+            run_state.api_process.kill()
+    
+    if run_state.status == "running":
+        run_state.status = "stopped"
 
 
 # ============================================================================
@@ -113,20 +472,23 @@ async def rl_list_environments() -> str:
     2. Study how they load datasets, define verifiers, and structure rewards
     3. Inspect HuggingFace datasets to understand data formats
     4. Copy an existing environment as a template
-    5. Test with rl_test_inference before running full training
     
     Returns:
-        JSON string with list of environments or error message
+        JSON string with list of environments
     """
-    result = await _make_request("GET", "/environments")
+    _initialize_environments()
     
-    if "error" in result:
-        return json.dumps(result, indent=2)
-    
-    # Add helpful tips to the response
     response = {
-        "environments": result,
-        "count": len(result),
+        "environments": [
+            {
+                "name": env.name,
+                "class_name": env.class_name,
+                "file_path": env.file_path,
+                "description": env.description,
+            }
+            for env in _environments
+        ],
+        "count": len(_environments),
         "tips": [
             "Use rl_select_environment(name) to select an environment",
             "Read the file_path with file tools to understand how each environment works",
@@ -141,23 +503,58 @@ async def rl_select_environment(name: str) -> str:
     """
     Select an RL environment for training.
     
-    This loads the environment's default configuration into the config state.
-    After selecting, use rl_get_current_config() to see the configuration
+    This loads the environment's configuration fields into memory.
+    After selecting, use rl_get_current_config() to see all configurable options
     and rl_edit_config() to modify specific fields.
     
     Args:
         name: Name of the environment to select (from rl_list_environments)
     
     Returns:
-        JSON string with selection result, file path, and current config
+        JSON string with selection result, file path, and configurable field count
     
-    TIP: Read the returned file_path to understand how the environment works:
-    - How it loads data (load_dataset calls)
-    - How it verifies answers (score_answer method)
-    - What prompts it uses (system_prompt, get_next_item)
+    TIP: Read the returned file_path to understand how the environment works.
     """
-    result = await _make_request("POST", f"/environments/{name}/select")
-    return json.dumps(result, indent=2)
+    global _current_env, _current_config, _env_config_cache
+    
+    _initialize_environments()
+    
+    env_info = None
+    for env in _environments:
+        if env.name == name:
+            env_info = env
+            break
+    
+    if not env_info:
+        return json.dumps({
+            "error": f"Environment '{name}' not found",
+            "available": [e.name for e in _environments],
+        }, indent=2)
+    
+    _current_env = name
+    
+    # Dynamically discover config fields
+    config_fields = _get_env_config_fields(env_info.file_path)
+    _env_config_cache[name] = config_fields
+    
+    # Initialize current config with defaults for non-locked fields
+    _current_config = {}
+    for field_name, field_info in config_fields.items():
+        if not field_info.get("locked", False):
+            _current_config[field_name] = field_info.get("default")
+    
+    configurable_count = sum(1 for f in config_fields.values() if not f.get("locked", False))
+    locked_count = sum(1 for f in config_fields.values() if f.get("locked", False))
+    
+    return json.dumps({
+        "message": f"Selected environment: {name}",
+        "environment": name,
+        "file_path": env_info.file_path,
+        "configurable_fields": configurable_count,
+        "locked_fields": locked_count,
+        "config": _current_config,
+        "tip": f"Use rl_get_current_config() to see all {configurable_count} configurable fields.",
+    }, indent=2)
 
 
 # ============================================================================
@@ -175,18 +572,40 @@ async def rl_get_current_config() -> str:
     - configurable_fields: Can be changed with rl_edit_config()
     - locked_fields: Infrastructure settings that cannot be changed
     
-    Common configurable fields include:
-    - group_size: Rollouts per prompt
-    - batch_size: Training batch size
-    - wandb_name: WandB run name prefix
-    - system_prompt: Model instructions
-    - And any environment-specific options
-    
     Returns:
         JSON string with configurable and locked fields
     """
-    result = await _make_request("GET", "/config")
-    return json.dumps(result, indent=2)
+    if not _current_env:
+        return json.dumps({
+            "error": "No environment selected. Use rl_select_environment(name) first.",
+        }, indent=2)
+    
+    config_fields = _env_config_cache.get(_current_env, {})
+    
+    configurable = []
+    locked = []
+    
+    for field_name, field_info in config_fields.items():
+        field_data = {
+            "name": field_name,
+            "type": field_info.get("type", "unknown"),
+            "default": field_info.get("default"),
+            "description": field_info.get("description", ""),
+            "current_value": _current_config.get(field_name, field_info.get("default")),
+        }
+        
+        if field_info.get("locked", False):
+            field_data["locked_value"] = LOCKED_FIELDS.get("env", {}).get(field_name)
+            locked.append(field_data)
+        else:
+            configurable.append(field_data)
+    
+    return json.dumps({
+        "environment": _current_env,
+        "configurable_fields": configurable,
+        "locked_fields": locked,
+        "tip": "Use rl_edit_config(field, value) to change any configurable field.",
+    }, indent=2)
 
 
 async def rl_edit_config(field: str, value: Any) -> str:
@@ -205,8 +624,36 @@ async def rl_edit_config(field: str, value: Any) -> str:
     Returns:
         JSON string with updated config or error message
     """
-    result = await _make_request("POST", "/config", {"field": field, "value": value})
-    return json.dumps(result, indent=2)
+    global _current_config
+    
+    if not _current_env:
+        return json.dumps({
+            "error": "No environment selected. Use rl_select_environment(name) first.",
+        }, indent=2)
+    
+    config_fields = _env_config_cache.get(_current_env, {})
+    
+    if field not in config_fields:
+        return json.dumps({
+            "error": f"Unknown field '{field}'",
+            "available_fields": list(config_fields.keys()),
+        }, indent=2)
+    
+    field_info = config_fields[field]
+    if field_info.get("locked", False):
+        return json.dumps({
+            "error": f"Field '{field}' is locked and cannot be changed",
+            "locked_value": LOCKED_FIELDS.get("env", {}).get(field),
+        }, indent=2)
+    
+    _current_config[field] = value
+    
+    return json.dumps({
+        "message": f"Updated {field} = {value}",
+        "field": field,
+        "value": value,
+        "config": _current_config,
+    }, indent=2)
 
 
 # ============================================================================
@@ -218,24 +665,106 @@ async def rl_start_training() -> str:
     Start a new RL training run with the current environment and config.
     
     Requires an environment to be selected first using rl_select_environment().
-    Use rl_edit_config() to set group_size, batch_size, wandb_project before starting.
+    Use rl_edit_config() to adjust configuration before starting.
     
-    Most training parameters are fixed (lora_rank=32, learning_rate=4e-5, etc.)
-    and cannot be changed.
+    This spawns three processes:
+    1. run-api (Atropos trajectory API)
+    2. launch_training.py (Tinker trainer + inference server)
+    3. environment.py serve (the selected environment)
     
     WARNING: Training runs take hours. Use rl_check_status() to monitor
     progress (recommended: check every 30 minutes at most).
     
     Returns:
         JSON string with run_id and initial status
-    
-    TIP: Before starting training:
-    1. Test with rl_test_inference() to verify the environment works
-    2. Configure group_size and batch_size appropriately
-    3. Monitor WandB metrics for reward/mean and percent_correct
     """
-    result = await _make_request("POST", "/runs", {})
-    return json.dumps(result, indent=2)
+    global _active_runs
+    
+    if not _current_env:
+        return json.dumps({
+            "error": "No environment selected. Use rl_select_environment(name) first.",
+        }, indent=2)
+    
+    # Check API keys
+    if not os.getenv("TINKER_API_KEY"):
+        return json.dumps({
+            "error": "TINKER_API_KEY not set. Add it to ~/.hermes/.env",
+        }, indent=2)
+    
+    # Find environment file
+    env_info = None
+    for env in _environments:
+        if env.name == _current_env:
+            env_info = env
+            break
+    
+    if not env_info or not Path(env_info.file_path).exists():
+        return json.dumps({
+            "error": f"Environment file not found for '{_current_env}'",
+        }, indent=2)
+    
+    # Generate run ID
+    run_id = str(uuid.uuid4())[:8]
+    
+    # Create config YAML
+    CONFIGS_DIR.mkdir(exist_ok=True)
+    config_path = CONFIGS_DIR / f"run_{run_id}.yaml"
+    
+    # Start with locked config as base
+    import copy
+    run_config = copy.deepcopy(LOCKED_FIELDS)
+    
+    if "env" not in run_config:
+        run_config["env"] = {}
+    
+    # Apply configurable fields
+    for field_name, value in _current_config.items():
+        if value is not None and value != "":
+            run_config["env"][field_name] = value
+    
+    # Set WandB settings
+    wandb_project = _current_config.get("wandb_project", "atropos-tinker")
+    if "tinker" not in run_config:
+        run_config["tinker"] = {}
+    run_config["tinker"]["wandb_project"] = wandb_project
+    run_config["tinker"]["wandb_run_name"] = f"{_current_env}-{run_id}"
+    
+    if "wandb_name" in _current_config and _current_config["wandb_name"]:
+        run_config["env"]["wandb_name"] = _current_config["wandb_name"]
+    
+    with open(config_path, "w") as f:
+        yaml.dump(run_config, f, default_flow_style=False)
+    
+    # Create run state
+    run_state = RunState(
+        run_id=run_id,
+        environment=_current_env,
+        config=_current_config.copy(),
+        status="starting",
+        wandb_project=wandb_project,
+        wandb_run_name=f"{_current_env}-{run_id}",
+    )
+    
+    _active_runs[run_id] = run_state
+    
+    # Start training in background
+    asyncio.create_task(_spawn_training_run(run_state, config_path))
+    
+    return json.dumps({
+        "run_id": run_id,
+        "status": "starting",
+        "environment": _current_env,
+        "config": _current_config,
+        "wandb_project": wandb_project,
+        "wandb_run_name": f"{_current_env}-{run_id}",
+        "config_path": str(config_path),
+        "logs": {
+            "api": str(LOGS_DIR / f"api_{run_id}.log"),
+            "trainer": str(LOGS_DIR / f"trainer_{run_id}.log"),
+            "env": str(LOGS_DIR / f"env_{run_id}.log"),
+        },
+        "message": "Training starting. Use rl_check_status(run_id) to monitor (recommended: every 30 minutes).",
+    }, indent=2)
 
 
 async def rl_check_status(run_id: str) -> str:
@@ -245,19 +774,11 @@ async def rl_check_status(run_id: str) -> str:
     RATE LIMITED: For long-running training, this function enforces a
     minimum 30-minute interval between checks for the same run_id.
     
-    Fetches latest metrics from WandB if available:
-    - step: Current training step
-    - state: Run state (running, finished, crashed)
-    - reward_mean: Average reward across batches
-    - loss: Training loss
-    - percent_correct: Training accuracy
-    - eval_percent_correct: Evaluation accuracy
-    
     Args:
         run_id: The run ID returned by rl_start_training()
     
     Returns:
-        JSON string with run status and metrics, or rate limit message
+        JSON string with run status and metrics
     """
     global _last_status_check
     
@@ -275,7 +796,65 @@ async def rl_check_status(run_id: str) -> str:
             }, indent=2)
     
     _last_status_check[run_id] = now
-    result = await _make_request("GET", f"/runs/{run_id}")
+    
+    if run_id not in _active_runs:
+        return json.dumps({
+            "error": f"Run '{run_id}' not found",
+            "active_runs": list(_active_runs.keys()),
+        }, indent=2)
+    
+    run_state = _active_runs[run_id]
+    
+    # Check process status
+    processes = {
+        "api": run_state.api_process.poll() if run_state.api_process else None,
+        "trainer": run_state.trainer_process.poll() if run_state.trainer_process else None,
+        "env": run_state.env_process.poll() if run_state.env_process else None,
+    }
+    
+    running_time = time.time() - run_state.start_time if run_state.start_time else 0
+    
+    result = {
+        "run_id": run_id,
+        "status": run_state.status,
+        "environment": run_state.environment,
+        "running_time_minutes": running_time / 60,
+        "processes": {
+            name: "running" if code is None else f"exited ({code})"
+            for name, code in processes.items()
+        },
+        "wandb_project": run_state.wandb_project,
+        "wandb_run_name": run_state.wandb_run_name,
+        "logs": {
+            "api": str(LOGS_DIR / f"api_{run_id}.log"),
+            "trainer": str(LOGS_DIR / f"trainer_{run_id}.log"),
+            "env": str(LOGS_DIR / f"env_{run_id}.log"),
+        },
+    }
+    
+    if run_state.error_message:
+        result["error"] = run_state.error_message
+    
+    # Try to get WandB metrics if available
+    try:
+        import wandb
+        api = wandb.Api()
+        runs = api.runs(
+            f"{os.getenv('WANDB_ENTITY', 'nousresearch')}/{run_state.wandb_project}",
+            filters={"display_name": run_state.wandb_run_name}
+        )
+        if runs:
+            wandb_run = runs[0]
+            result["wandb_url"] = wandb_run.url
+            result["metrics"] = {
+                "step": wandb_run.summary.get("_step", 0),
+                "reward_mean": wandb_run.summary.get("train/reward_mean"),
+                "percent_correct": wandb_run.summary.get("train/percent_correct"),
+                "eval_percent_correct": wandb_run.summary.get("eval/percent_correct"),
+            }
+    except Exception as e:
+        result["wandb_error"] = str(e)
+    
     return json.dumps(result, indent=2)
 
 
@@ -283,84 +862,78 @@ async def rl_stop_training(run_id: str) -> str:
     """
     Stop a running training job.
     
-    Use this if:
-    - Metrics look bad or training is stagnant
-    - You want to try different settings
-    - You need to free up resources
-    
     Args:
         run_id: The run ID to stop
     
     Returns:
         JSON string with stop confirmation
     """
-    result = await _make_request("POST", f"/runs/{run_id}/stop")
-    return json.dumps(result, indent=2)
+    if run_id not in _active_runs:
+        return json.dumps({
+            "error": f"Run '{run_id}' not found",
+            "active_runs": list(_active_runs.keys()),
+        }, indent=2)
+    
+    run_state = _active_runs[run_id]
+    
+    if run_state.status not in ("running", "starting"):
+        return json.dumps({
+            "message": f"Run '{run_id}' is not running (status: {run_state.status})",
+        }, indent=2)
+    
+    _stop_training_run(run_state)
+    
+    return json.dumps({
+        "message": f"Stopped training run '{run_id}'",
+        "run_id": run_id,
+        "status": run_state.status,
+    }, indent=2)
 
 
 async def rl_get_results(run_id: str) -> str:
     """
-    Get final results and metrics for a completed training run.
-    
-    Returns:
-    - Final metrics (reward, loss, accuracy)
-    - WandB run URL for detailed analysis
-    - Path to trained weights (tinker:// URL)
+    Get final results and metrics for a training run.
     
     Args:
         run_id: The run ID to get results for
     
     Returns:
-        JSON string with final results and weights path
+        JSON string with final results
     """
-    result = await _make_request("GET", f"/runs/{run_id}/metrics")
+    if run_id not in _active_runs:
+        return json.dumps({
+            "error": f"Run '{run_id}' not found",
+        }, indent=2)
+    
+    run_state = _active_runs[run_id]
+    
+    result = {
+        "run_id": run_id,
+        "status": run_state.status,
+        "environment": run_state.environment,
+        "wandb_project": run_state.wandb_project,
+        "wandb_run_name": run_state.wandb_run_name,
+    }
+    
+    # Get WandB metrics
+    try:
+        import wandb
+        api = wandb.Api()
+        runs = api.runs(
+            f"{os.getenv('WANDB_ENTITY', 'nousresearch')}/{run_state.wandb_project}",
+            filters={"display_name": run_state.wandb_run_name}
+        )
+        if runs:
+            wandb_run = runs[0]
+            result["wandb_url"] = wandb_run.url
+            result["final_metrics"] = dict(wandb_run.summary)
+            result["history"] = [dict(row) for row in wandb_run.history(samples=10)]
+    except Exception as e:
+        result["wandb_error"] = str(e)
+    
     return json.dumps(result, indent=2)
 
 
-# ============================================================================
-# Inference Testing Tools
-# ============================================================================
-
-async def rl_test_inference(
-    prompts: List[str],
-    max_tokens: int = 256,
-    temperature: float = 1.0,
-) -> str:
-    """
-    Test inference + verifier on sample prompts WITHOUT full training.
-    
-    Use this to validate environments before committing to long training runs.
-    Tests:
-    - Data loading and formatting
-    - Model inference through Tinker
-    - Verifier/reward function logic
-    
-    NOTE: This still requires the RL API server to be running with
-    Tinker access for the Sample() method.
-    
-    Args:
-        prompts: List of test prompts to run through the environment
-        max_tokens: Maximum tokens to generate per prompt
-        temperature: Sampling temperature
-    
-    Returns:
-        JSON string with responses and verifier scores for each prompt
-    
-    TIP: Include prompts with known correct/incorrect answers to verify
-    the reward function is working correctly.
-    """
-    result = await _make_request("POST", "/test/inference", {
-        "prompts": prompts,
-        "max_tokens": max_tokens,
-        "temperature": temperature,
-    })
-    return json.dumps(result, indent=2)
-
-
-# ============================================================================
-# Utility Tools
-# ============================================================================
-
 async def rl_list_runs() -> str:
     """
     List all training runs (active and completed).
@@ -368,8 +941,252 @@ async def rl_list_runs() -> str:
     Returns:
         JSON string with list of runs and their status
     """
-    result = await _make_request("GET", "/runs")
-    return json.dumps(result, indent=2)
+    runs = []
+    for run_id, run_state in _active_runs.items():
+        runs.append({
+            "run_id": run_id,
+            "environment": run_state.environment,
+            "status": run_state.status,
+            "wandb_run_name": run_state.wandb_run_name,
+        })
+    
+    return json.dumps({
+        "runs": runs,
+        "count": len(runs),
+    }, indent=2)
+
+
+# ============================================================================
+# Inference Testing (via Atropos `process` mode with OpenRouter)
+# ============================================================================
+
+# Test models at different scales for robustness testing
+TEST_MODELS = [
+    {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
+    {"id": "zhipu-ai/glm-4-flash", "name": "GLM-4 Flash", "scale": "medium"},
+    {"id": "minimax/minimax-m1", "name": "MiniMax M1", "scale": "large"},
+]
+
+# Default test parameters - quick but representative
+DEFAULT_NUM_STEPS = 3       # Number of steps (items) to test
+DEFAULT_GROUP_SIZE = 16     # Completions per item (like training)
+
+
+async def rl_test_inference(
+    num_steps: int = DEFAULT_NUM_STEPS,
+    group_size: int = DEFAULT_GROUP_SIZE,
+    models: Optional[List[str]] = None,
+) -> str:
+    """
+    Quick inference test for any environment using Atropos's `process` mode.
+    
+    Runs a few steps of inference + scoring to validate:
+    - Environment loads correctly
+    - Prompt construction works
+    - Inference parsing is robust (tested with multiple model scales)
+    - Verifier/scoring logic works
+    
+    Default: 3 steps × 16 completions = 48 total rollouts per model.
+    Tests 3 models = 144 total rollouts. Quick sanity check.
+    
+    Test models (varying intelligence levels for robustness):
+    - qwen/qwen3-8b (small)
+    - zhipu-ai/glm-4-flash (medium)
+    - minimax/minimax-m1 (large)
+    
+    Args:
+        num_steps: Steps to run (default: 3, max recommended for testing)
+        group_size: Completions per step (default: 16, like training)
+        models: Optional model IDs to test. If None, uses all 3 test models.
+    
+    Returns:
+        JSON with results per model: steps_tested, accuracy, scores
+    """
+    if not _current_env:
+        return json.dumps({
+            "error": "No environment selected. Use rl_select_environment(name) first.",
+        }, indent=2)
+    
+    api_key = os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        return json.dumps({
+            "error": "OPENROUTER_API_KEY not set. Required for inference testing.",
+        }, indent=2)
+    
+    # Find environment info
+    env_info = None
+    for env in _environments:
+        if env.name == _current_env:
+            env_info = env
+            break
+    
+    if not env_info:
+        return json.dumps({
+            "error": f"Environment '{_current_env}' not found",
+        }, indent=2)
+    
+    # Determine which models to test
+    if models:
+        test_models = [m for m in TEST_MODELS if m["id"] in models]
+        if not test_models:
+            test_models = [{"id": m, "name": m, "scale": "custom"} for m in models]
+    else:
+        test_models = TEST_MODELS
+    
+    # Calculate total rollouts for logging
+    total_rollouts_per_model = num_steps * group_size
+    total_rollouts = total_rollouts_per_model * len(test_models)
+    
+    results = {
+        "environment": _current_env,
+        "environment_file": env_info.file_path,
+        "test_config": {
+            "num_steps": num_steps,
+            "group_size": group_size,
+            "rollouts_per_model": total_rollouts_per_model,
+            "total_rollouts": total_rollouts,
+        },
+        "models_tested": [],
+    }
+    
+    # Create output directory for test results
+    test_output_dir = LOGS_DIR / "inference_tests"
+    test_output_dir.mkdir(exist_ok=True)
+    
+    for model_info in test_models:
+        model_id = model_info["id"]
+        model_safe_name = model_id.replace("/", "_")
+        
+        print(f"\n{'='*60}")
+        print(f"Testing with {model_info['name']} ({model_id})")
+        print(f"{'='*60}")
+        
+        # Output file for this test run
+        output_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.jsonl"
+        
+        # Build the process command using Atropos's built-in CLI
+        # This runs the environment's actual code with OpenRouter as the inference backend
+        cmd = [
+            "python", env_info.file_path, "process",
+            "--env.total_steps", str(num_steps),
+            "--env.group_size", str(group_size),
+            "--env.use_wandb", "false",
+            "--env.data_path_to_save_groups", str(output_file),
+            "--openai.base_url", "https://openrouter.ai/api/v1",
+            "--openai.api_key", api_key,
+            "--openai.model_name", model_id,
+        ]
+        
+        print(f"Running: python {Path(env_info.file_path).name} process ...")
+        print(f"  {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts")
+        
+        model_results = {
+            "model": model_id,
+            "name": model_info["name"],
+            "scale": model_info["scale"],
+            "output_file": str(output_file),
+            "steps": [],
+            "steps_tested": 0,
+            "total_completions": 0,
+            "correct_completions": 0,
+        }
+        
+        try:
+            # Run the process command
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(TINKER_ATROPOS_ROOT),
+            )
+            
+            stdout, stderr = await asyncio.wait_for(
+                process.communicate(),
+                timeout=600,  # 10 minute timeout per model
+            )
+            
+            if process.returncode != 0:
+                model_results["error"] = f"Process exited with code {process.returncode}"
+                model_results["stderr"] = stderr.decode()[-1000:]
+                print(f"  Error: {model_results['error']}")
+            else:
+                print(f"  Process completed successfully")
+                
+                # Parse the output JSONL file
+                if output_file.exists():
+                    # Read JSONL file (one JSON object per line = one step)
+                    with open(output_file, "r") as f:
+                        for line in f:
+                            line = line.strip()
+                            if not line:
+                                continue
+                            try:
+                                item = json.loads(line)
+                                scores = item.get("scores", [])
+                                model_results["steps_tested"] += 1
+                                model_results["total_completions"] += len(scores)
+                                correct = sum(1 for s in scores if s > 0)
+                                model_results["correct_completions"] += correct
+                                
+                                model_results["steps"].append({
+                                    "step": model_results["steps_tested"],
+                                    "completions": len(scores),
+                                    "correct": correct,
+                                    "scores": scores,
+                                })
+                            except json.JSONDecodeError:
+                                continue
+                    
+                    print(f"  Completed {model_results['steps_tested']} steps")
+                else:
+                    model_results["error"] = f"Output file not created: {output_file}"
+                    
+        except asyncio.TimeoutError:
+            model_results["error"] = "Process timed out after 10 minutes"
+            print(f"  Timeout!")
+        except Exception as e:
+            model_results["error"] = str(e)
+            print(f"  Error: {e}")
+        
+        # Calculate stats
+        if model_results["total_completions"] > 0:
+            model_results["accuracy"] = round(
+                model_results["correct_completions"] / model_results["total_completions"], 3
+            )
+        else:
+            model_results["accuracy"] = 0
+            
+        if model_results["steps_tested"] > 0:
+            steps_with_correct = sum(1 for s in model_results["steps"] if s.get("correct", 0) > 0)
+            model_results["steps_with_correct"] = steps_with_correct
+            model_results["step_success_rate"] = round(
+                steps_with_correct / model_results["steps_tested"], 3
+            )
+        else:
+            model_results["steps_with_correct"] = 0
+            model_results["step_success_rate"] = 0
+        
+        print(f"  Results: {model_results['correct_completions']}/{model_results['total_completions']} correct")
+        print(f"  Accuracy: {model_results['accuracy']:.1%}")
+        
+        results["models_tested"].append(model_results)
+    
+    # Overall summary
+    working_models = [m for m in results["models_tested"] if m.get("steps_tested", 0) > 0]
+    
+    results["summary"] = {
+        "steps_requested": num_steps,
+        "models_tested": len(test_models),
+        "models_succeeded": len(working_models),
+        "best_model": max(working_models, key=lambda x: x.get("accuracy", 0))["model"] if working_models else None,
+        "avg_accuracy": round(
+            sum(m.get("accuracy", 0) for m in working_models) / len(working_models), 3
+        ) if working_models else 0,
+        "environment_working": len(working_models) > 0,
+        "output_directory": str(test_output_dir),
+    }
+    
+    return json.dumps(results, indent=2)
 
 
 # ============================================================================
@@ -378,27 +1195,16 @@ async def rl_list_runs() -> str:
 
 def check_rl_api_keys() -> bool:
     """
-    Check if required API keys are available in environment variables.
-    
-    Required:
-    - TINKER_API_KEY: For Tinker training service
-    - WANDB_API_KEY: For metrics logging and fetching
-    
-    Returns:
-        bool: True if all required keys are set, False otherwise
+    Check if required API keys are available.
     """
     tinker_key = os.getenv("TINKER_API_KEY")
     wandb_key = os.getenv("WANDB_API_KEY")
-    
     return bool(tinker_key) and bool(wandb_key)
 
 
 def get_missing_keys() -> List[str]:
     """
     Get list of missing required API keys.
-    
-    Returns:
-        List of missing key names
     """
     missing = []
     if not os.getenv("TINKER_API_KEY"):
@@ -406,18 +1212,3 @@ def get_missing_keys() -> List[str]:
     if not os.getenv("WANDB_API_KEY"):
         missing.append("WANDB_API_KEY")
     return missing
-
-
-# ============================================================================
-# Debug/Status
-# ============================================================================
-
-async def rl_health_check() -> str:
-    """
-    Check if the RL API server is running and accessible.
-    
-    Returns:
-        JSON string with server health status
-    """
-    result = await _make_request("GET", "/health")
-    return json.dumps(result, indent=2)
diff --git a/toolsets.py b/toolsets.py
index e4644251c7..abd6192a98 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -97,7 +97,7 @@ TOOLSETS = {
             "rl_get_current_config", "rl_edit_config",
             "rl_start_training", "rl_check_status",
             "rl_stop_training", "rl_get_results",
-            "rl_test_inference", "rl_list_runs"
+            "rl_list_runs", "rl_test_inference"
         ],
         "includes": []
     },

From 3c0d0dba49f99da4b4e363545dfe1e2fac0417e6 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 4 Feb 2026 13:57:59 -0800
Subject: [PATCH 36/48] Update RL tools and enhance configuration management

- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`.
- Enhanced `README.md` with installation instructions for submodules and updated API key usage.
- Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools.
- Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation.
- Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.
---
 README.md                 |  23 ++++---
 model_tools.py            |  10 ++--
 rl_cli.py                 |  91 +++++++++++++++++++++++++---
 run_agent.py              |   8 ++-
 scripts/install.ps1       |  42 ++++++++++++-
 scripts/install.sh        |  34 +++++++++--
 tools/rl_training_tool.py | 122 +++++++++++++++++++++++++++++++-------
 7 files changed, 274 insertions(+), 56 deletions(-)

diff --git a/README.md b/README.md
index f49ae26afd..a1673c912e 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins
 ```
 
 The installer will:
-- Clone to `~/.hermes-agent`
+- Clone to `~/.hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
 - Create a virtual environment
 - Install all dependencies
 - Run the interactive setup wizard
@@ -281,18 +281,10 @@ Train language models with reinforcement learning using the Tinker API and Atrop
 ```bash
 TINKER_API_KEY=your-tinker-key      # Get from https://tinker-console.thinkingmachines.ai/keys
 WANDB_API_KEY=your-wandb-key        # Get from https://wandb.ai/authorize
+OPENROUTER_API_KEY=your-key         # Optional: for rl_test_inference
 ```
 
-2. **Install tinker-atropos:** (in a separate directory)
-```bash
-cd ~/tinker-atropos
-pip install -e .
-```
-
-3. **Start the RL API server:**
-```bash
-rl-server    # Runs on port 8080 by default
-```
+2. **That's it!** tinker-atropos is included as a submodule - no separate installation needed.
 
 #### Using RL Tools
 
@@ -313,10 +305,12 @@ Agent: I'll set up an RL training run on the GSM8k environment...
 | `rl_select_environment` | Select an environment for training |
 | `rl_get_current_config` | View all configurable options |
 | `rl_edit_config` | Change a configuration value |
+| `rl_test_inference` | Test environment with OpenRouter (pre-training validation) |
 | `rl_start_training` | Start a training run |
 | `rl_check_status` | Check training progress |
 | `rl_stop_training` | Stop a running training |
 | `rl_get_results` | Fetch WandB metrics |
+| `rl_list_runs` | List active training runs |
 
 #### Dedicated RL CLI
 
@@ -434,7 +428,7 @@ skills/
 If you prefer not to use the installer:
 
 ```bash
-# Clone the repository
+# Clone the repository (with submodules)
 git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
 cd hermes-agent
 
@@ -445,6 +439,11 @@ cd hermes-agent
 python3 -m venv venv
 source venv/bin/activate
 pip install -e ".[all]"
+
+# Install submodules (required for terminal and RL tools)
+pip install -e "./mini-swe-agent"    # Terminal tool backend
+pip install -e "./tinker-atropos"    # RL training backend
+
 hermes setup
 ```
 
diff --git a/model_tools.py b/model_tools.py
index 847e56effe..e95a595c8a 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -665,7 +665,7 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
                         "models": {
                             "type": "array",
                             "items": {"type": "string"},
-                            "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, zhipu-ai/glm-4-flash, minimax/minimax-m1"
+                            "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"
                         }
                     },
                     "required": []
@@ -730,7 +730,7 @@ def get_all_tool_names() -> List[str]:
             "rl_get_current_config", "rl_edit_config",
             "rl_start_training", "rl_check_status",
             "rl_stop_training", "rl_get_results",
-            "rl_list_runs"
+            "rl_list_runs", "rl_test_inference"
         ])
     
     return tool_names
@@ -898,7 +898,7 @@ def get_tool_definitions(
                             "rl_get_current_config", "rl_edit_config",
                             "rl_start_training", "rl_check_status",
                             "rl_stop_training", "rl_get_results",
-                            "rl_list_runs"
+                            "rl_list_runs", "rl_test_inference"
                         ]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
@@ -950,7 +950,7 @@ def get_tool_definitions(
                             "rl_get_current_config", "rl_edit_config",
                             "rl_start_training", "rl_check_status",
                             "rl_stop_training", "rl_get_results",
-                            "rl_list_runs"
+                            "rl_list_runs", "rl_test_inference"
                         ]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
@@ -1407,7 +1407,7 @@ def handle_function_call(
             "rl_get_current_config", "rl_edit_config",
             "rl_start_training", "rl_check_status",
             "rl_stop_training", "rl_get_results",
-            "rl_list_runs"
+            "rl_list_runs", "rl_test_inference"
         ]:
             return handle_rl_function_call(function_name, function_args)
 
diff --git a/rl_cli.py b/rl_cli.py
index fe0eecfd48..a45c365b4d 100644
--- a/rl_cli.py
+++ b/rl_cli.py
@@ -25,14 +25,34 @@ import sys
 from pathlib import Path
 
 import fire
+import yaml
 
 # Load environment variables from .env file
 from dotenv import load_dotenv
 
-env_path = Path(__file__).parent / '.env'
-if env_path.exists():
-    load_dotenv(dotenv_path=env_path)
-    print(f"✅ Loaded environment variables from {env_path}")
+# Load from ~/.hermes/.env first, then local .env
+hermes_env_path = Path.home() / '.hermes' / '.env'
+local_env_path = Path(__file__).parent / '.env'
+
+if hermes_env_path.exists():
+    load_dotenv(dotenv_path=hermes_env_path)
+    print(f"✅ Loaded environment variables from {hermes_env_path}")
+elif local_env_path.exists():
+    load_dotenv(dotenv_path=local_env_path)
+    print(f"✅ Loaded environment variables from {local_env_path}")
+
+# Set terminal working directory to tinker-atropos submodule
+# This ensures terminal commands run in the right context for RL work
+tinker_atropos_dir = Path(__file__).parent / 'tinker-atropos'
+if tinker_atropos_dir.exists():
+    os.environ['TERMINAL_CWD'] = str(tinker_atropos_dir)
+    os.environ['HERMES_QUIET'] = '1'  # Disable temp subdirectory creation
+    print(f"📂 Terminal working directory: {tinker_atropos_dir}")
+else:
+    # Fall back to hermes-agent directory if submodule not found
+    os.environ['TERMINAL_CWD'] = str(Path(__file__).parent)
+    os.environ['HERMES_QUIET'] = '1'
+    print(f"⚠️  tinker-atropos submodule not found, using: {Path(__file__).parent}")
 
 # Import agent and tools
 from run_agent import AIAgent
@@ -40,6 +60,50 @@ from model_tools import get_tool_definitions, check_toolset_requirements
 from tools.rl_training_tool import check_rl_api_keys, get_missing_keys
 
 
+# ============================================================================
+# Config Loading
+# ============================================================================
+
+DEFAULT_MODEL = "anthropic/claude-opus-4.5"
+DEFAULT_BASE_URL = "https://openrouter.ai/api/v1"
+
+
+def load_hermes_config() -> dict:
+    """
+    Load configuration from ~/.hermes/config.yaml.
+    
+    Returns:
+        dict: Configuration with model, base_url, etc.
+    """
+    config_path = Path.home() / '.hermes' / 'config.yaml'
+    
+    config = {
+        "model": DEFAULT_MODEL,
+        "base_url": DEFAULT_BASE_URL,
+    }
+    
+    if config_path.exists():
+        try:
+            with open(config_path, "r") as f:
+                file_config = yaml.safe_load(f) or {}
+            
+            # Get model from config
+            if "model" in file_config:
+                if isinstance(file_config["model"], str):
+                    config["model"] = file_config["model"]
+                elif isinstance(file_config["model"], dict):
+                    config["model"] = file_config["model"].get("default", DEFAULT_MODEL)
+            
+            # Get base_url if specified
+            if "base_url" in file_config:
+                config["base_url"] = file_config["base_url"]
+                
+        except Exception as e:
+            print(f"⚠️  Warning: Failed to load config.yaml: {e}")
+    
+    return config
+
+
 # ============================================================================
 # RL-Specific Configuration
 # ============================================================================
@@ -108,7 +172,7 @@ When asked to train a model, follow this workflow:
 """
 
 # Toolsets to enable for RL workflows
-RL_TOOLSETS = ["base", "terminal", "web", "rl"]
+RL_TOOLSETS = ["terminal", "web", "rl"]
 
 
 # ============================================================================
@@ -172,9 +236,9 @@ def list_environments_sync():
 
 def main(
     task: str = None,
-    model: str = "anthropic/claude-sonnet-4-20250514",
+    model: str = None,
     api_key: str = None,
-    base_url: str = "https://openrouter.ai/api/v1",
+    base_url: str = None,
     max_iterations: int = RL_MAX_ITERATIONS,
     interactive: bool = False,
     list_environments: bool = False,
@@ -187,9 +251,9 @@ def main(
     
     Args:
         task: The training task/goal (e.g., "Train a model on GSM8k for math")
-        model: Model to use for the agent (default: claude-sonnet-4)
+        model: Model to use for the agent (reads from ~/.hermes/config.yaml if not provided)
         api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided)
-        base_url: API base URL (default: OpenRouter)
+        base_url: API base URL (reads from config or defaults to OpenRouter)
         max_iterations: Maximum agent iterations (default: 200 for long workflows)
         interactive: Run in interactive mode (multiple conversations)
         list_environments: Just list available RL environments and exit
@@ -210,6 +274,15 @@ def main(
         # Check server status
         python rl_cli.py --check-server
     """
+    # Load config from ~/.hermes/config.yaml
+    config = load_hermes_config()
+    
+    # Use config values if not explicitly provided
+    if model is None:
+        model = config["model"]
+    if base_url is None:
+        base_url = config["base_url"]
+    
     print("🎯 RL Training Agent")
     print("=" * 60)
     
diff --git a/run_agent.py b/run_agent.py
index 7b70289fff..1aceb5b589 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1764,10 +1764,16 @@ class AIAgent:
                         self._invalid_tool_retries = 0
                     
                     # Validate tool call arguments are valid JSON
+                    # Handle empty strings as empty objects (common model quirk)
                     invalid_json_args = []
                     for tc in assistant_message.tool_calls:
+                        args = tc.function.arguments
+                        # Treat empty/whitespace strings as empty object
+                        if not args or not args.strip():
+                            tc.function.arguments = "{}"
+                            continue
                         try:
-                            json.loads(tc.function.arguments)
+                            json.loads(args)
                         except json.JSONDecodeError as e:
                             invalid_json_args.append((tc.function.name, str(e)))
                     
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index caf80288d4..3666b21b58 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -150,14 +150,15 @@ function Install-Repository {
         }
     } else {
         # Try SSH first (for private repo access), fall back to HTTPS
+        # Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
         Write-Info "Trying SSH clone..."
-        $sshResult = git clone --branch $Branch $RepoUrlSsh $InstallDir 2>&1
+        $sshResult = git clone --branch $Branch --recurse-submodules $RepoUrlSsh $InstallDir 2>&1
         
         if ($LASTEXITCODE -eq 0) {
             Write-Success "Cloned via SSH"
         } else {
             Write-Info "SSH failed, trying HTTPS..."
-            $httpsResult = git clone --branch $Branch $RepoUrlHttps $InstallDir 2>&1
+            $httpsResult = git clone --branch $Branch --recurse-submodules $RepoUrlHttps $InstallDir 2>&1
             
             if ($LASTEXITCODE -eq 0) {
                 Write-Success "Cloned via HTTPS"
@@ -171,6 +172,13 @@ function Install-Repository {
         }
     }
     
+    # Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
+    Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
+    Push-Location $InstallDir
+    git submodule update --init --recursive
+    Pop-Location
+    Write-Success "Submodules ready"
+    
     Write-Success "Repository ready"
 }
 
@@ -208,15 +216,43 @@ function Install-Dependencies {
         & .\venv\Scripts\Activate.ps1
     }
     
+    # Install main package
     try {
         pip install -e ".[all]" 2>&1 | Out-Null
     } catch {
         pip install -e "." | Out-Null
     }
     
+    Write-Success "Main package installed"
+    
+    # Install submodules
+    Write-Info "Installing mini-swe-agent (terminal tool backend)..."
+    if (Test-Path "mini-swe-agent\pyproject.toml") {
+        try {
+            pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
+            Write-Success "mini-swe-agent installed"
+        } catch {
+            Write-Warning "mini-swe-agent install failed (terminal tools may not work)"
+        }
+    } else {
+        Write-Warning "mini-swe-agent not found (run: git submodule update --init)"
+    }
+    
+    Write-Info "Installing tinker-atropos (RL training backend)..."
+    if (Test-Path "tinker-atropos\pyproject.toml") {
+        try {
+            pip install -e ".\tinker-atropos" 2>&1 | Out-Null
+            Write-Success "tinker-atropos installed"
+        } catch {
+            Write-Warning "tinker-atropos install failed (RL tools may not work)"
+        }
+    } else {
+        Write-Warning "tinker-atropos not found (run: git submodule update --init)"
+    }
+    
     Pop-Location
     
-    Write-Success "Dependencies installed"
+    Write-Success "All dependencies installed"
 }
 
 function Set-PathVariable {
diff --git a/scripts/install.sh b/scripts/install.sh
index 463a0d5bea..4b8affaa6e 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -292,12 +292,13 @@ clone_repo() {
         fi
     else
         # Try SSH first (for private repo access), fall back to HTTPS
+        # Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
         log_info "Trying SSH clone..."
-        if git clone --branch "$BRANCH" "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
+        if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
             log_success "Cloned via SSH"
         else
             log_info "SSH failed, trying HTTPS..."
-            if git clone --branch "$BRANCH" "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
+            if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
                 log_success "Cloned via HTTPS"
             else
                 log_error "Failed to clone repository"
@@ -310,6 +311,12 @@ clone_repo() {
     fi
     
     cd "$INSTALL_DIR"
+    
+    # Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
+    log_info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
+    git submodule update --init --recursive
+    log_success "Submodules ready"
+    
     log_success "Repository ready"
 }
 
@@ -343,10 +350,29 @@ install_deps() {
         source venv/bin/activate
     fi
     
-    # Install the package in editable mode with all extras
+    # Install the main package in editable mode with all extras
     pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
     
-    log_success "Dependencies installed"
+    log_success "Main package installed"
+    
+    # Install submodules
+    log_info "Installing mini-swe-agent (terminal tool backend)..."
+    if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
+        pip install -e "./mini-swe-agent" > /dev/null 2>&1 || log_warn "mini-swe-agent install failed (terminal tools may not work)"
+        log_success "mini-swe-agent installed"
+    else
+        log_warn "mini-swe-agent not found (run: git submodule update --init)"
+    fi
+    
+    log_info "Installing tinker-atropos (RL training backend)..."
+    if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
+        pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
+        log_success "tinker-atropos installed"
+    else
+        log_warn "tinker-atropos not found (run: git submodule update --init)"
+    fi
+    
+    log_success "All dependencies installed"
 }
 
 setup_path() {
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 3c257c4c58..8c18bee670 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -37,6 +37,7 @@ import subprocess
 import sys
 import time
 import uuid
+from datetime import datetime
 import yaml
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -84,6 +85,7 @@ LOCKED_FIELDS = {
             "weight": 1.0,
             "num_requests_for_eval": 256,
             "timeout": 3600,
+            "server_type": "sglang",  # Tinker uses sglang for actual training
         }
     ],
     "tinker": {
@@ -211,6 +213,9 @@ def _scan_environments() -> List[EnvironmentInfo]:
 def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
     """
     Dynamically import an environment and extract its config fields.
+    
+    Uses config_init() to get the actual config class, with fallback to
+    directly importing BaseEnvConfig if config_init fails.
     """
     try:
         # Load the environment module
@@ -230,15 +235,38 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
         if not env_class:
             return {}
         
-        # Call config_init to get the actual config
-        env_config, server_configs = env_class.config_init()
-        config_class = type(env_config)
+        # Try calling config_init to get the actual config class
+        config_class = None
+        try:
+            env_config, server_configs = env_class.config_init()
+            config_class = type(env_config)
+        except Exception as config_error:
+            # Fallback: try to import BaseEnvConfig directly from atroposlib
+            print(f"Note: config_init failed ({config_error}), using BaseEnvConfig defaults")
+            try:
+                from atroposlib.envs.base import BaseEnvConfig
+                config_class = BaseEnvConfig
+            except ImportError:
+                return {}
+        
+        if not config_class:
+            return {}
+        
+        # Helper to make values JSON-serializable (handle enums, etc.)
+        def make_serializable(val):
+            if val is None:
+                return None
+            if hasattr(val, 'value'):  # Enum
+                return val.value
+            if hasattr(val, 'name') and hasattr(val, '__class__') and 'Enum' in str(type(val)):
+                return val.name
+            return val
         
         # Extract fields from the Pydantic model
         fields = {}
         for field_name, field_info in config_class.model_fields.items():
             field_type = field_info.annotation
-            default = field_info.default
+            default = make_serializable(field_info.default)
             description = field_info.description or ""
             
             is_locked = field_name in LOCKED_FIELD_NAMES
@@ -248,12 +276,15 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
             if hasattr(field_type, "__origin__"):
                 type_name = str(field_type)
             
+            locked_value = LOCKED_FIELDS.get("env", {}).get(field_name, default)
+            current_value = make_serializable(locked_value) if is_locked else default
+            
             fields[field_name] = {
                 "type": type_name,
-                "default": default if default is not None else None,
+                "default": default,
                 "description": description,
                 "locked": is_locked,
-                "current_value": LOCKED_FIELDS.get("env", {}).get(field_name, default) if is_locked else default,
+                "current_value": current_value,
             }
         
         return fields
@@ -315,7 +346,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         
         trainer_log_file = open(trainer_log, "w")
         run_state.trainer_process = subprocess.Popen(
-            ["python", "launch_training.py", "--config", str(config_path)],
+            [sys.executable, "launch_training.py", "--config", str(config_path)],
             stdout=trainer_log_file,
             stderr=subprocess.STDOUT,
             cwd=str(TINKER_ATROPOS_ROOT),
@@ -355,7 +386,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         
         env_log_file = open(env_log, "w")
         run_state.env_process = subprocess.Popen(
-            ["python", str(env_info.file_path), "serve", "--config", str(config_path)],
+            [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
             stdout=env_log_file,
             stderr=subprocess.STDOUT,
             cwd=str(TINKER_ATROPOS_ROOT),
@@ -543,17 +574,14 @@ async def rl_select_environment(name: str) -> str:
         if not field_info.get("locked", False):
             _current_config[field_name] = field_info.get("default")
     
-    configurable_count = sum(1 for f in config_fields.values() if not f.get("locked", False))
-    locked_count = sum(1 for f in config_fields.values() if f.get("locked", False))
+    # Auto-set wandb_name to "{env_name}-DATETIME" to avoid overlaps
+    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+    _current_config["wandb_name"] = f"{name}-{timestamp}"
     
     return json.dumps({
         "message": f"Selected environment: {name}",
         "environment": name,
         "file_path": env_info.file_path,
-        "configurable_fields": configurable_count,
-        "locked_fields": locked_count,
-        "config": _current_config,
-        "tip": f"Use rl_get_current_config() to see all {configurable_count} configurable fields.",
     }, indent=2)
 
 
@@ -961,10 +989,11 @@ async def rl_list_runs() -> str:
 # ============================================================================
 
 # Test models at different scales for robustness testing
+# These are cheap, capable models on OpenRouter for testing parsing/scoring
 TEST_MODELS = [
     {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
-    {"id": "zhipu-ai/glm-4-flash", "name": "GLM-4 Flash", "scale": "medium"},
-    {"id": "minimax/minimax-m1", "name": "MiniMax M1", "scale": "large"},
+    {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
+    {"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"},
 ]
 
 # Default test parameters - quick but representative
@@ -1066,18 +1095,35 @@ async def rl_test_inference(
         
         # Build the process command using Atropos's built-in CLI
         # This runs the environment's actual code with OpenRouter as the inference backend
+        # We pass our locked settings + test-specific overrides via CLI args
         cmd = [
-            "python", env_info.file_path, "process",
+            sys.executable, env_info.file_path, "process",
+            # Test-specific overrides
             "--env.total_steps", str(num_steps),
             "--env.group_size", str(group_size),
-            "--env.use_wandb", "false",
+            "--env.use_wandb", "false",  # No wandb for quick tests
             "--env.data_path_to_save_groups", str(output_file),
+            # Use locked settings from our config
+            "--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"],
+            "--env.max_token_length", str(LOCKED_FIELDS["env"]["max_token_length"]),
+            "--env.max_num_workers", str(LOCKED_FIELDS["env"]["max_num_workers"]),
+            "--env.max_batches_offpolicy", str(LOCKED_FIELDS["env"]["max_batches_offpolicy"]),
+            # OpenRouter config for inference testing
+            # IMPORTANT: Use server_type=openai for OpenRouter (not sglang)
+            # sglang is only for actual training with Tinker's inference server
             "--openai.base_url", "https://openrouter.ai/api/v1",
             "--openai.api_key", api_key,
             "--openai.model_name", model_id,
+            "--openai.server_type", "openai",  # OpenRouter is OpenAI-compatible
+            "--openai.health_check", "false",  # OpenRouter doesn't have health endpoint
         ]
         
-        print(f"Running: python {Path(env_info.file_path).name} process ...")
+        # Debug: Print the full command
+        cmd_str = " ".join(str(c) for c in cmd)
+        # Hide API key in printed output
+        cmd_display = cmd_str.replace(api_key, "***API_KEY***")
+        print(f"Command: {cmd_display}")
+        print(f"Working dir: {TINKER_ATROPOS_ROOT}")
         print(f"  {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts")
         
         model_results = {
@@ -1105,12 +1151,44 @@ async def rl_test_inference(
                 timeout=600,  # 10 minute timeout per model
             )
             
+            # Decode output
+            stdout_text = stdout.decode() if stdout else ""
+            stderr_text = stderr.decode() if stderr else ""
+            
+            # Write logs to files for inspection outside CLI
+            log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log"
+            with open(log_file, "w") as f:
+                f.write(f"Command: {cmd_display}\n")
+                f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
+                f.write(f"Return code: {process.returncode}\n")
+                f.write(f"\n{'='*60}\n")
+                f.write(f"STDOUT:\n{'='*60}\n")
+                f.write(stdout_text or "(empty)\n")
+                f.write(f"\n{'='*60}\n")
+                f.write(f"STDERR:\n{'='*60}\n")
+                f.write(stderr_text or "(empty)\n")
+            
+            print(f"  Log file: {log_file}")
+            
+            # Print to console for immediate debugging
+            if stdout_text.strip():
+                print(f"\n--- STDOUT ---")
+                print(stdout_text[-2000:])  # Last 2000 chars
+            
+            if stderr_text.strip():
+                print(f"\n--- STDERR ---")
+                print(stderr_text[-2000:])  # Last 2000 chars
+            
             if process.returncode != 0:
                 model_results["error"] = f"Process exited with code {process.returncode}"
-                model_results["stderr"] = stderr.decode()[-1000:]
-                print(f"  Error: {model_results['error']}")
+                model_results["stderr"] = stderr_text[-1000:]
+                model_results["stdout"] = stdout_text[-1000:]
+                model_results["log_file"] = str(log_file)
+                print(f"\n  ❌ Error: {model_results['error']}")
             else:
-                print(f"  Process completed successfully")
+                print(f"\n  ✅ Process completed successfully")
+                print(f"  Output file: {output_file}")
+                print(f"  File exists: {output_file.exists()}")
                 
                 # Parse the output JSONL file
                 if output_file.exists():

From 5c3105b4376c7422b7c0c0f76e487f14a72a3e38 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 4 Feb 2026 21:07:07 -0800
Subject: [PATCH 37/48] Enhance RL test inference with WandB integration and
 real-time output streaming

- Added unique run ID generation for WandB tracking during test inference.
- Enabled WandB usage for test tracking and updated command-line arguments accordingly.
- Implemented real-time output streaming for process execution, improving log visibility and debugging.
- Enhanced error handling to display last few lines of stderr for better troubleshooting.
---
 tools/rl_training_tool.py | 67 ++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 8c18bee670..770c542c7b 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -1093,6 +1093,10 @@ async def rl_test_inference(
         # Output file for this test run
         output_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.jsonl"
         
+        # Generate unique run ID for wandb
+        test_run_id = str(uuid.uuid4())[:8]
+        wandb_run_name = f"test_inference_RSIAgent_{_current_env}_{test_run_id}"
+        
         # Build the process command using Atropos's built-in CLI
         # This runs the environment's actual code with OpenRouter as the inference backend
         # We pass our locked settings + test-specific overrides via CLI args
@@ -1101,7 +1105,8 @@ async def rl_test_inference(
             # Test-specific overrides
             "--env.total_steps", str(num_steps),
             "--env.group_size", str(group_size),
-            "--env.use_wandb", "false",  # No wandb for quick tests
+            "--env.use_wandb", "true",  # Enable wandb for test tracking
+            "--env.wandb_name", wandb_run_name,
             "--env.data_path_to_save_groups", str(output_file),
             # Use locked settings from our config
             "--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"],
@@ -1124,12 +1129,14 @@ async def rl_test_inference(
         cmd_display = cmd_str.replace(api_key, "***API_KEY***")
         print(f"Command: {cmd_display}")
         print(f"Working dir: {TINKER_ATROPOS_ROOT}")
+        print(f"WandB run: {wandb_run_name}")
         print(f"  {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts")
         
         model_results = {
             "model": model_id,
             "name": model_info["name"],
             "scale": model_info["scale"],
+            "wandb_run": wandb_run_name,
             "output_file": str(output_file),
             "steps": [],
             "steps_tested": 0,
@@ -1138,7 +1145,7 @@ async def rl_test_inference(
         }
         
         try:
-            # Run the process command
+            # Run the process command with real-time output streaming
             process = await asyncio.create_subprocess_exec(
                 *cmd,
                 stdout=asyncio.subprocess.PIPE,
@@ -1146,17 +1153,43 @@ async def rl_test_inference(
                 cwd=str(TINKER_ATROPOS_ROOT),
             )
             
-            stdout, stderr = await asyncio.wait_for(
-                process.communicate(),
-                timeout=600,  # 10 minute timeout per model
-            )
+            # Stream output in real-time while collecting for logs
+            stdout_lines = []
+            stderr_lines = []
+            log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log"
             
-            # Decode output
-            stdout_text = stdout.decode() if stdout else ""
-            stderr_text = stderr.decode() if stderr else ""
+            async def read_stream(stream, lines_list, prefix=""):
+                """Read stream line by line and print in real-time."""
+                while True:
+                    line = await stream.readline()
+                    if not line:
+                        break
+                    decoded = line.decode().rstrip()
+                    lines_list.append(decoded)
+                    # Print progress-related lines in real-time
+                    if any(kw in decoded.lower() for kw in ['processing', 'group', 'step', 'progress', '%', 'completed']):
+                        print(f"  {prefix}{decoded}")
+            
+            # Read both streams concurrently with timeout
+            try:
+                await asyncio.wait_for(
+                    asyncio.gather(
+                        read_stream(process.stdout, stdout_lines, "📊 "),
+                        read_stream(process.stderr, stderr_lines, "⚠️ "),
+                    ),
+                    timeout=600,  # 10 minute timeout per model
+                )
+            except asyncio.TimeoutError:
+                process.kill()
+                raise
+            
+            await process.wait()
+            
+            # Combine output for logging
+            stdout_text = "\n".join(stdout_lines)
+            stderr_text = "\n".join(stderr_lines)
             
             # Write logs to files for inspection outside CLI
-            log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log"
             with open(log_file, "w") as f:
                 f.write(f"Command: {cmd_display}\n")
                 f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
@@ -1170,21 +1203,17 @@ async def rl_test_inference(
             
             print(f"  Log file: {log_file}")
             
-            # Print to console for immediate debugging
-            if stdout_text.strip():
-                print(f"\n--- STDOUT ---")
-                print(stdout_text[-2000:])  # Last 2000 chars
-            
-            if stderr_text.strip():
-                print(f"\n--- STDERR ---")
-                print(stderr_text[-2000:])  # Last 2000 chars
-            
             if process.returncode != 0:
                 model_results["error"] = f"Process exited with code {process.returncode}"
                 model_results["stderr"] = stderr_text[-1000:]
                 model_results["stdout"] = stdout_text[-1000:]
                 model_results["log_file"] = str(log_file)
                 print(f"\n  ❌ Error: {model_results['error']}")
+                # Print last few lines of stderr for debugging
+                if stderr_lines:
+                    print(f"  Last errors:")
+                    for line in stderr_lines[-5:]:
+                        print(f"    {line}")
             else:
                 print(f"\n  ✅ Process completed successfully")
                 print(f"  Output file: {output_file}")

From 533c064269417d4c213aa8393e3a6098a78fb5d1 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Thu, 5 Feb 2026 03:49:46 -0800
Subject: [PATCH 38/48] Add file manipulation tools and enhance setup scripts

- Introduced file manipulation capabilities in `model_tools.py`, including functions for reading, writing, patching, and searching files.
- Added a new `file` toolset in `toolsets.py` and updated distributions to include file tools.
- Enhanced `setup-hermes.sh` and `install.sh` scripts to check for and optionally install `ripgrep` for faster file searching.
- Implemented a new `file_operations.py` module to encapsulate file operations using shell commands.
- Updated `doctor.py` and `install.ps1` to check for `ripgrep` and provide installation guidance if not found.
- Added fuzzy matching and patch parsing capabilities to improve file manipulation accuracy and flexibility.
---
 hermes_cli/doctor.py     |   7 +
 model_tools.py           | 266 ++++++++++-
 scripts/install.ps1      |  89 +++-
 scripts/install.sh       | 124 ++++++
 setup-hermes.sh          |  47 ++
 tools/__init__.py        |  24 +
 tools/file_operations.py | 937 +++++++++++++++++++++++++++++++++++++++
 tools/file_tools.py      | 113 +++++
 tools/fuzzy_match.py     | 478 ++++++++++++++++++++
 tools/patch_parser.py    | 439 ++++++++++++++++++
 toolset_distributions.py |  24 +-
 toolsets.py              |  14 +-
 12 files changed, 2549 insertions(+), 13 deletions(-)
 create mode 100644 tools/file_operations.py
 create mode 100644 tools/file_tools.py
 create mode 100644 tools/fuzzy_match.py
 create mode 100644 tools/patch_parser.py

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 82b7e5414b..5e0ee39fa7 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -167,6 +167,13 @@ def run_doctor(args):
     else:
         check_warn("git not found", "(optional)")
     
+    # ripgrep (optional, for faster file search)
+    if shutil.which("rg"):
+        check_ok("ripgrep (rg)", "(faster file search)")
+    else:
+        check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
+        check_info("Install for faster search: sudo apt install ripgrep")
+    
     # Docker (optional)
     terminal_env = os.getenv("TERMINAL_ENV", "local")
     if terminal_env == "docker":
diff --git a/model_tools.py b/model_tools.py
index e95a595c8a..203a6669d5 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -33,6 +33,9 @@ from typing import Dict, Any, List, Optional, Tuple
 
 from tools.web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_firecrawl_api_key
 from tools.terminal_tool import terminal_tool, check_terminal_requirements, TERMINAL_TOOL_DESCRIPTION, cleanup_vm
+# File manipulation tools (read, write, patch, search)
+from tools.file_tools import read_file_tool, write_file_tool, patch_tool, search_tool
+from tools import check_file_requirements
 # Hecate/MorphCloud terminal tool (cloud VMs) - available as alternative backend
 from tools.terminal_hecate import terminal_hecate_tool, check_hecate_requirements, TERMINAL_HECATE_DESCRIPTION
 from tools.vision_tools import vision_analyze_tool, check_vision_requirements
@@ -155,6 +158,13 @@ TOOLSET_REQUIREMENTS = {
             "rl_list_runs", "rl_test_inference",
         ],
     },
+    "file": {
+        "name": "File Operations (read, write, patch, search)",
+        "env_vars": [],  # Uses terminal backend, no additional requirements
+        "check_fn": check_file_requirements,
+        "setup_url": None,
+        "tools": ["read_file", "write_file", "patch", "search"],
+    },
 }
 
 
@@ -675,6 +685,163 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
     ]
 
 
+def get_file_tool_definitions() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for file manipulation tools in OpenAI's expected format.
+    
+    File tools operate via the terminal backend and support any environment
+    (local, docker, singularity, ssh, modal).
+    
+    Returns:
+        List[Dict]: List of file tool definitions compatible with OpenAI API
+    """
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "read_file",
+                "description": "Read a file with pagination support. Returns content with line numbers in 'LINE_NUM|CONTENT' format. For binary files (images), returns base64-encoded data. If file not found, suggests similar filenames.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string",
+                            "description": "Path to the file to read (absolute or relative)"
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Line number to start reading from (1-indexed, default: 1)",
+                            "default": 1,
+                            "minimum": 1
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Maximum number of lines to read (default: 500, max: 2000)",
+                            "default": 500,
+                            "maximum": 2000
+                        }
+                    },
+                    "required": ["path"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "write_file",
+                "description": "Write content to a file. Creates parent directories automatically. Returns bytes written and lint check results for supported languages.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string",
+                            "description": "Path to the file to write (will be created if doesn't exist)"
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "Content to write to the file"
+                        }
+                    },
+                    "required": ["path", "content"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "patch",
+                "description": "Modify files using either simple string replacement or V4A patch format. Mode 'replace' does find-and-replace with fuzzy matching. Mode 'patch' applies multi-file changes using V4A format (*** Begin/End Patch). Auto-runs syntax checks on modified files.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "mode": {
+                            "type": "string",
+                            "enum": ["replace", "patch"],
+                            "description": "Edit mode: 'replace' for string replacement, 'patch' for V4A patch format",
+                            "default": "replace"
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "File path (required for 'replace' mode)"
+                        },
+                        "old_string": {
+                            "type": "string",
+                            "description": "Text to find and replace (required for 'replace' mode). Must be unique in file unless replace_all=true"
+                        },
+                        "new_string": {
+                            "type": "string",
+                            "description": "Replacement text (required for 'replace' mode)"
+                        },
+                        "replace_all": {
+                            "type": "boolean",
+                            "description": "Replace all occurrences instead of requiring unique match (default: false)",
+                            "default": False
+                        },
+                        "patch": {
+                            "type": "string",
+                            "description": "V4A format patch content (required for 'patch' mode). Format: *** Begin Patch / *** Update File: path / @@ context @@ / -removed / +added / *** End Patch"
+                        }
+                    },
+                    "required": ["mode"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "search",
+                "description": "Search for content in files or search for files by name. Use target='content' to search inside files (like grep), or target='files' to find files by name pattern (like glob/find). Results sorted by modification time (newest first).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "pattern": {
+                            "type": "string",
+                            "description": "For target='content': regex pattern to search for. For target='files': glob pattern (e.g., '*.py', '*config*')"
+                        },
+                        "target": {
+                            "type": "string",
+                            "enum": ["content", "files"],
+                            "description": "Search mode: 'content' searches inside files, 'files' searches for files by name",
+                            "default": "content"
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "Directory or file to search in (default: current directory)",
+                            "default": "."
+                        },
+                        "file_glob": {
+                            "type": "string",
+                            "description": "Filter files by pattern when target='content' (e.g., '*.py' to only search Python files)"
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Maximum number of results (default: 50)",
+                            "default": 50
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Skip first N results for pagination (default: 0)",
+                            "default": 0
+                        },
+                        "output_mode": {
+                            "type": "string",
+                            "enum": ["content", "files_only", "count"],
+                            "description": "For target='content': 'content' shows matches, 'files_only' shows file paths, 'count' shows match counts per file",
+                            "default": "content"
+                        },
+                        "context": {
+                            "type": "integer",
+                            "description": "Lines of context around matches (only for target='content', output_mode='content')",
+                            "default": 0
+                        }
+                    },
+                    "required": ["pattern"]
+                }
+            }
+        }
+    ]
+
+
 def get_all_tool_names() -> List[str]:
     """
     Get the names of all available tools across all toolsets.
@@ -733,6 +900,12 @@ def get_all_tool_names() -> List[str]:
             "rl_list_runs", "rl_test_inference"
         ])
     
+    # File manipulation tools (use terminal backend)
+    if check_file_requirements():
+        tool_names.extend([
+            "read_file", "write_file", "patch", "search"
+        ])
+    
     return tool_names
 
 
@@ -782,6 +955,11 @@ def get_toolset_for_tool(tool_name: str) -> str:
         "rl_stop_training": "rl_tools",
         "rl_get_results": "rl_tools",
         "rl_list_runs": "rl_tools",
+        # File manipulation tools
+        "read_file": "file_tools",
+        "write_file": "file_tools",
+        "patch": "file_tools",
+        "search": "file_tools",
     }
     
     return toolset_mapping.get(tool_name, "unknown")
@@ -864,6 +1042,11 @@ def get_tool_definitions(
         for tool in get_rl_tool_definitions():
             all_available_tools_map[tool["function"]["name"]] = tool
     
+    # File manipulation tools (use terminal backend)
+    if check_file_requirements():
+        for tool in get_file_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
+    
     # Determine which tools to include based on toolsets
     tools_to_include = set()
     
@@ -899,7 +1082,8 @@ def get_tool_definitions(
                             "rl_start_training", "rl_check_status",
                             "rl_stop_training", "rl_get_results",
                             "rl_list_runs", "rl_test_inference"
-                        ]
+                        ],
+                        "file_tools": ["read_file", "write_file", "patch", "search"]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.update(legacy_tools)
@@ -951,7 +1135,8 @@ def get_tool_definitions(
                             "rl_start_training", "rl_check_status",
                             "rl_stop_training", "rl_get_results",
                             "rl_list_runs", "rl_test_inference"
-                        ]
+                        ],
+                        "file_tools": ["read_file", "write_file", "patch", "search"]
                     }
                     legacy_tools = legacy_map.get(toolset_name, [])
                     tools_to_include.difference_update(legacy_tools)
@@ -1338,6 +1523,70 @@ def handle_rl_function_call(
     return json.dumps({"error": f"Unknown RL function: {function_name}"}, ensure_ascii=False)
 
 
+def handle_file_function_call(
+    function_name: str,
+    function_args: Dict[str, Any],
+    task_id: Optional[str] = None
+) -> str:
+    """
+    Handle function calls for file manipulation tools.
+    
+    These tools use the terminal backend for all operations, supporting
+    local, docker, singularity, ssh, and modal environments.
+    
+    Args:
+        function_name (str): Name of the file function to call
+        function_args (Dict): Arguments for the function
+        task_id (str): Task identifier for environment isolation
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    # Determine task_id to use
+    tid = task_id or "default"
+    
+    if function_name == "read_file":
+        return read_file_tool(
+            path=function_args.get("path", ""),
+            offset=function_args.get("offset", 1),
+            limit=function_args.get("limit", 500),
+            task_id=tid
+        )
+    
+    elif function_name == "write_file":
+        return write_file_tool(
+            path=function_args.get("path", ""),
+            content=function_args.get("content", ""),
+            task_id=tid
+        )
+    
+    elif function_name == "patch":
+        return patch_tool(
+            mode=function_args.get("mode", "replace"),
+            path=function_args.get("path"),
+            old_string=function_args.get("old_string"),
+            new_string=function_args.get("new_string"),
+            replace_all=function_args.get("replace_all", False),
+            patch=function_args.get("patch"),
+            task_id=tid
+        )
+    
+    elif function_name == "search":
+        return search_tool(
+            pattern=function_args.get("pattern", ""),
+            target=function_args.get("target", "content"),
+            path=function_args.get("path", "."),
+            file_glob=function_args.get("file_glob"),
+            limit=function_args.get("limit", 50),
+            offset=function_args.get("offset", 0),
+            output_mode=function_args.get("output_mode", "content"),
+            context=function_args.get("context", 0),
+            task_id=tid
+        )
+    
+    return json.dumps({"error": f"Unknown file function: {function_name}"}, ensure_ascii=False)
+
+
 def handle_function_call(
     function_name: str, 
     function_args: Dict[str, Any], 
@@ -1411,6 +1660,10 @@ def handle_function_call(
         ]:
             return handle_rl_function_call(function_name, function_args)
 
+        # Route file manipulation tools
+        elif function_name in ["read_file", "write_file", "patch", "search"]:
+            return handle_file_function_call(function_name, function_args, task_id)
+
         else:
             error_msg = f"Unknown function: {function_name}"
             print(f"❌ {error_msg}")
@@ -1482,6 +1735,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
             "tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
             "description": "Schedule and manage automated tasks (cronjobs) - only available in interactive CLI mode",
             "requirements": ["HERMES_INTERACTIVE=1 (set automatically by cli.py)"]
+        },
+        "file_tools": {
+            "available": check_file_requirements(),
+            "tools": ["read_file", "write_file", "patch", "search"],
+            "description": "File manipulation tools: read/write files, search content/files, patch with fuzzy matching",
+            "requirements": ["Terminal backend available (local/docker/ssh/singularity/modal)"]
         }
     }
     
@@ -1502,7 +1761,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
         "image_tools": check_image_generation_requirements(),
         "skills_tools": check_skills_requirements(),
         "browser_tools": check_browser_requirements(),
-        "cronjob_tools": check_cronjob_requirements()
+        "cronjob_tools": check_cronjob_requirements(),
+        "file_tools": check_file_requirements()
     }
 
 if __name__ == "__main__":
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 3666b21b58..8170abba69 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -128,6 +128,78 @@ function Test-Node {
     return $true  # Don't fail - Node is optional
 }
 
+function Test-Ripgrep {
+    Write-Info "Checking ripgrep (optional, for faster file search)..."
+    
+    if (Get-Command rg -ErrorAction SilentlyContinue) {
+        $version = rg --version | Select-Object -First 1
+        Write-Success "$version found"
+        $script:HasRipgrep = $true
+        return $true
+    }
+    
+    Write-Warning "ripgrep not found (file search will use findstr fallback)"
+    
+    # Check what package managers are available
+    $hasWinget = Get-Command winget -ErrorAction SilentlyContinue
+    $hasChoco = Get-Command choco -ErrorAction SilentlyContinue
+    $hasScoop = Get-Command scoop -ErrorAction SilentlyContinue
+    
+    # Offer to install
+    Write-Host ""
+    $response = Read-Host "Would you like to install ripgrep? (faster search, recommended) [Y/n]"
+    
+    if ($response -eq "" -or $response -match "^[Yy]") {
+        Write-Info "Installing ripgrep..."
+        
+        if ($hasWinget) {
+            try {
+                winget install BurntSushi.ripgrep.MSVC --silent 2>&1 | Out-Null
+                if ($LASTEXITCODE -eq 0) {
+                    Write-Success "ripgrep installed via winget"
+                    $script:HasRipgrep = $true
+                    return $true
+                }
+            } catch { }
+        }
+        
+        if ($hasChoco) {
+            try {
+                choco install ripgrep -y 2>&1 | Out-Null
+                if ($LASTEXITCODE -eq 0) {
+                    Write-Success "ripgrep installed via chocolatey"
+                    $script:HasRipgrep = $true
+                    return $true
+                }
+            } catch { }
+        }
+        
+        if ($hasScoop) {
+            try {
+                scoop install ripgrep 2>&1 | Out-Null
+                if ($LASTEXITCODE -eq 0) {
+                    Write-Success "ripgrep installed via scoop"
+                    $script:HasRipgrep = $true
+                    return $true
+                }
+            } catch { }
+        }
+        
+        Write-Warning "Auto-install failed. You can install manually:"
+    } else {
+        Write-Info "Skipping ripgrep installation. To install manually:"
+    }
+    
+    # Show manual install instructions
+    Write-Info "  winget install BurntSushi.ripgrep.MSVC"
+    Write-Info "  Or: choco install ripgrep"
+    Write-Info "  Or: scoop install ripgrep"
+    Write-Info "  Or download from: https://github.com/BurntSushi/ripgrep/releases"
+    
+    $script:HasRipgrep = $false
+    return $true  # Don't fail - ripgrep is optional
+}
+
 # ============================================================================
 # Installation
 # ============================================================================
@@ -405,6 +477,20 @@ function Write-Completion {
     Write-Host ""
     Write-Host "⚡ Restart your terminal for PATH changes to take effect" -ForegroundColor Yellow
     Write-Host ""
+    
+    # Show notes about optional tools
+    if (-not $HasNode) {
+        Write-Host "Note: Node.js was not found. Browser automation tools" -ForegroundColor Yellow
+        Write-Host "will have limited functionality." -ForegroundColor Yellow
+        Write-Host ""
+    }
+    
+    if (-not $HasRipgrep) {
+        Write-Host "Note: ripgrep (rg) was not found. File search will use" -ForegroundColor Yellow
+        Write-Host "findstr as a fallback. For faster search:" -ForegroundColor Yellow
+        Write-Host "  winget install BurntSushi.ripgrep.MSVC" -ForegroundColor Yellow
+        Write-Host ""
+    }
 }
 
 # ============================================================================
@@ -416,7 +502,8 @@ function Main {
     
     if (-not (Test-Python)) { exit 1 }
     if (-not (Test-Git)) { exit 1 }
-    Test-Node  # Optional, doesn't fail
+    Test-Node      # Optional, doesn't fail
+    Test-Ripgrep   # Optional, doesn't fail
     
     Install-Repository
     Install-Venv
diff --git a/scripts/install.sh b/scripts/install.sh
index 4b8affaa6e..c3ff5a7931 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -271,6 +271,120 @@ check_node() {
     # Don't exit - Node is optional
 }
 
+check_ripgrep() {
+    log_info "Checking ripgrep (optional, for faster file search)..."
+    
+    if command -v rg &> /dev/null; then
+        RG_VERSION=$(rg --version | head -1)
+        log_success "$RG_VERSION found"
+        HAS_RIPGREP=true
+        return 0
+    fi
+    
+    log_warn "ripgrep not found (file search will use grep fallback)"
+    
+    # Offer to install
+    echo ""
+    read -p "Would you like to install ripgrep? (faster search, recommended) [Y/n] " -n 1 -r
+    echo
+    
+    if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+        log_info "Installing ripgrep..."
+        
+        # Check if we can use sudo
+        CAN_SUDO=false
+        if command -v sudo &> /dev/null; then
+            # Check if user has sudo access (without actually running sudo)
+            if sudo -n true 2>/dev/null || sudo -v 2>/dev/null; then
+                CAN_SUDO=true
+            fi
+        fi
+        
+        case "$OS" in
+            linux)
+                if [ "$CAN_SUDO" = true ]; then
+                    case "$DISTRO" in
+                        ubuntu|debian)
+                            if sudo apt install -y ripgrep 2>/dev/null; then
+                                log_success "ripgrep installed"
+                                HAS_RIPGREP=true
+                                return 0
+                            fi
+                            ;;
+                        fedora)
+                            if sudo dnf install -y ripgrep 2>/dev/null; then
+                                log_success "ripgrep installed"
+                                HAS_RIPGREP=true
+                                return 0
+                            fi
+                            ;;
+                        arch)
+                            if sudo pacman -S --noconfirm ripgrep 2>/dev/null; then
+                                log_success "ripgrep installed"
+                                HAS_RIPGREP=true
+                                return 0
+                            fi
+                            ;;
+                    esac
+                else
+                    log_warn "sudo not available - cannot auto-install system packages"
+                    # Try cargo as fallback if available
+                    if command -v cargo &> /dev/null; then
+                        log_info "Trying cargo install (no sudo required)..."
+                        if cargo install ripgrep 2>/dev/null; then
+                            log_success "ripgrep installed via cargo"
+                            HAS_RIPGREP=true
+                            return 0
+                        fi
+                    fi
+                fi
+                ;;
+            macos)
+                if command -v brew &> /dev/null; then
+                    if brew install ripgrep 2>/dev/null; then
+                        log_success "ripgrep installed"
+                        HAS_RIPGREP=true
+                        return 0
+                    fi
+                fi
+                ;;
+        esac
+        log_warn "Auto-install failed. You can install manually later:"
+    else
+        log_info "Skipping ripgrep installation. To install manually:"
+    fi
+    
+    # Show manual install instructions
+    case "$OS" in
+        linux)
+            case "$DISTRO" in
+                ubuntu|debian)
+                    log_info "  sudo apt install ripgrep"
+                    ;;
+                fedora)
+                    log_info "  sudo dnf install ripgrep"
+                    ;;
+                arch)
+                    log_info "  sudo pacman -S ripgrep"
+                    ;;
+                *)
+                    log_info "  https://github.com/BurntSushi/ripgrep#installation"
+                    ;;
+            esac
+            # Show cargo alternative for users without sudo
+            if command -v cargo &> /dev/null; then
+                log_info "  Or without sudo: cargo install ripgrep"
+            fi
+            ;;
+        macos)
+            log_info "  brew install ripgrep"
+            ;;
+    esac
+    
+    HAS_RIPGREP=false
+    # Don't exit - ripgrep is optional (grep fallback exists)
+}
+
 # ============================================================================
 # Installation
 # ============================================================================
@@ -540,6 +654,15 @@ print_success() {
         echo "if you need full browser support."
         echo -e "${NC}"
     fi
+    
+    # Show ripgrep note if not installed
+    if [ "$HAS_RIPGREP" = false ]; then
+        echo -e "${YELLOW}"
+        echo "Note: ripgrep (rg) was not found. File search will use"
+        echo "grep as a fallback. For faster search in large codebases,"
+        echo "install ripgrep: sudo apt install ripgrep (or brew install ripgrep)"
+        echo -e "${NC}"
+    fi
 }
 
 # ============================================================================
@@ -553,6 +676,7 @@ main() {
     check_python
     check_git
     check_node
+    check_ripgrep
     
     clone_repo
     setup_venv
diff --git a/setup-hermes.sh b/setup-hermes.sh
index 4cffdc7313..e22511b393 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -80,6 +80,53 @@ pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
 
 echo -e "${GREEN}✓${NC} Dependencies installed"
 
+# ============================================================================
+# Optional: ripgrep (for faster file search)
+# ============================================================================
+
+echo -e "${CYAN}→${NC} Checking ripgrep (optional, for faster search)..."
+
+if command -v rg &> /dev/null; then
+    echo -e "${GREEN}✓${NC} ripgrep found"
+else
+    echo -e "${YELLOW}⚠${NC} ripgrep not found (file search will use grep fallback)"
+    read -p "Install ripgrep for faster search? [Y/n] " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
+        INSTALLED=false
+        
+        # Check if sudo is available
+        if command -v sudo &> /dev/null && sudo -n true 2>/dev/null; then
+            if command -v apt &> /dev/null; then
+                sudo apt install -y ripgrep && INSTALLED=true
+            elif command -v dnf &> /dev/null; then
+                sudo dnf install -y ripgrep && INSTALLED=true
+            fi
+        fi
+        
+        # Try brew (no sudo needed)
+        if [ "$INSTALLED" = false ] && command -v brew &> /dev/null; then
+            brew install ripgrep && INSTALLED=true
+        fi
+        
+        # Try cargo (no sudo needed)
+        if [ "$INSTALLED" = false ] && command -v cargo &> /dev/null; then
+            echo -e "${CYAN}→${NC} Trying cargo install (no sudo required)..."
+            cargo install ripgrep && INSTALLED=true
+        fi
+        
+        if [ "$INSTALLED" = true ]; then
+            echo -e "${GREEN}✓${NC} ripgrep installed"
+        else
+            echo -e "${YELLOW}⚠${NC} Auto-install failed. Install options:"
+            echo "    sudo apt install ripgrep     # Debian/Ubuntu"
+            echo "    brew install ripgrep         # macOS"
+            echo "    cargo install ripgrep        # With Rust (no sudo)"
+            echo "    https://github.com/BurntSushi/ripgrep#installation"
+        fi
+    fi
+fi
+
 # ============================================================================
 # Environment file
 # ============================================================================
diff --git a/tools/__init__.py b/tools/__init__.py
index 0b6bcdcc96..004a6add1c 100644
--- a/tools/__init__.py
+++ b/tools/__init__.py
@@ -111,6 +111,22 @@ from .rl_training_tool import (
     get_missing_keys,
 )
 
+# File manipulation tools (read, write, patch, search)
+from .file_tools import (
+    read_file_tool,
+    write_file_tool,
+    patch_tool,
+    search_tool,
+    get_file_tools,
+    clear_file_ops_cache,
+)
+
+# File tools have no external requirements - they use the terminal backend
+def check_file_requirements():
+    """File tools only require terminal backend to be available."""
+    from .terminal_tool import check_terminal_requirements
+    return check_terminal_requirements()
+
 __all__ = [
     # Web tools
     'web_search_tool',
@@ -181,5 +197,13 @@ __all__ = [
     'rl_test_inference',
     'check_rl_api_keys',
     'get_missing_keys',
+    # File manipulation tools
+    'read_file_tool',
+    'write_file_tool',
+    'patch_tool',
+    'search_tool',
+    'get_file_tools',
+    'clear_file_ops_cache',
+    'check_file_requirements',
 ]
 
diff --git a/tools/file_operations.py b/tools/file_operations.py
new file mode 100644
index 0000000000..2509df3c57
--- /dev/null
+++ b/tools/file_operations.py
@@ -0,0 +1,937 @@
+#!/usr/bin/env python3
+"""
+File Operations Module
+
+Provides file manipulation capabilities (read, write, patch, search) that work
+across all terminal backends (local, docker, singularity, ssh, modal).
+
+The key insight is that all file operations can be expressed as shell commands,
+so we wrap the terminal backend's execute() interface to provide a unified file API.
+
+Usage:
+    from tools.file_operations import ShellFileOperations
+    from tools.terminal_tool import _active_environments
+    
+    # Get file operations for a terminal environment
+    file_ops = ShellFileOperations(terminal_env)
+    
+    # Read a file
+    result = file_ops.read_file("/path/to/file.py")
+    
+    # Write a file
+    result = file_ops.write_file("/path/to/new.py", "print('hello')")
+    
+    # Search for content
+    result = file_ops.search("TODO", path=".", file_glob="*.py")
+"""
+
+import os
+import re
+import json
+import uuid
+import difflib
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Optional, List, Dict, Any, Tuple
+from pathlib import Path
+
+
+# =============================================================================
+# Result Data Classes
+# =============================================================================
+
+@dataclass
+class ReadResult:
+    """Result from reading a file."""
+    content: str = ""
+    total_lines: int = 0
+    file_size: int = 0
+    truncated: bool = False
+    hint: Optional[str] = None
+    is_binary: bool = False
+    is_image: bool = False
+    base64_content: Optional[str] = None
+    mime_type: Optional[str] = None
+    dimensions: Optional[str] = None  # For images: "WIDTHxHEIGHT"
+    error: Optional[str] = None
+    similar_files: List[str] = field(default_factory=list)
+    
+    def to_dict(self) -> dict:
+        return {k: v for k, v in self.__dict__.items() if v is not None and v != [] and v != ""}
+
+
+@dataclass
+class WriteResult:
+    """Result from writing a file."""
+    bytes_written: int = 0
+    dirs_created: bool = False
+    error: Optional[str] = None
+    warning: Optional[str] = None
+    
+    def to_dict(self) -> dict:
+        return {k: v for k, v in self.__dict__.items() if v is not None}
+
+
+@dataclass
+class PatchResult:
+    """Result from patching a file."""
+    success: bool = False
+    diff: str = ""
+    files_modified: List[str] = field(default_factory=list)
+    files_created: List[str] = field(default_factory=list)
+    files_deleted: List[str] = field(default_factory=list)
+    lint: Optional[Dict[str, Any]] = None
+    error: Optional[str] = None
+    
+    def to_dict(self) -> dict:
+        result = {"success": self.success}
+        if self.diff:
+            result["diff"] = self.diff
+        if self.files_modified:
+            result["files_modified"] = self.files_modified
+        if self.files_created:
+            result["files_created"] = self.files_created
+        if self.files_deleted:
+            result["files_deleted"] = self.files_deleted
+        if self.lint:
+            result["lint"] = self.lint
+        if self.error:
+            result["error"] = self.error
+        return result
+
+
+@dataclass
+class SearchMatch:
+    """A single search match."""
+    path: str
+    line_number: int
+    content: str
+    mtime: float = 0.0  # Modification time for sorting
+
+
+@dataclass
+class SearchResult:
+    """Result from searching."""
+    matches: List[SearchMatch] = field(default_factory=list)
+    files: List[str] = field(default_factory=list)
+    counts: Dict[str, int] = field(default_factory=dict)
+    total_count: int = 0
+    truncated: bool = False
+    error: Optional[str] = None
+    
+    def to_dict(self) -> dict:
+        result = {"total_count": self.total_count}
+        if self.matches:
+            result["matches"] = [
+                {"path": m.path, "line": m.line_number, "content": m.content}
+                for m in self.matches
+            ]
+        if self.files:
+            result["files"] = self.files
+        if self.counts:
+            result["counts"] = self.counts
+        if self.truncated:
+            result["truncated"] = True
+        if self.error:
+            result["error"] = self.error
+        return result
+
+
+@dataclass
+class LintResult:
+    """Result from linting a file."""
+    success: bool = True
+    skipped: bool = False
+    output: str = ""
+    message: str = ""
+    
+    def to_dict(self) -> dict:
+        if self.skipped:
+            return {"status": "skipped", "message": self.message}
+        return {
+            "status": "ok" if self.success else "error",
+            "output": self.output
+        }
+
+
+@dataclass
+class ExecuteResult:
+    """Result from executing a shell command."""
+    stdout: str = ""
+    exit_code: int = 0
+
+
+# =============================================================================
+# Abstract Interface
+# =============================================================================
+
+class FileOperations(ABC):
+    """Abstract interface for file operations across terminal backends."""
+    
+    @abstractmethod
+    def read_file(self, path: str, offset: int = 1, limit: int = 500) -> ReadResult:
+        """Read a file with pagination support."""
+        ...
+    
+    @abstractmethod
+    def write_file(self, path: str, content: str) -> WriteResult:
+        """Write content to a file, creating directories as needed."""
+        ...
+    
+    @abstractmethod
+    def patch_replace(self, path: str, old_string: str, new_string: str, 
+                      replace_all: bool = False) -> PatchResult:
+        """Replace text in a file using fuzzy matching."""
+        ...
+    
+    @abstractmethod
+    def patch_v4a(self, patch_content: str) -> PatchResult:
+        """Apply a V4A format patch."""
+        ...
+    
+    @abstractmethod
+    def search(self, pattern: str, path: str = ".", target: str = "content",
+               file_glob: Optional[str] = None, limit: int = 50, offset: int = 0,
+               output_mode: str = "content", context: int = 0) -> SearchResult:
+        """Search for content or files."""
+        ...
+
+
+# =============================================================================
+# Shell-based Implementation
+# =============================================================================
+
+# Binary file extensions (fast path check)
+BINARY_EXTENSIONS = {
+    # Images
+    '.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.tiff', '.tif',
+    '.svg',  # SVG is text but often treated as binary
+    # Audio/Video
+    '.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv', '.flac', '.ogg', '.webm',
+    # Archives
+    '.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar',
+    # Documents
+    '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
+    # Compiled/Binary
+    '.exe', '.dll', '.so', '.dylib', '.o', '.a', '.pyc', '.pyo', '.class',
+    '.wasm', '.bin',
+    # Fonts
+    '.ttf', '.otf', '.woff', '.woff2', '.eot',
+    # Other
+    '.db', '.sqlite', '.sqlite3',
+}
+
+# Image extensions (subset of binary that we can return as base64)
+IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico'}
+
+# Linters by file extension
+LINTERS = {
+    '.py': 'python -m py_compile {file} 2>&1',
+    '.js': 'node --check {file} 2>&1',
+    '.ts': 'npx tsc --noEmit {file} 2>&1',
+    '.go': 'go vet {file} 2>&1',
+    '.rs': 'rustfmt --check {file} 2>&1',
+}
+
+# Max limits for read operations
+MAX_LINES = 2000
+MAX_LINE_LENGTH = 2000
+MAX_FILE_SIZE = 50 * 1024  # 50KB
+
+
+class ShellFileOperations(FileOperations):
+    """
+    File operations implemented via shell commands.
+    
+    Works with ANY terminal backend that has execute(command, cwd) method.
+    This includes local, docker, singularity, ssh, and modal environments.
+    """
+    
+    def __init__(self, terminal_env, cwd: str = None):
+        """
+        Initialize file operations with a terminal environment.
+        
+        Args:
+            terminal_env: Any object with execute(command, cwd) method.
+                         Returns {"output": str, "returncode": int}
+            cwd: Working directory (defaults to env's cwd or /tmp)
+        """
+        self.env = terminal_env
+        # Determine cwd from various possible sources
+        self.cwd = cwd or getattr(terminal_env, 'cwd', None) or \
+                   getattr(getattr(terminal_env, 'config', None), 'cwd', None) or '/tmp'
+        
+        # Cache for command availability checks
+        self._command_cache: Dict[str, bool] = {}
+    
+    def _exec(self, command: str, cwd: str = None, timeout: int = None) -> ExecuteResult:
+        """Execute command via terminal backend."""
+        kwargs = {}
+        if timeout:
+            kwargs['timeout'] = timeout
+        
+        result = self.env.execute(command, cwd=cwd or self.cwd, **kwargs)
+        return ExecuteResult(
+            stdout=result.get("output", ""),
+            exit_code=result.get("returncode", 0)
+        )
+    
+    def _has_command(self, cmd: str) -> bool:
+        """Check if a command exists in the environment (cached)."""
+        if cmd not in self._command_cache:
+            result = self._exec(f"command -v {cmd} >/dev/null 2>&1 && echo 'yes'")
+            self._command_cache[cmd] = result.stdout.strip() == 'yes'
+        return self._command_cache[cmd]
+    
+    def _is_likely_binary(self, path: str, content_sample: str = None) -> bool:
+        """
+        Check if a file is likely binary.
+        
+        Uses extension check (fast) + content analysis (fallback).
+        """
+        ext = os.path.splitext(path)[1].lower()
+        if ext in BINARY_EXTENSIONS:
+            return True
+        
+        # Content analysis: >30% non-printable chars = binary
+        if content_sample:
+            if not content_sample:
+                return False
+            non_printable = sum(1 for c in content_sample[:1000] 
+                               if ord(c) < 32 and c not in '\n\r\t')
+            return non_printable / min(len(content_sample), 1000) > 0.30
+        
+        return False
+    
+    def _is_image(self, path: str) -> bool:
+        """Check if file is an image we can return as base64."""
+        ext = os.path.splitext(path)[1].lower()
+        return ext in IMAGE_EXTENSIONS
+    
+    def _add_line_numbers(self, content: str, start_line: int = 1) -> str:
+        """Add line numbers to content in LINE_NUM|CONTENT format."""
+        lines = content.split('\n')
+        numbered = []
+        for i, line in enumerate(lines, start=start_line):
+            # Truncate long lines
+            if len(line) > MAX_LINE_LENGTH:
+                line = line[:MAX_LINE_LENGTH] + "... [truncated]"
+            numbered.append(f"{i:6d}|{line}")
+        return '\n'.join(numbered)
+    
+    def _expand_path(self, path: str) -> str:
+        """
+        Expand shell-style paths like ~ and ~user to absolute paths.
+        
+        This must be done BEFORE shell escaping, since ~ doesn't expand
+        inside single quotes.
+        """
+        if not path:
+            return path
+        
+        # Handle ~ and ~user
+        if path.startswith('~'):
+            # Get home directory via the terminal environment
+            result = self._exec("echo $HOME")
+            if result.exit_code == 0 and result.stdout.strip():
+                home = result.stdout.strip()
+                if path == '~':
+                    return home
+                elif path.startswith('~/'):
+                    return home + path[1:]  # Replace ~ with home
+                # ~username format - let shell expand it
+                expand_result = self._exec(f"echo {path}")
+                if expand_result.exit_code == 0:
+                    return expand_result.stdout.strip()
+        
+        return path
+    
+    def _escape_shell_arg(self, arg: str) -> str:
+        """Escape a string for safe use in shell commands."""
+        # Use single quotes and escape any single quotes in the string
+        return "'" + arg.replace("'", "'\"'\"'") + "'"
+    
+    def _unified_diff(self, old_content: str, new_content: str, filename: str) -> str:
+        """Generate unified diff between old and new content."""
+        old_lines = old_content.splitlines(keepends=True)
+        new_lines = new_content.splitlines(keepends=True)
+        diff = difflib.unified_diff(
+            old_lines, new_lines,
+            fromfile=f"a/{filename}",
+            tofile=f"b/{filename}"
+        )
+        return ''.join(diff)
+    
+    # =========================================================================
+    # READ Implementation
+    # =========================================================================
+    
+    def read_file(self, path: str, offset: int = 1, limit: int = 500) -> ReadResult:
+        """
+        Read a file with pagination, binary detection, and line numbers.
+        
+        Args:
+            path: File path (absolute or relative to cwd)
+            offset: Line number to start from (1-indexed, default 1)
+            limit: Maximum lines to return (default 500, max 2000)
+        
+        Returns:
+            ReadResult with content, metadata, or error info
+        """
+        # Expand ~ and other shell paths
+        path = self._expand_path(path)
+        
+        # Clamp limit
+        limit = min(limit, MAX_LINES)
+        
+        # Check if file exists and get metadata
+        stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
+        stat_result = self._exec(stat_cmd)
+        
+        if stat_result.exit_code != 0:
+            # File not found - try to suggest similar files
+            return self._suggest_similar_files(path)
+        
+        try:
+            file_size = int(stat_result.stdout.strip())
+        except ValueError:
+            file_size = 0
+        
+        # Check if file is too large
+        if file_size > MAX_FILE_SIZE:
+            # Still try to read, but warn
+            pass
+        
+        # Check if it's an image - return base64
+        if self._is_image(path):
+            return self._read_image(path)
+        
+        # Read a sample to check for binary content
+        sample_cmd = f"head -c 1000 {self._escape_shell_arg(path)} 2>/dev/null"
+        sample_result = self._exec(sample_cmd)
+        
+        if self._is_likely_binary(path, sample_result.stdout):
+            return ReadResult(
+                is_binary=True,
+                file_size=file_size,
+                error="Binary file - cannot display as text. Use appropriate tools to handle this file type."
+            )
+        
+        # Read with pagination using sed
+        end_line = offset + limit - 1
+        read_cmd = f"sed -n '{offset},{end_line}p' {self._escape_shell_arg(path)}"
+        read_result = self._exec(read_cmd)
+        
+        if read_result.exit_code != 0:
+            return ReadResult(error=f"Failed to read file: {read_result.stdout}")
+        
+        # Get total line count
+        wc_cmd = f"wc -l < {self._escape_shell_arg(path)}"
+        wc_result = self._exec(wc_cmd)
+        try:
+            total_lines = int(wc_result.stdout.strip())
+        except ValueError:
+            total_lines = 0
+        
+        # Check if truncated
+        truncated = total_lines > end_line
+        hint = None
+        if truncated:
+            hint = f"Use offset={end_line + 1} to continue reading (showing {offset}-{end_line} of {total_lines} lines)"
+        
+        return ReadResult(
+            content=self._add_line_numbers(read_result.stdout, offset),
+            total_lines=total_lines,
+            file_size=file_size,
+            truncated=truncated,
+            hint=hint
+        )
+    
+    def _read_image(self, path: str) -> ReadResult:
+        """Read an image file, returning base64 content."""
+        # Get file size
+        stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
+        stat_result = self._exec(stat_cmd)
+        try:
+            file_size = int(stat_result.stdout.strip())
+        except ValueError:
+            file_size = 0
+        
+        # Get base64 content
+        b64_cmd = f"base64 -w 0 {self._escape_shell_arg(path)} 2>/dev/null"
+        b64_result = self._exec(b64_cmd, timeout=30)
+        
+        if b64_result.exit_code != 0:
+            return ReadResult(
+                is_image=True,
+                is_binary=True,
+                file_size=file_size,
+                error=f"Failed to read image: {b64_result.stdout}"
+            )
+        
+        # Try to get dimensions (requires ImageMagick)
+        dimensions = None
+        if self._has_command('identify'):
+            dim_cmd = f"identify -format '%wx%h' {self._escape_shell_arg(path)} 2>/dev/null"
+            dim_result = self._exec(dim_cmd)
+            if dim_result.exit_code == 0:
+                dimensions = dim_result.stdout.strip()
+        
+        # Determine MIME type from extension
+        ext = os.path.splitext(path)[1].lower()
+        mime_types = {
+            '.png': 'image/png',
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.gif': 'image/gif',
+            '.webp': 'image/webp',
+            '.bmp': 'image/bmp',
+            '.ico': 'image/x-icon',
+        }
+        mime_type = mime_types.get(ext, 'application/octet-stream')
+        
+        return ReadResult(
+            is_image=True,
+            is_binary=True,
+            file_size=file_size,
+            base64_content=b64_result.stdout,
+            mime_type=mime_type,
+            dimensions=dimensions
+        )
+    
+    def _suggest_similar_files(self, path: str) -> ReadResult:
+        """Suggest similar files when the requested file is not found."""
+        # Get directory and filename
+        dir_path = os.path.dirname(path) or "."
+        filename = os.path.basename(path)
+        
+        # List files in directory
+        ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -20"
+        ls_result = self._exec(ls_cmd)
+        
+        similar = []
+        if ls_result.exit_code == 0 and ls_result.stdout.strip():
+            files = ls_result.stdout.strip().split('\n')
+            # Simple similarity: files that share some characters with the target
+            for f in files:
+                # Check if filenames share significant overlap
+                common = set(filename.lower()) & set(f.lower())
+                if len(common) >= len(filename) * 0.5:  # 50% character overlap
+                    similar.append(os.path.join(dir_path, f))
+        
+        return ReadResult(
+            error=f"File not found: {path}",
+            similar_files=similar[:5]  # Limit to 5 suggestions
+        )
+    
+    # =========================================================================
+    # WRITE Implementation
+    # =========================================================================
+    
+    def write_file(self, path: str, content: str) -> WriteResult:
+        """
+        Write content to a file, creating parent directories as needed.
+        
+        Uses heredoc with unique marker for safe shell execution.
+        
+        Args:
+            path: File path to write
+            content: Content to write
+        
+        Returns:
+            WriteResult with bytes written or error
+        """
+        # Expand ~ and other shell paths
+        path = self._expand_path(path)
+        
+        # Create parent directories
+        parent = os.path.dirname(path)
+        dirs_created = False
+        
+        if parent:
+            mkdir_cmd = f"mkdir -p {self._escape_shell_arg(parent)}"
+            mkdir_result = self._exec(mkdir_cmd)
+            if mkdir_result.exit_code == 0:
+                dirs_created = True
+        
+        # Generate unique marker for heredoc that won't appear in content
+        marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+        while marker in content:
+            marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
+        
+        # Write using heredoc with single-quoted marker (prevents all expansion)
+        # The single quotes around the marker prevent variable expansion
+        write_cmd = f"cat > {self._escape_shell_arg(path)} << '{marker}'\n{content}\n{marker}"
+        write_result = self._exec(write_cmd)
+        
+        if write_result.exit_code != 0:
+            return WriteResult(error=f"Failed to write file: {write_result.stdout}")
+        
+        # Get bytes written
+        stat_cmd = f"stat -c '%s' {self._escape_shell_arg(path)} 2>/dev/null"
+        stat_result = self._exec(stat_cmd)
+        
+        try:
+            bytes_written = int(stat_result.stdout.strip())
+        except ValueError:
+            bytes_written = len(content.encode('utf-8'))
+        
+        return WriteResult(
+            bytes_written=bytes_written,
+            dirs_created=dirs_created
+        )
+    
+    # =========================================================================
+    # PATCH Implementation (Replace Mode)
+    # =========================================================================
+    
+    def patch_replace(self, path: str, old_string: str, new_string: str,
+                      replace_all: bool = False) -> PatchResult:
+        """
+        Replace text in a file using fuzzy matching.
+        
+        Args:
+            path: File path to modify
+            old_string: Text to find (must be unique unless replace_all=True)
+            new_string: Replacement text
+            replace_all: If True, replace all occurrences
+        
+        Returns:
+            PatchResult with diff and lint results
+        """
+        # Expand ~ and other shell paths
+        path = self._expand_path(path)
+        
+        # Read current content
+        read_cmd = f"cat {self._escape_shell_arg(path)} 2>/dev/null"
+        read_result = self._exec(read_cmd)
+        
+        if read_result.exit_code != 0:
+            return PatchResult(error=f"Failed to read file: {path}")
+        
+        content = read_result.stdout
+        
+        # Import and use fuzzy matching
+        from tools.fuzzy_match import fuzzy_find_and_replace
+        
+        new_content, match_count, error = fuzzy_find_and_replace(
+            content, old_string, new_string, replace_all
+        )
+        
+        if error:
+            return PatchResult(error=error)
+        
+        if match_count == 0:
+            return PatchResult(error=f"Could not find match for old_string in {path}")
+        
+        # Write back
+        write_result = self.write_file(path, new_content)
+        if write_result.error:
+            return PatchResult(error=f"Failed to write changes: {write_result.error}")
+        
+        # Generate diff
+        diff = self._unified_diff(content, new_content, path)
+        
+        # Auto-lint
+        lint_result = self._check_lint(path)
+        
+        return PatchResult(
+            success=True,
+            diff=diff,
+            files_modified=[path],
+            lint=lint_result.to_dict() if lint_result else None
+        )
+    
+    def patch_v4a(self, patch_content: str) -> PatchResult:
+        """
+        Apply a V4A format patch.
+        
+        V4A format:
+            *** Begin Patch
+            *** Update File: path/to/file.py
+            @@ context hint @@
+             context line
+            -removed line
+            +added line
+            *** End Patch
+        
+        Args:
+            patch_content: V4A format patch string
+        
+        Returns:
+            PatchResult with changes made
+        """
+        # Import patch parser
+        from tools.patch_parser import parse_v4a_patch, apply_v4a_operations
+        
+        operations, parse_error = parse_v4a_patch(patch_content)
+        if parse_error:
+            return PatchResult(error=f"Failed to parse patch: {parse_error}")
+        
+        # Apply operations
+        result = apply_v4a_operations(operations, self)
+        return result
+    
+    def _check_lint(self, path: str) -> LintResult:
+        """
+        Run syntax check on a file after editing.
+        
+        Args:
+            path: File path to lint
+        
+        Returns:
+            LintResult with status and any errors
+        """
+        ext = os.path.splitext(path)[1].lower()
+        
+        if ext not in LINTERS:
+            return LintResult(skipped=True, message=f"No linter for {ext} files")
+        
+        # Check if linter command is available
+        linter_cmd = LINTERS[ext]
+        # Extract the base command (first word)
+        base_cmd = linter_cmd.split()[0]
+        
+        if not self._has_command(base_cmd):
+            return LintResult(skipped=True, message=f"{base_cmd} not available")
+        
+        # Run linter
+        cmd = linter_cmd.format(file=self._escape_shell_arg(path))
+        result = self._exec(cmd, timeout=30)
+        
+        return LintResult(
+            success=result.exit_code == 0,
+            output=result.stdout.strip() if result.stdout.strip() else ""
+        )
+    
+    # =========================================================================
+    # SEARCH Implementation
+    # =========================================================================
+    
+    def search(self, pattern: str, path: str = ".", target: str = "content",
+               file_glob: Optional[str] = None, limit: int = 50, offset: int = 0,
+               output_mode: str = "content", context: int = 0) -> SearchResult:
+        """
+        Search for content or files.
+        
+        Args:
+            pattern: Regex (for content) or glob pattern (for files)
+            path: Directory/file to search (default: cwd)
+            target: "content" (grep) or "files" (glob)
+            file_glob: File pattern filter for content search (e.g., "*.py")
+            limit: Max results (default 50)
+            offset: Skip first N results
+            output_mode: "content", "files_only", or "count"
+            context: Lines of context around matches
+        
+        Returns:
+            SearchResult with matches or file list
+        """
+        # Expand ~ and other shell paths
+        path = self._expand_path(path)
+        
+        if target == "files":
+            return self._search_files(pattern, path, limit, offset)
+        else:
+            return self._search_content(pattern, path, file_glob, limit, offset, 
+                                        output_mode, context)
+    
+    def _search_files(self, pattern: str, path: str, limit: int, offset: int) -> SearchResult:
+        """Search for files by name pattern (glob-like)."""
+        # Check if find is available (not on Windows without Git Bash/WSL)
+        if not self._has_command('find'):
+            return SearchResult(
+                error="File search requires 'find' command. "
+                      "On Windows, use Git Bash, WSL, or install Unix tools."
+            )
+        
+        # Auto-prepend **/ for recursive search if not already present
+        if not pattern.startswith('**/') and '/' not in pattern:
+            search_pattern = pattern
+        else:
+            search_pattern = pattern.split('/')[-1]
+        
+        # Use find with modification time sorting
+        # -printf '%T@ %p\n' outputs: timestamp path
+        # sort -rn sorts by timestamp descending (newest first)
+        cmd = f"find {self._escape_shell_arg(path)} -type f -name {self._escape_shell_arg(search_pattern)} " \
+              f"-printf '%T@ %p\\n' 2>/dev/null | sort -rn | tail -n +{offset + 1} | head -n {limit}"
+        
+        result = self._exec(cmd, timeout=60)
+        
+        if result.exit_code != 0 and not result.stdout.strip():
+            # Try without -printf (BSD find compatibility)
+            cmd_simple = f"find {self._escape_shell_arg(path)} -type f -name {self._escape_shell_arg(search_pattern)} " \
+                        f"2>/dev/null | head -n {limit + offset} | tail -n +{offset + 1}"
+            result = self._exec(cmd_simple, timeout=60)
+        
+        files = []
+        for line in result.stdout.strip().split('\n'):
+            if not line:
+                continue
+            # Parse "timestamp path" format
+            parts = line.split(' ', 1)
+            if len(parts) == 2 and parts[0].replace('.', '').isdigit():
+                files.append(parts[1])
+            else:
+                files.append(line)
+        
+        return SearchResult(
+            files=files,
+            total_count=len(files)
+        )
+    
+    def _search_content(self, pattern: str, path: str, file_glob: Optional[str],
+                        limit: int, offset: int, output_mode: str, context: int) -> SearchResult:
+        """Search for content inside files (grep-like)."""
+        # Try ripgrep first (fast), fallback to grep (slower but works)
+        if self._has_command('rg'):
+            return self._search_with_rg(pattern, path, file_glob, limit, offset, 
+                                        output_mode, context)
+        elif self._has_command('grep'):
+            return self._search_with_grep(pattern, path, file_glob, limit, offset,
+                                          output_mode, context)
+        else:
+            # Neither rg nor grep available (Windows without Git Bash, etc.)
+            return SearchResult(
+                error="Content search requires ripgrep (rg) or grep. "
+                      "Install ripgrep: https://github.com/BurntSushi/ripgrep#installation"
+            )
+    
+    def _search_with_rg(self, pattern: str, path: str, file_glob: Optional[str],
+                        limit: int, offset: int, output_mode: str, context: int) -> SearchResult:
+        """Search using ripgrep."""
+        cmd_parts = ["rg", "--line-number", "--no-heading"]
+        
+        # Add context if requested
+        if context > 0:
+            cmd_parts.extend(["-C", str(context)])
+        
+        # Add file glob filter
+        if file_glob:
+            cmd_parts.extend(["--glob", file_glob])
+        
+        # Output mode handling
+        if output_mode == "files_only":
+            cmd_parts.append("-l")  # Files only
+        elif output_mode == "count":
+            cmd_parts.append("-c")  # Count per file
+        
+        # Add pattern and path
+        cmd_parts.append(self._escape_shell_arg(pattern))
+        cmd_parts.append(self._escape_shell_arg(path))
+        
+        # Limit results
+        cmd_parts.extend(["|", "head", "-n", str(limit + offset)])
+        
+        cmd = " ".join(cmd_parts)
+        result = self._exec(cmd, timeout=60)
+        
+        # Parse results based on output mode
+        if output_mode == "files_only":
+            files = [f for f in result.stdout.strip().split('\n') if f][offset:]
+            return SearchResult(files=files[:limit], total_count=len(files))
+        
+        elif output_mode == "count":
+            counts = {}
+            for line in result.stdout.strip().split('\n'):
+                if ':' in line:
+                    parts = line.rsplit(':', 1)
+                    if len(parts) == 2:
+                        try:
+                            counts[parts[0]] = int(parts[1])
+                        except ValueError:
+                            pass
+            return SearchResult(counts=counts, total_count=sum(counts.values()))
+        
+        else:
+            # Parse content matches
+            matches = []
+            for line in result.stdout.strip().split('\n')[offset:]:
+                if not line:
+                    continue
+                # Format: file:line:content
+                parts = line.split(':', 2)
+                if len(parts) >= 3:
+                    try:
+                        matches.append(SearchMatch(
+                            path=parts[0],
+                            line_number=int(parts[1]),
+                            content=parts[2][:500]  # Truncate long lines
+                        ))
+                    except ValueError:
+                        # Line number not an int, skip
+                        pass
+            
+            return SearchResult(
+                matches=matches[:limit],
+                total_count=len(matches),
+                truncated=len(matches) > limit
+            )
+    
+    def _search_with_grep(self, pattern: str, path: str, file_glob: Optional[str],
+                          limit: int, offset: int, output_mode: str, context: int) -> SearchResult:
+        """Fallback search using grep."""
+        cmd_parts = ["grep", "-rn"]
+        
+        # Add context if requested
+        if context > 0:
+            cmd_parts.extend(["-C", str(context)])
+        
+        # Add file pattern filter
+        if file_glob:
+            cmd_parts.extend(["--include", file_glob])
+        
+        # Output mode handling
+        if output_mode == "files_only":
+            cmd_parts.append("-l")
+        elif output_mode == "count":
+            cmd_parts.append("-c")
+        
+        # Add pattern and path
+        cmd_parts.append(self._escape_shell_arg(pattern))
+        cmd_parts.append(self._escape_shell_arg(path))
+        
+        # Limit and offset
+        cmd_parts.extend(["|", "tail", "-n", f"+{offset + 1}", "|", "head", "-n", str(limit)])
+        
+        cmd = " ".join(cmd_parts)
+        result = self._exec(cmd, timeout=60)
+        
+        # Parse results (same format as rg)
+        if output_mode == "files_only":
+            files = [f for f in result.stdout.strip().split('\n') if f]
+            return SearchResult(files=files, total_count=len(files))
+        
+        elif output_mode == "count":
+            counts = {}
+            for line in result.stdout.strip().split('\n'):
+                if ':' in line:
+                    parts = line.rsplit(':', 1)
+                    if len(parts) == 2:
+                        try:
+                            counts[parts[0]] = int(parts[1])
+                        except ValueError:
+                            pass
+            return SearchResult(counts=counts, total_count=sum(counts.values()))
+        
+        else:
+            matches = []
+            for line in result.stdout.strip().split('\n'):
+                if not line:
+                    continue
+                parts = line.split(':', 2)
+                if len(parts) >= 3:
+                    try:
+                        matches.append(SearchMatch(
+                            path=parts[0],
+                            line_number=int(parts[1]),
+                            content=parts[2][:500]
+                        ))
+                    except ValueError:
+                        pass
+            
+            return SearchResult(
+                matches=matches,
+                total_count=len(matches)
+            )
diff --git a/tools/file_tools.py b/tools/file_tools.py
new file mode 100644
index 0000000000..71704fba6b
--- /dev/null
+++ b/tools/file_tools.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""File Tools Module - LLM agent file manipulation tools."""
+
+import json
+import threading
+from typing import Optional
+from tools.file_operations import ShellFileOperations
+
+_file_ops_lock = threading.Lock()
+_file_ops_cache: dict = {}
+
+
+def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
+    """Get or create ShellFileOperations for a terminal environment."""
+    from tools.terminal_tool import _active_environments, _env_lock, _LocalEnvironment
+    
+    with _file_ops_lock:
+        if task_id in _file_ops_cache:
+            return _file_ops_cache[task_id]
+        
+        with _env_lock:
+            if task_id not in _active_environments:
+                import os
+                env = _LocalEnvironment(cwd=os.getcwd(), timeout=60)
+                _active_environments[task_id] = env
+            terminal_env = _active_environments[task_id]
+        
+        file_ops = ShellFileOperations(terminal_env)
+        _file_ops_cache[task_id] = file_ops
+        return file_ops
+
+
+def clear_file_ops_cache(task_id: str = None):
+    """Clear the file operations cache."""
+    with _file_ops_lock:
+        if task_id:
+            _file_ops_cache.pop(task_id, None)
+        else:
+            _file_ops_cache.clear()
+
+
+def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
+    """Read a file with pagination and line numbers."""
+    try:
+        file_ops = _get_file_ops(task_id)
+        result = file_ops.read_file(path, offset, limit)
+        return json.dumps(result.to_dict(), ensure_ascii=False)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, ensure_ascii=False)
+
+
+def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
+    """Write content to a file."""
+    try:
+        file_ops = _get_file_ops(task_id)
+        result = file_ops.write_file(path, content)
+        return json.dumps(result.to_dict(), ensure_ascii=False)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, ensure_ascii=False)
+
+
+def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
+               new_string: str = None, replace_all: bool = False, patch: str = None,
+               task_id: str = "default") -> str:
+    """Patch a file using replace mode or V4A patch format."""
+    try:
+        file_ops = _get_file_ops(task_id)
+        
+        if mode == "replace":
+            if not path:
+                return json.dumps({"error": "path required"})
+            if old_string is None or new_string is None:
+                return json.dumps({"error": "old_string and new_string required"})
+            result = file_ops.patch_replace(path, old_string, new_string, replace_all)
+        elif mode == "patch":
+            if not patch:
+                return json.dumps({"error": "patch content required"})
+            result = file_ops.patch_v4a(patch)
+        else:
+            return json.dumps({"error": f"Unknown mode: {mode}"})
+        
+        return json.dumps(result.to_dict(), ensure_ascii=False)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, ensure_ascii=False)
+
+
+def search_tool(pattern: str, target: str = "content", path: str = ".",
+                file_glob: str = None, limit: int = 50, offset: int = 0,
+                output_mode: str = "content", context: int = 0,
+                task_id: str = "default") -> str:
+    """Search for content or files."""
+    try:
+        file_ops = _get_file_ops(task_id)
+        result = file_ops.search(
+            pattern=pattern, path=path, target=target, file_glob=file_glob,
+            limit=limit, offset=offset, output_mode=output_mode, context=context
+        )
+        return json.dumps(result.to_dict(), ensure_ascii=False)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, ensure_ascii=False)
+
+
+FILE_TOOLS = [
+    {"name": "read_file", "function": read_file_tool},
+    {"name": "write_file", "function": write_file_tool},
+    {"name": "patch", "function": patch_tool},
+    {"name": "search", "function": search_tool}
+]
+
+
+def get_file_tools():
+    """Get the list of file tool definitions."""
+    return FILE_TOOLS
diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py
new file mode 100644
index 0000000000..796072ff97
--- /dev/null
+++ b/tools/fuzzy_match.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python3
+"""
+Fuzzy Matching Module for File Operations
+
+Implements a multi-strategy matching chain to robustly find and replace text,
+accommodating variations in whitespace, indentation, and escaping common
+in LLM-generated code.
+
+The 9-strategy chain (inspired by OpenCode):
+1. Exact match - Direct string comparison
+2. Line-trimmed - Strip leading/trailing whitespace per line
+3. Block anchor - Match first+last lines, use similarity for middle
+4. Whitespace normalized - Collapse multiple spaces/tabs to single space
+5. Indentation flexible - Ignore indentation differences entirely
+6. Escape normalized - Convert \\n literals to actual newlines
+7. Trimmed boundary - Trim first/last line whitespace only
+8. Context-aware - 50% line similarity threshold
+9. Multi-occurrence - For replace_all flag
+
+Usage:
+    from tools.fuzzy_match import fuzzy_find_and_replace
+    
+    new_content, match_count, error = fuzzy_find_and_replace(
+        content="def foo():\\n    pass",
+        old_string="def foo():",
+        new_string="def bar():",
+        replace_all=False
+    )
+"""
+
+import re
+from typing import Tuple, Optional, List, Callable
+from difflib import SequenceMatcher
+
+
+def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
+                            replace_all: bool = False) -> Tuple[str, int, Optional[str]]:
+    """
+    Find and replace text using a chain of increasingly fuzzy matching strategies.
+    
+    Args:
+        content: The file content to search in
+        old_string: The text to find
+        new_string: The replacement text
+        replace_all: If True, replace all occurrences; if False, require uniqueness
+    
+    Returns:
+        Tuple of (new_content, match_count, error_message)
+        - If successful: (modified_content, number_of_replacements, None)
+        - If failed: (original_content, 0, error_description)
+    """
+    if not old_string:
+        return content, 0, "old_string cannot be empty"
+    
+    if old_string == new_string:
+        return content, 0, "old_string and new_string are identical"
+    
+    # Try each matching strategy in order
+    strategies: List[Tuple[str, Callable]] = [
+        ("exact", _strategy_exact),
+        ("line_trimmed", _strategy_line_trimmed),
+        ("whitespace_normalized", _strategy_whitespace_normalized),
+        ("indentation_flexible", _strategy_indentation_flexible),
+        ("escape_normalized", _strategy_escape_normalized),
+        ("trimmed_boundary", _strategy_trimmed_boundary),
+        ("block_anchor", _strategy_block_anchor),
+        ("context_aware", _strategy_context_aware),
+    ]
+    
+    for strategy_name, strategy_fn in strategies:
+        matches = strategy_fn(content, old_string)
+        
+        if matches:
+            # Found matches with this strategy
+            if len(matches) > 1 and not replace_all:
+                return content, 0, (
+                    f"Found {len(matches)} matches for old_string. "
+                    f"Provide more context to make it unique, or use replace_all=True."
+                )
+            
+            # Perform replacement
+            new_content = _apply_replacements(content, matches, new_string)
+            return new_content, len(matches), None
+    
+    # No strategy found a match
+    return content, 0, "Could not find a match for old_string in the file"
+
+
+def _apply_replacements(content: str, matches: List[Tuple[int, int]], new_string: str) -> str:
+    """
+    Apply replacements at the given positions.
+    
+    Args:
+        content: Original content
+        matches: List of (start, end) positions to replace
+        new_string: Replacement text
+    
+    Returns:
+        Content with replacements applied
+    """
+    # Sort matches by position (descending) to replace from end to start
+    # This preserves positions of earlier matches
+    sorted_matches = sorted(matches, key=lambda x: x[0], reverse=True)
+    
+    result = content
+    for start, end in sorted_matches:
+        result = result[:start] + new_string + result[end:]
+    
+    return result
+
+
+# =============================================================================
+# Matching Strategies
+# =============================================================================
+
+def _strategy_exact(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """Strategy 1: Exact string match."""
+    matches = []
+    start = 0
+    while True:
+        pos = content.find(pattern, start)
+        if pos == -1:
+            break
+        matches.append((pos, pos + len(pattern)))
+        start = pos + 1
+    return matches
+
+
+def _strategy_line_trimmed(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 2: Match with line-by-line whitespace trimming.
+    
+    Strips leading/trailing whitespace from each line before matching.
+    """
+    # Normalize pattern and content by trimming each line
+    pattern_lines = [line.strip() for line in pattern.split('\n')]
+    pattern_normalized = '\n'.join(pattern_lines)
+    
+    content_lines = content.split('\n')
+    content_normalized_lines = [line.strip() for line in content_lines]
+    
+    # Build mapping from normalized positions back to original positions
+    return _find_normalized_matches(
+        content, content_lines, content_normalized_lines,
+        pattern, pattern_normalized
+    )
+
+
+def _strategy_whitespace_normalized(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 3: Collapse multiple whitespace to single space.
+    """
+    def normalize(s):
+        # Collapse multiple spaces/tabs to single space, preserve newlines
+        return re.sub(r'[ \t]+', ' ', s)
+    
+    pattern_normalized = normalize(pattern)
+    content_normalized = normalize(content)
+    
+    # Find in normalized, map back to original
+    matches_in_normalized = _strategy_exact(content_normalized, pattern_normalized)
+    
+    if not matches_in_normalized:
+        return []
+    
+    # Map positions back to original content
+    return _map_normalized_positions(content, content_normalized, matches_in_normalized)
+
+
+def _strategy_indentation_flexible(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 4: Ignore indentation differences entirely.
+    
+    Strips all leading whitespace from lines before matching.
+    """
+    def strip_indent(s):
+        return '\n'.join(line.lstrip() for line in s.split('\n'))
+    
+    pattern_stripped = strip_indent(pattern)
+    
+    content_lines = content.split('\n')
+    content_stripped_lines = [line.lstrip() for line in content_lines]
+    pattern_lines = [line.lstrip() for line in pattern.split('\n')]
+    
+    return _find_normalized_matches(
+        content, content_lines, content_stripped_lines,
+        pattern, '\n'.join(pattern_lines)
+    )
+
+
+def _strategy_escape_normalized(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 5: Convert escape sequences to actual characters.
+    
+    Handles \\n -> newline, \\t -> tab, etc.
+    """
+    def unescape(s):
+        # Convert common escape sequences
+        return s.replace('\\n', '\n').replace('\\t', '\t').replace('\\r', '\r')
+    
+    pattern_unescaped = unescape(pattern)
+    
+    if pattern_unescaped == pattern:
+        # No escapes to convert, skip this strategy
+        return []
+    
+    return _strategy_exact(content, pattern_unescaped)
+
+
+def _strategy_trimmed_boundary(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 6: Trim whitespace from first and last lines only.
+    
+    Useful when the pattern boundaries have whitespace differences.
+    """
+    pattern_lines = pattern.split('\n')
+    if not pattern_lines:
+        return []
+    
+    # Trim only first and last lines
+    pattern_lines[0] = pattern_lines[0].strip()
+    if len(pattern_lines) > 1:
+        pattern_lines[-1] = pattern_lines[-1].strip()
+    
+    modified_pattern = '\n'.join(pattern_lines)
+    
+    content_lines = content.split('\n')
+    
+    # Search through content for matching block
+    matches = []
+    pattern_line_count = len(pattern_lines)
+    
+    for i in range(len(content_lines) - pattern_line_count + 1):
+        block_lines = content_lines[i:i + pattern_line_count]
+        
+        # Trim first and last of this block
+        check_lines = block_lines.copy()
+        check_lines[0] = check_lines[0].strip()
+        if len(check_lines) > 1:
+            check_lines[-1] = check_lines[-1].strip()
+        
+        if '\n'.join(check_lines) == modified_pattern:
+            # Found match - calculate original positions
+            start_pos = sum(len(line) + 1 for line in content_lines[:i])
+            end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1
+            if end_pos >= len(content):
+                end_pos = len(content)
+            matches.append((start_pos, end_pos))
+    
+    return matches
+
+
+def _strategy_block_anchor(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 7: Match by anchoring on first and last lines.
+    
+    If first and last lines match exactly, accept middle with 70% similarity.
+    """
+    pattern_lines = pattern.split('\n')
+    if len(pattern_lines) < 2:
+        return []  # Need at least 2 lines for anchoring
+    
+    first_line = pattern_lines[0].strip()
+    last_line = pattern_lines[-1].strip()
+    
+    content_lines = content.split('\n')
+    matches = []
+    
+    pattern_line_count = len(pattern_lines)
+    
+    for i in range(len(content_lines) - pattern_line_count + 1):
+        # Check if first and last lines match
+        if (content_lines[i].strip() == first_line and 
+            content_lines[i + pattern_line_count - 1].strip() == last_line):
+            
+            # Check middle similarity
+            if pattern_line_count <= 2:
+                # Only first and last, they match
+                similarity = 1.0
+            else:
+                content_middle = '\n'.join(content_lines[i+1:i+pattern_line_count-1])
+                pattern_middle = '\n'.join(pattern_lines[1:-1])
+                similarity = SequenceMatcher(None, content_middle, pattern_middle).ratio()
+            
+            if similarity >= 0.70:
+                # Calculate positions
+                start_pos = sum(len(line) + 1 for line in content_lines[:i])
+                end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1
+                if end_pos >= len(content):
+                    end_pos = len(content)
+                matches.append((start_pos, end_pos))
+    
+    return matches
+
+
+def _strategy_context_aware(content: str, pattern: str) -> List[Tuple[int, int]]:
+    """
+    Strategy 8: Line-by-line similarity with 50% threshold.
+    
+    Finds blocks where at least 50% of lines have high similarity.
+    """
+    pattern_lines = pattern.split('\n')
+    content_lines = content.split('\n')
+    
+    if not pattern_lines:
+        return []
+    
+    matches = []
+    pattern_line_count = len(pattern_lines)
+    
+    for i in range(len(content_lines) - pattern_line_count + 1):
+        block_lines = content_lines[i:i + pattern_line_count]
+        
+        # Calculate line-by-line similarity
+        high_similarity_count = 0
+        for p_line, c_line in zip(pattern_lines, block_lines):
+            sim = SequenceMatcher(None, p_line.strip(), c_line.strip()).ratio()
+            if sim >= 0.80:
+                high_similarity_count += 1
+        
+        # Need at least 50% of lines to have high similarity
+        if high_similarity_count >= len(pattern_lines) * 0.5:
+            start_pos = sum(len(line) + 1 for line in content_lines[:i])
+            end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1
+            if end_pos >= len(content):
+                end_pos = len(content)
+            matches.append((start_pos, end_pos))
+    
+    return matches
+
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+def _find_normalized_matches(content: str, content_lines: List[str],
+                              content_normalized_lines: List[str],
+                              pattern: str, pattern_normalized: str) -> List[Tuple[int, int]]:
+    """
+    Find matches in normalized content and map back to original positions.
+    
+    Args:
+        content: Original content string
+        content_lines: Original content split by lines
+        content_normalized_lines: Normalized content lines
+        pattern: Original pattern
+        pattern_normalized: Normalized pattern
+    
+    Returns:
+        List of (start, end) positions in the original content
+    """
+    pattern_norm_lines = pattern_normalized.split('\n')
+    num_pattern_lines = len(pattern_norm_lines)
+    
+    matches = []
+    
+    for i in range(len(content_normalized_lines) - num_pattern_lines + 1):
+        # Check if this block matches
+        block = '\n'.join(content_normalized_lines[i:i + num_pattern_lines])
+        
+        if block == pattern_normalized:
+            # Found a match - calculate original positions
+            start_pos = sum(len(line) + 1 for line in content_lines[:i])
+            end_pos = sum(len(line) + 1 for line in content_lines[:i + num_pattern_lines]) - 1
+            
+            # Handle case where end is past content
+            if end_pos >= len(content):
+                end_pos = len(content)
+            
+            matches.append((start_pos, end_pos))
+    
+    return matches
+
+
+def _map_normalized_positions(original: str, normalized: str,
+                               normalized_matches: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
+    """
+    Map positions from normalized string back to original.
+    
+    This is a best-effort mapping that works for whitespace normalization.
+    """
+    if not normalized_matches:
+        return []
+    
+    # Build character mapping from normalized to original
+    orig_to_norm = []  # orig_to_norm[i] = position in normalized
+    
+    orig_idx = 0
+    norm_idx = 0
+    
+    while orig_idx < len(original) and norm_idx < len(normalized):
+        if original[orig_idx] == normalized[norm_idx]:
+            orig_to_norm.append(norm_idx)
+            orig_idx += 1
+            norm_idx += 1
+        elif original[orig_idx] in ' \t' and normalized[norm_idx] == ' ':
+            # Original has space/tab, normalized collapsed to space
+            orig_to_norm.append(norm_idx)
+            orig_idx += 1
+            # Don't advance norm_idx yet - wait until all whitespace consumed
+            if orig_idx < len(original) and original[orig_idx] not in ' \t':
+                norm_idx += 1
+        elif original[orig_idx] in ' \t':
+            # Extra whitespace in original
+            orig_to_norm.append(norm_idx)
+            orig_idx += 1
+        else:
+            # Mismatch - shouldn't happen with our normalization
+            orig_to_norm.append(norm_idx)
+            orig_idx += 1
+    
+    # Fill remaining
+    while orig_idx < len(original):
+        orig_to_norm.append(len(normalized))
+        orig_idx += 1
+    
+    # Reverse mapping: for each normalized position, find original range
+    norm_to_orig_start = {}
+    norm_to_orig_end = {}
+    
+    for orig_pos, norm_pos in enumerate(orig_to_norm):
+        if norm_pos not in norm_to_orig_start:
+            norm_to_orig_start[norm_pos] = orig_pos
+        norm_to_orig_end[norm_pos] = orig_pos
+    
+    # Map matches
+    original_matches = []
+    for norm_start, norm_end in normalized_matches:
+        # Find original start
+        if norm_start in norm_to_orig_start:
+            orig_start = norm_to_orig_start[norm_start]
+        else:
+            # Find nearest
+            orig_start = min(i for i, n in enumerate(orig_to_norm) if n >= norm_start)
+        
+        # Find original end
+        if norm_end - 1 in norm_to_orig_end:
+            orig_end = norm_to_orig_end[norm_end - 1] + 1
+        else:
+            orig_end = orig_start + (norm_end - norm_start)
+        
+        # Expand to include trailing whitespace that was normalized
+        while orig_end < len(original) and original[orig_end] in ' \t':
+            orig_end += 1
+        
+        original_matches.append((orig_start, min(orig_end, len(original))))
+    
+    return original_matches
+
+
+# =============================================================================
+# Utility Functions
+# =============================================================================
+
+def find_best_match(content: str, pattern: str) -> Optional[Tuple[int, int, str]]:
+    """
+    Find the best match for a pattern and return the strategy name.
+    
+    Returns:
+        Tuple of (start, end, strategy_name) or None if no match
+    """
+    strategies = [
+        ("exact", _strategy_exact),
+        ("line_trimmed", _strategy_line_trimmed),
+        ("whitespace_normalized", _strategy_whitespace_normalized),
+        ("indentation_flexible", _strategy_indentation_flexible),
+        ("escape_normalized", _strategy_escape_normalized),
+        ("trimmed_boundary", _strategy_trimmed_boundary),
+        ("block_anchor", _strategy_block_anchor),
+        ("context_aware", _strategy_context_aware),
+    ]
+    
+    for strategy_name, strategy_fn in strategies:
+        matches = strategy_fn(content, pattern)
+        if matches:
+            return (matches[0][0], matches[0][1], strategy_name)
+    
+    return None
diff --git a/tools/patch_parser.py b/tools/patch_parser.py
new file mode 100644
index 0000000000..bce7bb6e30
--- /dev/null
+++ b/tools/patch_parser.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+"""
+V4A Patch Format Parser
+
+Parses the V4A patch format used by codex, cline, and other coding agents.
+
+V4A Format:
+    *** Begin Patch
+    *** Update File: path/to/file.py
+    @@ optional context hint @@
+     context line (space prefix)
+    -removed line (minus prefix)
+    +added line (plus prefix)
+    *** Add File: path/to/new.py
+    +new file content
+    +line 2
+    *** Delete File: path/to/old.py
+    *** Move File: old/path.py -> new/path.py
+    *** End Patch
+
+Usage:
+    from tools.patch_parser import parse_v4a_patch, apply_v4a_operations
+    
+    operations, error = parse_v4a_patch(patch_content)
+    if error:
+        print(f"Parse error: {error}")
+    else:
+        result = apply_v4a_operations(operations, file_ops)
+"""
+
+import re
+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple, Any
+from enum import Enum
+
+
+class OperationType(Enum):
+    ADD = "add"
+    UPDATE = "update"
+    DELETE = "delete"
+    MOVE = "move"
+
+
+@dataclass
+class HunkLine:
+    """A single line in a patch hunk."""
+    prefix: str  # ' ', '-', or '+'
+    content: str
+
+
+@dataclass
+class Hunk:
+    """A group of changes within a file."""
+    context_hint: Optional[str] = None
+    lines: List[HunkLine] = field(default_factory=list)
+
+
+@dataclass
+class PatchOperation:
+    """A single operation in a V4A patch."""
+    operation: OperationType
+    file_path: str
+    new_path: Optional[str] = None  # For move operations
+    hunks: List[Hunk] = field(default_factory=list)
+    content: Optional[str] = None  # For add file operations
+
+
+def parse_v4a_patch(patch_content: str) -> Tuple[List[PatchOperation], Optional[str]]:
+    """
+    Parse a V4A format patch.
+    
+    Args:
+        patch_content: The patch text in V4A format
+    
+    Returns:
+        Tuple of (operations, error_message)
+        - If successful: (list_of_operations, None)
+        - If failed: ([], error_description)
+    """
+    lines = patch_content.split('\n')
+    operations: List[PatchOperation] = []
+    
+    # Find patch boundaries
+    start_idx = None
+    end_idx = None
+    
+    for i, line in enumerate(lines):
+        if '*** Begin Patch' in line or '***Begin Patch' in line:
+            start_idx = i
+        elif '*** End Patch' in line or '***End Patch' in line:
+            end_idx = i
+            break
+    
+    if start_idx is None:
+        # Try to parse without explicit begin marker
+        start_idx = -1
+    
+    if end_idx is None:
+        end_idx = len(lines)
+    
+    # Parse operations between boundaries
+    i = start_idx + 1
+    current_op: Optional[PatchOperation] = None
+    current_hunk: Optional[Hunk] = None
+    
+    while i < end_idx:
+        line = lines[i]
+        
+        # Check for file operation markers
+        update_match = re.match(r'\*\*\*\s*Update\s+File:\s*(.+)', line)
+        add_match = re.match(r'\*\*\*\s*Add\s+File:\s*(.+)', line)
+        delete_match = re.match(r'\*\*\*\s*Delete\s+File:\s*(.+)', line)
+        move_match = re.match(r'\*\*\*\s*Move\s+File:\s*(.+?)\s*->\s*(.+)', line)
+        
+        if update_match:
+            # Save previous operation
+            if current_op:
+                if current_hunk and current_hunk.lines:
+                    current_op.hunks.append(current_hunk)
+                operations.append(current_op)
+            
+            current_op = PatchOperation(
+                operation=OperationType.UPDATE,
+                file_path=update_match.group(1).strip()
+            )
+            current_hunk = None
+            
+        elif add_match:
+            if current_op:
+                if current_hunk and current_hunk.lines:
+                    current_op.hunks.append(current_hunk)
+                operations.append(current_op)
+            
+            current_op = PatchOperation(
+                operation=OperationType.ADD,
+                file_path=add_match.group(1).strip()
+            )
+            current_hunk = Hunk()
+            
+        elif delete_match:
+            if current_op:
+                if current_hunk and current_hunk.lines:
+                    current_op.hunks.append(current_hunk)
+                operations.append(current_op)
+            
+            current_op = PatchOperation(
+                operation=OperationType.DELETE,
+                file_path=delete_match.group(1).strip()
+            )
+            operations.append(current_op)
+            current_op = None
+            current_hunk = None
+            
+        elif move_match:
+            if current_op:
+                if current_hunk and current_hunk.lines:
+                    current_op.hunks.append(current_hunk)
+                operations.append(current_op)
+            
+            current_op = PatchOperation(
+                operation=OperationType.MOVE,
+                file_path=move_match.group(1).strip(),
+                new_path=move_match.group(2).strip()
+            )
+            operations.append(current_op)
+            current_op = None
+            current_hunk = None
+            
+        elif line.startswith('@@'):
+            # Context hint / hunk marker
+            if current_op:
+                if current_hunk and current_hunk.lines:
+                    current_op.hunks.append(current_hunk)
+                
+                # Extract context hint
+                hint_match = re.match(r'@@\s*(.+?)\s*@@', line)
+                hint = hint_match.group(1) if hint_match else None
+                current_hunk = Hunk(context_hint=hint)
+                
+        elif current_op and line:
+            # Parse hunk line
+            if current_hunk is None:
+                current_hunk = Hunk()
+            
+            if line.startswith('+'):
+                current_hunk.lines.append(HunkLine('+', line[1:]))
+            elif line.startswith('-'):
+                current_hunk.lines.append(HunkLine('-', line[1:]))
+            elif line.startswith(' '):
+                current_hunk.lines.append(HunkLine(' ', line[1:]))
+            elif line.startswith('\\'):
+                # "\ No newline at end of file" marker - skip
+                pass
+            else:
+                # Treat as context line (implicit space prefix)
+                current_hunk.lines.append(HunkLine(' ', line))
+        
+        i += 1
+    
+    # Don't forget the last operation
+    if current_op:
+        if current_hunk and current_hunk.lines:
+            current_op.hunks.append(current_hunk)
+        operations.append(current_op)
+    
+    return operations, None
+
+
+def apply_v4a_operations(operations: List[PatchOperation], 
+                          file_ops: Any) -> 'PatchResult':
+    """
+    Apply V4A patch operations using a file operations interface.
+    
+    Args:
+        operations: List of PatchOperation from parse_v4a_patch
+        file_ops: Object with read_file, write_file methods
+    
+    Returns:
+        PatchResult with results of all operations
+    """
+    # Import here to avoid circular imports
+    from tools.file_operations import PatchResult
+    
+    files_modified = []
+    files_created = []
+    files_deleted = []
+    all_diffs = []
+    errors = []
+    
+    for op in operations:
+        try:
+            if op.operation == OperationType.ADD:
+                result = _apply_add(op, file_ops)
+                if result[0]:
+                    files_created.append(op.file_path)
+                    all_diffs.append(result[1])
+                else:
+                    errors.append(f"Failed to add {op.file_path}: {result[1]}")
+                    
+            elif op.operation == OperationType.DELETE:
+                result = _apply_delete(op, file_ops)
+                if result[0]:
+                    files_deleted.append(op.file_path)
+                    all_diffs.append(result[1])
+                else:
+                    errors.append(f"Failed to delete {op.file_path}: {result[1]}")
+                    
+            elif op.operation == OperationType.MOVE:
+                result = _apply_move(op, file_ops)
+                if result[0]:
+                    files_modified.append(f"{op.file_path} -> {op.new_path}")
+                    all_diffs.append(result[1])
+                else:
+                    errors.append(f"Failed to move {op.file_path}: {result[1]}")
+                    
+            elif op.operation == OperationType.UPDATE:
+                result = _apply_update(op, file_ops)
+                if result[0]:
+                    files_modified.append(op.file_path)
+                    all_diffs.append(result[1])
+                else:
+                    errors.append(f"Failed to update {op.file_path}: {result[1]}")
+                    
+        except Exception as e:
+            errors.append(f"Error processing {op.file_path}: {str(e)}")
+    
+    # Run lint on all modified/created files
+    lint_results = {}
+    for f in files_modified + files_created:
+        if hasattr(file_ops, '_check_lint'):
+            lint_result = file_ops._check_lint(f)
+            lint_results[f] = lint_result.to_dict()
+    
+    combined_diff = '\n'.join(all_diffs)
+    
+    if errors:
+        return PatchResult(
+            success=False,
+            diff=combined_diff,
+            files_modified=files_modified,
+            files_created=files_created,
+            files_deleted=files_deleted,
+            lint=lint_results if lint_results else None,
+            error='; '.join(errors)
+        )
+    
+    return PatchResult(
+        success=True,
+        diff=combined_diff,
+        files_modified=files_modified,
+        files_created=files_created,
+        files_deleted=files_deleted,
+        lint=lint_results if lint_results else None
+    )
+
+
+def _apply_add(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
+    """Apply an add file operation."""
+    # Extract content from hunks (all + lines)
+    content_lines = []
+    for hunk in op.hunks:
+        for line in hunk.lines:
+            if line.prefix == '+':
+                content_lines.append(line.content)
+    
+    content = '\n'.join(content_lines)
+    
+    result = file_ops.write_file(op.file_path, content)
+    if result.error:
+        return False, result.error
+    
+    diff = f"--- /dev/null\n+++ b/{op.file_path}\n"
+    diff += '\n'.join(f"+{line}" for line in content_lines)
+    
+    return True, diff
+
+
+def _apply_delete(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
+    """Apply a delete file operation."""
+    # Read file first for diff
+    read_result = file_ops.read_file(op.file_path)
+    
+    if read_result.error and "not found" in read_result.error.lower():
+        # File doesn't exist, nothing to delete
+        return True, f"# {op.file_path} already deleted or doesn't exist"
+    
+    # Delete by writing empty and then removing
+    # Use shell command via the underlying environment
+    rm_result = file_ops._exec(f"rm -f {file_ops._escape_shell_arg(op.file_path)}")
+    
+    if rm_result.exit_code != 0:
+        return False, rm_result.stdout
+    
+    diff = f"--- a/{op.file_path}\n+++ /dev/null\n# File deleted"
+    return True, diff
+
+
+def _apply_move(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
+    """Apply a move file operation."""
+    # Use shell mv command
+    mv_result = file_ops._exec(
+        f"mv {file_ops._escape_shell_arg(op.file_path)} {file_ops._escape_shell_arg(op.new_path)}"
+    )
+    
+    if mv_result.exit_code != 0:
+        return False, mv_result.stdout
+    
+    diff = f"# Moved: {op.file_path} -> {op.new_path}"
+    return True, diff
+
+
+def _apply_update(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
+    """Apply an update file operation."""
+    # Read current content
+    read_result = file_ops.read_file(op.file_path, limit=10000)
+    
+    if read_result.error:
+        return False, f"Cannot read file: {read_result.error}"
+    
+    # Parse content (remove line numbers)
+    current_lines = []
+    for line in read_result.content.split('\n'):
+        if '|' in line:
+            # Line format: "    123|content"
+            parts = line.split('|', 1)
+            if len(parts) == 2:
+                current_lines.append(parts[1])
+            else:
+                current_lines.append(line)
+        else:
+            current_lines.append(line)
+    
+    current_content = '\n'.join(current_lines)
+    
+    # Apply each hunk
+    new_content = current_content
+    
+    for hunk in op.hunks:
+        # Build search pattern from context and removed lines
+        search_lines = []
+        replace_lines = []
+        
+        for line in hunk.lines:
+            if line.prefix == ' ':
+                search_lines.append(line.content)
+                replace_lines.append(line.content)
+            elif line.prefix == '-':
+                search_lines.append(line.content)
+            elif line.prefix == '+':
+                replace_lines.append(line.content)
+        
+        if search_lines:
+            search_pattern = '\n'.join(search_lines)
+            replacement = '\n'.join(replace_lines)
+            
+            # Use fuzzy matching
+            from tools.fuzzy_match import fuzzy_find_and_replace
+            new_content, count, error = fuzzy_find_and_replace(
+                new_content, search_pattern, replacement, replace_all=False
+            )
+            
+            if error and count == 0:
+                # Try with context hint if available
+                if hunk.context_hint:
+                    # Find the context hint location and search nearby
+                    hint_pos = new_content.find(hunk.context_hint)
+                    if hint_pos != -1:
+                        # Search in a window around the hint
+                        window_start = max(0, hint_pos - 500)
+                        window_end = min(len(new_content), hint_pos + 2000)
+                        window = new_content[window_start:window_end]
+                        
+                        window_new, count, error = fuzzy_find_and_replace(
+                            window, search_pattern, replacement, replace_all=False
+                        )
+                        
+                        if count > 0:
+                            new_content = new_content[:window_start] + window_new + new_content[window_end:]
+                            error = None
+                
+                if error:
+                    return False, f"Could not apply hunk: {error}"
+    
+    # Write new content
+    write_result = file_ops.write_file(op.file_path, new_content)
+    if write_result.error:
+        return False, write_result.error
+    
+    # Generate diff
+    import difflib
+    diff_lines = difflib.unified_diff(
+        current_content.splitlines(keepends=True),
+        new_content.splitlines(keepends=True),
+        fromfile=f"a/{op.file_path}",
+        tofile=f"b/{op.file_path}"
+    )
+    diff = ''.join(diff_lines)
+    
+    return True, diff
diff --git a/toolset_distributions.py b/toolset_distributions.py
index 7eb5980a1e..7f829c2784 100644
--- a/toolset_distributions.py
+++ b/toolset_distributions.py
@@ -35,6 +35,7 @@ DISTRIBUTIONS = {
             "vision": 100,
             "image_gen": 100,
             "terminal": 100,
+            "file": 100,
             "moa": 100,
             "browser": 100
         }
@@ -66,10 +67,11 @@ DISTRIBUTIONS = {
 
     # Scientific problem solving focused distribution
     "science": {
-        "description": "Scientific research with web, terminal, and browser capabilities",
+        "description": "Scientific research with web, terminal, file, and browser capabilities",
         "toolsets": {
             "web": 94,       # 94% chance of web tools
             "terminal": 94,  # 94% chance of terminal tools
+            "file": 94,      # 94% chance of file tools
             "vision": 65,    # 65% chance of vision tools
             "browser": 50,   # 50% chance of browser for accessing papers/databases
             "image_gen": 15, # 15% chance of image generation tools
@@ -79,9 +81,10 @@ DISTRIBUTIONS = {
 
     # Development-focused distribution
     "development": {
-        "description": "Terminal and reasoning with occasional web lookup",
+        "description": "Terminal, file tools, and reasoning with occasional web lookup",
         "toolsets": {
             "terminal": 80,  # 80% chance of terminal tools
+            "file": 80,      # 80% chance of file tools (read, write, patch, search)
             "moa": 60,       # 60% chance of reasoning tools
             "web": 30,       # 30% chance of web tools
             "vision": 10     # 10% chance of vision tools
@@ -108,6 +111,7 @@ DISTRIBUTIONS = {
             "vision": 50,
             "image_gen": 50,
             "terminal": 50,
+            "file": 50,
             "moa": 50,
             "browser": 50
         }
@@ -123,17 +127,19 @@ DISTRIBUTIONS = {
     
     # Terminal only
     "terminal_only": {
-        "description": "Only terminal tool for code execution tasks",
+        "description": "Terminal and file tools for code execution tasks",
         "toolsets": {
-            "terminal": 100
+            "terminal": 100,
+            "file": 100
         }
     },
     
     # Terminal + web (common for coding tasks that need docs)
     "terminal_web": {
-        "description": "Terminal with web search for documentation lookup",
+        "description": "Terminal and file tools with web search for documentation lookup",
         "toolsets": {
             "terminal": 100,
+            "file": 100,
             "web": 100
         }
     },
@@ -188,9 +194,10 @@ DISTRIBUTIONS = {
     
     # Terminal-focused tasks distribution (for nous-terminal-tasks.jsonl)
     "terminal_tasks": {
-        "description": "Terminal-focused distribution with high terminal availability, occasional other tools",
+        "description": "Terminal-focused distribution with high terminal/file availability, occasional other tools",
         "toolsets": {
             "terminal": 97,   # 97% - terminal almost always available
+            "file": 97,       # 97% - file tools almost always available
             "web": 15,        # 15% - web search/scrape for documentation
             "browser": 10,    # 10% - browser occasionally for web interaction
             "vision": 8,      # 8% - vision analysis rarely
@@ -200,10 +207,11 @@ DISTRIBUTIONS = {
     
     # Mixed browser+terminal tasks distribution (for mixed-browser-terminal-tasks.jsonl)
     "mixed_tasks": {
-        "description": "Mixed distribution with high browser and terminal availability for complex tasks",
+        "description": "Mixed distribution with high browser, terminal, and file availability for complex tasks",
         "toolsets": {
             "browser": 92,    # 92% - browser tools highly available
-            "terminal": 92,   # 92% - terminal highly available  
+            "terminal": 92,   # 92% - terminal highly available
+            "file": 92,       # 92% - file tools highly available
             "web": 35,        # 35% - web search/scrape fairly common
             "vision": 15,     # 15% - vision analysis occasionally
             "image_gen": 15   # 15% - image generation occasionally
diff --git a/toolsets.py b/toolsets.py
index abd6192a98..7dac5ff144 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -102,12 +102,18 @@ TOOLSETS = {
         "includes": []
     },
     
+    "file": {
+        "description": "File manipulation tools: read, write, patch (with fuzzy matching), and search (content + files)",
+        "tools": ["read_file", "write_file", "patch", "search"],
+        "includes": []
+    },
+    
     # Scenario-specific toolsets
     
     "debugging": {
         "description": "Debugging and troubleshooting toolkit",
         "tools": ["terminal"],
-        "includes": ["web"]  # For searching error messages and solutions
+        "includes": ["web", "file"]  # For searching error messages and solutions, and file operations
     },
     
     "safe": {
@@ -127,6 +133,8 @@ TOOLSETS = {
             "web_search", "web_extract",
             # Terminal
             "terminal",
+            # File manipulation
+            "read_file", "write_file", "patch", "search",
             # Vision
             "vision_analyze",
             # Image generation
@@ -155,6 +163,8 @@ TOOLSETS = {
         "tools": [
             # Terminal - enabled with dangerous command approval system
             "terminal",
+            # File manipulation
+            "read_file", "write_file", "patch", "search",
             # Web tools
             "web_search", "web_extract",
             # Vision - analyze images sent by users
@@ -189,6 +199,8 @@ TOOLSETS = {
             "web_search", "web_extract",
             # Terminal - only for trusted personal accounts
             "terminal",
+            # File manipulation
+            "read_file", "write_file", "patch", "search",
             # Vision
             "vision_analyze",
             # Skills

From ac797259232e91c107381546137a287b2262f9d0 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sat, 7 Feb 2026 00:05:04 +0000
Subject: [PATCH 39/48] Update dependencies and enhance installation scripts

- Added `prompt_toolkit` as a direct dependency for interactive CLI support.
- Updated `modal` optional dependency to require `swe-rex[modal]>=1.4.0` for improved cloud execution capabilities.
- Enhanced `messaging` optional dependencies to include `aiohttp>=3.9.0` for WhatsApp bridge communication.
- Refined installation scripts to check for Python version requirements, emphasizing the need for Python 3.11+ for RL training tools.
- Improved setup scripts to ensure proper installation of submodules and dependencies, enhancing user experience during setup.
---
 .gitignore                |   2 +
 README.md                 | 343 ++++++++++++++++++++++++++++++++++++--
 example-skill/SKILL.md    |  70 --------
 hermes_cli/doctor.py      |  38 ++++-
 hermes_cli/setup.py       |  90 ++++++++--
 pyproject.toml            |   8 +-
 requirements.txt          |  19 +--
 scripts/install.ps1       |  29 +++-
 scripts/install.sh        |  27 ++-
 setup-hermes.sh           |  33 ++++
 tools/rl_training_tool.py |  22 ++-
 11 files changed, 553 insertions(+), 128 deletions(-)
 delete mode 100644 example-skill/SKILL.md

diff --git a/.gitignore b/.gitignore
index 87617b600f..dcbbb56aaa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,8 @@ agent-browser/
 *.pem
 privvy*
 images/
+__pycache__/
+hermes_agent.egg-info/
 
 # CLI config (may contain sensitive SSH paths)
 cli-config.yaml
diff --git a/README.md b/README.md
index a2c4bf269a..aa603d64a0 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,9 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins
 ```
 
 The installer will:
-- Clone to `~/.hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
-- Create a virtual environment
-- Install all dependencies
+- Clone to `~/.hermes/hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
+- Create a virtual environment (Python 3.11+ recommended)
+- Install all dependencies and submodule packages
 - Run the interactive setup wizard
 - Add `hermes` to your PATH
 
@@ -179,8 +179,8 @@ hermes config set terminal.singularity_image ~/python.sif
 
 **Modal** (serverless cloud):
 ```bash
-pip install modal boto3
-modal setup  # Authenticate
+pip install "swe-rex[modal]"   # Installs swe-rex + modal + boto3
+modal setup                    # Authenticate with Modal
 hermes config set terminal.backend modal
 ```
 
@@ -275,16 +275,19 @@ See [docs/messaging.md](docs/messaging.md) for WhatsApp and advanced setup.
 
 Train language models with reinforcement learning using the Tinker API and Atropos framework.
 
+> **Note:** RL training tools require **Python 3.11+** (the upstream `tinker` package has this requirement). On Python 3.10, the RL toolset will be automatically disabled — all other features work fine.
+
 #### Requirements
 
-1. **API Keys:** Add to `~/.hermes/.env`:
+1. **Python 3.11+** (check with `python3 --version`)
+2. **API Keys:** Add to `~/.hermes/.env`:
 ```bash
 TINKER_API_KEY=your-tinker-key      # Get from https://tinker-console.thinkingmachines.ai/keys
 WANDB_API_KEY=your-wandb-key        # Get from https://wandb.ai/authorize
 OPENROUTER_API_KEY=your-key         # Optional: for rl_test_inference
 ```
 
-2. **That's it!** tinker-atropos is included as a submodule - no separate installation needed.
+3. **That's it!** tinker-atropos is included as a submodule — the installer handles it automatically.
 
 #### Using RL Tools
 
@@ -425,26 +428,332 @@ skills/
 
 ## Manual Installation
 
-If you prefer not to use the installer:
+If you prefer full control over the installation process (or the quick-install script doesn't suit your environment), follow these steps to set everything up by hand.
+
+### Prerequisites
+
+| Requirement | Minimum Version | Check Command | Notes |
+|-------------|----------------|---------------|-------|
+| **Python** | 3.11+ recommended (3.10 minimum) | `python3 --version` | Required. 3.11+ needed for RL training tools |
+| **Git** | Any recent | `git --version` | Required |
+| **pip** | 21+ | `pip --version` | Comes with Python |
+| **Node.js** | 18+ | `node --version` | Optional — needed for browser automation tools |
+| **ripgrep** | Any | `rg --version` | Optional — faster file search in terminal tool (falls back to grep) |
+
+<details>
+<summary><strong>Installing prerequisites by platform</strong></summary>
+
+**Ubuntu / Debian:**
+```bash
+sudo apt update
+sudo apt install python3.11 python3.11-venv python3-pip git
+# Optional:
+sudo apt install ripgrep nodejs npm
+```
+
+**macOS (Homebrew):**
+```bash
+brew install python@3.11 git
+# Optional:
+brew install ripgrep node
+```
+
+**Windows (WSL recommended):**
+Use the [Windows Subsystem for Linux](https://learn.microsoft.com/en-us/windows/wsl/install) and follow the Ubuntu instructions above. Alternatively, use the PowerShell quick-install script at the top of this README.
+
+</details>
+
+---
+
+### Step 1: Clone the Repository
+
+Clone with `--recurse-submodules` to pull the required submodules ([mini-swe-agent](https://github.com/SWE-agent/mini-swe-agent) for the terminal tool backend and [tinker-atropos](https://github.com/nousresearch/tinker-atropos) for RL training):
 
 ```bash
-# Clone the repository (with submodules)
+git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+```
+
+If you already cloned without `--recurse-submodules`, initialize them manually:
+```bash
+git submodule update --init --recursive
+```
+
+---
+
+### Step 2: Create & Activate a Virtual Environment
+
+A virtual environment keeps Hermes dependencies isolated from your system Python:
+
+```bash
+python3 -m venv venv
+source venv/bin/activate
+
+# Upgrade core packaging tools
+pip install --upgrade pip wheel setuptools
+```
+
+> **Tip:** Every time you open a new terminal to use Hermes, activate the venv first:
+> `source /path/to/hermes-agent/venv/bin/activate`
+
+---
+
+### Step 3: Install Python Dependencies
+
+Install the main package in editable mode with all optional extras (messaging, cron, CLI menus):
+
+```bash
+pip install -e ".[all]"
+```
+
+If you only want the core agent (no Telegram/Discord/cron support):
+```bash
+pip install -e "."
+```
+
+<details>
+<summary><strong>Optional extras breakdown</strong></summary>
+
+| Extra | What it adds | Install command |
+|-------|-------------|-----------------|
+| `all` | Everything below | `pip install -e ".[all]"` |
+| `messaging` | Telegram & Discord gateway | `pip install -e ".[messaging]"` |
+| `cron` | Cron expression parsing for scheduled tasks | `pip install -e ".[cron]"` |
+| `cli` | Terminal menu UI for setup wizard | `pip install -e ".[cli]"` |
+| `modal` | Modal cloud execution backend (swe-rex + modal + boto3) | `pip install -e ".[modal]"` |
+| `dev` | pytest & test utilities | `pip install -e ".[dev]"` |
+
+You can combine extras: `pip install -e ".[messaging,cron]"`
+
+</details>
+
+---
+
+### Step 4: Install Submodule Packages
+
+These are local packages checked out as Git submodules. Install them in editable mode:
+
+```bash
+# Terminal tool backend (required for the terminal/command-execution tool)
+pip install -e "./mini-swe-agent"
+
+# RL training backend (requires Python 3.11+)
+pip install -e "./tinker-atropos"
+```
+
+Both are optional — if you skip them, the corresponding toolsets simply won't be available.
+
+> **Note:** `tinker-atropos` requires Python 3.11+ (the upstream `tinker` package has this constraint). On Python 3.10, skip this line — RL tools will be disabled but everything else works.
+
+---
+
+### Step 5: Install Node.js Dependencies (Optional)
+
+Only needed if you plan to use the **browser automation** toolset (Browserbase-powered):
+
+```bash
+npm install
+```
+
+This installs the `agent-browser` package defined in `package.json`. Skip this step if you don't need browser tools.
+
+---
+
+### Step 6: Create the Configuration Directory
+
+Hermes stores all user configuration in `~/.hermes/`:
+
+```bash
+# Create the directory structure
+mkdir -p ~/.hermes/{cron,sessions,logs}
+
+# Copy the example config file
+cp cli-config.yaml.example ~/.hermes/config.yaml
+
+# Create an empty .env file for API keys
+touch ~/.hermes/.env
+```
+
+Your `~/.hermes/` directory should now look like:
+```
+~/.hermes/
+├── config.yaml     # Agent settings (model, terminal, toolsets, compression, etc.)
+├── .env            # API keys and secrets (one per line: KEY=value)
+├── cron/           # Scheduled job data
+├── sessions/       # Messaging gateway sessions
+└── logs/           # Conversation logs
+```
+
+---
+
+### Step 7: Add Your API Keys
+
+Open `~/.hermes/.env` in your editor and add at minimum an LLM provider key:
+
+```bash
+# Required — at least one LLM provider:
+OPENROUTER_API_KEY=sk-or-v1-your-key-here
+
+# Optional — enable additional tools:
+FIRECRAWL_API_KEY=fc-your-key          # Web search & scraping
+BROWSERBASE_API_KEY=bb-your-key        # Browser automation
+BROWSERBASE_PROJECT_ID=your-project-id # Browser automation
+FAL_KEY=your-fal-key                   # Image generation (FLUX)
+TINKER_API_KEY=your-tinker-key         # RL training
+WANDB_API_KEY=your-wandb-key           # RL training metrics
+
+# Optional — messaging gateway:
+TELEGRAM_BOT_TOKEN=123456:ABC-DEF      # From @BotFather
+TELEGRAM_ALLOWED_USERS=your-user-id    # Comma-separated
+DISCORD_BOT_TOKEN=MTIz...              # From Developer Portal
+DISCORD_ALLOWED_USERS=your-user-id     # Comma-separated
+```
+
+Or set them one at a time via the CLI:
+```bash
+hermes config set OPENROUTER_API_KEY sk-or-v1-your-key-here
+```
+
+---
+
+### Step 8: Add `hermes` to Your PATH
+
+The `hermes` command is installed into the virtual environment's `bin/` directory. Add it to your shell PATH so you can run `hermes` from anywhere:
+
+**Bash** (`~/.bashrc`):
+```bash
+echo '' >> ~/.bashrc
+echo '# Hermes Agent' >> ~/.bashrc
+echo 'export PATH="$HOME/hermes-agent/venv/bin:$PATH"' >> ~/.bashrc
+source ~/.bashrc
+```
+
+**Zsh** (`~/.zshrc`):
+```bash
+echo '' >> ~/.zshrc
+echo '# Hermes Agent' >> ~/.zshrc
+echo 'export PATH="$HOME/hermes-agent/venv/bin:$PATH"' >> ~/.zshrc
+source ~/.zshrc
+```
+
+**Fish** (`~/.config/fish/config.fish`):
+```fish
+fish_add_path $HOME/hermes-agent/venv/bin
+```
+
+> **Note:** Adjust the path if you cloned to a different location. The key is to add the `venv/bin` directory inside your clone to your PATH.
+
+Alternatively, if you don't want to modify your PATH, you can create a symlink:
+```bash
+mkdir -p ~/.local/bin
+ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
+```
+(Most distributions already have `~/.local/bin` on the PATH.)
+
+---
+
+### Step 9: Run the Setup Wizard (Optional)
+
+The interactive setup wizard walks you through configuring your API keys and preferences:
+
+```bash
+hermes setup
+```
+
+This is optional if you already configured `~/.hermes/.env` and `~/.hermes/config.yaml` manually in the steps above.
+
+---
+
+### Step 10: Verify the Installation
+
+```bash
+# Check that the command is available
+hermes version
+
+# Run diagnostics to verify everything is working
+hermes doctor
+
+# Check your configuration
+hermes status
+
+# Test with a quick query
+hermes chat -q "Hello! What tools do you have available?"
+```
+
+If `hermes doctor` reports issues, it will tell you exactly what's missing and how to fix it.
+
+---
+
+### Quick-Reference: Manual Install (Condensed)
+
+For those who just want the commands without the explanations:
+
+```bash
+# Clone & enter
 git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
 cd hermes-agent
 
-# Run setup script
-./setup-hermes.sh
-
-# Or manually:
+# Virtual environment
 python3 -m venv venv
 source venv/bin/activate
+pip install --upgrade pip wheel setuptools
+
+# Install everything
 pip install -e ".[all]"
+pip install -e "./mini-swe-agent"
+pip install -e "./tinker-atropos"
+npm install  # optional, for browser tools
 
-# Install submodules (required for terminal and RL tools)
-pip install -e "./mini-swe-agent"    # Terminal tool backend
-pip install -e "./tinker-atropos"    # RL training backend
+# Configure
+mkdir -p ~/.hermes/{cron,sessions,logs}
+cp cli-config.yaml.example ~/.hermes/config.yaml
+touch ~/.hermes/.env
+echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
 
-hermes setup
+# Add to PATH (adjust for your shell)
+echo 'export PATH="'$(pwd)'/venv/bin:$PATH"' >> ~/.bashrc
+source ~/.bashrc
+
+# Verify
+hermes doctor
+hermes
+```
+
+---
+
+### Updating a Manual Installation
+
+To update an existing manual install to the latest version:
+
+```bash
+cd /path/to/hermes-agent
+source venv/bin/activate
+
+# Pull latest code and submodules
+git pull origin main
+git submodule update --init --recursive
+
+# Reinstall (picks up new dependencies)
+pip install -e ".[all]"
+pip install -e "./mini-swe-agent"
+pip install -e "./tinker-atropos"
+
+# Check for new config options added since your last update
+hermes config check
+hermes config migrate   # Interactively add any missing options
+```
+
+### Uninstalling a Manual Installation
+
+```bash
+# Remove the cloned repository
+rm -rf /path/to/hermes-agent
+
+# Remove user configuration (optional — keep if you plan to reinstall)
+rm -rf ~/.hermes
+
+# Remove the PATH line from your shell config (~/.bashrc or ~/.zshrc)
+# Look for the "# Hermes Agent" comment and remove that block
 ```
 
 ---
diff --git a/example-skill/SKILL.md b/example-skill/SKILL.md
deleted file mode 100644
index df20ff2097..0000000000
--- a/example-skill/SKILL.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-name: example-skill
-description: An example skill demonstrating the skill file format and structure
----
-
-# Example Skill
-
-This is an example skill file that demonstrates how to create skills for the Hermes Agent.
-
-## Skill File Format
-
-Skills are markdown files with YAML frontmatter at the top:
-
-```yaml
----
-name: your-skill-name
-description: A brief one-line description of what this skill does
----
-```
-
-The frontmatter fields:
-- **name**: The identifier used to reference this skill (lowercase, hyphens for spaces)
-- **description**: A brief description shown when listing skills (keep under 200 chars)
-
-## Writing Effective Skills
-
-### 1. Be Specific and Actionable
-
-Good skills provide clear, actionable instructions:
-
-```
-When reviewing code:
-1. Check for security vulnerabilities first
-2. Verify error handling is comprehensive
-3. Ensure tests cover edge cases
-```
-
-### 2. Include Examples
-
-Show concrete examples of what you want:
-
-```python
-# Good: Descriptive variable names
-user_authentication_token = get_token()
-
-# Bad: Cryptic abbreviations  
-uat = gt()
-```
-
-### 3. Define When to Use
-
-Help the agent understand when this skill applies:
-
-> Use this skill when: reviewing pull requests, auditing security, or checking code quality.
-
-## Skill Categories
-
-Consider organizing skills by purpose:
-
-- **Conventions**: Coding standards, API patterns, naming rules
-- **Workflows**: Step-by-step processes for deployments, reviews, releases
-- **Knowledge**: Domain-specific information, system architecture, gotchas
-- **Templates**: Boilerplate for common tasks, response formats
-
-## Tips
-
-1. Keep the description concise - it's shown in the skills list
-2. Use headers to organize longer skills
-3. Include code examples where helpful
-4. Reference other skills if they're related
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 5e0ee39fa7..7c770cf8ac 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -58,8 +58,11 @@ def run_doctor(args):
     print(color("◆ Python Environment", Colors.CYAN, Colors.BOLD))
     
     py_version = sys.version_info
-    if py_version >= (3, 10):
+    if py_version >= (3, 11):
         check_ok(f"Python {py_version.major}.{py_version.minor}.{py_version.micro}")
+    elif py_version >= (3, 10):
+        check_ok(f"Python {py_version.major}.{py_version.minor}.{py_version.micro}")
+        check_warn("Python 3.11+ recommended for RL Training tools (tinker requires >= 3.11)")
     elif py_version >= (3, 8):
         check_warn(f"Python {py_version.major}.{py_version.minor}.{py_version.micro}", "(3.10+ recommended)")
     else:
@@ -263,6 +266,39 @@ def run_doctor(args):
         except Exception as e:
             check_warn("Anthropic API", f"({e})")
     
+    # =========================================================================
+    # Check: Submodules
+    # =========================================================================
+    print()
+    print(color("◆ Submodules", Colors.CYAN, Colors.BOLD))
+    
+    # mini-swe-agent (terminal tool backend)
+    mini_swe_dir = PROJECT_ROOT / "mini-swe-agent"
+    if mini_swe_dir.exists() and (mini_swe_dir / "pyproject.toml").exists():
+        try:
+            __import__("minisweagent")
+            check_ok("mini-swe-agent", "(terminal backend)")
+        except ImportError:
+            check_warn("mini-swe-agent found but not installed", "(run: pip install -e ./mini-swe-agent)")
+            issues.append("Install mini-swe-agent: pip install -e ./mini-swe-agent")
+    else:
+        check_warn("mini-swe-agent not found", "(run: git submodule update --init --recursive)")
+    
+    # tinker-atropos (RL training backend)
+    tinker_dir = PROJECT_ROOT / "tinker-atropos"
+    if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists():
+        if py_version >= (3, 11):
+            try:
+                __import__("tinker_atropos")
+                check_ok("tinker-atropos", "(RL training backend)")
+            except ImportError:
+                check_warn("tinker-atropos found but not installed", "(run: pip install -e ./tinker-atropos)")
+                issues.append("Install tinker-atropos: pip install -e ./tinker-atropos")
+        else:
+            check_warn("tinker-atropos requires Python 3.11+", f"(current: {py_version.major}.{py_version.minor})")
+    else:
+        check_warn("tinker-atropos not found", "(run: git submodule update --init --recursive)")
+    
     # =========================================================================
     # Check: Tool Availability
     # =========================================================================
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 83f42730c9..5f9f045a3a 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -652,6 +652,23 @@ def run_setup_wizard(args):
         print_info("Modal Cloud Configuration:")
         print_info("Get credentials at: https://modal.com/settings")
         
+        # Check if swe-rex[modal] is installed, install if missing
+        try:
+            from swerex.deployment.modal import ModalDeployment
+            print_info("swe-rex[modal] package: installed ✓")
+        except ImportError:
+            print_info("Installing required package: swe-rex[modal]...")
+            import subprocess
+            result = subprocess.run(
+                [sys.executable, "-m", "pip", "install", "swe-rex[modal]>=1.4.0"],
+                capture_output=True, text=True
+            )
+            if result.returncode == 0:
+                print_success("swe-rex[modal] installed (includes modal + boto3)")
+            else:
+                print_warning("Failed to install swe-rex[modal] — install manually:")
+                print_info('  pip install "swe-rex[modal]>=1.4.0"')
+        
         # Always show current status and allow reconfiguration
         current_token = get_env_value('MODAL_TOKEN_ID')
         if current_token:
@@ -917,6 +934,24 @@ def run_setup_wizard(args):
                 save_env_value("BROWSERBASE_API_KEY", api_key)
             if project_id:
                 save_env_value("BROWSERBASE_PROJECT_ID", project_id)
+            
+            # Check if Node.js dependencies are installed (required for browser tools)
+            import shutil
+            node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
+            if not node_modules.exists() and shutil.which("npm"):
+                print_info("    Installing Node.js dependencies for browser tools...")
+                import subprocess
+                result = subprocess.run(
+                    ["npm", "install", "--silent"],
+                    capture_output=True, text=True, cwd=str(PROJECT_ROOT)
+                )
+                if result.returncode == 0:
+                    print_success("    Node.js dependencies installed")
+                else:
+                    print_warning("    npm install failed — run manually: cd ~/.hermes/hermes-agent && npm install")
+            elif not node_modules.exists():
+                print_warning("    Node.js not found — browser tools require: npm install (in the hermes-agent directory)")
+            
             print_success("    Configured ✓")
     print()
     
@@ -950,6 +985,11 @@ def run_setup_wizard(args):
     tinker_configured = get_env_value('TINKER_API_KEY')
     wandb_configured = get_env_value('WANDB_API_KEY')
     
+    # Check Python version requirement upfront
+    rl_python_ok = sys.version_info >= (3, 11)
+    if not rl_python_ok:
+        print_warning(f"  Requires Python 3.11+ (current: {sys.version_info.major}.{sys.version_info.minor})")
+    
     if tinker_configured and wandb_configured:
         print_success("  Status: Configured ✓")
         if prompt_yes_no("  Update RL training credentials?", False):
@@ -969,18 +1009,46 @@ def run_setup_wizard(args):
             print_warning("  Status: Not configured (tools will be disabled)")
         
         if prompt_yes_no("  Set up RL Training?", False):
-            print_info("    Get Tinker key at: https://tinker-console.thinkingmachines.ai/keys")
-            print_info("    Get WandB key at: https://wandb.ai/authorize")
-            api_key = prompt("    Tinker API key", password=True)
-            if api_key:
-                save_env_value("TINKER_API_KEY", api_key)
-            wandb_key = prompt("    WandB API key", password=True)
-            if wandb_key:
-                save_env_value("WANDB_API_KEY", wandb_key)
-            if api_key and wandb_key:
-                print_success("    Configured ✓")
+            # Check Python version before proceeding
+            if not rl_python_ok:
+                print_error(f"    Python 3.11+ required (current: {sys.version_info.major}.{sys.version_info.minor})")
+                print_info("    Upgrade Python and reinstall to enable RL training tools")
             else:
-                print_warning("    Partially configured (both keys required)")
+                print_info("    Get Tinker key at: https://tinker-console.thinkingmachines.ai/keys")
+                print_info("    Get WandB key at: https://wandb.ai/authorize")
+                api_key = prompt("    Tinker API key", password=True)
+                if api_key:
+                    save_env_value("TINKER_API_KEY", api_key)
+                wandb_key = prompt("    WandB API key", password=True)
+                if wandb_key:
+                    save_env_value("WANDB_API_KEY", wandb_key)
+                
+                # Check if tinker-atropos submodule is installed
+                try:
+                    __import__("tinker_atropos")
+                except ImportError:
+                    tinker_dir = PROJECT_ROOT / "tinker-atropos"
+                    if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists():
+                        print_info("    Installing tinker-atropos submodule...")
+                        import subprocess
+                        result = subprocess.run(
+                            [sys.executable, "-m", "pip", "install", "-e", str(tinker_dir)],
+                            capture_output=True, text=True
+                        )
+                        if result.returncode == 0:
+                            print_success("    tinker-atropos installed")
+                        else:
+                            print_warning("    tinker-atropos install failed — run manually:")
+                            print_info('      pip install -e "./tinker-atropos"')
+                    else:
+                        print_warning("    tinker-atropos submodule not found — run:")
+                        print_info("      git submodule update --init --recursive")
+                        print_info('      pip install -e "./tinker-atropos"')
+                
+                if api_key and wandb_key:
+                    print_success("    Configured ✓")
+                else:
+                    print_warning("    Partially configured (both keys required)")
     
     # =========================================================================
     # Save config and show summary
diff --git a/pyproject.toml b/pyproject.toml
index 0924ceaf68..1af0cb2a76 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,8 @@ dependencies = [
   "requests",
   "jinja2",
   "pydantic>=2.0",
+  # Interactive CLI (prompt_toolkit is used directly by cli.py)
+  "prompt_toolkit",
   # Tools
   "firecrawl-py",
   "fal-client",
@@ -32,12 +34,12 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-modal = ["modal", "boto3"]
+modal = ["swe-rex[modal]>=1.4.0"]
 dev = ["pytest", "pytest-asyncio"]
-messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0"]
+messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0"]
 cron = ["croniter"]
 cli = ["simple-term-menu"]
-all = ["croniter", "python-telegram-bot>=20.0", "discord.py>=2.0", "simple-term-menu"]
+all = ["croniter", "python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "simple-term-menu"]
 
 [project.scripts]
 hermes = "hermes_cli.main:main"
diff --git a/requirements.txt b/requirements.txt
index 98db357c93..d950a50ba0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,10 @@ httpx
 rich
 tenacity
 prompt_toolkit
+pyyaml
+requests
+jinja2
+pydantic>=2.0
 
 # Web tools
 firecrawl-py
@@ -15,10 +19,6 @@ fal-client
 
 # mini-swe-agent dependencies (for terminal tool)
 # Note: Install mini-swe-agent itself with: pip install -e ./mini-swe-agent
-pyyaml
-requests
-jinja2
-pydantic>=2.0
 litellm>=1.75.5
 typer
 platformdirs
@@ -27,18 +27,17 @@ platformdirs
 # Requires Docker installed and user in 'docker' group
 
 # Optional: For Modal backend (cloud execution)
-# modal
-# boto3
+# swe-rex[modal]>=1.4.0  # Includes modal + boto3 + swe-rex runtime
 
 # Optional: For cron expression parsing (cronjob scheduling)
 croniter
 
 # Optional: For messaging platform integrations (gateway)
-# Telegram: pip install python-telegram-bot
+# Telegram
 python-telegram-bot>=20.0
 
-# Discord: pip install discord.py
+# Discord
 discord.py>=2.0
 
-# WhatsApp: Requires Node.js bridge (see docs/messaging.md)
-# aiohttp  # For WhatsApp bridge communication
\ No newline at end of file
+# WhatsApp bridge communication + general async HTTP (used by gateway)
+aiohttp>=3.9.0
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 8170abba69..86b914d3e9 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -69,7 +69,7 @@ function Write-Error {
 function Test-Python {
     Write-Info "Checking Python..."
     
-    # Try different python commands
+    # Try different python commands (prefer 3.11+ for full feature support)
     $pythonCmds = @("python3", "python", "py -3")
     
     foreach ($cmd in $pythonCmds) {
@@ -79,7 +79,15 @@ function Test-Python {
                 $major, $minor = $version.Split('.')
                 if ([int]$major -ge 3 -and [int]$minor -ge 10) {
                     $script:PythonCmd = $cmd
+                    $script:PythonVersion = $version
                     Write-Success "Python $version found"
+                    
+                    # Warn if < 3.11 (RL training tools require 3.11+)
+                    if ([int]$minor -lt 11) {
+                        Write-Warning "Python 3.11+ recommended — RL Training tools (tinker-atropos) require >= 3.11"
+                        Write-Info "Core agent features will work fine on $version"
+                    }
+                    
                     return $true
                 }
             }
@@ -89,7 +97,7 @@ function Test-Python {
     }
     
     Write-Error "Python 3.10+ not found"
-    Write-Info "Please install Python 3.10 or newer from:"
+    Write-Info "Please install Python 3.11 or newer (recommended) from:"
     Write-Info "  https://www.python.org/downloads/"
     Write-Info ""
     Write-Info "Make sure to check 'Add Python to PATH' during installation"
@@ -312,11 +320,18 @@ function Install-Dependencies {
     
     Write-Info "Installing tinker-atropos (RL training backend)..."
     if (Test-Path "tinker-atropos\pyproject.toml") {
-        try {
-            pip install -e ".\tinker-atropos" 2>&1 | Out-Null
-            Write-Success "tinker-atropos installed"
-        } catch {
-            Write-Warning "tinker-atropos install failed (RL tools may not work)"
+        # tinker-atropos depends on the 'tinker' package which requires Python >= 3.11
+        $major, $minor = $PythonVersion.Split('.')
+        if ([int]$minor -ge 11) {
+            try {
+                pip install -e ".\tinker-atropos" 2>&1 | Out-Null
+                Write-Success "tinker-atropos installed"
+            } catch {
+                Write-Warning "tinker-atropos install failed (RL tools may not work)"
+            }
+        } else {
+            Write-Warning "tinker-atropos requires Python 3.11+ (skipping — RL training tools won't be available)"
+            Write-Info "Upgrade to Python 3.11+ to enable RL training features"
         }
     } else {
         Write-Warning "tinker-atropos not found (run: git submodule update --init)"
diff --git a/scripts/install.sh b/scripts/install.sh
index c3ff5a7931..c97cbc8a85 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -149,22 +149,29 @@ detect_os() {
 check_python() {
     log_info "Checking Python..."
     
-    # Try different python commands
+    # Try different python commands (prefer 3.11+ for full feature support)
     for cmd in python3.12 python3.11 python3.10 python3 python; do
         if command -v $cmd &> /dev/null; then
             PYTHON_CMD=$cmd
             PYTHON_VERSION=$($cmd -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
             
-            # Check version
-            if python3 -c "import sys; exit(0 if sys.version_info >= (3, 10) else 1)" 2>/dev/null; then
+            # Check minimum version (3.10)
+            if $cmd -c "import sys; exit(0 if sys.version_info >= (3, 10) else 1)" 2>/dev/null; then
                 log_success "Python $PYTHON_VERSION found"
+                
+                # Warn if < 3.11 (RL training tools require 3.11+)
+                if ! $cmd -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
+                    log_warn "Python 3.11+ recommended — RL Training tools (tinker-atropos) require >= 3.11"
+                    log_info "Core agent features will work fine on $PYTHON_VERSION"
+                fi
+                
                 return 0
             fi
         fi
     done
     
     log_error "Python 3.10+ not found"
-    log_info "Please install Python 3.10 or newer:"
+    log_info "Please install Python 3.11 or newer (recommended):"
     
     case "$OS" in
         linux)
@@ -179,7 +186,7 @@ check_python() {
                     log_info "  sudo pacman -S python"
                     ;;
                 *)
-                    log_info "  Use your package manager to install Python 3.10+"
+                    log_info "  Use your package manager to install Python 3.11+"
                     ;;
             esac
             ;;
@@ -480,8 +487,14 @@ install_deps() {
     
     log_info "Installing tinker-atropos (RL training backend)..."
     if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
-        pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
-        log_success "tinker-atropos installed"
+        # tinker-atropos depends on the 'tinker' package which requires Python >= 3.11
+        if $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
+            pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
+            log_success "tinker-atropos installed"
+        else
+            log_warn "tinker-atropos requires Python 3.11+ (skipping — RL training tools won't be available)"
+            log_info "Upgrade to Python 3.11+ to enable RL training features"
+        fi
     else
         log_warn "tinker-atropos not found (run: git submodule update --init)"
     fi
diff --git a/setup-hermes.sh b/setup-hermes.sh
index e22511b393..e1a9dcb447 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -54,6 +54,11 @@ fi
 PYTHON_VERSION=$($PYTHON_CMD -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
 echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION found"
 
+# Warn if < 3.11 (RL training tools require 3.11+)
+if ! $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
+    echo -e "${YELLOW}⚠${NC} Python 3.11+ recommended — RL Training tools (tinker-atropos) require >= 3.11"
+fi
+
 # ============================================================================
 # Virtual environment
 # ============================================================================
@@ -80,6 +85,34 @@ pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
 
 echo -e "${GREEN}✓${NC} Dependencies installed"
 
+# ============================================================================
+# Submodules (terminal backend + RL training)
+# ============================================================================
+
+echo -e "${CYAN}→${NC} Installing submodules..."
+
+# mini-swe-agent (terminal tool backend)
+if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
+    pip install -e "./mini-swe-agent" > /dev/null 2>&1 && \
+        echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \
+        echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (terminal tools may not work)"
+else
+    echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)"
+fi
+
+# tinker-atropos (RL training backend — requires Python 3.11+)
+if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
+    if $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
+        pip install -e "./tinker-atropos" > /dev/null 2>&1 && \
+            echo -e "${GREEN}✓${NC} tinker-atropos installed" || \
+            echo -e "${YELLOW}⚠${NC} tinker-atropos install failed (RL tools may not work)"
+    else
+        echo -e "${YELLOW}⚠${NC} tinker-atropos requires Python 3.11+ (skipping — RL training tools won't be available)"
+    fi
+else
+    echo -e "${YELLOW}⚠${NC} tinker-atropos not found (run: git submodule update --init --recursive)"
+fi
+
 # ============================================================================
 # Optional: ripgrep (for faster file search)
 # ============================================================================
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 770c542c7b..8b901ad5e1 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -1300,10 +1300,26 @@ async def rl_test_inference(
 # Requirements Check
 # ============================================================================
 
+def check_rl_python_version() -> bool:
+    """
+    Check if Python version meets the minimum for RL tools.
+    
+    tinker-atropos depends on the 'tinker' package which requires Python >= 3.11.
+    """
+    return sys.version_info >= (3, 11)
+
+
 def check_rl_api_keys() -> bool:
     """
-    Check if required API keys are available.
+    Check if required API keys and Python version are available.
+    
+    RL training requires:
+    - Python >= 3.11 (tinker package requirement)
+    - TINKER_API_KEY for the Tinker training API
+    - WANDB_API_KEY for Weights & Biases metrics
     """
+    if not check_rl_python_version():
+        return False
     tinker_key = os.getenv("TINKER_API_KEY")
     wandb_key = os.getenv("WANDB_API_KEY")
     return bool(tinker_key) and bool(wandb_key)
@@ -1311,9 +1327,11 @@ def check_rl_api_keys() -> bool:
 
 def get_missing_keys() -> List[str]:
     """
-    Get list of missing required API keys.
+    Get list of missing requirements for RL tools (API keys and Python version).
     """
     missing = []
+    if not check_rl_python_version():
+        missing.append(f"Python >= 3.11 (current: {sys.version_info.major}.{sys.version_info.minor})")
     if not os.getenv("TINKER_API_KEY"):
         missing.append("TINKER_API_KEY")
     if not os.getenv("WANDB_API_KEY"):

From 07b615e96ed4ac9700ec64d02a71969d8ac3edc7 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sat, 7 Feb 2026 09:17:16 +0000
Subject: [PATCH 40/48] Add support for Atropos Agentic RL environments
 (requires branch tool_call_support in Atropos atm)

- Added new environments for reinforcement learning, including `HermesSweEnv` for software engineering tasks and `TerminalTestEnv` for inline testing.
- Introduced `ToolContext` for unrestricted access to tools during reward computation.
- Updated `.gitignore` to exclude `wandb/` directory.
- Enhanced `README.md` with detailed architecture and usage instructions for Atropos environments.
- Added configuration files for SWE and terminal test environments to streamline setup.
- Removed unnecessary compiled Python files from `__pycache__`.
---
 .gitignore                                    |   1 +
 README.md                                     |  88 ++
 __pycache__/model_tools.cpython-310.pyc       | Bin 7132 -> 0 bytes
 __pycache__/web_tools.cpython-310.pyc         | Bin 8167 -> 0 bytes
 environments/__init__.py                      |  28 +
 environments/agent_loop.py                    | 306 ++++++
 environments/configs/swe_default.yaml         |  33 +
 .../configs/terminal_test_default.yaml        |  35 +
 environments/hermes_base_env.py               | 540 +++++++++++
 environments/hermes_swe_env.py                | 229 +++++
 environments/terminal_test_env.py             | 292 ++++++
 environments/tool_call_parsers/__init__.py    | 120 +++
 .../tool_call_parsers/deepseek_v3_1_parser.py |  71 ++
 .../tool_call_parsers/deepseek_v3_parser.py   |  75 ++
 .../tool_call_parsers/glm45_parser.py         | 109 +++
 .../tool_call_parsers/glm47_parser.py         |  35 +
 .../tool_call_parsers/hermes_parser.py        |  73 ++
 .../tool_call_parsers/kimi_k2_parser.py       |  93 ++
 .../tool_call_parsers/llama_parser.py         |  96 ++
 .../tool_call_parsers/longcat_parser.py       |  69 ++
 .../tool_call_parsers/mistral_parser.py       | 130 +++
 .../tool_call_parsers/qwen3_coder_parser.py   | 163 ++++
 environments/tool_call_parsers/qwen_parser.py |  19 +
 environments/tool_context.py                  | 246 +++++
 hermes_agent.egg-info/PKG-INFO                | 868 ------------------
 hermes_agent.egg-info/SOURCES.txt             |  47 -
 hermes_agent.egg-info/dependency_links.txt    |   1 -
 hermes_agent.egg-info/entry_points.txt        |   3 -
 hermes_agent.egg-info/requires.txt            |  35 -
 hermes_agent.egg-info/top_level.txt           |  11 -
 30 files changed, 2851 insertions(+), 965 deletions(-)
 delete mode 100644 __pycache__/model_tools.cpython-310.pyc
 delete mode 100644 __pycache__/web_tools.cpython-310.pyc
 create mode 100644 environments/__init__.py
 create mode 100644 environments/agent_loop.py
 create mode 100644 environments/configs/swe_default.yaml
 create mode 100644 environments/configs/terminal_test_default.yaml
 create mode 100644 environments/hermes_base_env.py
 create mode 100644 environments/hermes_swe_env.py
 create mode 100644 environments/terminal_test_env.py
 create mode 100644 environments/tool_call_parsers/__init__.py
 create mode 100644 environments/tool_call_parsers/deepseek_v3_1_parser.py
 create mode 100644 environments/tool_call_parsers/deepseek_v3_parser.py
 create mode 100644 environments/tool_call_parsers/glm45_parser.py
 create mode 100644 environments/tool_call_parsers/glm47_parser.py
 create mode 100644 environments/tool_call_parsers/hermes_parser.py
 create mode 100644 environments/tool_call_parsers/kimi_k2_parser.py
 create mode 100644 environments/tool_call_parsers/llama_parser.py
 create mode 100644 environments/tool_call_parsers/longcat_parser.py
 create mode 100644 environments/tool_call_parsers/mistral_parser.py
 create mode 100644 environments/tool_call_parsers/qwen3_coder_parser.py
 create mode 100644 environments/tool_call_parsers/qwen_parser.py
 create mode 100644 environments/tool_context.py
 delete mode 100644 hermes_agent.egg-info/PKG-INFO
 delete mode 100644 hermes_agent.egg-info/SOURCES.txt
 delete mode 100644 hermes_agent.egg-info/dependency_links.txt
 delete mode 100644 hermes_agent.egg-info/entry_points.txt
 delete mode 100644 hermes_agent.egg-info/requires.txt
 delete mode 100644 hermes_agent.egg-info/top_level.txt

diff --git a/.gitignore b/.gitignore
index dcbbb56aaa..3c5ca3743b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,6 +41,7 @@ privvy*
 images/
 __pycache__/
 hermes_agent.egg-info/
+wandb/
 
 # CLI config (may contain sensitive SSH paths)
 cli-config.yaml
diff --git a/README.md b/README.md
index aa603d64a0..45340e8f0f 100644
--- a/README.md
+++ b/README.md
@@ -323,6 +323,94 @@ For extended RL workflows with longer timeouts:
 python rl_cli.py --model "anthropic/claude-sonnet-4-20250514"
 ```
 
+### 🧪 Atropos RL Environments
+
+Hermes-Agent integrates with the [Atropos](https://github.com/NousResearch/atropos) RL framework through a layered environment system. This allows training models with reinforcement learning on agentic tasks using hermes-agent's tools.
+
+#### Architecture
+
+The integration has three layers:
+
+| Layer | File | Purpose |
+|-------|------|---------|
+| **Agent Loop** | `environments/agent_loop.py` | Reusable multi-turn tool-calling engine (standard OpenAI spec) |
+| **Base Environment** | `environments/hermes_base_env.py` | Abstract Atropos `BaseEnv` subclass with toolset resolution, ToolContext, scoring |
+| **Concrete Envs** | `environments/terminal_test_env.py`, `environments/hermes_swe_env.py` | Task-specific environments |
+
+#### Two-Phase Operation
+
+- **Phase 1 (OpenAI server type)**: Works with any OpenAI-compatible endpoint (VLLM, SGLang, OpenRouter, OpenAI API). The server handles tool call parsing natively. Good for **SFT data generation**, **verifier testing**, and **evaluation**.
+- **Phase 2 (VLLM server type)**: Uses ManagedServer for exact token IDs + logprobs via `/generate`. Client-side tool call parser registry reconstructs structured `tool_calls` from raw output. Required for **full RL training**.
+
+#### Quick Start
+
+```bash
+# 1. Launch VLLM with tool parser
+vllm serve YourModel --tool-parser hermes
+
+# 2. Start the Atropos API server
+run-api
+
+# 3. Run an environment
+python environments/terminal_test_env.py serve \
+    --openai.base_url http://localhost:8000/v1 \
+    --openai.model_name YourModel \
+    --openai.server_type openai
+```
+
+#### ToolContext (Reward Functions)
+
+Reward functions receive a `ToolContext` with unrestricted access to all hermes-agent tools, scoped to the rollout's sandbox:
+
+```python
+async def compute_reward(self, item, result, ctx: ToolContext) -> float:
+    # Run tests in the model's terminal sandbox
+    test = ctx.terminal("pytest -v")
+    if test["exit_code"] == 0:
+        return 1.0
+    # Or check a file, search the web, navigate a browser...
+    return 0.0
+```
+
+#### Creating Custom Environments
+
+Subclass `HermesAgentBaseEnv` and implement 5 methods:
+
+```python
+from environments.hermes_base_env import HermesAgentBaseEnv
+
+class MyEnv(HermesAgentBaseEnv):
+    name = "my-env"
+    async def setup(self): ...            # Load data
+    async def get_next_item(self): ...    # Return next item
+    def format_prompt(self, item): ...    # Item -> prompt string
+    async def compute_reward(self, item, result, ctx): ...  # Score with ToolContext
+    async def evaluate(self, *args, **kwargs): ...          # Periodic eval
+
+if __name__ == "__main__":
+    MyEnv.cli()
+```
+
+#### Toolset Distributions
+
+Configure which tools are available per group, either explicitly or probabilistically:
+
+```bash
+# Explicit toolsets
+--env.enabled_toolsets '["terminal","file","web"]'
+
+# Probabilistic distribution (sampled per group)
+--env.distribution development
+```
+
+#### Tool Call Parsers (Phase 2)
+
+For VLLM server type, a parser registry extracts structured `tool_calls` from raw model output. Supported parsers: `hermes`, `mistral`, `llama3_json`, `qwen`, `deepseek_v3`, `deepseek_v3_1`, `kimi_k2`, `longcat`, `glm45`, `glm47`, `qwen3_coder`.
+
+```bash
+--env.tool_call_parser hermes  # Match your VLLM --tool-parser flag
+```
+
 ### ⏰ Scheduled Tasks (Cron)
 
 Schedule tasks to run automatically:
diff --git a/__pycache__/model_tools.cpython-310.pyc b/__pycache__/model_tools.cpython-310.pyc
deleted file mode 100644
index 519e30120efd8d130760c2cd9b98a49c3ea6fb0d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 7132
zcmc&(O>i8=6`q-$U9DC>mW8k-8H=`LEG@_qkRpK%kQm#5i7^!Br`Uwr8f~|B2hGkd
zJu{ZI$SJB6DiuN%6y$~->>NU<3YVO6$}!g*Qn}0_2a<~~gyfP6fspULnb}?8pGu`t
zm6f-*XS(~n*RS9E-h1u#*jUNH=lJ2*n!hO<#-He6@H32uZ{r(0VHt)rs)jVB<(X9z
zzd6rp<*K=sUA0^JYTl$V+bgt&sza?}wb&Z24!26x65i)kUggw?ve&H{W5!swWI+!7
zz^;y7XQU{HF)}7gYDA9oS7GMJdO?oLF^rDiV02vW!RQ{$C2l>A#1%Xz<X((Uq@#P&
zx0CV~yq#3H$f*tMZBs7{8){14x?`I3yN`b-cZ}+-c-qfTw_e_N#W-QSbl`hdb$@kQ
zP3yPhv~S9p>I~-0s+so;c|h$)dmF|s$%7c1d(XI3pw+MZP>q4wEN0%W4yf536Z6ES
z98;bJ)g#s0)NS(6j3MV|jOxL(zkvQ6)*My`neLpl<Q=bLU+)>9GPh&OJ2q_ndwB%B
z7Qrjeyndx7!0UFbTVndRr;;y2a#1~`4$1P2p+C8f^n$q@lHi+3p3HvR3?@MDPI=em
zdAwPkF|J?-mn`P@XN(+WPE&Fv{asCe@1|dAo#k)&9_KU+s`O-ADo;GyZhL`1Kk`(m
z^lZ}&M2jCpz3m5~j_P3`LQE1ty{TGGARJ$cCK^xaK&-d5IChGbfY!06Pl>wYdBIBQ
zR4Ck5r>)b)n5f$=an8|hJ4)yUDoi;gBs;o&&Xo!eBJ$mJS4rXeq0;M4U6qK9RO_zq
z`VE#QS)<OYdKA*T^~kS>ZrjiJmySnTD?jwML=bgYkV@&OctNeD%a;P>=z4QmsPmzA
z>fy4eYiH9dCxULOwHj?YSm|t)O3wvOLsd!wAKnJ<HW^QMDH<x|7qvu65+ztp)l|!b
ztC4b;3^xnnG3bDGJFtpX+S4abWJ$zB*JZL)9whiUpUYRYF(5S0)hdX*5cHB$+%4YH
zJncPE4)17wS-d*`N~HAGd_~MZy%jdwKHklHZp#hv>T4IurK<#6XUe(QKH=8GICsq7
zitW>G5SHzD&sXTh6Z9h0I&+A}=?$wtd7gr$)i-LPbI$d)YEH+kZK$odpj8-YzfQac
zKVx_}j&JZwG=M=%=A<q21alWH2}qMAIYQudF)zpQW>3SE6Bl!GFTRs_HYMN(D{hth
zWZpI9e$1b~NEBw|Y{QZV<ZYOF5HshlS$Vq@%<B;I%FFrmcR?P;$~)wOJOWCKSiiJ;
zZ6kNqD3`k*@PXXW44{|PTwPy0)lvSjQ;PvCwxjAH`~zOma>A9q0X)MR3KCN#bEPx*
zE}h9s74D1ny2yy84#lPlXK!_!&|UMC*mT2Y%8WdksmI04hIJlV?tXNZy+?#i%!iQV
zpRS+CLtcn~j=u%jJ8peNq+$uJJax|TLv+XjgM?M+hMt16Jon6Lv_PE@ek%gs?Q~R#
zH-tBiA8Z0{R>X4wA%p~#IuwwYDK^r9$Rq=8L#w9p1NWTD*sR2b_S(x(SzG|10WG>m
zo`u}$Vm56^D!g`kLqr{tJ>6E>itTW#qv8?ZQeC?p(p5Y}u+&hx`@oaVdAAj{gderm
zlqTI~8w0t-TE!Av+refl;=X0ka?V%8J$IL{k`k|))KCgg&TBGaJ&q<GN*BaKEx;Yf
zqm%G~IC+*f&a?CBeY7^mYvU5|v!itAsvs_E^-APwCCMXwr==3grH-TV089<aR>G0E
z>jIkY8z&PSWOZxhNLIpn<axa#VeAWFdqC$pAA1M5$mhlfgUNgdz(U4ksY1u~7{~jT
z!9-v(w3Na}gwqY1x*au|>4~hwc}Ht!izVz<$b?C?WFB-(DRS(VY6&V5_a$?n1&#vc
zvYn_V?}n-sBswT!;wgHJ(bF5Uz3InxGi-U?ub)r>=}fevuoH!}6F3)dC}oMU%sQ9a
zYzvN8&h&39SON1OtBJH1#D#=}o+l*_k&-_=PB8`%4hje$RpJ0WmAYxbrQktZMZ6=^
zm%TlQj)rpka-{x%0F2pWmY(M}lBnkcJR1&J46g4v;GnPai8yuL4#1TB0)nk{zd<gt
z<^+7cM4s_kEY%$!oa$aAm4L&k^GUZEhMl0Yx?1l3^lWA!yS78#x!Ht5Gg7PBK}G=u
zaO|PKfw~e)YNfHVEEXRn9dxu?r%3~1FJg>vQ?~0-3tI+w!ExEDXehI0Ixs|cKm^%h
zuvm`s*pyq3iy4S?*G@2ez{FCCh|p27$nzH20D_3qknH$DKU<xQOQp!_9N+P_0%TBq
zr)MIWCF(<D3FPJa7BukA91JBM9aL1DXEf7CFyTW(qb?X|KD&3d*>0)TP;K~b)LMO7
zw+YvR)yI`?so>}_I{ejv^aSZkJPnhn!QigRPY!~1pwU>vmSvgv<}B0xmz77K|LlL_
zZ_NIy{RQ66WOEA1*yq*|^)U92Yv*Kih*~z^{<}S4e?qkE-|4%+c2^`PFwEi0k$eiB
zhH=5zHZB{Xxq~#<L?02;bneH*UeH83C7g3OScd}F%`>SiGo-M{1UT2t!kTaz4Xwyw
zlI(O>a7;Yp8eYg?k?9h>ezKXm20~jFx|hWo(1hG(O_7BuU&4XeUsErF(`gpIPmrnJ
zG@2hrr1O9U!b|b-=DJ{lK&C<>0qehP#(7RH2dwN)Xg;1Ii=mOhY&O71K%v4ejppnj
zy+Sj`GX2#k%nUpNsDe7GjNXTeS&DjmzQ09h8@bwsr4MeKmr<6;+!gDB#eHO6=;uOH
z7D7u7ZRd8Z9eU1fThu>GD~nk9FwY)t7?^E`_O>17cPxGHwvGHPPvx0!!iH>~HFCwg
zkPi#n_BQ4)g;7v=k;lfEcY*ukm^Z||J#qqci{a3AVS8x1xPyExEM6X#d*$Rdm1UM|
zUNK>jw{-u;y1hZ>O{kYy!a$;S4C?7;X}gptmXM^BE8+|kN^vjEMfy2C0cci7kn#0a
zlg`BwWqVi-4vdRQiAF@Sc`WYr4Gf$s;?b;>O6vrad{3Nx>WoNoCtk=&Ag3R(q4(o3
z^#nD0shOmP?1baq8NEPVihc}fdXaid)Eq_==atrN-QD+`zu_Ymv$8TlE23PAbFjrY
zhp3Il^_g*8auG6ukaED-Ci_qL5tG<{IY5q<mr<(|l!x^yak`rtN|q8?IUciE1hNAX
z^}&|iU^=aj2s<;GxP*HH%L0L>I5KLEniE#ZELx-HEPj(-32#TushqB0(k?f!@q&^r
zc7u1&>CTZOoHO)D2*chscdQoxbka(U?1BY1fZ=CVerR1rakI_FNAqkWw6`8NjSD#w
zlw_V(!EN$z6uKF(Z@#`@ylBXw3%T#ZxRc*abJHjnok!??fNbkY2ZcwrcQ!jA-2jyW
zDl}jg#dD-?1e#p;6RbHM%0#tGxd)?54<(u6Zq!9sM&xpR5ZG+G^=7t0IDM{b%4_?u
zY@&@^pvOO8dSuzen}(@B$UZqgy&{rp4QB&v+)yYdpwa|ogcQEo%y=gBsQST<L>-G8
zlyeZ#7O8=_BuPkmEAij^b5Em?;{WZ-ti@*>7ll<Hk<PSzRq54j?4x2g4j>l`LMjue
z>cMK#43a^lQk1<i(GPP8PB0j7{yUkud8UAhh{z*8zVxQh_mVeEoFqH|rR-woGYe2y
zF0d!XdDxX7>iaQMKY%74KAEwMa|D}q-{Iq77Q5C88nLOa32~`!XM(U_e*}`jG#YD?
z{JAtdZcdov)|5GJ&YH#7tS|H<7_8&l`%v0K`(uBeLr0pSv4iAAT0ev5N*iT={-W)g
zl*}Mp6uQ4)-B9W}kj)Bbjg0Qv7)~hxn?20X)xcmUjMAhdIk>M@`~?3MacZCzPe?0g
zz`K$xg^n!Eo?Oojw-P>d>&2ww;Vfwk^f$4yZsi3;vZQM6*XjM7IjPjKhASH`7ubJ@
zV0Yo!W6z&D{bCIl3bpT?d{HR>oU7YDW$t_{NC_M+i-vpIoIVPcNzFEzma{ZD_|Viv
zj39R@X6W+Ov)xDOOOFsIGNi^oU|B$M$|~9>r>>^`x&5VG(vRciu2aRcLCFiBD#3Mf
zsxAe5tNSBX&vCk=67D)5+MtA7G4$B?&_y^JK%#DXU6UN%Sw3WPtieOW#m^uYmO+z%
z`cZ27v5R{gz-H5XG&%vOWN`UatO>~V3Pt-%eF~kpSgR4v)oRfJJbwEAdzb(6>$eaB
zuDK@eE<N-gexg~X^zM(7Yi8K|Gs(>aA7v%VVff>VuP48k-t1aJd}a#$Ac-@4EKBUW
zQ%|}91I*wKBFZ~TpT7T-4@7TfdYiq}Ekf-fs`HAb)6*md+fqa@B*drhzx$@R`ZW68
z+{(&|z6U*)74+Wyc~95@FRJJhG<%YQ;;uc2uC*-mNg_a13kbY(1-DdmF>>;}Lpge-
zd*Xj_Z`=iEw)h)f1a}`3=>^n{?sgZ+qZ1Eiw?L)*>LOn$Emp*0a<{v9p<G^whiWxc
z*0maAZCt=D7<^fiPiQ(cO{SArA0_qu{EMAae~aExiN{p}V>d%5ds=L-!617!M}-6U
zJVdiejhf2RcqnylRy*C>5$-W2#5qsl&W6V0VS?|Z^sxDg?lB@jrybjbuE+-4NE)x_
z8^s4vCgcA+9wr12PNT8S;?$Hmotw?gK0mjAZeea|-&ASTGK;yQS-^J!Dcxc8rpzLz
YclV}mC(Jpdebc7(TG1-<nEl6p1AWmU{{R30

diff --git a/__pycache__/web_tools.cpython-310.pyc b/__pycache__/web_tools.cpython-310.pyc
deleted file mode 100644
index d20f5fb5084052b61d72168f359c930e6644f8e3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8167
zcmds6TW{RP6(+gdi&oNFmM`%&9m`3y8|#ubPTFkhqQ1zfQ`<3YCqd+>3u<RpqQxaQ
zLsFORDoBy^rHIi2Nr3{XVf!&f``rG6KJ*{(F?}dp6sTYNVzfZNGbC4(EMJf&MNuwx
zNDk+6=FB<gJLilyJY3dr{ch}oMfX`v`!l^{uK~P#AAkS2u4!DG)ws?LN1xTz-Iz7h
zJvW=fJ?EJ1{A^yQHl|Z(7iWvQCW^wiUE=vWIxpPNX9svulzB;%TP7di<&VtSLH;}+
zLfx<!QuPI1;X6Lc&kplrd_?3$dB4VYCVdKgl<&f~3ZLM6P`4NVr;=|azK`$6w;dQ|
zq-F2}{2)f(DMtCBB}4vdKoh(8;SY6Cw2MEj?mIu!{4U<Uk<_x}enj1O^ReXpXmWoh
zxj&oSpG)rJ$-SD~Yjo$v74@%8T7SxETKP&~x!iI*SFpFmJiF?7j?XT5Jak04e09<G
zSzEoZj`WsoE_}8iTp{fyTS4<cH4hf8fUS6PiLKbdB5Q|EV0RqB=B?(EaCzEQ`cvi8
zp_IZ6oHZ8u80)IFY~$JTS+Lk|Kf&h1KvmdoAY{vG3f8uk(A*A~u>3Vl%ls9qQ^wsx
zN4YTrbL=I-&e~EmrM2QHf;^1YT(sN;+g(t@wnDcV*q+OLd%?AWPzq31KDlh!jx`Uu
z)vD^{Nm^pV7nW=;HmI)7uBaC^2ra+mNoKn!+ZK(N^bo6ow3<QEWhQ=OO%IE41J;sW
zoB18lv|9-iJJy2mlfH`Tr03~)1!CT}11w(gw%^fq{Xm9l8GgBZD&cg~Yj;4(yzSV*
z8m%B^aGfm%LC3F8Pjj#7PX&tWQ)oPmCVggYY1K-ZIK{>CbW6mHnvVHVF2Cu6?0T8u
zQcEIU#o}PMJD!9<Y-Zkss>t#tuGv_z8cP*Nw=h~&VwWITK0C2><m%X^wO|q3*|wUC
zHU!zhF1fVXjy*3eX$yaB0?%zbIPqdFJIl6v^XBFAK4Yp`x@Yl}S=y`D#!{AwRjUnY
zCw7j_O^{4L8cvM0ExE+K6?bgYjPagYv;^AKcoy2{2Dbt$S=$Cns$<#FC)n6o8~Q|K
zLBO)2hgt5DeY{xHBlEoN2e=Q#n}6D|A$L*b>dCjxoqwls^3u7+4`<%N_oCIY8%ts>
zG6O6vDo6oUaGNlw{maBt9{<z$`&ATNUx%SxHv;Xp-h*lFX?Jq#IrV1XEk|#?+!d{6
zS`h|P>$@!4-ZE*oFJ3;*gxmDE;LL85*vGO%^OI6b!9yr};K6KQl_Asx)`>NpFn}SL
z+Soc}nEGnm0#kg|QmU!^hs<TWY#HL<g_u_Dk{`?OB&!nl>x!i5j=S(`OzXtd)Kt8C
zn_FD(h{rYYmluw$ww-MT%FJ{vSnG(nOdD1uB`15;vB*p=3K+?Z{G5~v3CW~(#I-uR
z3d5=RmkKNR*n|eSid)n!*#z3$Oz+dw2K%7;D%+B5P_i*nbR1Y-9@`!Dyy!VxNMDWD
zJ@#&vY1e-A>fEt(CD%?)&RUb*7baghHhp65*t@mvcy`r~XxiO-?bOLDGcO-+oV#%H
zwVB4{nG0{cHFIXJW<-V*QO*zNYo;tf5TgSzxeX${q1ff)MsngtmE>FcKBO2Tc!L)E
zxh97(!2K7d7rnNa4#bjchwbT0(z`C2fj|8NA=|>AJPDi&ru$ZHs<RgDQo}To!DBwv
zNHfS+F-J|alB?uQL+-%qT~eDU|6VBMT9jADr)i)ny=V>8O=;F|@X(zMpj|!fGr)6C
zN13~sC%_K$>qbw5u9-Z4BS)IY3qh`D+%|b}LAzsoNNr0ubE<9rdZA~K19QWGRu+4x
zeOtStnOaa<yQ*vJrml4l^1)!BXHvhL#(Ey(4fXQ$9FK6jz=wNzTfc*z1??S;SJut<
zO*jqezM`*awH;Q6TnCb^M*|{2p3Jb-N7fiT7;F$!R5=F^RqDRKVf8(Z4R?Tm7hA)U
z@stD7v|QzcD7utDZ*@fcDYm`;LH-7HpBLm`kViu9MMv-qb(?ih?JKJSgKM!0CX~eW
zP(jD@mRQ(P#1NyD1h6WWfDRX|Rl6OwnH#p};k|e*SiTtClo2FNg-H@E3xHQ=FJ&yi
zWo4r?Y77eM?DZ>eTx576W-D;875pVMrDH3eke}7^9LHOs#<33p2Pig#M5wxc+^0LO
zVPk4(V|D5?vCZdX7Bsl_;8z-Dn=#YD#{xTW#6!pKHw&fn_(q8xe0)Q{=}DTj<#<-G
zJ<aRetdyt$Fg(~{ZgbVTdR4_`%Z4G};PK3AQ*_8<tg~~iRES$}KYa$lChY($Gx*-F
z7vOVDTzD8(;@+)KZzM`b`$xM;dH{W>v+-_uoK=;e)!BW$7L_c%Y`L)EwVd3E&e34o
zT5Tj76%EDW(cmCuF(g~`#jk(L&H{Bgi;79BZYdSbyJWpS*P{Z!7M8KQXGTg88xKOX
zfQ~x*2YC!%-29kDc|qSV*2+=71HlZU;n?kLG-H3H5ciC7j&P&gf(Rn>y6?GBo`>y@
zuWV>E&}S4(KUA%tmLW%JEhSLc@mya-gE6fQ)m<0SfSS3{_7{|)kBbWR*+J$!SvL8q
zucOl6hr&3#S0Bk8GAer6sGzLq2lUbh#=qqA)Xjw4`B5SE88%sQ3PXu=%8LIL58Vr7
z#RJ%JWtWY_E*sb3DUrVfyG$0`L<_Ru`Mw1R^!&uDW8$(JHK&r5fVb?QTJu<m5_
zmE|7nnXvxKe)HiNFW!4Dh*1FHDrPGH1qvAy2!H|ze}MucK!IX{0wvJ2vsc=L0;9cB
z1_gGl7vC=e1=L-kz;5ejDGJOyS&Z`l8~}|eQle1oD}#j84a$s7@SyZCg^1Xv`5NJY
z1cp_~55ESvorB4zXEO9Ex`35W$lDYa_U%V3X(_y%5k5jO&fphz095J6jR=E`2EviH
zgD}TacExhOEM_EhVY-x&{~>H7h6Hx@OW?v28T=;%4@6mwC&q?N4g33&0+G7^SFj-?
z#ZQP2pM1n-617Ua0a)ue%<2|N{HlJfX2@eOR&oMGcn}3y{-jrC=T%%UpThV11w<qW
z*P<MD<o@8B?vjhpmPUj1-Qa7ag+;Vx?B1<UZnFCZtKJ`?)C(y&0*9I;TPydY=pKmS
zBVFGHsdXlaB6$b}&_fdHNk+xfD57H0Lmr{3F)EHyK|Z&9h6<z$wc5bO0_Ah4py-x7
zEICClm1KJxz@CL*QI1nT3U4L3^IK74gis@yR;4&r(a2{gj6=Ki3SeU-H>8i~`}B&j
z1wOuwMo}ICgWZg09?IrzfOUkuO_<UCHHfD^qQG6(y0bl94j?ihKOcDF0!1j|<|f4o
zy&P<I?E9-Yf#-XA=OCvjpm@UsW*{~|v|ti$^s%O#U`@YPVNJf5N2@_T1gxP5gWiUD
zPGL<E<5YSDP_Uy{L^+aR&5H_aOumzk0&7aZno<U9egdo+P*}4ISTm4dO&MeD?v*!T
z&7NL4gEf2C2i_k5)~LI}ny0KkrdYEjgYt;9*aL8*@+#!2e05+$shijlLkcv77iBNL
z?g#{mm;wZ)04OLWeWWysS|_U#Ikt=B2^WMAVl{qD^HwoJkSWW`WIE=#On#9dhpkYO
zHywOBQHO-Z=g(ijAc1fk8&NYdIMvNCM3X2{Pe(sPa!+uW>OVYy>X_1#F@<KS=L$sY
z$3e;WFN<u7D;ewd|AK4(D^S(fuEzmYcTcReS+PU(a$g^p--PuGDO9<4>zBX(`yYPJ
zPNxcXcQ<Og`(G1C2|7+DrI0KiX(2N0DmIX>;3*nTO?DjM455x-PK^jLP`D;1sgLS7
zO?`^VXnh#__!mrmi`u+M#Rh!si{Y!kW<UhxbGHRAg<E;mGzPLqL)TzT53NDgiKv}f
zMyV8*f;9OOeJl|!H((6p%hch~NLC>vQ-mlS`z?C-<O2|njp}C3&<(vn|9>7TJ`~eV
zp+gh@O&5iIr}6hG&I*vUx{VXKJNkz@Qo%;|Q)LlPFN)?8vs)@^jdRo4Sa%~!ggmWq
zmu=~}ZSq|xXG`ZYIHVy%qKXIdAoJ>c=Bf+@&5#M95k60_vz7z%sLqO35Q?iIB+*>U
zMJ9EP3=a_=qEJkn!GB5=N?KC1yGhhHk{CA0oc{%1e2R-JMR}H5l^Tt<WxI_=$nbja
z7R2Wf2W;Kyi6^{5l1>V%Hz)D8WmB3o=zi}KQoL9;IKbAtu%B3{!|JLNn$3fq7^g;)
z$UIEqNKK4)kCL*boV1XLN|7UGf;vCC3lBvLNiRsZwrpyPatR?x`@LJA-6nNbXN>VF
zkrWnRQ^<moz$4uq<(c>#1&{)Xu6NIUW#>xWX_h82wmCc+PY#cQFz_H}jz3=OzQp>`
z&vr*Y<0&IDhcn}IweBf&BYX9bvz+m4^9eSt&T?=d7m}2#1H|!KO_w9!S53cIGcL;S
z(Bqz{*l3_xqY=BiXHY3iD53(+58!-Al0dwuQ$0z}q@1CG?2ht=<77$>2(9uNlDtw$
zYAi`*`;zNgZGlP1;f_qPY6sgYLg~Blf$;Ze=Y29zrd~Q+(o6afej~Zj+`c1+rVbsi
Xyjm(7dMQ`Z3-~8LqyL1q=HLDe`A_ZV

diff --git a/environments/__init__.py b/environments/__init__.py
new file mode 100644
index 0000000000..5f2fb6c776
--- /dev/null
+++ b/environments/__init__.py
@@ -0,0 +1,28 @@
+"""
+Hermes-Agent Atropos Environments
+
+Provides a layered integration between hermes-agent's tool-calling capabilities
+and the Atropos RL training framework.
+
+Layers:
+    - agent_loop: Reusable multi-turn agent loop with standard OpenAI-spec tool calling
+    - tool_context: Per-rollout tool access handle for reward/verification functions
+    - hermes_base_env: Abstract base environment (BaseEnv subclass) for Atropos
+    - tool_call_parsers: Client-side tool call parser registry for Phase 2 (VLLM /generate)
+
+Concrete environments:
+    - terminal_test_env: Simple file-creation tasks for testing the stack
+    - hermes_swe_env: SWE-bench style tasks with Modal sandboxes
+"""
+
+from environments.agent_loop import AgentResult, HermesAgentLoop
+from environments.tool_context import ToolContext
+from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
+
+__all__ = [
+    "AgentResult",
+    "HermesAgentLoop",
+    "ToolContext",
+    "HermesAgentBaseEnv",
+    "HermesAgentEnvConfig",
+]
diff --git a/environments/agent_loop.py b/environments/agent_loop.py
new file mode 100644
index 0000000000..7e9453b606
--- /dev/null
+++ b/environments/agent_loop.py
@@ -0,0 +1,306 @@
+"""
+HermesAgentLoop -- Reusable Multi-Turn Agent Engine
+
+Runs the hermes-agent tool-calling loop using standard OpenAI-spec tool calling.
+Works with any server that returns ChatCompletion objects with tool_calls:
+    - Phase 1: OpenAI server type (VLLM, SGLang, OpenRouter, OpenAI API)
+    - Phase 2: ManagedServer with client-side tool call parser
+
+The loop passes tools= and checks response.choices[0].message.tool_calls,
+identical to hermes-agent's run_agent.py. Tool execution is dispatched via
+handle_function_call() from model_tools.py.
+"""
+
+import json
+import logging
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Set
+
+from model_tools import handle_function_call
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class AgentResult:
+    """Result of running the agent loop."""
+
+    # Full conversation history in OpenAI message format
+    messages: List[Dict[str, Any]]
+    # ManagedServer.get_state() if available (Phase 2), None otherwise
+    managed_state: Optional[Dict[str, Any]] = None
+    # How many LLM calls were made
+    turns_used: int = 0
+    # True if model stopped calling tools naturally (vs hitting max_turns)
+    finished_naturally: bool = False
+    # Extracted reasoning content per turn (from PR #297 helpers)
+    reasoning_per_turn: List[Optional[str]] = field(default_factory=list)
+
+
+def _extract_reasoning_from_message(message) -> Optional[str]:
+    """
+    Extract reasoning content from a ChatCompletion message.
+
+    Handles multiple provider formats:
+    1. message.reasoning_content field (some providers)
+    2. message.reasoning field (some providers)
+    3. message.reasoning_details[].text (OpenRouter style)
+
+    Note: <think> block extraction from content is NOT done here -- that's
+    handled by the response already in Phase 1 (server does it) or by
+    ManagedServer's patch in Phase 2.
+
+    Args:
+        message: The assistant message from ChatCompletion response
+
+    Returns:
+        Extracted reasoning text, or None if not found
+    """
+    # Check reasoning_content field (common across providers)
+    if hasattr(message, "reasoning_content") and message.reasoning_content:
+        return message.reasoning_content
+
+    # Check reasoning field
+    if hasattr(message, "reasoning") and message.reasoning:
+        return message.reasoning
+
+    # Check reasoning_details (OpenRouter style)
+    if hasattr(message, "reasoning_details") and message.reasoning_details:
+        for detail in message.reasoning_details:
+            if hasattr(detail, "text") and detail.text:
+                return detail.text
+            if isinstance(detail, dict) and detail.get("text"):
+                return detail["text"]
+
+    return None
+
+
+class HermesAgentLoop:
+    """
+    Runs hermes-agent's tool-calling loop using standard OpenAI-spec tool calling.
+
+    Same pattern as run_agent.py:
+    - Pass tools= to the API
+    - Check response.choices[0].message.tool_calls
+    - Dispatch via handle_function_call()
+
+    Works identically with any server type -- OpenAI, VLLM, SGLang, OpenRouter,
+    or ManagedServer with a parser. The server determines how tool_calls get
+    populated on the response.
+    """
+
+    def __init__(
+        self,
+        server,
+        tool_schemas: List[Dict[str, Any]],
+        valid_tool_names: Set[str],
+        max_turns: int = 30,
+        task_id: Optional[str] = None,
+        temperature: float = 1.0,
+        max_tokens: Optional[int] = None,
+    ):
+        """
+        Initialize the agent loop.
+
+        Args:
+            server: Server object with chat_completion() method (OpenAIServer,
+                    ManagedServer, ServerManager, etc.)
+            tool_schemas: OpenAI-format tool definitions from get_tool_definitions()
+            valid_tool_names: Set of tool names the model is allowed to call
+            max_turns: Maximum number of LLM calls before stopping
+            task_id: Unique ID for terminal/browser session isolation
+            temperature: Sampling temperature for generation
+            max_tokens: Max tokens per generation (None for server default)
+        """
+        self.server = server
+        self.tool_schemas = tool_schemas
+        self.valid_tool_names = valid_tool_names
+        self.max_turns = max_turns
+        self.task_id = task_id or str(uuid.uuid4())
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+
+    async def run(self, messages: List[Dict[str, Any]]) -> AgentResult:
+        """
+        Execute the full agent loop using standard OpenAI tool calling.
+
+        Args:
+            messages: Initial conversation messages (system + user).
+                      Modified in-place as the conversation progresses.
+
+        Returns:
+            AgentResult with full conversation history, managed state, and metadata
+        """
+        reasoning_per_turn = []
+
+        for turn in range(self.max_turns):
+            # Build the chat_completion kwargs
+            chat_kwargs = {
+                "messages": messages,
+                "n": 1,
+                "temperature": self.temperature,
+            }
+
+            # Only pass tools if we have them
+            if self.tool_schemas:
+                chat_kwargs["tools"] = self.tool_schemas
+
+            # Only pass max_tokens if explicitly set
+            if self.max_tokens is not None:
+                chat_kwargs["max_tokens"] = self.max_tokens
+
+            # Make the API call -- standard OpenAI spec
+            try:
+                response = await self.server.chat_completion(**chat_kwargs)
+            except Exception as e:
+                logger.error("API call failed on turn %d: %s", turn + 1, e)
+                return AgentResult(
+                    messages=messages,
+                    managed_state=self._get_managed_state(),
+                    turns_used=turn + 1,
+                    finished_naturally=False,
+                    reasoning_per_turn=reasoning_per_turn,
+                )
+
+            if not response or not response.choices:
+                logger.warning("Empty response on turn %d", turn + 1)
+                return AgentResult(
+                    messages=messages,
+                    managed_state=self._get_managed_state(),
+                    turns_used=turn + 1,
+                    finished_naturally=False,
+                    reasoning_per_turn=reasoning_per_turn,
+                )
+
+            assistant_msg = response.choices[0].message
+
+            # Extract reasoning content from the response (all provider formats)
+            reasoning = _extract_reasoning_from_message(assistant_msg)
+            reasoning_per_turn.append(reasoning)
+
+            # Check for tool calls -- standard OpenAI spec
+            if assistant_msg.tool_calls:
+                # Build the assistant message dict for conversation history
+                msg_dict: Dict[str, Any] = {
+                    "role": "assistant",
+                    "content": assistant_msg.content or "",
+                    "tool_calls": [
+                        {
+                            "id": tc.id,
+                            "type": "function",
+                            "function": {
+                                "name": tc.function.name,
+                                "arguments": tc.function.arguments,
+                            },
+                        }
+                        for tc in assistant_msg.tool_calls
+                    ],
+                }
+
+                # Preserve reasoning_content for multi-turn chat template handling
+                # (e.g., Kimi-K2's template renders <think> blocks differently
+                # for history vs. the latest turn based on this field)
+                if reasoning:
+                    msg_dict["reasoning_content"] = reasoning
+
+                messages.append(msg_dict)
+
+                # Execute each tool call via hermes-agent's dispatch
+                for tc in assistant_msg.tool_calls:
+                    tool_name = tc.function.name
+
+                    # Validate tool name
+                    if tool_name not in self.valid_tool_names:
+                        tool_result = json.dumps(
+                            {
+                                "error": f"Unknown tool '{tool_name}'. "
+                                f"Available tools: {sorted(self.valid_tool_names)}"
+                            }
+                        )
+                        logger.warning(
+                            "Model called unknown tool '%s' on turn %d",
+                            tool_name,
+                            turn + 1,
+                        )
+                    else:
+                        # Parse arguments and dispatch
+                        try:
+                            args = json.loads(tc.function.arguments)
+                        except json.JSONDecodeError:
+                            args = {}
+                            logger.warning(
+                                "Invalid JSON in tool call arguments for '%s': %s",
+                                tool_name,
+                                tc.function.arguments[:200],
+                            )
+
+                        try:
+                            tool_result = handle_function_call(
+                                tool_name, args, task_id=self.task_id
+                            )
+                        except Exception as e:
+                            tool_result = json.dumps(
+                                {"error": f"Tool execution failed: {str(e)}"}
+                            )
+                            logger.error(
+                                "Tool '%s' execution failed: %s", tool_name, e
+                            )
+
+                    # Add tool response to conversation
+                    messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tc.id,
+                            "content": tool_result,
+                        }
+                    )
+
+                logger.debug(
+                    "Turn %d: %d tool calls executed",
+                    turn + 1,
+                    len(assistant_msg.tool_calls),
+                )
+
+            else:
+                # No tool calls -- model is done
+                msg_dict = {
+                    "role": "assistant",
+                    "content": assistant_msg.content or "",
+                }
+                if reasoning:
+                    msg_dict["reasoning_content"] = reasoning
+                messages.append(msg_dict)
+
+                logger.debug(
+                    "Turn %d: model finished naturally (no tool calls)", turn + 1
+                )
+
+                return AgentResult(
+                    messages=messages,
+                    managed_state=self._get_managed_state(),
+                    turns_used=turn + 1,
+                    finished_naturally=True,
+                    reasoning_per_turn=reasoning_per_turn,
+                )
+
+        # Hit max turns without the model stopping
+        logger.info("Agent hit max_turns (%d) without finishing", self.max_turns)
+        return AgentResult(
+            messages=messages,
+            managed_state=self._get_managed_state(),
+            turns_used=self.max_turns,
+            finished_naturally=False,
+            reasoning_per_turn=reasoning_per_turn,
+        )
+
+    def _get_managed_state(self) -> Optional[Dict[str, Any]]:
+        """
+        Get ManagedServer state if the server supports it.
+
+        Returns state dict with SequenceNodes containing tokens/logprobs/masks,
+        or None if the server doesn't support get_state() (e.g., regular OpenAI server).
+        """
+        if hasattr(self.server, "get_state"):
+            return self.server.get_state()
+        return None
diff --git a/environments/configs/swe_default.yaml b/environments/configs/swe_default.yaml
new file mode 100644
index 0000000000..3477e4b330
--- /dev/null
+++ b/environments/configs/swe_default.yaml
@@ -0,0 +1,33 @@
+# SWE Environment -- Default Configuration
+#
+# SWE-bench style tasks with Modal sandboxes for cloud isolation.
+# Uses terminal + file + web toolsets.
+#
+# Usage:
+#   python environments/hermes_swe_env.py serve --config environments/configs/swe_default.yaml
+
+env:
+  enabled_toolsets: ["terminal", "file", "web"]
+  max_agent_turns: 30
+  max_token_length: 4096
+  group_size: 4
+  terminal_backend: "modal"
+  tool_call_parser: "hermes"
+  tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview"
+  dataset_name: "bigcode/humanevalpack"
+  dataset_split: "test"
+  prompt_field: "prompt"
+  steps_per_eval: 50
+  total_steps: 500
+  use_wandb: true
+  wandb_name: "hermes-swe"
+  system_prompt: >
+    You are a skilled software engineer. You have access to a terminal,
+    file tools, and web search. Use these tools to complete the coding task.
+    Write clean, working code and verify it runs correctly before finishing.
+
+openai:
+  base_url: "http://localhost:8000/v1"
+  model_name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview"
+  server_type: "openai"
+  api_key: ""
diff --git a/environments/configs/terminal_test_default.yaml b/environments/configs/terminal_test_default.yaml
new file mode 100644
index 0000000000..19b38e3340
--- /dev/null
+++ b/environments/configs/terminal_test_default.yaml
@@ -0,0 +1,35 @@
+# Terminal Test Environment -- Default Configuration
+#
+# Simple file-creation tasks for validating the full Atropos + hermes-agent stack.
+# Uses Modal terminal backend and OpenRouter (Claude) for inference.
+# API keys loaded from ~/hermes-agent/.env
+#
+# Usage:
+#   run-api
+#   python environments/terminal_test_env.py serve
+#   # Or with config file:
+#   python environments/terminal_test_env.py serve --config environments/configs/terminal_test_default.yaml
+
+env:
+  enabled_toolsets: ["terminal", "file"]
+  max_agent_turns: 10
+  max_token_length: 2048
+  group_size: 3
+  total_steps: 3
+  steps_per_eval: 3
+  terminal_backend: "modal"
+  tool_call_parser: "hermes"
+  tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview"
+  ensure_scores_are_not_same: false
+  use_wandb: false
+  system_prompt: >
+    You are a helpful assistant with access to a terminal and file tools.
+    Complete the user's request by using the available tools.
+    Be precise and follow instructions exactly.
+
+openai:
+  base_url: "https://openrouter.ai/api/v1"
+  model_name: "anthropic/claude-opus-4.6"
+  server_type: "openai"
+  health_check: false
+  # api_key loaded from OPENROUTER_API_KEY in .env
diff --git a/environments/hermes_base_env.py b/environments/hermes_base_env.py
new file mode 100644
index 0000000000..d17fcd6ab7
--- /dev/null
+++ b/environments/hermes_base_env.py
@@ -0,0 +1,540 @@
+"""
+HermesAgentBaseEnv -- Abstract Base Environment for Hermes-Agent + Atropos
+
+Provides the Atropos integration plumbing that all hermes-agent environments share:
+- Two-mode operation (OpenAI server for Phase 1, VLLM ManagedServer for Phase 2)
+- Per-group toolset/distribution resolution
+- Agent loop orchestration via HermesAgentLoop
+- ToolContext creation for reward functions
+- ScoredDataGroup construction from ManagedServer state
+
+Subclasses only need to implement:
+    setup()           -- Load dataset, initialize state
+    get_next_item()   -- Return the next item from the dataset
+    format_prompt()   -- Convert a dataset item into the user message
+    compute_reward()  -- Score the rollout (has full ToolContext access)
+    evaluate()        -- Periodic evaluation
+"""
+
+import asyncio
+import json
+import logging
+import os
+import sys
+import uuid
+from abc import abstractmethod
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
+
+# Ensure the hermes-agent repo root is on sys.path so that imports like
+# `from model_tools import ...` and `from environments.X import ...` work
+# regardless of where the script is invoked from.
+_repo_root = Path(__file__).resolve().parent.parent
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from dotenv import load_dotenv
+from pydantic import Field
+
+# Load API keys from hermes-agent/.env so all environments can access them
+_env_path = _repo_root / ".env"
+if _env_path.exists():
+    load_dotenv(dotenv_path=_env_path)
+
+from atroposlib.envs.base import (
+    BaseEnv,
+    BaseEnvConfig,
+    ScoredDataGroup,
+    ScoredDataItem,
+)
+from atroposlib.envs.server_handling.server_manager import (
+    APIServerConfig,
+    ServerBaseline,
+    ServerManager,
+)
+from atroposlib.type_definitions import Item
+
+from environments.agent_loop import AgentResult, HermesAgentLoop
+from environments.tool_context import ToolContext
+
+# Import hermes-agent toolset infrastructure
+from model_tools import get_tool_definitions
+from toolset_distributions import sample_toolsets_from_distribution
+
+logger = logging.getLogger(__name__)
+
+
+class HermesAgentEnvConfig(BaseEnvConfig):
+    """
+    Configuration for hermes-agent Atropos environments.
+
+    Extends BaseEnvConfig with agent-specific settings for toolsets,
+    terminal backend, dataset loading, and tool call parsing.
+    """
+
+    # --- Toolset configuration ---
+    # Mutually exclusive: use either enabled_toolsets OR distribution
+    enabled_toolsets: Optional[List[str]] = Field(
+        default=None,
+        description="Explicit list of hermes toolsets to enable (e.g., ['terminal', 'file', 'web']). "
+        "If None and distribution is also None, all available toolsets are enabled.",
+    )
+    disabled_toolsets: Optional[List[str]] = Field(
+        default=None,
+        description="Toolsets to disable. Applied as a filter on top of enabled_toolsets or distribution.",
+    )
+    distribution: Optional[str] = Field(
+        default=None,
+        description="Name of a toolset distribution from toolset_distributions.py "
+        "(e.g., 'development', 'terminal_tasks'). Sampled once per group. "
+        "Mutually exclusive with enabled_toolsets.",
+    )
+
+    # --- Agent loop configuration ---
+    max_agent_turns: int = Field(
+        default=30,
+        description="Maximum number of LLM calls (tool-calling iterations) per rollout.",
+    )
+    system_prompt: Optional[str] = Field(
+        default=None,
+        description="System prompt for the agent. Tools are handled via the tools= parameter, "
+        "not embedded in the prompt text.",
+    )
+    agent_temperature: float = Field(
+        default=1.0,
+        description="Sampling temperature for agent generation during rollouts.",
+    )
+
+    # --- Terminal backend ---
+    terminal_backend: str = Field(
+        default="local",
+        description="Terminal backend: 'local', 'docker', 'modal', 'ssh', 'singularity'. "
+        "Modal recommended for production RL (cloud isolation per rollout).",
+    )
+
+    # --- Dataset ---
+    dataset_name: Optional[str] = Field(
+        default=None,
+        description="HuggingFace dataset name. Optional if tasks are defined inline.",
+    )
+    dataset_split: str = Field(
+        default="train",
+        description="Dataset split to use.",
+    )
+    prompt_field: str = Field(
+        default="prompt",
+        description="Which field in the dataset contains the prompt.",
+    )
+
+    # --- Phase 2: Tool call parsing ---
+    tool_call_parser: str = Field(
+        default="hermes",
+        description="Tool call parser name for Phase 2 (VLLM server type). "
+        "Ignored in Phase 1 (OpenAI server type where VLLM parses natively). "
+        "Options: hermes, mistral, llama3_json, qwen, deepseek_v3, etc.",
+    )
+
+
+class HermesAgentBaseEnv(BaseEnv):
+    """
+    Abstract base environment for hermes-agent Atropos integration.
+
+    Handles two modes of operation:
+    - Phase 1 (OpenAI server type): Uses server.chat_completion() directly.
+      The server (VLLM, SGLang, OpenRouter, OpenAI) handles tool call parsing
+      and reasoning extraction natively. DummyManagedServer provides placeholder
+      tokens. Good for SFT data gen, verifier testing, evaluation.
+
+    - Phase 2 (VLLM server type): Uses ManagedServer for exact token IDs + logprobs
+      via /generate. Client-side tool call parser reconstructs structured tool_calls
+      from raw output. Full RL training capability.
+
+    Subclasses must implement:
+        setup()           -- Load dataset, initialize state
+        get_next_item()   -- Return the next item to roll out
+        format_prompt()   -- Convert a dataset item into the user message string
+        compute_reward()  -- Score the rollout using ToolContext
+        evaluate()        -- Periodic evaluation
+    """
+
+    name: Optional[str] = "hermes-agent"
+    env_config_cls = HermesAgentEnvConfig
+
+    def __init__(
+        self,
+        config: HermesAgentEnvConfig,
+        server_configs: Union[ServerBaseline, List[APIServerConfig]],
+        slurm=False,
+        testing=False,
+    ):
+        super().__init__(config, server_configs, slurm, testing)
+
+        # Set terminal backend environment variable so hermes tools pick it up
+        if config.terminal_backend:
+            os.environ["TERMINAL_ENV"] = config.terminal_backend
+
+        # Current group's resolved tools (set in collect_trajectories)
+        self._current_group_tools: Optional[Tuple[List[Dict], Set[str]]] = None
+
+    # =========================================================================
+    # Toolset resolution (per-group)
+    # =========================================================================
+
+    def _resolve_tools_for_group(self) -> Tuple[List[Dict[str, Any]], Set[str]]:
+        """
+        Resolve toolsets for a group. Called once in collect_trajectories(),
+        then shared by all collect_trajectory() calls in the group.
+
+        If distribution is set, samples probabilistically.
+        If enabled_toolsets is set, uses that explicit list.
+        disabled_toolsets is applied as a filter on top.
+
+        Returns:
+            (tool_schemas, valid_tool_names) tuple
+        """
+        config = self.config
+
+        if config.distribution:
+            group_toolsets = sample_toolsets_from_distribution(config.distribution)
+            logger.info("Sampled toolsets from '%s': %s", config.distribution, group_toolsets)
+        else:
+            group_toolsets = config.enabled_toolsets  # None means "all available"
+
+        tools = get_tool_definitions(
+            enabled_toolsets=group_toolsets,
+            disabled_toolsets=config.disabled_toolsets,
+            quiet_mode=True,
+        )
+
+        valid_names = {t["function"]["name"] for t in tools} if tools else set()
+        logger.info("Resolved %d tools for group: %s", len(valid_names), sorted(valid_names))
+        return tools, valid_names
+
+    # =========================================================================
+    # Server mode detection
+    # =========================================================================
+
+    def _use_managed_server(self) -> bool:
+        """
+        Determine if we should use ManagedServer (Phase 2) or direct server (Phase 1).
+
+        Phase 2 (ManagedServer) is used when the server type is 'vllm' or 'sglang',
+        which go through the /generate endpoint for exact token tracking.
+
+        Phase 1 (direct server) is used for 'openai' server type, which uses
+        /v1/chat/completions with native tool call parsing.
+        """
+        if not self.server.servers:
+            return False
+
+        server = self.server.servers[0]
+        # If the server is an OpenAI server (not VLLM/SGLang), use direct mode
+        from atroposlib.envs.server_handling.openai_server import OpenAIServer
+        return not isinstance(server, OpenAIServer)
+
+    # =========================================================================
+    # Core Atropos integration
+    # =========================================================================
+
+    async def collect_trajectories(
+        self, item: Item
+    ) -> Tuple[
+        Union[Optional[ScoredDataGroup], List[Optional[ScoredDataGroup]]],
+        List[Item],
+    ]:
+        """
+        Override collect_trajectories to resolve toolsets once per group,
+        then delegate to the standard group-level collection.
+
+        The default BaseEnv.collect_trajectories() calls collect_trajectory()
+        group_size times in parallel. We resolve tools once here and store
+        them for all those calls to use.
+        """
+        # Resolve toolsets for this group (shared by all rollouts in the group)
+        self._current_group_tools = self._resolve_tools_for_group()
+
+        # Delegate to the default implementation which calls collect_trajectory()
+        # group_size times via asyncio.gather
+        return await super().collect_trajectories(item)
+
+    # =========================================================================
+    # Wandb rollout display -- format trajectories nicely
+    # =========================================================================
+
+    @staticmethod
+    def _format_trajectory_for_display(messages: List[Dict[str, Any]]) -> str:
+        """
+        Format a conversation's messages into a readable trajectory string
+        for wandb rollout tables. Shows tool calls, tool results, and reasoning
+        in a structured way instead of raw token decoding.
+        """
+        parts = []
+        for msg in messages:
+            role = msg.get("role", "unknown")
+            content = msg.get("content", "")
+
+            if role == "system":
+                parts.append(f"[SYSTEM]\n{content}")
+
+            elif role == "user":
+                parts.append(f"[USER]\n{content}")
+
+            elif role == "assistant":
+                # Show reasoning if present
+                reasoning = msg.get("reasoning_content", "")
+                if reasoning:
+                    # Truncate long reasoning for display
+                    if len(reasoning) > 300:
+                        reasoning = reasoning[:300] + "..."
+                    parts.append(f"[ASSISTANT thinking]\n{reasoning}")
+
+                # Show content
+                if content:
+                    parts.append(f"[ASSISTANT]\n{content}")
+
+                # Show tool calls
+                tool_calls = msg.get("tool_calls", [])
+                for tc in tool_calls:
+                    func = tc.get("function", {})
+                    name = func.get("name", "?")
+                    args = func.get("arguments", "{}")
+                    # Truncate long arguments for display
+                    if len(args) > 200:
+                        args = args[:200] + "..."
+                    parts.append(f"[TOOL CALL] {name}({args})")
+
+            elif role == "tool":
+                tool_id = msg.get("tool_call_id", "")
+                result = content
+                # Truncate long tool results for display
+                if len(result) > 500:
+                    result = result[:500] + "..."
+                parts.append(f"[TOOL RESULT] {result}")
+
+        return "\n\n".join(parts)
+
+    async def add_rollouts_for_wandb(
+        self,
+        scored_data,
+        item=None,
+    ):
+        """
+        Override to show formatted trajectories with tool calls visible,
+        instead of raw token decoding which loses all structure.
+        """
+        num_keep = self.config.num_rollouts_per_group_for_logging
+        if num_keep == -1:
+            num_keep = self.config.group_size
+
+        group = []
+        for i in range(min(num_keep, len(scored_data.get("scores", [])))):
+            score = scored_data["scores"][i]
+
+            # Use messages if available for rich display
+            messages = None
+            if scored_data.get("messages") and i < len(scored_data["messages"]):
+                messages = scored_data["messages"][i]
+
+            if messages:
+                text = self._format_trajectory_for_display(messages)
+            elif scored_data.get("tokens") and i < len(scored_data["tokens"]):
+                text = self.tokenizer.decode(scored_data["tokens"][i])
+            else:
+                text = "(no data)"
+
+            group.append((text, score))
+
+        self.rollouts_for_wandb.append(group)
+        if len(self.rollouts_for_wandb) > self.config.num_rollouts_to_keep:
+            self.rollouts_for_wandb.pop(0)
+
+    async def collect_trajectory(
+        self, item: Item
+    ) -> Tuple[Optional[Union[ScoredDataItem, Any]], List[Item]]:
+        """
+        Run a single rollout: agent loop + reward computation.
+
+        This is called group_size times in parallel by collect_trajectories().
+        Each call gets its own task_id for terminal/browser session isolation.
+        """
+        task_id = str(uuid.uuid4())
+
+        # Get group-level tools (resolved once in collect_trajectories)
+        if self._current_group_tools is None:
+            # Fallback: resolve per-trajectory if called outside collect_trajectories
+            tools, valid_names = self._resolve_tools_for_group()
+        else:
+            tools, valid_names = self._current_group_tools
+
+        # Build initial messages
+        messages: List[Dict[str, Any]] = []
+        if self.config.system_prompt:
+            messages.append({"role": "system", "content": self.config.system_prompt})
+        messages.append({"role": "user", "content": self.format_prompt(item)})
+
+        # Run the agent loop
+        result: AgentResult
+        if self._use_managed_server():
+            # Phase 2: ManagedServer with parser -- exact tokens + logprobs
+            try:
+                async with self.server.managed_server(tokenizer=self.tokenizer) as managed:
+                    agent = HermesAgentLoop(
+                        server=managed,
+                        tool_schemas=tools,
+                        valid_tool_names=valid_names,
+                        max_turns=self.config.max_agent_turns,
+                        task_id=task_id,
+                        temperature=self.config.agent_temperature,
+                        max_tokens=self.config.max_token_length,
+                    )
+                    result = await agent.run(messages)
+            except NotImplementedError:
+                # DummyManagedServer not allowed -- fall back to Phase 1
+                logger.warning(
+                    "ManagedServer not available (OpenAI server?). "
+                    "Falling back to direct server mode."
+                )
+                agent = HermesAgentLoop(
+                    server=self.server,
+                    tool_schemas=tools,
+                    valid_tool_names=valid_names,
+                    max_turns=self.config.max_agent_turns,
+                    task_id=task_id,
+                    temperature=self.config.agent_temperature,
+                    max_tokens=self.config.max_token_length,
+                )
+                result = await agent.run(messages)
+        else:
+            # Phase 1: OpenAI server -- native tool_calls, placeholder tokens
+            agent = HermesAgentLoop(
+                server=self.server,
+                tool_schemas=tools,
+                valid_tool_names=valid_names,
+                max_turns=self.config.max_agent_turns,
+                task_id=task_id,
+                temperature=self.config.agent_temperature,
+                max_tokens=self.config.max_token_length,
+            )
+            result = await agent.run(messages)
+
+        # Compute reward using ToolContext (gives verifier full tool access)
+        ctx = ToolContext(task_id)
+        try:
+            reward = await self.compute_reward(item, result, ctx)
+        except Exception as e:
+            logger.error("compute_reward failed: %s", e)
+            reward = 0.0
+        finally:
+            ctx.cleanup()
+
+        # Build ScoredDataItem from ManagedServer state
+        # Phase 2: real tokens/masks/logprobs from SequenceNodes
+        # Phase 1: placeholder tokens (still need a valid ScoredDataItem for the pipeline)
+        nodes = (result.managed_state or {}).get("nodes", [])
+
+        if nodes:
+            # Phase 2 (or DummyManagedServer): use actual node data
+            node = nodes[-1]  # Final sequence node = full trajectory
+            scored_item: Dict[str, Any] = {
+                "tokens": node.tokens,
+                "masks": node.masked_tokens,
+                "scores": reward,
+            }
+
+            # Include logprobs if available (Phase 2)
+            if hasattr(node, "logprobs") and node.logprobs:
+                scored_item["advantages"] = None  # Computed by trainer
+                scored_item["ref_logprobs"] = None
+        else:
+            # Phase 1 with no managed state: create placeholder tokens
+            # so the data pipeline doesn't break. These are NOT suitable
+            # for training but allow process mode (SFT data gen) to work.
+            # Tokenize the full conversation to get approximate tokens.
+            full_text = "\n".join(
+                msg.get("content", "") for msg in result.messages if msg.get("content")
+            )
+            if self.tokenizer:
+                tokens = self.tokenizer.encode(full_text, add_special_tokens=True)
+            else:
+                tokens = list(range(min(len(full_text) // 4, 128)))
+
+            scored_item = {
+                "tokens": tokens,
+                "masks": [-100] + tokens[1:],  # Mask first token as prompt
+                "scores": reward,
+            }
+
+        # Always include messages for wandb rollout display and data logging
+        scored_item["messages"] = result.messages
+
+        return scored_item, []
+
+    # =========================================================================
+    # Abstract methods -- subclasses must implement
+    # =========================================================================
+
+    @abstractmethod
+    async def setup(self):
+        """
+        Load dataset, initialize state.
+
+        Called once when the environment starts. Typical implementation:
+            self.dataset = load_dataset(self.config.dataset_name, split=self.config.dataset_split)
+            self.iter = 0
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    async def get_next_item(self) -> Item:
+        """
+        Return the next item from the dataset for rollout.
+
+        Called by the base env's main loop to get items for workers.
+        Should cycle through the dataset.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def format_prompt(self, item: Item) -> str:
+        """
+        Convert a dataset item into the user message for the agent.
+
+        Args:
+            item: Dataset item (dict, tuple, etc.)
+
+        Returns:
+            The prompt string to send to the agent
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    async def compute_reward(
+        self, item: Item, result: AgentResult, ctx: ToolContext
+    ) -> float:
+        """
+        Score the rollout. Has full access to:
+        - item: the original dataset item (ground truth, test commands, etc.)
+        - result: AgentResult with full messages, turn count, reasoning, etc.
+        - ctx: ToolContext -- call ANY hermes-agent tool (terminal, file, web,
+               browser, vision...) scoped to this rollout's sandbox. Nothing
+               is off-limits.
+
+        Args:
+            item: The dataset item that was rolled out
+            result: The agent's rollout result
+            ctx: ToolContext with full tool access for verification
+
+        Returns:
+            Reward float (typically 0.0 to 1.0, but any float is valid)
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    async def evaluate(self, *args, **kwargs):
+        """
+        Periodic evaluation. Called every steps_per_eval steps.
+
+        Typical implementation runs the agent on a held-out eval set
+        and logs metrics via wandb/evaluate_log.
+        """
+        raise NotImplementedError
diff --git a/environments/hermes_swe_env.py b/environments/hermes_swe_env.py
new file mode 100644
index 0000000000..23b3e8f028
--- /dev/null
+++ b/environments/hermes_swe_env.py
@@ -0,0 +1,229 @@
+"""
+HermesSweEnv -- SWE-Bench Style Environment with Modal Sandboxes
+
+A concrete environment for software engineering tasks where the model writes code
+and the reward function runs tests to verify correctness. Uses Modal terminal
+backend for cloud-isolated sandboxes per rollout.
+
+The reward function uses ToolContext.terminal() to run test commands in the same
+Modal sandbox the model used during its agentic loop. All filesystem state from
+the model's tool calls is preserved for verification.
+
+Usage:
+    # Phase 1: OpenAI server type
+    vllm serve YourModel --tool-parser hermes
+    run-api
+    python environments/hermes_swe_env.py serve \\
+        --openai.base_url http://localhost:8000/v1 \\
+        --openai.model_name YourModel \\
+        --openai.server_type openai \\
+        --env.dataset_name bigcode/humanevalpack \\
+        --env.terminal_backend modal
+
+    # Phase 2: VLLM server type (full RL training)
+    python environments/hermes_swe_env.py serve \\
+        --openai.base_url http://localhost:8000/v1 \\
+        --openai.model_name YourModel \\
+        --openai.server_type vllm \\
+        --env.tool_call_parser hermes \\
+        --env.terminal_backend modal
+"""
+
+import logging
+import sys
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+# Ensure repo root is on sys.path for imports
+_repo_root = Path(__file__).resolve().parent.parent
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from datasets import load_dataset
+
+from atroposlib.envs.base import ScoredDataGroup
+from atroposlib.envs.server_handling.server_manager import APIServerConfig
+from atroposlib.type_definitions import Item
+
+from environments.agent_loop import AgentResult
+from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
+from environments.tool_context import ToolContext
+
+logger = logging.getLogger(__name__)
+
+
+class HermesSweEnvConfig(HermesAgentEnvConfig):
+    """Config with defaults for SWE-bench style tasks."""
+
+    pass  # Inherits all fields, overrides defaults in config_init
+
+
+class HermesSweEnv(HermesAgentBaseEnv):
+    """
+    SWE-bench style environment using Modal terminal backend.
+
+    The model gets a coding task, uses terminal + file + web tools to solve it,
+    and the reward function runs tests in the same Modal sandbox to verify.
+
+    Subclass this for specific SWE datasets (HumanEval, SWE-bench, etc.)
+    and customize format_prompt() and compute_reward() as needed.
+    """
+
+    name = "hermes-swe"
+    env_config_cls = HermesSweEnvConfig
+
+    @classmethod
+    def config_init(cls) -> Tuple[HermesSweEnvConfig, List[APIServerConfig]]:
+        """
+        Default configuration for the SWE environment.
+
+        Uses Modal terminal backend for cloud isolation and terminal + file + web toolsets.
+        """
+        env_config = HermesSweEnvConfig(
+            # Toolsets: terminal for running code, file for reading/writing, web for docs
+            enabled_toolsets=["terminal", "file", "web"],
+            disabled_toolsets=None,
+            distribution=None,
+            # Agent settings -- SWE tasks need more turns
+            max_agent_turns=30,
+            max_token_length=4096,
+            agent_temperature=1.0,
+            system_prompt=(
+                "You are a skilled software engineer. You have access to a terminal, "
+                "file tools, and web search. Use these tools to complete the coding task. "
+                "Write clean, working code and verify it runs correctly before finishing."
+            ),
+            # Modal backend for cloud-isolated sandboxes
+            terminal_backend="modal",
+            # Dataset -- override via CLI for your specific SWE dataset
+            dataset_name="bigcode/humanevalpack",
+            dataset_split="test",
+            prompt_field="prompt",
+            # Atropos settings
+            group_size=4,
+            tokenizer_name="NousResearch/DeepHermes-3-Llama-3-3B-Preview",
+            tool_call_parser="hermes",
+            steps_per_eval=50,
+            total_steps=500,
+            use_wandb=True,
+            wandb_name="hermes-swe",
+        )
+
+        server_configs = [
+            APIServerConfig(
+                base_url="http://localhost:8000/v1",
+                model_name="NousResearch/DeepHermes-3-Llama-3-3B-Preview",
+                server_type="openai",  # Phase 1; switch to "vllm" for Phase 2
+                api_key="",
+            )
+        ]
+
+        return env_config, server_configs
+
+    async def setup(self):
+        """Load the SWE dataset."""
+        if self.config.dataset_name:
+            self.dataset = load_dataset(
+                self.config.dataset_name, split=self.config.dataset_split
+            )
+        else:
+            # Placeholder if no dataset specified
+            self.dataset = []
+        self.iter = 0
+        self.reward_buffer: List[float] = []
+
+    async def get_next_item(self) -> Dict[str, Any]:
+        """Cycle through the SWE dataset."""
+        if not self.dataset:
+            raise ValueError("No dataset loaded. Set dataset_name in config.")
+        item = self.dataset[self.iter % len(self.dataset)]
+        self.iter += 1
+        return item
+
+    def format_prompt(self, item: Dict[str, Any]) -> str:
+        """
+        Format the SWE task prompt.
+
+        Override this in subclasses for different dataset formats.
+        Default assumes the dataset has a 'prompt' field and optionally a 'test' field.
+        """
+        prompt = item.get(self.config.prompt_field, "")
+
+        # If the dataset has test information, include it in the prompt
+        test_info = item.get("test", item.get("test_code", item.get("tests", "")))
+        if test_info:
+            prompt += f"\n\nTests to pass:\n{test_info}"
+
+        return prompt
+
+    async def compute_reward(
+        self, item: Dict[str, Any], result: AgentResult, ctx: ToolContext
+    ) -> float:
+        """
+        Score by running tests in the model's Modal sandbox.
+
+        Default implementation:
+        - If the dataset item has a 'test' or 'test_code' field, run it
+        - Check exit code: 0 = pass, non-zero = fail
+        - Partial credit for file creation
+
+        Override this in subclasses for more sophisticated reward logic.
+        """
+        # Find the test command from the dataset item
+        test_code = item.get("test", item.get("test_code", item.get("tests", "")))
+
+        if test_code:
+            # Run the test in the model's sandbox
+            test_result = ctx.terminal(
+                f'cd /workspace && python3 -c "{test_code}"', timeout=60
+            )
+
+            if test_result["exit_code"] == 0:
+                self.reward_buffer.append(1.0)
+                return 1.0
+
+        # Partial credit: check if the model created any Python files
+        file_check = ctx.terminal("find /workspace -name '*.py' -newer /tmp/.start_marker 2>/dev/null | head -5")
+        if file_check["exit_code"] == 0 and file_check.get("output", "").strip():
+            self.reward_buffer.append(0.1)
+            return 0.1
+
+        self.reward_buffer.append(0.0)
+        return 0.0
+
+    async def evaluate(self, *args, **kwargs):
+        """
+        Run evaluation on a held-out set.
+
+        Override for dataset-specific evaluation logic.
+        """
+        start_time = time.time()
+        end_time = time.time()
+
+        eval_metrics = {"eval/placeholder": 0.0}
+        await self.evaluate_log(
+            metrics=eval_metrics,
+            start_time=start_time,
+            end_time=end_time,
+        )
+
+    async def wandb_log(self, wandb_metrics: Optional[Dict] = None):
+        """Log SWE-specific metrics."""
+        if wandb_metrics is None:
+            wandb_metrics = {}
+
+        if self.reward_buffer:
+            wandb_metrics["train/avg_reward"] = sum(self.reward_buffer) / len(
+                self.reward_buffer
+            )
+            wandb_metrics["train/pass_rate"] = sum(
+                1 for r in self.reward_buffer if r == 1.0
+            ) / len(self.reward_buffer)
+            self.reward_buffer = []
+
+        await super().wandb_log(wandb_metrics)
+
+
+if __name__ == "__main__":
+    HermesSweEnv.cli()
diff --git a/environments/terminal_test_env.py b/environments/terminal_test_env.py
new file mode 100644
index 0000000000..e30d56695e
--- /dev/null
+++ b/environments/terminal_test_env.py
@@ -0,0 +1,292 @@
+"""
+TerminalTestEnv -- Simple Test Environment for Validating the Stack
+
+A self-contained environment with inline tasks (no external dataset needed).
+Each task asks the model to create a file at a known path with specific content.
+The reward verifier cats the file and checks if the content matches.
+
+Enables only terminal + file toolsets. Uses Modal terminal backend with
+OpenRouter (Claude) by default.
+
+Training tasks (3):
+    1. Create ~/greeting.txt with "Hello from Hermes Agent"
+    2. Create ~/count.txt with numbers 1-5, one per line
+    3. Create ~/answer.txt with the result of 123 + 456
+
+Eval task (1):
+    1. Create ~/result.txt with the result of 6 * 7
+
+Usage:
+    # Start Atropos API server
+    run-api
+
+    # Run environment (uses OpenRouter + Modal by default)
+    python environments/terminal_test_env.py serve
+
+    # Process mode (no run-api needed, saves to JSONL)
+    python environments/terminal_test_env.py process \\
+        --env.data_path_to_save_groups terminal_test_output.jsonl
+"""
+
+import logging
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+# Ensure repo root is on sys.path for imports
+_repo_root = Path(__file__).resolve().parent.parent
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from atroposlib.envs.base import ScoredDataGroup
+from atroposlib.envs.server_handling.server_manager import APIServerConfig
+from atroposlib.type_definitions import Item
+
+from environments.agent_loop import AgentResult
+from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
+from environments.tool_context import ToolContext
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Inline task definitions -- no external dataset needed
+# =============================================================================
+
+TRAIN_TASKS = [
+    {
+        "prompt": "Create a file at ~/greeting.txt containing exactly the text: Hello from Hermes Agent",
+        "verify_path": "~/greeting.txt",
+        "expected_content": "Hello from Hermes Agent",
+    },
+    {
+        "prompt": "Create a file at ~/count.txt containing the numbers 1 through 5, one per line",
+        "verify_path": "~/count.txt",
+        "expected_content": "1\n2\n3\n4\n5",
+    },
+    {
+        "prompt": "Create a file at ~/answer.txt containing the result of 123 + 456",
+        "verify_path": "~/answer.txt",
+        "expected_content": "579",
+    },
+]
+
+EVAL_TASKS = [
+    {
+        "prompt": "Create a file at ~/result.txt containing the result of 6 * 7",
+        "verify_path": "~/result.txt",
+        "expected_content": "42",
+    },
+]
+
+
+class TerminalTestEnvConfig(HermesAgentEnvConfig):
+    """Config with defaults suitable for terminal testing."""
+
+    pass  # Inherits all fields, overrides defaults in config_init
+
+
+class TerminalTestEnv(HermesAgentBaseEnv):
+    """
+    Simple test environment with inline file-creation tasks.
+
+    All tasks follow the same pattern: "create a file at ~/X.txt with content Y".
+    The verifier runs `cat ~/X.txt` in the rollout's terminal and checks the output
+    against the expected string. Same verifier logic for all tasks.
+
+    This environment is designed to validate the full stack end-to-end:
+    - Agent loop executes tool calls (terminal/file)
+    - ToolContext provides terminal access to the reward function
+    - Reward function verifies file content via cat
+    - Scored data flows through the Atropos pipeline
+    """
+
+    name = "terminal-test"
+    env_config_cls = TerminalTestEnvConfig
+
+    @classmethod
+    def config_init(cls) -> Tuple[TerminalTestEnvConfig, List[APIServerConfig]]:
+        """
+        Default configuration for the terminal test environment.
+
+        Uses Modal terminal backend for cloud isolation and OpenRouter with
+        Claude for inference. API keys loaded from ~/hermes-agent/.env.
+        """
+        env_config = TerminalTestEnvConfig(
+            # Terminal + file tools only
+            enabled_toolsets=["terminal", "file"],
+            disabled_toolsets=None,
+            distribution=None,
+            # Agent settings
+            max_agent_turns=10,  # Simple tasks, don't need many turns
+            max_token_length=2048,
+            agent_temperature=1.0,
+            system_prompt=(
+                "You are a helpful assistant with access to a terminal and file tools. "
+                "Complete the user's request by using the available tools. "
+                "Be precise and follow instructions exactly."
+            ),
+            # Modal terminal backend for cloud-isolated sandboxes per rollout
+            terminal_backend="modal",
+            # Atropos settings
+            group_size=3,              # 3 rollouts per group
+            tokenizer_name="NousResearch/DeepHermes-3-Llama-3-3B-Preview",
+            tool_call_parser="hermes",
+            steps_per_eval=3,          # Eval after all 3 steps
+            total_steps=3,             # 3 groups total (1 group per step)
+            use_wandb=True,
+            wandb_name="terminal-test",
+            ensure_scores_are_not_same=False,  # Allow all-same scores for simple tasks
+            # No external dataset
+            dataset_name=None,
+        )
+
+        # OpenRouter with Claude -- API key loaded from .env (OPENROUTER_API_KEY)
+        server_configs = [
+            APIServerConfig(
+                base_url="https://openrouter.ai/api/v1",
+                model_name="anthropic/claude-opus-4.6",
+                server_type="openai",
+                api_key=os.getenv("OPENROUTER_API_KEY", ""),
+                health_check=False,  # OpenRouter doesn't have a /health endpoint
+            )
+        ]
+
+        return env_config, server_configs
+
+    async def setup(self):
+        """Initialize inline task lists."""
+        self.train_tasks = list(TRAIN_TASKS)
+        self.eval_tasks = list(EVAL_TASKS)
+        self.iter = 0
+        # Track reward stats for wandb logging
+        self.reward_buffer: List[float] = []
+
+    async def get_next_item(self) -> Dict[str, str]:
+        """Cycle through training tasks."""
+        item = self.train_tasks[self.iter % len(self.train_tasks)]
+        self.iter += 1
+        return item
+
+    def format_prompt(self, item: Dict[str, str]) -> str:
+        """The prompt is directly in the task item."""
+        return item["prompt"]
+
+    async def compute_reward(
+        self, item: Dict[str, str], result: AgentResult, ctx: ToolContext
+    ) -> float:
+        """
+        Verify by cat-ing the expected file path and checking content matches.
+        Same verifier for all tasks -- they all write a file at a known path.
+
+        Scoring:
+            1.0 = exact match
+            0.5 = expected content is present but has extra stuff
+            0.0 = file doesn't exist or content doesn't match
+        """
+        verify_result = ctx.terminal(f"cat {item['verify_path']}")
+
+        # File doesn't exist or can't be read
+        if verify_result["exit_code"] != 0:
+            self.reward_buffer.append(0.0)
+            return 0.0
+
+        actual = verify_result.get("output", "").strip()
+        expected = item["expected_content"].strip()
+
+        # Exact match
+        if actual == expected:
+            self.reward_buffer.append(1.0)
+            return 1.0
+
+        # Partial credit: expected content is present but has extra stuff
+        if expected in actual:
+            self.reward_buffer.append(0.5)
+            return 0.5
+
+        self.reward_buffer.append(0.0)
+        return 0.0
+
+    async def evaluate(self, *args, **kwargs):
+        """
+        Run eval tasks using the agent loop and verify results.
+        Logs accuracy metrics.
+        """
+        start_time = time.time()
+        correct = 0
+        total = len(self.eval_tasks)
+        samples = []
+
+        for eval_item in self.eval_tasks:
+            try:
+                # For eval, we do a simple single-turn completion (not full agent loop)
+                # to keep eval fast. The agent loop is tested via training.
+                completion = await self.server.chat_completion(
+                    messages=[
+                        {"role": "system", "content": self.config.system_prompt or ""},
+                        {"role": "user", "content": eval_item["prompt"]},
+                    ],
+                    n=1,
+                    max_tokens=self.config.max_token_length,
+                    temperature=0.0,
+                    split="eval",
+                )
+
+                response_content = (
+                    completion.choices[0].message.content if completion.choices else ""
+                )
+
+                samples.append(
+                    {
+                        "prompt": eval_item["prompt"],
+                        "response": response_content,
+                        "expected": eval_item["expected_content"],
+                    }
+                )
+
+            except Exception as e:
+                logger.error("Eval failed for item: %s", e)
+                samples.append(
+                    {
+                        "prompt": eval_item["prompt"],
+                        "response": f"ERROR: {e}",
+                        "expected": eval_item["expected_content"],
+                    }
+                )
+
+        end_time = time.time()
+
+        eval_metrics = {
+            "eval/num_samples": total,
+        }
+
+        await self.evaluate_log(
+            metrics=eval_metrics,
+            samples=samples,
+            start_time=start_time,
+            end_time=end_time,
+        )
+
+    async def wandb_log(self, wandb_metrics: Optional[Dict] = None):
+        """Log training metrics including reward stats and accuracy."""
+        if wandb_metrics is None:
+            wandb_metrics = {}
+
+        if self.reward_buffer:
+            total = len(self.reward_buffer)
+            correct = sum(1 for r in self.reward_buffer if r == 1.0)
+            partial = sum(1 for r in self.reward_buffer if r == 0.5)
+
+            wandb_metrics["train/avg_reward"] = sum(self.reward_buffer) / total
+            wandb_metrics["train/accuracy"] = correct / total
+            wandb_metrics["train/partial_match_rate"] = partial / total
+            wandb_metrics["train/total_rollouts"] = total
+            self.reward_buffer = []
+
+        await super().wandb_log(wandb_metrics)
+
+
+if __name__ == "__main__":
+    TerminalTestEnv.cli()
diff --git a/environments/tool_call_parsers/__init__.py b/environments/tool_call_parsers/__init__.py
new file mode 100644
index 0000000000..8bff3f9d1f
--- /dev/null
+++ b/environments/tool_call_parsers/__init__.py
@@ -0,0 +1,120 @@
+"""
+Tool Call Parser Registry
+
+Client-side parsers that extract structured tool_calls from raw model output text.
+Used in Phase 2 (VLLM server type) where ManagedServer's /generate endpoint returns
+raw text without tool call parsing.
+
+Each parser is a standalone reimplementation of the corresponding VLLM parser's
+non-streaming extract_tool_calls() logic. No VLLM dependency -- only standard library
+(re, json, uuid) and openai types.
+
+Usage:
+    from environments.tool_call_parsers import get_parser
+
+    parser = get_parser("hermes")
+    content, tool_calls = parser.parse(raw_model_output)
+    # content = text with tool call markup stripped
+    # tool_calls = list of ChatCompletionMessageToolCall objects, or None
+"""
+
+import logging
+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional, Tuple, Type
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+)
+
+logger = logging.getLogger(__name__)
+
+# Type alias for parser return value
+ParseResult = Tuple[Optional[str], Optional[List[ChatCompletionMessageToolCall]]]
+
+
+class ToolCallParser(ABC):
+    """
+    Base class for tool call parsers.
+
+    Each parser knows how to extract structured tool_calls from a specific
+    model family's raw output text format.
+    """
+
+    @abstractmethod
+    def parse(self, text: str) -> ParseResult:
+        """
+        Parse raw model output text for tool calls.
+
+        Args:
+            text: Raw decoded text from the model's completion
+
+        Returns:
+            Tuple of (content, tool_calls) where:
+            - content: text with tool call markup stripped (the message 'content' field),
+                       or None if the entire output was tool calls
+            - tool_calls: list of ChatCompletionMessageToolCall objects,
+                          or None if no tool calls were found
+        """
+        raise NotImplementedError
+
+
+# Global parser registry: name -> parser class
+PARSER_REGISTRY: Dict[str, Type[ToolCallParser]] = {}
+
+
+def register_parser(name: str):
+    """
+    Decorator to register a parser class under a given name.
+
+    Usage:
+        @register_parser("hermes")
+        class HermesToolCallParser(ToolCallParser):
+            ...
+    """
+
+    def decorator(cls: Type[ToolCallParser]) -> Type[ToolCallParser]:
+        PARSER_REGISTRY[name] = cls
+        return cls
+
+    return decorator
+
+
+def get_parser(name: str) -> ToolCallParser:
+    """
+    Get a parser instance by name.
+
+    Args:
+        name: Parser name (e.g., "hermes", "mistral", "llama3_json")
+
+    Returns:
+        Instantiated parser
+
+    Raises:
+        KeyError: If parser name is not found in registry
+    """
+    if name not in PARSER_REGISTRY:
+        available = sorted(PARSER_REGISTRY.keys())
+        raise KeyError(
+            f"Tool call parser '{name}' not found. Available parsers: {available}"
+        )
+    return PARSER_REGISTRY[name]()
+
+
+def list_parsers() -> List[str]:
+    """Return sorted list of registered parser names."""
+    return sorted(PARSER_REGISTRY.keys())
+
+
+# Import all parser modules to trigger registration via @register_parser decorators
+# Each module registers itself when imported
+from environments.tool_call_parsers.hermes_parser import HermesToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.longcat_parser import LongcatToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.mistral_parser import MistralToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.llama_parser import LlamaToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.qwen_parser import QwenToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.deepseek_v3_parser import DeepSeekV3ToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.deepseek_v3_1_parser import DeepSeekV31ToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.kimi_k2_parser import KimiK2ToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.glm45_parser import Glm45ToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.glm47_parser import Glm47ToolCallParser  # noqa: E402, F401
+from environments.tool_call_parsers.qwen3_coder_parser import Qwen3CoderToolCallParser  # noqa: E402, F401
diff --git a/environments/tool_call_parsers/deepseek_v3_1_parser.py b/environments/tool_call_parsers/deepseek_v3_1_parser.py
new file mode 100644
index 0000000000..f0124c3893
--- /dev/null
+++ b/environments/tool_call_parsers/deepseek_v3_1_parser.py
@@ -0,0 +1,71 @@
+"""
+DeepSeek V3.1 tool call parser.
+
+Similar to V3 but with a slightly different format:
+    <｜tool▁call▁begin｜>function_name<｜tool▁sep｜>arguments<｜tool▁call▁end｜>
+
+Note: V3 has type+name before the separator, V3.1 has name before and args after.
+
+Based on VLLM's DeepSeekV31ToolParser.extract_tool_calls()
+"""
+
+import re
+import uuid
+from typing import List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+@register_parser("deepseek_v3_1")
+@register_parser("deepseek_v31")
+class DeepSeekV31ToolCallParser(ToolCallParser):
+    """
+    Parser for DeepSeek V3.1 tool calls.
+
+    Slightly different regex than V3: function_name comes before the separator,
+    arguments come after (no type field, no json code block wrapper).
+    """
+
+    START_TOKEN = "<｜tool▁calls▁begin｜>"
+
+    # Regex captures: function_name, function_arguments
+    PATTERN = re.compile(
+        r"<｜tool▁call▁begin｜>(?P<function_name>.*?)<｜tool▁sep｜>(?P<function_arguments>.*?)<｜tool▁call▁end｜>"
+    )
+
+    def parse(self, text: str) -> ParseResult:
+        if self.START_TOKEN not in text:
+            return text, None
+
+        try:
+            matches = self.PATTERN.findall(text)
+            if not matches:
+                return text, None
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            for match in matches:
+                func_name, func_args = match
+                tool_calls.append(
+                    ChatCompletionMessageToolCall(
+                        id=f"call_{uuid.uuid4().hex[:8]}",
+                        type="function",
+                        function=Function(
+                            name=func_name.strip(),
+                            arguments=func_args.strip(),
+                        ),
+                    )
+                )
+
+            if not tool_calls:
+                return text, None
+
+            content = text[: text.find(self.START_TOKEN)].strip()
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/deepseek_v3_parser.py b/environments/tool_call_parsers/deepseek_v3_parser.py
new file mode 100644
index 0000000000..5356b1a67d
--- /dev/null
+++ b/environments/tool_call_parsers/deepseek_v3_parser.py
@@ -0,0 +1,75 @@
+"""
+DeepSeek V3 tool call parser.
+
+Format uses special unicode tokens:
+    <｜tool▁calls▁begin｜>
+    <｜tool▁call▁begin｜>type<｜tool▁sep｜>function_name
+    ```json
+    {"arg": "value"}
+    ```
+    <｜tool▁call▁end｜>
+    <｜tool▁calls▁end｜>
+
+Based on VLLM's DeepSeekV3ToolParser.extract_tool_calls()
+"""
+
+import re
+import uuid
+from typing import List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+@register_parser("deepseek_v3")
+class DeepSeekV3ToolCallParser(ToolCallParser):
+    """
+    Parser for DeepSeek V3 tool calls.
+
+    Uses special unicode tokens with fullwidth angle brackets and block elements.
+    Extracts type, function name, and JSON arguments from the structured format.
+    """
+
+    START_TOKEN = "<｜tool▁calls▁begin｜>"
+
+    # Regex captures: type, function_name, function_arguments
+    PATTERN = re.compile(
+        r"<｜tool▁call▁begin｜>(?P<type>.*)<｜tool▁sep｜>(?P<function_name>.*)\n```json\n(?P<function_arguments>.*)\n```<｜tool▁call▁end｜>"
+    )
+
+    def parse(self, text: str) -> ParseResult:
+        if self.START_TOKEN not in text:
+            return text, None
+
+        try:
+            matches = self.PATTERN.findall(text)
+            if not matches:
+                return text, None
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            for match in matches:
+                tc_type, func_name, func_args = match
+                tool_calls.append(
+                    ChatCompletionMessageToolCall(
+                        id=f"call_{uuid.uuid4().hex[:8]}",
+                        type="function",
+                        function=Function(
+                            name=func_name.strip(),
+                            arguments=func_args.strip(),
+                        ),
+                    )
+                )
+
+            if not tool_calls:
+                return text, None
+
+            # Content is everything before the tool calls section
+            content = text[: text.find(self.START_TOKEN)].strip()
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/glm45_parser.py b/environments/tool_call_parsers/glm45_parser.py
new file mode 100644
index 0000000000..e92e29881f
--- /dev/null
+++ b/environments/tool_call_parsers/glm45_parser.py
@@ -0,0 +1,109 @@
+"""
+GLM 4.5 (GLM-4-MoE) tool call parser.
+
+Format uses custom arg_key/arg_value tags rather than standard JSON:
+    <tool_call>function_name
+    <arg_key>param1</arg_key><arg_value>value1</arg_value>
+    <arg_key>param2</arg_key><arg_value>value2</arg_value>
+    </tool_call>
+
+Values are deserialized using json.loads -> ast.literal_eval -> raw string fallback.
+
+Based on VLLM's Glm4MoeModelToolParser.extract_tool_calls()
+"""
+
+import ast
+import json
+import re
+import uuid
+from typing import Any, Dict, List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+def _deserialize_value(value: str) -> Any:
+    """
+    Try to deserialize a string value to its native Python type.
+    Attempts json.loads, then ast.literal_eval, then returns raw string.
+    """
+    try:
+        return json.loads(value)
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    try:
+        return ast.literal_eval(value)
+    except (ValueError, SyntaxError, TypeError):
+        pass
+
+    return value
+
+
+@register_parser("glm45")
+class Glm45ToolCallParser(ToolCallParser):
+    """
+    Parser for GLM 4.5 (GLM-4-MoE) tool calls.
+
+    Uses <tool_call>...</tool_call> tags with <arg_key>/<arg_value> pairs
+    instead of standard JSON arguments.
+    """
+
+    FUNC_CALL_REGEX = re.compile(r"<tool_call>.*?</tool_call>", re.DOTALL)
+    FUNC_DETAIL_REGEX = re.compile(r"<tool_call>([^\n]*)\n(.*)</tool_call>", re.DOTALL)
+    FUNC_ARG_REGEX = re.compile(
+        r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>", re.DOTALL
+    )
+
+    START_TOKEN = "<tool_call>"
+
+    def parse(self, text: str) -> ParseResult:
+        if self.START_TOKEN not in text:
+            return text, None
+
+        try:
+            matched_calls = self.FUNC_CALL_REGEX.findall(text)
+            if not matched_calls:
+                return text, None
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+
+            for match in matched_calls:
+                detail = self.FUNC_DETAIL_REGEX.search(match)
+                if not detail:
+                    continue
+
+                func_name = detail.group(1).strip()
+                func_args_raw = detail.group(2)
+
+                # Parse arg_key/arg_value pairs
+                pairs = self.FUNC_ARG_REGEX.findall(func_args_raw) if func_args_raw else []
+                arg_dict: Dict[str, Any] = {}
+                for key, value in pairs:
+                    arg_key = key.strip()
+                    arg_val = _deserialize_value(value.strip())
+                    arg_dict[arg_key] = arg_val
+
+                tool_calls.append(
+                    ChatCompletionMessageToolCall(
+                        id=f"call_{uuid.uuid4().hex[:8]}",
+                        type="function",
+                        function=Function(
+                            name=func_name,
+                            arguments=json.dumps(arg_dict, ensure_ascii=False),
+                        ),
+                    )
+                )
+
+            if not tool_calls:
+                return text, None
+
+            content = text[: text.find(self.START_TOKEN)].strip()
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/glm47_parser.py b/environments/tool_call_parsers/glm47_parser.py
new file mode 100644
index 0000000000..6631cf842c
--- /dev/null
+++ b/environments/tool_call_parsers/glm47_parser.py
@@ -0,0 +1,35 @@
+"""
+GLM 4.7 tool call parser.
+
+Same as GLM 4.5 but with slightly different regex patterns.
+The tool_call tags may wrap differently and arg parsing handles
+newlines between key/value pairs.
+
+Based on VLLM's Glm47MoeModelToolParser (extends Glm4MoeModelToolParser).
+"""
+
+import re
+
+from environments.tool_call_parsers import ParseResult, register_parser
+from environments.tool_call_parsers.glm45_parser import Glm45ToolCallParser
+
+
+@register_parser("glm47")
+class Glm47ToolCallParser(Glm45ToolCallParser):
+    """
+    Parser for GLM 4.7 tool calls.
+    Extends GLM 4.5 with updated regex patterns.
+    """
+
+    def __init__(self):
+        super().__init__()
+        # GLM 4.7 uses a slightly different detail regex that includes
+        # the <tool_call> wrapper and optional arg_key content
+        self.FUNC_DETAIL_REGEX = re.compile(
+            r"<tool_call>(.*?)(<arg_key>.*?)?</tool_call>", re.DOTALL
+        )
+        # GLM 4.7 handles newlines between arg_key and arg_value tags
+        self.FUNC_ARG_REGEX = re.compile(
+            r"<arg_key>(.*?)</arg_key>(?:\\n|\s)*<arg_value>(.*?)</arg_value>",
+            re.DOTALL,
+        )
diff --git a/environments/tool_call_parsers/hermes_parser.py b/environments/tool_call_parsers/hermes_parser.py
new file mode 100644
index 0000000000..c1902fd623
--- /dev/null
+++ b/environments/tool_call_parsers/hermes_parser.py
@@ -0,0 +1,73 @@
+"""
+Hermes tool call parser.
+
+Format: <tool_call>{"name": "func", "arguments": {...}}</tool_call>
+Based on VLLM's Hermes2ProToolParser.extract_tool_calls()
+"""
+
+import json
+import re
+import uuid
+from typing import List, Optional, Tuple
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+@register_parser("hermes")
+class HermesToolCallParser(ToolCallParser):
+    """
+    Parser for Hermes-format tool calls.
+
+    Matches <tool_call>...</tool_call> tags containing JSON with "name" and "arguments".
+    Also handles unclosed <tool_call> at end-of-string (truncated generation).
+    """
+
+    # Matches both closed and unclosed tool_call tags
+    PATTERN = re.compile(
+        r"<tool_call>\s*(.*?)\s*</tool_call>|<tool_call>\s*(.*)", re.DOTALL
+    )
+
+    def parse(self, text: str) -> ParseResult:
+        if "<tool_call>" not in text:
+            return text, None
+
+        try:
+            matches = self.PATTERN.findall(text)
+            if not matches:
+                return text, None
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            for match in matches:
+                # match is a tuple: (closed_content, unclosed_content)
+                raw_json = match[0] if match[0] else match[1]
+                if not raw_json.strip():
+                    continue
+
+                tc_data = json.loads(raw_json)
+                tool_calls.append(
+                    ChatCompletionMessageToolCall(
+                        id=f"call_{uuid.uuid4().hex[:8]}",
+                        type="function",
+                        function=Function(
+                            name=tc_data["name"],
+                            arguments=json.dumps(
+                                tc_data.get("arguments", {}), ensure_ascii=False
+                            ),
+                        ),
+                    )
+                )
+
+            if not tool_calls:
+                return text, None
+
+            # Content is everything before the first <tool_call> tag
+            content = text[: text.find("<tool_call>")].strip()
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/kimi_k2_parser.py b/environments/tool_call_parsers/kimi_k2_parser.py
new file mode 100644
index 0000000000..29f40fc243
--- /dev/null
+++ b/environments/tool_call_parsers/kimi_k2_parser.py
@@ -0,0 +1,93 @@
+"""
+Kimi K2 tool call parser.
+
+Format:
+    <|tool_calls_section_begin|>
+    <|tool_call_begin|>function_id:0<|tool_call_argument_begin|>{"arg": "val"}<|tool_call_end|>
+    <|tool_calls_section_end|>
+
+The function_id format is typically "functions.func_name:index" or "func_name:index".
+
+Based on VLLM's KimiK2ToolParser.extract_tool_calls()
+"""
+
+import re
+import uuid
+from typing import List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+@register_parser("kimi_k2")
+class KimiK2ToolCallParser(ToolCallParser):
+    """
+    Parser for Kimi K2 tool calls.
+
+    Uses section begin/end tokens wrapping individual tool call begin/end tokens.
+    The tool_call_id contains the function name (after last dot, before colon).
+    """
+
+    # Support both singular and plural variants
+    START_TOKENS = [
+        "<|tool_calls_section_begin|>",
+        "<|tool_call_section_begin|>",
+    ]
+
+    # Regex captures: tool_call_id (e.g., "functions.get_weather:0"), function_arguments
+    PATTERN = re.compile(
+        r"<\|tool_call_begin\|>\s*(?P<tool_call_id>[^<]+:\d+)\s*"
+        r"<\|tool_call_argument_begin\|>\s*"
+        r"(?P<function_arguments>(?:(?!<\|tool_call_begin\|>).)*?)\s*"
+        r"<\|tool_call_end\|>",
+        re.DOTALL,
+    )
+
+    def parse(self, text: str) -> ParseResult:
+        # Check for any variant of the start token
+        has_start = any(token in text for token in self.START_TOKENS)
+        if not has_start:
+            return text, None
+
+        try:
+            matches = self.PATTERN.findall(text)
+            if not matches:
+                return text, None
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            for match in matches:
+                function_id, function_args = match
+
+                # Extract function name from ID format: "functions.get_weather:0" -> "get_weather"
+                function_name = function_id.split(":")[0].split(".")[-1]
+
+                tool_calls.append(
+                    ChatCompletionMessageToolCall(
+                        id=function_id,  # Preserve the original ID format
+                        type="function",
+                        function=Function(
+                            name=function_name,
+                            arguments=function_args.strip(),
+                        ),
+                    )
+                )
+
+            if not tool_calls:
+                return text, None
+
+            # Content is everything before the tool calls section
+            earliest_start = len(text)
+            for token in self.START_TOKENS:
+                idx = text.find(token)
+                if idx >= 0 and idx < earliest_start:
+                    earliest_start = idx
+
+            content = text[:earliest_start].strip()
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/llama_parser.py b/environments/tool_call_parsers/llama_parser.py
new file mode 100644
index 0000000000..8eb2136a11
--- /dev/null
+++ b/environments/tool_call_parsers/llama_parser.py
@@ -0,0 +1,96 @@
+"""
+Llama 3.x / 4 tool call parser.
+
+Format: The model outputs JSON objects with "name" and "arguments" (or "parameters") keys.
+May be preceded by <|python_tag|> token. Supports multiple JSON objects separated
+by content or semicolons.
+
+Based on VLLM's Llama3JsonToolParser.extract_tool_calls()
+"""
+
+import json
+import re
+import uuid
+from typing import List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+@register_parser("llama3_json")
+@register_parser("llama4_json")
+class LlamaToolCallParser(ToolCallParser):
+    """
+    Parser for Llama 3.x and 4 JSON-format tool calls.
+
+    Finds JSON objects containing "name" + ("arguments" or "parameters") keys.
+    Uses Python's json.JSONDecoder.raw_decode for robust extraction of
+    JSON objects from mixed text.
+    """
+
+    BOT_TOKEN = "<|python_tag|>"
+
+    # Regex to find the start of potential JSON objects
+    JSON_START = re.compile(r"\{")
+
+    def parse(self, text: str) -> ParseResult:
+        # Quick check: need either the bot token or a JSON brace
+        if self.BOT_TOKEN not in text and "{" not in text:
+            return text, None
+
+        try:
+            decoder = json.JSONDecoder()
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            end_index = -1  # Track where the last parsed JSON ended
+
+            for match in self.JSON_START.finditer(text):
+                start = match.start()
+                # Skip if this brace is inside a previously parsed JSON object
+                if start <= end_index:
+                    continue
+
+                try:
+                    obj, json_end = decoder.raw_decode(text[start:])
+                    end_index = start + json_end
+
+                    # Must have "name" and either "arguments" or "parameters"
+                    name = obj.get("name")
+                    args = obj.get("arguments", obj.get("parameters"))
+
+                    if not name or args is None:
+                        continue
+
+                    # Normalize arguments to JSON string
+                    if isinstance(args, dict):
+                        args = json.dumps(args, ensure_ascii=False)
+                    elif not isinstance(args, str):
+                        args = json.dumps(args, ensure_ascii=False)
+
+                    tool_calls.append(
+                        ChatCompletionMessageToolCall(
+                            id=f"call_{uuid.uuid4().hex[:8]}",
+                            type="function",
+                            function=Function(name=name, arguments=args),
+                        )
+                    )
+                except (json.JSONDecodeError, KeyError, ValueError):
+                    continue
+
+            if not tool_calls:
+                return text, None
+
+            # Content is everything before the first tool call JSON
+            # Find where the first tool call starts in the text
+            first_tc_start = text.find("{")
+            if self.BOT_TOKEN in text:
+                first_tc_start = text.find(self.BOT_TOKEN)
+            content = text[:first_tc_start].strip() if first_tc_start > 0 else None
+
+            return content, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/longcat_parser.py b/environments/tool_call_parsers/longcat_parser.py
new file mode 100644
index 0000000000..afecdb8629
--- /dev/null
+++ b/environments/tool_call_parsers/longcat_parser.py
@@ -0,0 +1,69 @@
+"""
+Longcat Flash Chat tool call parser.
+
+Same as Hermes but uses <longcat_tool_call> tags instead of <tool_call>.
+Based on VLLM's LongcatFlashToolParser (extends Hermes2ProToolParser).
+"""
+
+import json
+import re
+import uuid
+from typing import List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+@register_parser("longcat")
+class LongcatToolCallParser(ToolCallParser):
+    """
+    Parser for Longcat Flash Chat tool calls.
+    Identical logic to Hermes, just different tag names.
+    """
+
+    PATTERN = re.compile(
+        r"<longcat_tool_call>\s*(.*?)\s*</longcat_tool_call>|<longcat_tool_call>\s*(.*)",
+        re.DOTALL,
+    )
+
+    def parse(self, text: str) -> ParseResult:
+        if "<longcat_tool_call>" not in text:
+            return text, None
+
+        try:
+            matches = self.PATTERN.findall(text)
+            if not matches:
+                return text, None
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            for match in matches:
+                raw_json = match[0] if match[0] else match[1]
+                if not raw_json.strip():
+                    continue
+
+                tc_data = json.loads(raw_json)
+                tool_calls.append(
+                    ChatCompletionMessageToolCall(
+                        id=f"call_{uuid.uuid4().hex[:8]}",
+                        type="function",
+                        function=Function(
+                            name=tc_data["name"],
+                            arguments=json.dumps(
+                                tc_data.get("arguments", {}), ensure_ascii=False
+                            ),
+                        ),
+                    )
+                )
+
+            if not tool_calls:
+                return text, None
+
+            content = text[: text.find("<longcat_tool_call>")].strip()
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/mistral_parser.py b/environments/tool_call_parsers/mistral_parser.py
new file mode 100644
index 0000000000..5526bdd010
--- /dev/null
+++ b/environments/tool_call_parsers/mistral_parser.py
@@ -0,0 +1,130 @@
+"""
+Mistral tool call parser.
+
+Supports two formats depending on tokenizer version:
+- Pre-v11: content[TOOL_CALLS] [{"name": ..., "arguments": {...}}, ...]
+- v11+:    content[TOOL_CALLS]tool_name1{"arg": "val"}[TOOL_CALLS]tool_name2{"arg": "val"}
+
+Based on VLLM's MistralToolParser.extract_tool_calls()
+The [TOOL_CALLS] token is the bot_token used by Mistral models.
+"""
+
+import json
+import re
+import uuid
+from typing import List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+def _generate_mistral_id() -> str:
+    """Mistral tool call IDs are 9-char alphanumeric strings."""
+    import random
+    import string
+
+    return "".join(random.choices(string.ascii_letters + string.digits, k=9))
+
+
+@register_parser("mistral")
+class MistralToolCallParser(ToolCallParser):
+    """
+    Parser for Mistral-format tool calls.
+
+    Detects format by checking if the content after [TOOL_CALLS] starts with '['
+    (pre-v11 JSON array) or with a tool name (v11+ format).
+    """
+
+    # The [TOOL_CALLS] token -- may appear as different strings depending on tokenizer
+    BOT_TOKEN = "[TOOL_CALLS]"
+
+    # Fallback regex for pre-v11 format when JSON parsing fails
+    TOOL_CALL_REGEX = re.compile(r"\[?\s*(\{.*?\})\s*\]?", re.DOTALL)
+
+    def parse(self, text: str) -> ParseResult:
+        if self.BOT_TOKEN not in text:
+            return text, None
+
+        try:
+            parts = text.split(self.BOT_TOKEN)
+            content = parts[0].strip()
+            raw_tool_calls = parts[1:]
+
+            # Detect format: if the first raw part starts with '[', it's pre-v11
+            first_raw = raw_tool_calls[0].strip() if raw_tool_calls else ""
+            is_pre_v11 = first_raw.startswith("[") or first_raw.startswith("{")
+
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+
+            if not is_pre_v11:
+                # v11+ format: [TOOL_CALLS]tool_name{args}[TOOL_CALLS]tool_name2{args2}
+                for raw in raw_tool_calls:
+                    raw = raw.strip()
+                    if not raw or "{" not in raw:
+                        continue
+
+                    brace_idx = raw.find("{")
+                    tool_name = raw[:brace_idx].strip()
+                    args_str = raw[brace_idx:]
+
+                    tool_calls.append(
+                        ChatCompletionMessageToolCall(
+                            id=_generate_mistral_id(),
+                            type="function",
+                            function=Function(name=tool_name, arguments=args_str),
+                        )
+                    )
+            else:
+                # Pre-v11 format: [TOOL_CALLS] [{"name": ..., "arguments": {...}}]
+                try:
+                    parsed = json.loads(first_raw)
+                    if isinstance(parsed, dict):
+                        parsed = [parsed]
+
+                    for tc in parsed:
+                        args = tc.get("arguments", {})
+                        if isinstance(args, dict):
+                            args = json.dumps(args, ensure_ascii=False)
+
+                        tool_calls.append(
+                            ChatCompletionMessageToolCall(
+                                id=_generate_mistral_id(),
+                                type="function",
+                                function=Function(
+                                    name=tc["name"], arguments=args
+                                ),
+                            )
+                        )
+                except json.JSONDecodeError:
+                    # Fallback regex extraction
+                    match = self.TOOL_CALL_REGEX.findall(first_raw)
+                    if match:
+                        for raw_json in match:
+                            try:
+                                tc = json.loads(raw_json)
+                                args = tc.get("arguments", {})
+                                if isinstance(args, dict):
+                                    args = json.dumps(args, ensure_ascii=False)
+                                tool_calls.append(
+                                    ChatCompletionMessageToolCall(
+                                        id=_generate_mistral_id(),
+                                        type="function",
+                                        function=Function(
+                                            name=tc["name"], arguments=args
+                                        ),
+                                    )
+                                )
+                            except (json.JSONDecodeError, KeyError):
+                                continue
+
+            if not tool_calls:
+                return text, None
+
+            return content if content else None, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/qwen3_coder_parser.py b/environments/tool_call_parsers/qwen3_coder_parser.py
new file mode 100644
index 0000000000..042e46f7bf
--- /dev/null
+++ b/environments/tool_call_parsers/qwen3_coder_parser.py
@@ -0,0 +1,163 @@
+"""
+Qwen3-Coder tool call parser.
+
+Format uses XML-style nested tags:
+    <tool_call>
+    <function=function_name>
+    <parameter=param_name>value</parameter>
+    <parameter=param_name2>value2</parameter>
+    </function>
+    </tool_call>
+
+Parameters are extracted from <parameter=name>value</parameter> tags and
+type-converted using the schema if available, otherwise treated as strings.
+
+Based on VLLM's Qwen3CoderToolParser.extract_tool_calls()
+"""
+
+import ast
+import json
+import re
+import uuid
+from typing import Any, Dict, List, Optional
+
+from openai.types.chat.chat_completion_message_tool_call import (
+    ChatCompletionMessageToolCall,
+    Function,
+)
+
+from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser
+
+
+def _try_convert_value(value: str) -> Any:
+    """
+    Try to convert a parameter value string to a native Python type.
+    Handles null, numbers, booleans, JSON objects/arrays, and falls back to string.
+    """
+    stripped = value.strip()
+
+    # Handle null
+    if stripped.lower() == "null":
+        return None
+
+    # Try JSON first (handles objects, arrays, strings, numbers, booleans)
+    try:
+        return json.loads(stripped)
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    # Try Python literal eval (handles tuples, etc.)
+    try:
+        return ast.literal_eval(stripped)
+    except (ValueError, SyntaxError, TypeError):
+        pass
+
+    # Return as string
+    return stripped
+
+
+@register_parser("qwen3_coder")
+class Qwen3CoderToolCallParser(ToolCallParser):
+    """
+    Parser for Qwen3-Coder XML-format tool calls.
+
+    Uses nested XML tags: <tool_call><function=name><parameter=key>val</parameter></function></tool_call>
+    """
+
+    START_TOKEN = "<tool_call>"
+    FUNCTION_PREFIX = "<function="
+
+    # Find complete tool_call blocks (or unclosed at end)
+    TOOL_CALL_REGEX = re.compile(
+        r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL
+    )
+
+    # Find function blocks within a tool_call
+    FUNCTION_REGEX = re.compile(
+        r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL
+    )
+
+    # Find parameter blocks within a function
+    PARAMETER_REGEX = re.compile(
+        r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)",
+        re.DOTALL,
+    )
+
+    def _parse_function_call(self, function_str: str) -> Optional[ChatCompletionMessageToolCall]:
+        """Parse a single <function=name>...</function> block into a ToolCall."""
+        try:
+            # Extract function name: everything before the first '>'
+            gt_idx = function_str.index(">")
+            func_name = function_str[:gt_idx].strip()
+            params_str = function_str[gt_idx + 1:]
+
+            # Extract parameters
+            param_dict: Dict[str, Any] = {}
+            for match_text in self.PARAMETER_REGEX.findall(params_str):
+                if ">" not in match_text:
+                    continue
+                eq_idx = match_text.index(">")
+                param_name = match_text[:eq_idx].strip()
+                param_value = match_text[eq_idx + 1:]
+
+                # Clean up whitespace
+                if param_value.startswith("\n"):
+                    param_value = param_value[1:]
+                if param_value.endswith("\n"):
+                    param_value = param_value[:-1]
+
+                param_dict[param_name] = _try_convert_value(param_value)
+
+            return ChatCompletionMessageToolCall(
+                id=f"call_{uuid.uuid4().hex[:24]}",
+                type="function",
+                function=Function(
+                    name=func_name,
+                    arguments=json.dumps(param_dict, ensure_ascii=False),
+                ),
+            )
+        except (ValueError, IndexError):
+            return None
+
+    def parse(self, text: str) -> ParseResult:
+        if self.FUNCTION_PREFIX not in text:
+            return text, None
+
+        try:
+            # Find all tool_call blocks
+            tc_matches = self.TOOL_CALL_REGEX.findall(text)
+            raw_blocks = [m[0] if m[0] else m[1] for m in tc_matches]
+
+            # Fallback: if no tool_call tags, try the whole text
+            if not raw_blocks:
+                raw_blocks = [text]
+
+            # Find function blocks within each tool_call
+            function_strs: List[str] = []
+            for block in raw_blocks:
+                func_matches = self.FUNCTION_REGEX.findall(block)
+                function_strs.extend(m[0] if m[0] else m[1] for m in func_matches)
+
+            if not function_strs:
+                return text, None
+
+            # Parse each function call
+            tool_calls: List[ChatCompletionMessageToolCall] = []
+            for func_str in function_strs:
+                tc = self._parse_function_call(func_str)
+                if tc is not None:
+                    tool_calls.append(tc)
+
+            if not tool_calls:
+                return text, None
+
+            # Content before tool calls
+            first_tc = text.find(self.START_TOKEN)
+            if first_tc < 0:
+                first_tc = text.find(self.FUNCTION_PREFIX)
+            content = text[:first_tc].strip() if first_tc > 0 else None
+
+            return content, tool_calls
+
+        except Exception:
+            return text, None
diff --git a/environments/tool_call_parsers/qwen_parser.py b/environments/tool_call_parsers/qwen_parser.py
new file mode 100644
index 0000000000..9c8a814199
--- /dev/null
+++ b/environments/tool_call_parsers/qwen_parser.py
@@ -0,0 +1,19 @@
+"""
+Qwen 2.5 tool call parser.
+
+Uses the same <tool_call> format as Hermes.
+Registered as a separate parser name for clarity when using --tool-parser=qwen.
+"""
+
+from environments.tool_call_parsers import register_parser
+from environments.tool_call_parsers.hermes_parser import HermesToolCallParser
+
+
+@register_parser("qwen")
+class QwenToolCallParser(HermesToolCallParser):
+    """
+    Parser for Qwen 2.5 tool calls.
+    Same <tool_call>{"name": ..., "arguments": ...}</tool_call> format as Hermes.
+    """
+
+    pass  # Identical format -- inherits everything from Hermes
diff --git a/environments/tool_context.py b/environments/tool_context.py
new file mode 100644
index 0000000000..4c9f0d3632
--- /dev/null
+++ b/environments/tool_context.py
@@ -0,0 +1,246 @@
+"""
+ToolContext -- Unrestricted Tool Access for Reward Functions
+
+A per-rollout handle that gives reward/verification functions direct access to
+ALL hermes-agent tools, scoped to the rollout's task_id. The same task_id means
+the terminal/browser session is the SAME one the model used during its rollout --
+all state (files, processes, browser tabs) is preserved.
+
+The verifier author decides which tools to use. Nothing is hardcoded or gated.
+
+Example usage in a compute_reward():
+    async def compute_reward(self, item, result, ctx):
+        # Run tests in the model's terminal sandbox
+        test = ctx.terminal("pytest -v")
+        if test["exit_code"] == 0:
+            return 1.0
+
+        # Check if a file was created
+        content = ctx.read_file("/workspace/solution.py")
+        if content.get("content"):
+            return 0.5
+
+        return 0.0
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from model_tools import handle_function_call
+from tools.terminal_tool import cleanup_vm
+from tools.browser_tool import cleanup_browser
+
+logger = logging.getLogger(__name__)
+
+
+class ToolContext:
+    """
+    Open-ended access to all hermes-agent tools for a specific rollout.
+
+    Passed to compute_reward() so verifiers can use any tool they need:
+    terminal commands, file reads/writes, web searches, browser automation, etc.
+    All calls share the rollout's task_id for session isolation.
+    """
+
+    def __init__(self, task_id: str):
+        self.task_id = task_id
+
+    # -------------------------------------------------------------------------
+    # Terminal tools
+    # -------------------------------------------------------------------------
+
+    def terminal(self, command: str, timeout: int = 180) -> Dict[str, Any]:
+        """
+        Run a command in the rollout's terminal session.
+
+        Args:
+            command: Shell command to execute
+            timeout: Command timeout in seconds
+
+        Returns:
+            Dict with 'exit_code' (int) and 'output' (str)
+        """
+        result = handle_function_call(
+            "terminal",
+            {"command": command, "timeout": timeout},
+            task_id=self.task_id,
+        )
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"exit_code": -1, "output": result}
+
+    # -------------------------------------------------------------------------
+    # File tools
+    # -------------------------------------------------------------------------
+
+    def read_file(self, path: str) -> Dict[str, Any]:
+        """
+        Read a file from the rollout's filesystem.
+
+        Args:
+            path: File path to read
+
+        Returns:
+            Dict with file content or error
+        """
+        result = handle_function_call(
+            "read_file", {"path": path}, task_id=self.task_id
+        )
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    def write_file(self, path: str, content: str) -> Dict[str, Any]:
+        """
+        Write a file in the rollout's filesystem.
+
+        Args:
+            path: File path to write
+            content: Content to write
+
+        Returns:
+            Dict with success status or error
+        """
+        result = handle_function_call(
+            "write_file", {"path": path, "content": content}, task_id=self.task_id
+        )
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    def search(self, query: str, path: str = ".") -> Dict[str, Any]:
+        """
+        Search for text in the rollout's filesystem.
+
+        Args:
+            query: Search query
+            path: Directory to search in
+
+        Returns:
+            Dict with search results
+        """
+        result = handle_function_call(
+            "search", {"query": query, "path": path}, task_id=self.task_id
+        )
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    # -------------------------------------------------------------------------
+    # Web tools
+    # -------------------------------------------------------------------------
+
+    def web_search(self, query: str) -> Dict[str, Any]:
+        """
+        Search the web.
+
+        Args:
+            query: Search query
+
+        Returns:
+            Dict with search results
+        """
+        result = handle_function_call("web_search", {"query": query})
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    def web_extract(self, urls: List[str]) -> Dict[str, Any]:
+        """
+        Extract content from URLs.
+
+        Args:
+            urls: List of URLs to extract content from
+
+        Returns:
+            Dict with extracted content
+        """
+        result = handle_function_call("web_extract", {"urls": urls})
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    # -------------------------------------------------------------------------
+    # Browser tools
+    # -------------------------------------------------------------------------
+
+    def browser_navigate(self, url: str) -> Dict[str, Any]:
+        """
+        Navigate the rollout's browser session to a URL.
+
+        Args:
+            url: URL to navigate to
+
+        Returns:
+            Dict with page snapshot or error
+        """
+        result = handle_function_call(
+            "browser_navigate", {"url": url}, task_id=self.task_id
+        )
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    def browser_snapshot(self) -> Dict[str, Any]:
+        """
+        Take a snapshot of the current browser page.
+
+        Returns:
+            Dict with page content/accessibility snapshot
+        """
+        result = handle_function_call(
+            "browser_snapshot", {}, task_id=self.task_id
+        )
+        try:
+            return json.loads(result)
+        except json.JSONDecodeError:
+            return {"error": result}
+
+    # -------------------------------------------------------------------------
+    # Generic tool access
+    # -------------------------------------------------------------------------
+
+    def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str:
+        """
+        Call any hermes-agent tool by name.
+
+        This is the generic escape hatch -- if a tool doesn't have a convenience
+        wrapper above, you can call it directly here.
+
+        Args:
+            tool_name: Name of the tool (e.g., "vision_analyze", "skills_list")
+            arguments: Dict of arguments for the tool
+
+        Returns:
+            Raw JSON string result from the tool
+        """
+        return handle_function_call(tool_name, arguments, task_id=self.task_id)
+
+    # -------------------------------------------------------------------------
+    # Cleanup
+    # -------------------------------------------------------------------------
+
+    def cleanup(self):
+        """
+        Release all resources (terminal VMs, browser sessions) for this rollout.
+
+        Called automatically by the base environment via try/finally after
+        compute_reward() completes. You generally don't need to call this yourself.
+        """
+        try:
+            cleanup_vm(self.task_id)
+        except Exception as e:
+            logger.debug("VM cleanup for task %s: %s", self.task_id, e)
+
+        try:
+            cleanup_browser(self.task_id)
+        except Exception as e:
+            logger.debug("Browser cleanup for task %s: %s", self.task_id, e)
diff --git a/hermes_agent.egg-info/PKG-INFO b/hermes_agent.egg-info/PKG-INFO
deleted file mode 100644
index 159a406927..0000000000
--- a/hermes_agent.egg-info/PKG-INFO
+++ /dev/null
@@ -1,868 +0,0 @@
-Metadata-Version: 2.4
-Name: hermes-agent
-Version: 0.1.0
-Summary: AI agent with advanced tool-calling and toolsets
-Author: Nous Research
-License: MIT
-Requires-Python: >=3.10
-Description-Content-Type: text/markdown
-Requires-Dist: openai
-Requires-Dist: python-dotenv
-Requires-Dist: fire
-Requires-Dist: httpx
-Requires-Dist: rich
-Requires-Dist: tenacity
-Requires-Dist: pyyaml
-Requires-Dist: requests
-Requires-Dist: jinja2
-Requires-Dist: pydantic>=2.0
-Requires-Dist: firecrawl-py
-Requires-Dist: fal-client
-Requires-Dist: litellm>=1.75.5
-Requires-Dist: typer
-Requires-Dist: platformdirs
-Provides-Extra: modal
-Requires-Dist: modal; extra == "modal"
-Requires-Dist: boto3; extra == "modal"
-Provides-Extra: dev
-Requires-Dist: pytest; extra == "dev"
-Requires-Dist: pytest-asyncio; extra == "dev"
-Provides-Extra: messaging
-Requires-Dist: python-telegram-bot>=20.0; extra == "messaging"
-Requires-Dist: discord.py>=2.0; extra == "messaging"
-Provides-Extra: cron
-Requires-Dist: croniter; extra == "cron"
-Provides-Extra: all
-Requires-Dist: croniter; extra == "all"
-Requires-Dist: python-telegram-bot>=20.0; extra == "all"
-Requires-Dist: discord.py>=2.0; extra == "all"
-
-# Hermes Agent
-
-An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.
-
-## Features
-
-- **Interactive CLI**: Beautiful terminal interface with animated feedback, personalities, and session management
-- **Messaging Gateway**: Connect to Telegram, Discord, and WhatsApp for conversational AI anywhere
-- **Web Tools**: Search, extract content, and crawl websites
-- **Terminal Tools**: Execute commands via local, Docker, Singularity, Modal, or SSH backends
-- **Browser Tools**: Automate web browsers to navigate, click, type, and extract content
-- **Vision Tools**: Analyze images from URLs
-- **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
-- **Creative Tools**: Generate images from text prompts
-- **Skills Tools**: On-demand knowledge documents with progressive disclosure
-- **Toolsets System**: Organize tools into logical groups for different scenarios
-- **Scheduled Tasks**: Cron jobs for automated agent tasks with delivery to platforms
-- **Context Compression**: Automatic summarization when approaching context limits
-- **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking
-- **Ephemeral System Prompts**: Guide model behavior without polluting training datasets
-
-## Installation
-
-### Quick Install (Recommended)
-
-**Linux/macOS:**
-```bash
-curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
-```
-
-**Windows (PowerShell):**
-```powershell
-irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex
-```
-
-This installer will:
-- Clone the repository to `~/.hermes-agent`
-- Create a virtual environment and install dependencies
-- Set up the `hermes` command in your PATH
-- Run an interactive setup wizard to configure API keys
-
-### Manual Installation
-
-If you prefer to install manually:
-
-```bash
-# Clone with submodules
-git clone --recurse-submodules https://github.com/NousResearch/Hermes-Agent.git
-cd Hermes-Agent
-
-# Run the setup script
-./setup-hermes.sh
-```
-
-Or step-by-step:
-
-```bash
-# Create and activate virtual environment
-python3 -m venv venv
-source venv/bin/activate  # Windows: venv\Scripts\activate
-
-# Install in editable mode with all extras
-pip install -e ".[all]"
-
-# Or install dependencies manually
-pip install -r requirements.txt
-pip install -e ./mini-swe-agent
-
-# Copy and configure environment
-cp .env.example .env
-# Edit .env with your API keys
-
-# Run the setup wizard
-hermes setup
-```
-
-## Quick Start
-
-Once installed, the `hermes` command is your main entry point:
-
-```bash
-hermes                    # Interactive chat (default)
-hermes chat               # Same as above
-hermes chat -q "Hello"    # Single query, then exit
-hermes setup              # Configure API keys and settings
-hermes status             # Show configuration status
-hermes doctor             # Diagnose issues
-hermes gateway            # Start messaging gateway (Telegram/Discord/WhatsApp)
-hermes cron daemon        # Run cron job scheduler
-hermes version            # Show version info
-```
-
-**Legacy `./hermes` script:**
-```bash
-# The old CLI script still works:
-./hermes
-
-# Or with options:
-./hermes --model "anthropic/claude-sonnet-4" --toolsets "web,terminal"
-```
-
-The CLI provides:
-- Animated spinners during thinking and tool execution
-- Kawaii-style feedback messages
-- `/commands` for configuration, history, and session management
-- Customizable personalities (`/personality kawaii`, `/personality pirate`, etc.)
-- Persistent configuration via `cli-config.yaml`
-
-## Configuration
-
-### Environment Variables
-```bash
-# Copy the example environment file
-cp .env.example .env
-
-# Edit .env and add your API keys
-nano .env  # or use your preferred editor
-```
-
-**Required API Keys:**
-- `OPENROUTER_API_KEY` - LLM access via OpenRouter (get at: https://openrouter.ai/keys)
-- `FIRECRAWL_API_KEY` - Web tools (get at: https://firecrawl.dev/)
-- `NOUS_API_KEY` - Vision & reasoning tools (get at: https://inference-api.nousresearch.com/)
-- `FAL_KEY` - Image generation (get at: https://fal.ai/)
-
-**Optional API Keys (for specific features):**
-- `BROWSERBASE_API_KEY` - Browser automation (get at: https://browserbase.com/)
-- `BROWSERBASE_PROJECT_ID` - From Browserbase dashboard
-- `MORPH_API_KEY` - For legacy Hecate terminal backend (get at: https://morph.so/)
-
-### 4. Configure Terminal Backend
-
-The terminal tool uses **mini-swe-agent** environments. Configure in `.env` or `cli-config.yaml`:
-
-```bash
-# Backend: "local", "docker", "singularity", "modal", or "ssh"
-TERMINAL_ENV=local          # Default: runs on host machine (no isolation)
-TERMINAL_ENV=ssh            # Remote execution via SSH (agent code stays local)
-TERMINAL_ENV=singularity    # Recommended for HPC: Apptainer/Singularity containers
-TERMINAL_ENV=docker         # Isolated Docker containers
-TERMINAL_ENV=modal          # Cloud execution via Modal
-
-# Container image (for docker/singularity/modal backends)
-TERMINAL_DOCKER_IMAGE=python:3.11-slim
-TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim
-TERMINAL_TIMEOUT=60
-
-# SSH backend (for ssh)
-TERMINAL_SSH_HOST=my-server.example.com
-TERMINAL_SSH_USER=myuser
-TERMINAL_SSH_KEY=~/.ssh/id_rsa  # Optional, uses ssh-agent if not set
-```
-
-**Backend Requirements:**
-- **local**: No extra setup (runs directly on your machine, no isolation)
-- **ssh**: SSH access to remote machine (great for sandboxing - agent can't touch its own code)
-- **singularity**: Requires Apptainer or Singularity installed (common on HPC clusters, no root needed)
-- **docker**: Requires Docker installed and user in `docker` group
-- **modal**: Requires Modal account (see setup below)
-
-### Singularity/Apptainer Setup (Recommended for HPC)
-
-Singularity/Apptainer provides rootless container execution, ideal for HPC clusters:
-
-```bash
-# 1. Verify Apptainer is installed
-apptainer --version  # or: singularity --version
-
-# 2. Set up cache directories (important for parallel workers)
-# Use /scratch if available (HPC), otherwise /tmp
-export APPTAINER_CACHEDIR=/scratch/$USER/.apptainer
-export APPTAINER_TMPDIR=/scratch/$USER/.apptainer/tmp
-mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
-
-# 3. Pre-build SIF image (recommended for parallel batch processing)
-# This avoids race conditions when multiple workers start simultaneously
-apptainer build $APPTAINER_CACHEDIR/python-nodejs.sif docker://nikolaik/python-nodejs:python3.11-nodejs20
-
-# 4. Configure .env to use the local SIF
-TERMINAL_ENV=singularity
-TERMINAL_SINGULARITY_IMAGE=/scratch/$USER/.apptainer/python-nodejs.sif
-```
-
-**Tip:** The batch scripts in `configs/` automatically handle SIF pre-building if `/scratch` is available.
-
-### Modal Cloud Backend Setup
-
-[Modal](https://modal.com) provides serverless cloud compute for running sandboxed environments at scale.
-
-```bash
-# 1. Install Modal and dependencies
-pip install modal boto3
-
-# 2. Authenticate with Modal (opens browser)
-modal setup
-
-# 3. Set terminal backend to modal in .env
-TERMINAL_ENV=modal
-```
-
-Modal uses CLI-based authentication (stored in `~/.modal/`), so no API key is needed in `.env`. After running `modal setup`, commands will automatically execute in Modal's cloud sandboxes.
-
-### Browser Tools Setup
-
-Browser tools enable the agent to navigate websites, fill forms, click buttons, and extract content. They use [agent-browser](https://github.com/vercel-labs/agent-browser) CLI with [Browserbase](https://browserbase.com) cloud execution.
-
-```bash
-# 1. Install Node.js (if not already installed)
-# Use nvm (recommended) or your package manager
-
-# 2. Install agent-browser CLI (choose one option):
-npm install -g agent-browser     # Option A: Global install (recommended)
-npm install                      # Option B: Local install (uses npx fallback)
-
-# 3. Get Browserbase credentials
-# Sign up at https://browserbase.com/ and get your:
-# - API Key (from Settings → API Keys)
-# - Project ID (from your project dashboard)
-
-# 4. Add to your .env file:
-BROWSERBASE_API_KEY=your_api_key_here
-BROWSERBASE_PROJECT_ID=your_project_id_here
-```
-
-**Available Browser Tools:**
-
-| Tool | Description |
-|------|-------------|
-| `browser_navigate` | Navigate to a URL |
-| `browser_snapshot` | Get text-based page snapshot with element refs |
-| `browser_click` | Click an element by ref (e.g., `@e5`) |
-| `browser_type` | Type text into an input field |
-| `browser_scroll` | Scroll up or down |
-| `browser_back` | Go back in browser history |
-| `browser_press` | Press a keyboard key (Enter, Tab, etc.) |
-| `browser_close` | Close the browser session |
-| `browser_get_images` | Get list of images on the page |
-
-**Example Usage:**
-```bash
-# Use browser tools with web search and vision
-python run_agent.py \
-  --query "Go to amazon.com and find the price of the latest Kindle" \
-  --enabled_toolsets=browser,web,vision
-
-# Use browser-focused distribution
-python batch_runner.py \
-  --dataset_file=browser_tasks.jsonl \
-  --distribution=browser_use \
-  --run_name=browser_run
-```
-
-See `.env.example` for all available configuration options including debug settings.
-
-### Skills Tools
-
-Skills are on-demand knowledge documents the agent can load when needed. They follow a **progressive disclosure** pattern to minimize token usage:
-
-```
-skills/
-├── mlops/                    # Category folder
-│   ├── axolotl/             # Skill folder
-│   │   ├── SKILL.md         # Main instructions (required)
-│   │   ├── references/      # Additional docs, API specs
-│   │   └── templates/       # Output formats, configs
-│   └── vllm/
-│       └── SKILL.md
-```
-
-**Available Skills Tools:**
-
-| Tool | Description |
-|------|-------------|
-| `skills_categories` | List available skill categories (~50 tokens) |
-| `skills_list` | List skills with name + description (~3k tokens for 40 skills) |
-| `skill_view` | Load full skill content, tags, and linked files |
-
-**Example Usage:**
-```bash
-# Use skills tools
-python run_agent.py \
-  --query "What skills do you have for fine-tuning? Show me the axolotl skill." \
-  --enabled_toolsets=skills
-```
-
-**Creating Skills:**
-
-Skills use YAML frontmatter for metadata:
-```yaml
----
-name: my-skill
-description: Brief description shown in skills_list
-tags: [tag1, tag2]
-related_skills: [other-skill]
-version: 1.0.0
----
-# Skill Content
-
-Instructions, examples, and guidelines here...
-```
-
-Skills can include:
-- `references/` - Additional documentation, API specs, examples
-- `templates/` - Output formats, config files, boilerplate code
-- `scripts/` - Executable helpers (Python, shell scripts)
-
-## Session Logging
-
-Every conversation is automatically logged to `logs/` for debugging and inspection:
-
-```
-logs/
-├── session_20260201_143052_a1b2c3.json
-├── session_20260201_150217_d4e5f6.json
-└── ...
-```
-
-**Log Format:**
-```json
-{
-  "session_id": "20260201_143052_a1b2c3",
-  "model": "anthropic/claude-sonnet-4",
-  "session_start": "2026-02-01T14:30:52.123456",
-  "last_updated": "2026-02-01T14:35:12.789012",
-  "message_count": 8,
-  "conversations": [
-    {"from": "system", "value": "..."},
-    {"from": "human", "value": "..."},
-    {"from": "gpt", "value": "..."},
-    {"from": "tool", "value": "..."}
-  ]
-}
-```
-
-- **Automatic**: Logs are created and updated automatically after each conversation turn
-- **Session ID in Banner**: The CLI displays the session ID in the welcome banner
-- **Trajectory Format**: Uses the same format as batch processing for consistency
-- **Git Ignored**: `logs/` is in `.gitignore` so logs aren't committed
-
-## Context Compression
-
-Long conversations can exceed the model's context limit. Hermes Agent automatically compresses context when approaching the limit:
-
-**How it works:**
-1. Tracks actual token usage from API responses (`usage.prompt_tokens`)
-2. When tokens reach 85% of model's context limit, triggers compression
-3. Protects first 3 turns (system prompt, initial request, first response)
-4. Protects last 4 turns (recent context is most relevant)
-5. Summarizes middle turns using a fast/cheap model (Gemini Flash)
-6. Inserts summary as a user message, conversation continues seamlessly
-
-**Configuration (`cli-config.yaml`):**
-```yaml
-compression:
-  enabled: true                    # Enable auto-compression (default)
-  threshold: 0.85                  # Compress at 85% of context limit
-  summary_model: "google/gemini-2.0-flash-001"
-```
-
-**Or via environment variables:**
-```bash
-CONTEXT_COMPRESSION_ENABLED=true
-CONTEXT_COMPRESSION_THRESHOLD=0.85
-CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001
-```
-
-**When compression triggers, you'll see:**
-```
-📦 Context compression triggered (170,000 tokens ≥ 170,000 threshold)
-   📊 Model context limit: 200,000 tokens (85% = 170,000)
-   🗜️  Summarizing turns 4-15 (12 turns)
-   ✅ Compressed: 20 → 9 messages (~45,000 tokens saved)
-```
-
-## Scheduled Tasks (Cron Jobs)
-
-Hermes Agent can schedule automated tasks to run in the future - either one-time reminders or recurring jobs.
-
-### CLI Commands
-
-```bash
-# List scheduled jobs
-/cron
-
-# Add a one-shot reminder (runs once in 30 minutes)
-/cron add 30m Remind me to check the build status
-
-# Add a recurring job (every 2 hours)
-/cron add "every 2h" Check server status at 192.168.1.100 and report any issues
-
-# Add a cron expression (daily at 9am)
-/cron add "0 9 * * *" Generate a morning briefing summarizing GitHub notifications
-
-# Remove a job
-/cron remove abc123def456
-```
-
-### Agent Self-Scheduling
-
-The agent can also schedule its own follow-up tasks using tools:
-
-```python
-# Available when using hermes-cli toolset (default for CLI)
-schedule_cronjob(prompt="...", schedule="30m", repeat=1)  # One-shot
-schedule_cronjob(prompt="...", schedule="every 2h")       # Recurring
-list_cronjobs()                                            # View all jobs
-remove_cronjob(job_id="...")                              # Cancel a job
-```
-
-**⚠️ Important:** Cronjobs run in **isolated sessions with NO prior context**. The prompt must be completely self-contained with all necessary information (file paths, URLs, server addresses, etc.). The future agent will not remember anything from the current conversation.
-
-### Schedule Formats
-
-| Format | Example | Description |
-|--------|---------|-------------|
-| Duration | `30m`, `2h`, `1d` | One-shot delay from now |
-| Interval | `every 30m`, `every 2h` | Recurring at fixed intervals |
-| Cron | `0 9 * * *` | Cron expression (requires `croniter`) |
-| Timestamp | `2026-02-03T14:00` | One-shot at specific time |
-
-### Repeat Options
-
-| repeat | Behavior |
-|--------|----------|
-| (omitted) | One-shot schedules run once; intervals/cron run forever |
-| `1` | Run once then auto-delete |
-| `N` | Run N times then auto-delete |
-
-### Running the Cron Daemon
-
-Jobs are stored in `~/.hermes/cron/jobs.json` and executed by a scheduler:
-
-```bash
-# Option 1: Built-in daemon (checks every 60 seconds)
-python cli.py --cron-daemon
-
-# Option 2: System cron integration (run once per minute)
-# Add to crontab: crontab -e
-*/1 * * * * cd ~/hermes-agent && python cli.py --cron-tick-once >> ~/.hermes/cron/cron.log 2>&1
-```
-
-### Job Output
-
-Job outputs are saved to `~/.hermes/cron/output/{job_id}/{timestamp}.md` for review.
-
-## Messaging Gateway (Telegram, Discord, WhatsApp)
-
-Connect Hermes Agent to messaging platforms so you can chat from anywhere.
-
-### Quick Start
-
-```bash
-# 1. Add your bot token to .env
-echo 'TELEGRAM_BOT_TOKEN="your_token"' >> .env
-
-# 2. Test the gateway (foreground)
-./scripts/hermes-gateway run
-
-# 3. Install as a background service
-./scripts/hermes-gateway install
-
-# 4. Manage the service
-./scripts/hermes-gateway start   # Start
-./scripts/hermes-gateway stop    # Stop
-./scripts/hermes-gateway status  # Check status
-```
-
-### Supported Platforms
-
-| Platform | Setup | Toolset |
-|----------|-------|---------|
-| Telegram | Bot via @BotFather | `hermes-telegram` |
-| Discord | Bot via Developer Portal | `hermes-discord` |
-| WhatsApp | Node.js bridge | `hermes-whatsapp` |
-
-### Session Management
-
-- Sessions persist across messages (agent remembers context)
-- Reset policies: daily (4am), idle (2 hours), or both
-- Manual reset: send `/new` or `/reset`
-
-### Cron Job Delivery
-
-Schedule tasks that deliver to specific platforms:
-
-```python
-schedule_cronjob(
-    prompt="Check server status...",
-    schedule="every 1h",
-    deliver="telegram"  # or "origin", "discord", etc.
-)
-```
-
-### CLI Commands
-
-| Command | Description |
-|---------|-------------|
-| `/platforms` | Show gateway configuration status |
-| `--gateway` | Start the gateway (CLI flag) |
-
-See [docs/messaging.md](docs/messaging.md) for full setup instructions.
-
-## Interactive CLI
-
-The CLI provides a rich interactive experience for working with the agent.
-
-### Running the CLI
-
-```bash
-# Basic usage
-./hermes
-
-# With specific model
-./hermes --model "anthropic/claude-sonnet-4"
-
-# With specific toolsets
-./hermes --toolsets "web,terminal,skills"
-```
-
-### CLI Commands
-
-| Command | Description |
-|---------|-------------|
-| `/help` | Show available commands |
-| `/tools` | List available tools by toolset |
-| `/toolsets` | List available toolsets |
-| `/model [name]` | Show or change the current model |
-| `/prompt [text]` | View/set custom system prompt |
-| `/personality [name]` | Set a predefined personality |
-| `/clear` | Clear screen and reset conversation |
-| `/reset` | Reset conversation only |
-| `/history` | Show conversation history |
-| `/save` | Save current conversation to file |
-| `/config` | Show current configuration |
-| `/cron` | Manage scheduled tasks (list, add, remove) |
-| `/platforms` | Show gateway/messaging platform status |
-| `/quit` | Exit the CLI |
-
-### Configuration
-
-Copy `cli-config.yaml.example` to `cli-config.yaml` and customize:
-
-```yaml
-# Model settings
-model:
-  default: "anthropic/claude-sonnet-4"
-
-# Terminal backend (local, docker, singularity, modal, or ssh)
-terminal:
-  env_type: "local"
-  cwd: "."  # Use current directory
-
-# Or use SSH for remote execution (keeps agent code isolated)
-# terminal:
-#   env_type: "ssh"
-#   ssh_host: "my-server.example.com"
-#   ssh_user: "myuser"
-#   ssh_key: "~/.ssh/id_rsa"
-#   cwd: "/home/myuser/project"
-
-# Enable specific toolsets
-toolsets:
-  - all  # or: web, terminal, browser, vision, etc.
-
-# Custom personalities (use with /personality command)
-agent:
-  personalities:
-    helpful: "You are a helpful assistant."
-    kawaii: "You are a kawaii assistant! Use cute expressions..."
-```
-
-### Personalities
-
-Built-in personalities available via `/personality`:
-- `helpful`, `concise`, `technical`, `creative`, `teacher`
-- `kawaii`, `catgirl`, `pirate`, `shakespeare`, `surfer`
-- `noir`, `uwu`, `philosopher`, `hype`
-
-## Toolsets System
-
-The agent uses a toolsets system for organizing and managing tools. All tools must be part of a toolset to be accessible - individual tool selection is not supported. This ensures consistent and logical grouping of capabilities.
-
-### Key Concepts
-
-- **Toolsets**: Logical groups of tools for specific use cases (e.g., "research", "development", "debugging")
-- **Composition**: Toolsets can include other toolsets for powerful combinations
-- **Custom Toolsets**: Create your own toolsets at runtime or by editing `toolsets.py`
-- **Toolset-Only Access**: Tools are only accessible through toolsets, not individually
-
-### Available Toolsets
-
-See `toolsets.py` for the complete list of predefined toolsets including:
-- Basic toolsets (web, terminal, vision, creative, reasoning)
-- Composite toolsets (research, development, analysis, etc.)
-- Scenario-specific toolsets (debugging, documentation, API testing, etc.)
-- Special toolsets (safe mode without terminal, minimal, offline)
-
-### Using Toolsets
-
-```bash
-# Use a predefined toolset
-python run_agent.py --enabled_toolsets=research --query "Find latest AI papers"
-
-# Combine multiple toolsets
-python run_agent.py --enabled_toolsets=web,vision --query "Analyze this website"
-
-# Enable all toolsets explicitly (same as omitting the flag)
-python run_agent.py --enabled_toolsets=all --query "Do web research and run commands if helpful"
-
-# Safe mode (no terminal access)
-python run_agent.py --enabled_toolsets=safe --query "Help without running commands"
-
-# List all available toolsets and tools
-python run_agent.py --list_tools
-```
-
-See `toolsets.py` for the complete list of available toolsets and how to create custom ones.
-
-## Basic Usage
-
-### Default (all tools enabled)
-```bash
-# Uses OpenRouter by default - just set OPENROUTER_API_KEY in .env
-python run_agent.py \
-  --query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
-  --max_turns 20 \
-  --model anthropic/claude-sonnet-4-20250514
-```
-
-### With specific toolset
-```bash
-python run_agent.py \
-  --query "Debug this Python error" \
-  --enabled_toolsets=debugging \
-  --model anthropic/claude-sonnet-4-20250514
-```
-
-### Python API
-```python
-from run_agent import AIAgent
-
-# Uses OpenRouter by default (reads OPENROUTER_API_KEY from .env)
-agent = AIAgent(
-    model="anthropic/claude-sonnet-4-20250514",
-    enabled_toolsets=["research"]
-)
-response = agent.chat("Find information about quantum computing")
-
-# Create custom toolset at runtime
-from toolsets import create_custom_toolset
-
-create_custom_toolset(
-    name="my_tools",
-    description="My custom toolkit",
-    tools=["web_search"],
-    includes=["terminal", "vision"]
-)
-
-agent = AIAgent(enabled_toolsets=["my_tools"])
-```
-
-## Batch Processing
-
-Process multiple prompts from a dataset in parallel with automatic checkpointing and statistics tracking:
-
-```bash
-# Basic batch processing
-python batch_runner.py \
-  --dataset_file=prompts.jsonl \
-  --batch_size=20 \
-  --run_name=my_run
-
-# With specific distribution
-python batch_runner.py \
-  --dataset_file=prompts.jsonl \
-  --batch_size=20 \
-  --run_name=image_run \
-  --distribution=image_gen \
-  --num_workers=4
-```
-
-**Key Features:**
-- Parallel processing with configurable workers
-- Toolset distributions for varied data generation
-- Automatic checkpointing and resume capability
-- Combined output in `data/<run_name>/trajectories.jsonl`
-- Tool usage statistics and success rates
-
-Use `--list_distributions` to see available toolset distributions for varied data generation.
-
-### Trajectory Compression
-
-Post-process trajectories to fit within token budgets for training:
-
-```bash
-# Compress a directory of JSONL files
-python trajectory_compressor.py --input=data/my_run
-
-# Compress a single JSONL file
-python trajectory_compressor.py --input=data/trajectories.jsonl
-
-# Compress a 15% sample (useful for creating smaller training sets)
-python trajectory_compressor.py --input=data/trajectories.jsonl --sample_percent=15
-
-# Custom output and token target
-python trajectory_compressor.py \
-  --input=data/trajectories.jsonl \
-  --output=data/compressed.jsonl \
-  --target_max_tokens=16000
-```
-
-**Features:**
-- Protects first turns (system, human, first GPT response, first tool call)
-- Protects last N turns (configurable)
-- Summarizes middle turns using LLM to fit target token budget
-- Supports both directory and single file input
-- Optional random sampling with `--sample_percent`
-- Configurable via `configs/trajectory_compression.yaml`
-
-### Ephemeral System Prompts
-
-The ephemeral system prompt feature allows you to guide the model's behavior during batch processing **without** saving that prompt to the training dataset trajectories. This is useful for:
-
-- Guiding model behavior during data collection
-- Adding task-specific instructions 
-- Keeping saved trajectories clean and focused on tool-calling format
-
-**Example:**
-```bash
-python batch_runner.py \
-  --dataset_file=prompts.jsonl \
-  --batch_size=10 \
-  --run_name=my_run \
-  --ephemeral_system_prompt="You are a helpful assistant focused on image generation."
-```
-
-The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
-
-The ephemeral prompt influences model behavior during execution, but **only the standard tool-calling system prompt** is saved in trajectory files.
-
-## Command Line Arguments
-
-**Single Agent (`run_agent.py`):**
-- `--query`: The question or task for the agent
-- `--model`: Model to use (default: claude-opus-4-20250514)
-- `--api_key`: API key for authentication
-- `--base_url`: API endpoint URL
-- `--max_turns`: Maximum number of tool-calling iterations
-- `--enabled_toolsets`: Comma-separated list of toolsets to enable. Use `all` (or `*`) to enable everything. If omitted, all toolsets are enabled by default.
-- `--disabled_toolsets`: Comma-separated list of toolsets to disable
-- `--list_tools`: List all available toolsets and tools
-- `--save_trajectories`: Save conversation trajectories to JSONL files
-
-**Batch Processing (`batch_runner.py`):**
-- `--dataset_file`: Path to JSONL file with prompts
-- `--batch_size`: Number of prompts per batch
-- `--run_name`: Name for this run (for output/checkpointing)
-- `--distribution`: Toolset distribution to use (default: "default")
-- `--num_workers`: Number of parallel workers (default: 4)
-- `--resume`: Resume from checkpoint if interrupted
-- `--ephemeral_system_prompt`: System prompt used during execution but NOT saved to trajectories
-- `--list_distributions`: List available toolset distributions
-
-## Environment Variables
-
-All environment variables can be configured in the `.env` file (copy from `.env.example`).
-
-**LLM Provider (OpenRouter):**
-- `OPENROUTER_API_KEY`: Primary LLM access via OpenRouter (supports Claude, GPT-4, Gemini, etc.)
-- `LLM_MODEL`: Default model (e.g., `anthropic/claude-sonnet-4`, `openai/gpt-4o`)
-
-**Tool API Keys:**
-- `FIRECRAWL_API_KEY`: Web tools (search, extract, crawl)
-- `NOUS_API_KEY`: Vision and reasoning tools
-- `FAL_KEY`: Image generation tools
-
-**Terminal Tool Configuration (mini-swe-agent backend):**
-- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, `modal`, or `ssh` (default: `local`)
-- `TERMINAL_DOCKER_IMAGE`: Docker image for docker backend (default: `python:3.11-slim`)
-- `TERMINAL_SINGULARITY_IMAGE`: Singularity/Apptainer image (can be `docker://...` URL or local `.sif` path)
-- `TERMINAL_TIMEOUT`: Command timeout in seconds (default: `60`)
-- `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
-- `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)
-- `TERMINAL_SCRATCH_DIR`: Custom scratch directory for sandbox storage (optional, auto-detects `/scratch`)
-- `SUDO_PASSWORD`: Enable sudo commands by piping password via `sudo -S` (works with all backends)
-  - If unset in CLI mode, you'll be prompted interactively when sudo is needed (45s timeout)
-
-**SSH Backend Configuration (for remote execution):**
-- `TERMINAL_SSH_HOST`: Remote server hostname or IP
-- `TERMINAL_SSH_USER`: SSH username
-- `TERMINAL_SSH_PORT`: SSH port (default: `22`)
-- `TERMINAL_SSH_KEY`: Path to SSH private key (optional, uses ssh-agent if not set)
-
-**Context Compression (auto-shrinks long conversations):**
-- `CONTEXT_COMPRESSION_ENABLED`: Enable auto-compression (default: `true`)
-- `CONTEXT_COMPRESSION_THRESHOLD`: Compress at this % of context limit (default: `0.85`)
-- `CONTEXT_COMPRESSION_MODEL`: Model for generating summaries (default: `google/gemini-2.0-flash-001`)
-
-**Browser Tool Configuration (agent-browser + Browserbase):**
-- `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
-- `BROWSERBASE_PROJECT_ID`: Browserbase project ID
-- `BROWSER_SESSION_TIMEOUT`: Session timeout in seconds (default: `300`)
-
-**Legacy Hecate Terminal Backend (optional):**
-- `MORPH_API_KEY`: For Hecate/MorphCloud terminal backend
-- `HECATE_VM_LIFETIME_SECONDS`: VM lifetime (default: 300)
-- `HECATE_DEFAULT_SNAPSHOT_ID`: Default snapshot (default: snapshot_p5294qxt)
-
-**Debug Options:**
-- `WEB_TOOLS_DEBUG`, `VISION_TOOLS_DEBUG`, `MOA_TOOLS_DEBUG`, `IMAGE_TOOLS_DEBUG`: Enable debug logging
-
-## Key Files
-
-| File | Purpose |
-|------|---------|
-| `hermes` | CLI launcher script (run with `./hermes`) |
-| `cli.py` | Interactive CLI implementation |
-| `cli-config.yaml` | CLI configuration (copy from `.example`) |
-| `run_agent.py` | Main agent runner - single query execution |
-| `batch_runner.py` | Parallel batch processing with checkpointing |
-| `model_tools.py` | Core tool definitions and handlers |
-| `toolsets.py` | Toolset definitions and composition |
-| `toolset_distributions.py` | Probability distributions for data generation |
-| `trajectory_compressor.py` | Post-process trajectories for training |
-| `tools/` | Individual tool implementations |
-| `tools/skills_tool.py` | Skills system with progressive disclosure |
-| `skills/` | On-demand knowledge documents |
-| `docs/` | Documentation |
-| `configs/` | Example batch run scripts |
diff --git a/hermes_agent.egg-info/SOURCES.txt b/hermes_agent.egg-info/SOURCES.txt
deleted file mode 100644
index d94b105e0a..0000000000
--- a/hermes_agent.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-README.md
-batch_runner.py
-cli.py
-model_tools.py
-pyproject.toml
-run_agent.py
-toolset_distributions.py
-toolsets.py
-trajectory_compressor.py
-cron/__init__.py
-cron/jobs.py
-cron/scheduler.py
-gateway/__init__.py
-gateway/config.py
-gateway/delivery.py
-gateway/run.py
-gateway/session.py
-hermes_agent.egg-info/PKG-INFO
-hermes_agent.egg-info/SOURCES.txt
-hermes_agent.egg-info/dependency_links.txt
-hermes_agent.egg-info/entry_points.txt
-hermes_agent.egg-info/requires.txt
-hermes_agent.egg-info/top_level.txt
-hermes_cli/__init__.py
-hermes_cli/cron.py
-hermes_cli/doctor.py
-hermes_cli/gateway.py
-hermes_cli/main.py
-hermes_cli/setup.py
-hermes_cli/status.py
-tests/test_batch_runner.py
-tests/test_checkpoint_resumption.py
-tests/test_modal_terminal.py
-tests/test_nous_api_limits.py
-tests/test_nous_api_pattern.py
-tests/test_temperature_fix.py
-tests/test_web_tools.py
-tools/__init__.py
-tools/browser_tool.py
-tools/cronjob_tools.py
-tools/image_generation_tool.py
-tools/mixture_of_agents_tool.py
-tools/skills_tool.py
-tools/terminal_hecate.py
-tools/terminal_tool.py
-tools/vision_tools.py
-tools/web_tools.py
\ No newline at end of file
diff --git a/hermes_agent.egg-info/dependency_links.txt b/hermes_agent.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789179..0000000000
--- a/hermes_agent.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/hermes_agent.egg-info/entry_points.txt b/hermes_agent.egg-info/entry_points.txt
deleted file mode 100644
index 504de51c24..0000000000
--- a/hermes_agent.egg-info/entry_points.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-[console_scripts]
-hermes = hermes_cli.main:main
-hermes-agent = run_agent:main
diff --git a/hermes_agent.egg-info/requires.txt b/hermes_agent.egg-info/requires.txt
deleted file mode 100644
index 91036b7cda..0000000000
--- a/hermes_agent.egg-info/requires.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-firecrawl-py
-fal-client
-litellm>=1.75.5
-typer
-platformdirs
-
-[all]
-croniter
-python-telegram-bot>=20.0
-discord.py>=2.0
-
-[cron]
-croniter
-
-[dev]
-pytest
-pytest-asyncio
-
-[messaging]
-python-telegram-bot>=20.0
-discord.py>=2.0
-
-[modal]
-modal
-boto3
diff --git a/hermes_agent.egg-info/top_level.txt b/hermes_agent.egg-info/top_level.txt
deleted file mode 100644
index a804090242..0000000000
--- a/hermes_agent.egg-info/top_level.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-batch_runner
-cli
-cron
-gateway
-hermes_cli
-model_tools
-run_agent
-tools
-toolset_distributions
-toolsets
-trajectory_compressor

From c0494b3558df6cda7ea11196d807366539daa643 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sat, 7 Feb 2026 21:11:01 +0000
Subject: [PATCH 41/48] Update pyproject.toml to refine dependency management

- Reorganized the 'all' dependencies to include specific optional groups for better modularity.
- Added support for 'hermes-agent' with distinct categories: modal, messaging, cron, cli, and dev.
---
 pyproject.toml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1af0cb2a76..30565e088a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,13 @@ dev = ["pytest", "pytest-asyncio"]
 messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0"]
 cron = ["croniter"]
 cli = ["simple-term-menu"]
-all = ["croniter", "python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "simple-term-menu"]
+all = [
+  "hermes-agent[modal]",
+  "hermes-agent[messaging]",
+  "hermes-agent[cron]",
+  "hermes-agent[cli]",
+  "hermes-agent[dev]",
+]
 
 [project.scripts]
 hermes = "hermes_cli.main:main"

From a478e4458567f26f47fbbf2a44ef2b27f7ab6ff7 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sat, 7 Feb 2026 21:11:07 +0000
Subject: [PATCH 42/48] Increase max_token_length in TerminalTestEnv to 16000
 for enhanced processing capacity

---
 environments/terminal_test_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/terminal_test_env.py b/environments/terminal_test_env.py
index e30d56695e..9a5bdc2cbe 100644
--- a/environments/terminal_test_env.py
+++ b/environments/terminal_test_env.py
@@ -121,7 +121,7 @@ class TerminalTestEnv(HermesAgentBaseEnv):
             distribution=None,
             # Agent settings
             max_agent_turns=10,  # Simple tasks, don't need many turns
-            max_token_length=2048,
+            max_token_length=16000,
             agent_temperature=1.0,
             system_prompt=(
                 "You are a helpful assistant with access to a terminal and file tools. "

From a8809bbd3e4ba9671ce524525ae8a7ad10df1870 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sat, 7 Feb 2026 23:54:53 +0000
Subject: [PATCH 43/48] Transition installation to uv for py version and speed
 to be easier to streamline

- Integrated `uv` as a fast Python package manager for automatic Python provisioning and dependency management.
- Updated installation scripts (`setup-hermes.sh`, `install.sh`, `install.ps1`) to utilize `uv` for installing Python and packages, streamlining the setup process.
- Revised `README.md` to reflect changes in installation steps, including symlinking `hermes` for global access and clarifying Python version requirements.
- Adjusted commands in `doctor.py` and other scripts to recommend `uv` for package installations, ensuring consistency across the project.
---
 README.md            | 124 ++++++++++-----------
 hermes_cli/doctor.py |  10 +-
 hermes_cli/main.py   |  19 +++-
 hermes_cli/setup.py  |  40 +++++--
 scripts/install.ps1  | 220 ++++++++++++++++++++++---------------
 scripts/install.sh   | 253 +++++++++++++++++++++++--------------------
 setup-hermes.sh      | 143 +++++++++++++++---------
 7 files changed, 471 insertions(+), 338 deletions(-)

diff --git a/README.md b/README.md
index 45340e8f0f..18b016c4b8 100644
--- a/README.md
+++ b/README.md
@@ -15,11 +15,13 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins
 ```
 
 The installer will:
+- Install [uv](https://docs.astral.sh/uv/) (fast Python package manager) if not present
+- Install Python 3.11 via uv if not already available (no sudo needed)
 - Clone to `~/.hermes/hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
-- Create a virtual environment (Python 3.11+ recommended)
+- Create a virtual environment with Python 3.11
 - Install all dependencies and submodule packages
+- Symlink `hermes` into `~/.local/bin` so it works globally (no venv activation needed)
 - Run the interactive setup wizard
-- Add `hermes` to your PATH
 
 After installation, reload your shell and run:
 ```bash
@@ -179,7 +181,7 @@ hermes config set terminal.singularity_image ~/python.sif
 
 **Modal** (serverless cloud):
 ```bash
-pip install "swe-rex[modal]"   # Installs swe-rex + modal + boto3
+uv pip install "swe-rex[modal]"   # Installs swe-rex + modal + boto3
 modal setup                    # Authenticate with Modal
 hermes config set terminal.backend modal
 ```
@@ -522,26 +524,25 @@ If you prefer full control over the installation process (or the quick-install s
 
 | Requirement | Minimum Version | Check Command | Notes |
 |-------------|----------------|---------------|-------|
-| **Python** | 3.11+ recommended (3.10 minimum) | `python3 --version` | Required. 3.11+ needed for RL training tools |
 | **Git** | Any recent | `git --version` | Required |
-| **pip** | 21+ | `pip --version` | Comes with Python |
 | **Node.js** | 18+ | `node --version` | Optional — needed for browser automation tools |
 | **ripgrep** | Any | `rg --version` | Optional — faster file search in terminal tool (falls back to grep) |
 
+> **Note:** Python and pip are **not** prerequisites. The installer uses [uv](https://docs.astral.sh/uv/) to provision Python 3.11 automatically (no sudo needed). If you already have Python 3.11+ installed, uv will use it.
+
 <details>
 <summary><strong>Installing prerequisites by platform</strong></summary>
 
 **Ubuntu / Debian:**
 ```bash
-sudo apt update
-sudo apt install python3.11 python3.11-venv python3-pip git
+sudo apt update && sudo apt install git
 # Optional:
 sudo apt install ripgrep nodejs npm
 ```
 
 **macOS (Homebrew):**
 ```bash
-brew install python@3.11 git
+brew install git
 # Optional:
 brew install ripgrep node
 ```
@@ -569,34 +570,37 @@ git submodule update --init --recursive
 
 ---
 
-### Step 2: Create & Activate a Virtual Environment
+### Step 2: Install uv & Create Virtual Environment
 
-A virtual environment keeps Hermes dependencies isolated from your system Python:
+[uv](https://docs.astral.sh/uv/) is a fast Python package manager that can also provision Python itself. Install it and create the venv in one go:
 
 ```bash
-python3 -m venv venv
-source venv/bin/activate
+# Install uv (if not already installed)
+curl -LsSf https://astral.sh/uv/install.sh | sh
 
-# Upgrade core packaging tools
-pip install --upgrade pip wheel setuptools
+# Create venv with Python 3.11 (uv downloads it if not present — no sudo needed)
+uv venv venv --python 3.11
 ```
 
-> **Tip:** Every time you open a new terminal to use Hermes, activate the venv first:
-> `source /path/to/hermes-agent/venv/bin/activate`
+> **Tip:** You do **not** need to activate the venv to use `hermes`. The entry point has a hardcoded shebang pointing to the venv Python, so it works globally once symlinked (see Step 8). For installing packages, uv can target the venv directly via `VIRTUAL_ENV`.
 
 ---
 
 ### Step 3: Install Python Dependencies
 
-Install the main package in editable mode with all optional extras (messaging, cron, CLI menus):
+Install the main package in editable mode with all optional extras (messaging, cron, CLI menus, modal):
 
 ```bash
-pip install -e ".[all]"
+# Tell uv which venv to install into
+export VIRTUAL_ENV="$(pwd)/venv"
+
+# Install with all extras
+uv pip install -e ".[all]"
 ```
 
 If you only want the core agent (no Telegram/Discord/cron support):
 ```bash
-pip install -e "."
+uv pip install -e "."
 ```
 
 <details>
@@ -604,14 +608,14 @@ pip install -e "."
 
 | Extra | What it adds | Install command |
 |-------|-------------|-----------------|
-| `all` | Everything below | `pip install -e ".[all]"` |
-| `messaging` | Telegram & Discord gateway | `pip install -e ".[messaging]"` |
-| `cron` | Cron expression parsing for scheduled tasks | `pip install -e ".[cron]"` |
-| `cli` | Terminal menu UI for setup wizard | `pip install -e ".[cli]"` |
-| `modal` | Modal cloud execution backend (swe-rex + modal + boto3) | `pip install -e ".[modal]"` |
-| `dev` | pytest & test utilities | `pip install -e ".[dev]"` |
+| `all` | Everything below | `uv pip install -e ".[all]"` |
+| `messaging` | Telegram & Discord gateway | `uv pip install -e ".[messaging]"` |
+| `cron` | Cron expression parsing for scheduled tasks | `uv pip install -e ".[cron]"` |
+| `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` |
+| `modal` | Modal cloud execution backend (swe-rex + modal + boto3) | `uv pip install -e ".[modal]"` |
+| `dev` | pytest & test utilities | `uv pip install -e ".[dev]"` |
 
-You can combine extras: `pip install -e ".[messaging,cron]"`
+You can combine extras: `uv pip install -e ".[messaging,cron]"`
 
 </details>
 
@@ -623,16 +627,14 @@ These are local packages checked out as Git submodules. Install them in editable
 
 ```bash
 # Terminal tool backend (required for the terminal/command-execution tool)
-pip install -e "./mini-swe-agent"
+uv pip install -e "./mini-swe-agent"
 
-# RL training backend (requires Python 3.11+)
-pip install -e "./tinker-atropos"
+# RL training backend
+uv pip install -e "./tinker-atropos"
 ```
 
 Both are optional — if you skip them, the corresponding toolsets simply won't be available.
 
-> **Note:** `tinker-atropos` requires Python 3.11+ (the upstream `tinker` package has this constraint). On Python 3.10, skip this line — RL tools will be disabled but everything else works.
-
 ---
 
 ### Step 5: Install Node.js Dependencies (Optional)
@@ -706,13 +708,20 @@ hermes config set OPENROUTER_API_KEY sk-or-v1-your-key-here
 
 ### Step 8: Add `hermes` to Your PATH
 
-The `hermes` command is installed into the virtual environment's `bin/` directory. Add it to your shell PATH so you can run `hermes` from anywhere:
+The `hermes` entry point at `venv/bin/hermes` has a hardcoded shebang pointing to the venv's Python, so it works **without activating the venv**. The recommended approach is a symlink into `~/.local/bin` (most distributions already have this on PATH):
+
+```bash
+mkdir -p ~/.local/bin
+ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
+```
+
+If `~/.local/bin` isn't on your PATH yet, add it:
 
 **Bash** (`~/.bashrc`):
 ```bash
 echo '' >> ~/.bashrc
 echo '# Hermes Agent' >> ~/.bashrc
-echo 'export PATH="$HOME/hermes-agent/venv/bin:$PATH"' >> ~/.bashrc
+echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
 source ~/.bashrc
 ```
 
@@ -720,24 +729,15 @@ source ~/.bashrc
 ```bash
 echo '' >> ~/.zshrc
 echo '# Hermes Agent' >> ~/.zshrc
-echo 'export PATH="$HOME/hermes-agent/venv/bin:$PATH"' >> ~/.zshrc
+echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc
 source ~/.zshrc
 ```
 
 **Fish** (`~/.config/fish/config.fish`):
 ```fish
-fish_add_path $HOME/hermes-agent/venv/bin
+fish_add_path $HOME/.local/bin
 ```
 
-> **Note:** Adjust the path if you cloned to a different location. The key is to add the `venv/bin` directory inside your clone to your PATH.
-
-Alternatively, if you don't want to modify your PATH, you can create a symlink:
-```bash
-mkdir -p ~/.local/bin
-ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
-```
-(Most distributions already have `~/.local/bin` on the PATH.)
-
 ---
 
 ### Step 9: Run the Setup Wizard (Optional)
@@ -777,19 +777,21 @@ If `hermes doctor` reports issues, it will tell you exactly what's missing and h
 For those who just want the commands without the explanations:
 
 ```bash
+# Install uv (if not already installed)
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
 # Clone & enter
 git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
 cd hermes-agent
 
-# Virtual environment
-python3 -m venv venv
-source venv/bin/activate
-pip install --upgrade pip wheel setuptools
+# Create venv with Python 3.11 (uv downloads it if needed)
+uv venv venv --python 3.11
+export VIRTUAL_ENV="$(pwd)/venv"
 
 # Install everything
-pip install -e ".[all]"
-pip install -e "./mini-swe-agent"
-pip install -e "./tinker-atropos"
+uv pip install -e ".[all]"
+uv pip install -e "./mini-swe-agent"
+uv pip install -e "./tinker-atropos"
 npm install  # optional, for browser tools
 
 # Configure
@@ -798,9 +800,9 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env
 echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
 
-# Add to PATH (adjust for your shell)
-echo 'export PATH="'$(pwd)'/venv/bin:$PATH"' >> ~/.bashrc
-source ~/.bashrc
+# Make hermes available globally (no venv activation needed)
+mkdir -p ~/.local/bin
+ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes
 
 # Verify
 hermes doctor
@@ -815,16 +817,16 @@ To update an existing manual install to the latest version:
 
 ```bash
 cd /path/to/hermes-agent
-source venv/bin/activate
+export VIRTUAL_ENV="$(pwd)/venv"
 
 # Pull latest code and submodules
 git pull origin main
 git submodule update --init --recursive
 
 # Reinstall (picks up new dependencies)
-pip install -e ".[all]"
-pip install -e "./mini-swe-agent"
-pip install -e "./tinker-atropos"
+uv pip install -e ".[all]"
+uv pip install -e "./mini-swe-agent"
+uv pip install -e "./tinker-atropos"
 
 # Check for new config options added since your last update
 hermes config check
@@ -834,14 +836,14 @@ hermes config migrate   # Interactively add any missing options
 ### Uninstalling a Manual Installation
 
 ```bash
+# Remove the hermes symlink
+rm -f ~/.local/bin/hermes
+
 # Remove the cloned repository
 rm -rf /path/to/hermes-agent
 
 # Remove user configuration (optional — keep if you plan to reinstall)
 rm -rf ~/.hermes
-
-# Remove the PATH line from your shell config (~/.bashrc or ~/.zshrc)
-# Look for the "# Hermes Agent" comment and remove that block
 ```
 
 ---
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 7c770cf8ac..de9b721e89 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -103,7 +103,7 @@ def run_doctor(args):
             check_ok(name)
         except ImportError:
             check_fail(name, "(missing)")
-            issues.append(f"Install {name}: pip install {module}")
+            issues.append(f"Install {name}: uv pip install {module}")
     
     for module, name in optional_packages:
         try:
@@ -279,8 +279,8 @@ def run_doctor(args):
             __import__("minisweagent")
             check_ok("mini-swe-agent", "(terminal backend)")
         except ImportError:
-            check_warn("mini-swe-agent found but not installed", "(run: pip install -e ./mini-swe-agent)")
-            issues.append("Install mini-swe-agent: pip install -e ./mini-swe-agent")
+            check_warn("mini-swe-agent found but not installed", "(run: uv pip install -e ./mini-swe-agent)")
+            issues.append("Install mini-swe-agent: uv pip install -e ./mini-swe-agent")
     else:
         check_warn("mini-swe-agent not found", "(run: git submodule update --init --recursive)")
     
@@ -292,8 +292,8 @@ def run_doctor(args):
                 __import__("tinker_atropos")
                 check_ok("tinker-atropos", "(RL training backend)")
             except ImportError:
-                check_warn("tinker-atropos found but not installed", "(run: pip install -e ./tinker-atropos)")
-                issues.append("Install tinker-atropos: pip install -e ./tinker-atropos")
+                check_warn("tinker-atropos found but not installed", "(run: uv pip install -e ./tinker-atropos)")
+                issues.append("Install tinker-atropos: uv pip install -e ./tinker-atropos")
         else:
             check_warn("tinker-atropos requires Python 3.11+", f"(current: {py_version.major}.{py_version.minor})")
     else:
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index a3100279a0..a4c4f644bf 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -119,6 +119,7 @@ def cmd_uninstall(args):
 def cmd_update(args):
     """Update Hermes Agent to the latest version."""
     import subprocess
+    import shutil
     
     print("🦋 Updating Hermes Agent...")
     print()
@@ -163,13 +164,21 @@ def cmd_update(args):
         print("→ Pulling updates...")
         subprocess.run(["git", "pull", "origin", branch], cwd=PROJECT_ROOT, check=True)
         
-        # Reinstall Python dependencies
+        # Reinstall Python dependencies (prefer uv for speed, fall back to pip)
         print("→ Updating Python dependencies...")
-        venv_pip = PROJECT_ROOT / "venv" / "bin" / "pip"
-        if venv_pip.exists():
-            subprocess.run([str(venv_pip), "install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
+        uv_bin = shutil.which("uv")
+        if uv_bin:
+            subprocess.run(
+                [uv_bin, "pip", "install", "-e", ".", "--quiet"],
+                cwd=PROJECT_ROOT, check=True,
+                env={**os.environ, "VIRTUAL_ENV": str(PROJECT_ROOT / "venv")}
+            )
         else:
-            subprocess.run(["pip", "install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
+            venv_pip = PROJECT_ROOT / "venv" / "bin" / "pip"
+            if venv_pip.exists():
+                subprocess.run([str(venv_pip), "install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
+            else:
+                subprocess.run(["pip", "install", "-e", ".", "--quiet"], cwd=PROJECT_ROOT, check=True)
         
         # Check for Node.js deps
         if (PROJECT_ROOT / "package.json").exists():
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 5f9f045a3a..75e019d9eb 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -659,15 +659,24 @@ def run_setup_wizard(args):
         except ImportError:
             print_info("Installing required package: swe-rex[modal]...")
             import subprocess
-            result = subprocess.run(
-                [sys.executable, "-m", "pip", "install", "swe-rex[modal]>=1.4.0"],
-                capture_output=True, text=True
-            )
+            import shutil
+            # Prefer uv for speed, fall back to pip
+            uv_bin = shutil.which("uv")
+            if uv_bin:
+                result = subprocess.run(
+                    [uv_bin, "pip", "install", "swe-rex[modal]>=1.4.0"],
+                    capture_output=True, text=True
+                )
+            else:
+                result = subprocess.run(
+                    [sys.executable, "-m", "pip", "install", "swe-rex[modal]>=1.4.0"],
+                    capture_output=True, text=True
+                )
             if result.returncode == 0:
                 print_success("swe-rex[modal] installed (includes modal + boto3)")
             else:
                 print_warning("Failed to install swe-rex[modal] — install manually:")
-                print_info('  pip install "swe-rex[modal]>=1.4.0"')
+                print_info('  uv pip install "swe-rex[modal]>=1.4.0"')
         
         # Always show current status and allow reconfiguration
         current_token = get_env_value('MODAL_TOKEN_ID')
@@ -1031,19 +1040,28 @@ def run_setup_wizard(args):
                     if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists():
                         print_info("    Installing tinker-atropos submodule...")
                         import subprocess
-                        result = subprocess.run(
-                            [sys.executable, "-m", "pip", "install", "-e", str(tinker_dir)],
-                            capture_output=True, text=True
-                        )
+                        import shutil
+                        # Prefer uv for speed, fall back to pip
+                        uv_bin = shutil.which("uv")
+                        if uv_bin:
+                            result = subprocess.run(
+                                [uv_bin, "pip", "install", "-e", str(tinker_dir)],
+                                capture_output=True, text=True
+                            )
+                        else:
+                            result = subprocess.run(
+                                [sys.executable, "-m", "pip", "install", "-e", str(tinker_dir)],
+                                capture_output=True, text=True
+                            )
                         if result.returncode == 0:
                             print_success("    tinker-atropos installed")
                         else:
                             print_warning("    tinker-atropos install failed — run manually:")
-                            print_info('      pip install -e "./tinker-atropos"')
+                            print_info('      uv pip install -e "./tinker-atropos"')
                     else:
                         print_warning("    tinker-atropos submodule not found — run:")
                         print_info("      git submodule update --init --recursive")
-                        print_info('      pip install -e "./tinker-atropos"')
+                        print_info('      uv pip install -e "./tinker-atropos"')
                 
                 if api_key and wandb_key:
                     print_success("    Configured ✓")
diff --git a/scripts/install.ps1 b/scripts/install.ps1
index 86b914d3e9..25a4159e21 100644
--- a/scripts/install.ps1
+++ b/scripts/install.ps1
@@ -2,6 +2,7 @@
 # Hermes Agent Installer for Windows
 # ============================================================================
 # Installation script for Windows (PowerShell).
+# Uses uv for fast Python provisioning and package management.
 #
 # Usage:
 #   irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex
@@ -27,6 +28,7 @@ $ErrorActionPreference = "Stop"
 
 $RepoUrlSsh = "git@github.com:NousResearch/hermes-agent.git"
 $RepoUrlHttps = "https://github.com/NousResearch/hermes-agent.git"
+$PythonVersion = "3.11"
 
 # ============================================================================
 # Helper functions
@@ -52,12 +54,12 @@ function Write-Success {
     Write-Host "✓ $Message" -ForegroundColor Green
 }
 
-function Write-Warning {
+function Write-Warn {
     param([string]$Message)
     Write-Host "⚠ $Message" -ForegroundColor Yellow
 }
 
-function Write-Error {
+function Write-Err {
     param([string]$Message)
     Write-Host "✗ $Message" -ForegroundColor Red
 }
@@ -66,41 +68,93 @@ function Write-Error {
 # Dependency checks
 # ============================================================================
 
-function Test-Python {
-    Write-Info "Checking Python..."
+function Install-Uv {
+    Write-Info "Checking for uv package manager..."
     
-    # Try different python commands (prefer 3.11+ for full feature support)
-    $pythonCmds = @("python3", "python", "py -3")
+    # Check if uv is already available
+    if (Get-Command uv -ErrorAction SilentlyContinue) {
+        $version = uv --version
+        $script:UvCmd = "uv"
+        Write-Success "uv found ($version)"
+        return $true
+    }
     
-    foreach ($cmd in $pythonCmds) {
-        try {
-            $version = & $cmd.Split()[0] $cmd.Split()[1..99] -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>$null
-            if ($version) {
-                $major, $minor = $version.Split('.')
-                if ([int]$major -ge 3 -and [int]$minor -ge 10) {
-                    $script:PythonCmd = $cmd
-                    $script:PythonVersion = $version
-                    Write-Success "Python $version found"
-                    
-                    # Warn if < 3.11 (RL training tools require 3.11+)
-                    if ([int]$minor -lt 11) {
-                        Write-Warning "Python 3.11+ recommended — RL Training tools (tinker-atropos) require >= 3.11"
-                        Write-Info "Core agent features will work fine on $version"
-                    }
-                    
-                    return $true
-                }
-            }
-        } catch {
-            # Try next command
+    # Check common install locations
+    $uvPaths = @(
+        "$env:USERPROFILE\.local\bin\uv.exe",
+        "$env:USERPROFILE\.cargo\bin\uv.exe"
+    )
+    foreach ($uvPath in $uvPaths) {
+        if (Test-Path $uvPath) {
+            $script:UvCmd = $uvPath
+            $version = & $uvPath --version
+            Write-Success "uv found at $uvPath ($version)"
+            return $true
         }
     }
     
-    Write-Error "Python 3.10+ not found"
-    Write-Info "Please install Python 3.11 or newer (recommended) from:"
-    Write-Info "  https://www.python.org/downloads/"
-    Write-Info ""
-    Write-Info "Make sure to check 'Add Python to PATH' during installation"
+    # Install uv
+    Write-Info "Installing uv (fast Python package manager)..."
+    try {
+        powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 2>&1 | Out-Null
+        
+        # Find the installed binary
+        $uvExe = "$env:USERPROFILE\.local\bin\uv.exe"
+        if (-not (Test-Path $uvExe)) {
+            $uvExe = "$env:USERPROFILE\.cargo\bin\uv.exe"
+        }
+        if (-not (Test-Path $uvExe)) {
+            # Refresh PATH and try again
+            $env:Path = [Environment]::GetEnvironmentVariable("Path", "User") + ";" + [Environment]::GetEnvironmentVariable("Path", "Machine")
+            if (Get-Command uv -ErrorAction SilentlyContinue) {
+                $uvExe = (Get-Command uv).Source
+            }
+        }
+        
+        if (Test-Path $uvExe) {
+            $script:UvCmd = $uvExe
+            $version = & $uvExe --version
+            Write-Success "uv installed ($version)"
+            return $true
+        }
+        
+        Write-Err "uv installed but not found on PATH"
+        Write-Info "Try restarting your terminal and re-running"
+        return $false
+    } catch {
+        Write-Err "Failed to install uv"
+        Write-Info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
+        return $false
+    }
+}
+
+function Test-Python {
+    Write-Info "Checking Python $PythonVersion..."
+    
+    # Let uv find or install Python
+    try {
+        $pythonPath = & $UvCmd python find $PythonVersion 2>$null
+        if ($pythonPath) {
+            $ver = & $pythonPath --version 2>$null
+            Write-Success "Python found: $ver"
+            return $true
+        }
+    } catch { }
+    
+    # Python not found — use uv to install it (no admin needed!)
+    Write-Info "Python $PythonVersion not found, installing via uv..."
+    try {
+        & $UvCmd python install $PythonVersion 2>&1 | Out-Null
+        $pythonPath = & $UvCmd python find $PythonVersion 2>$null
+        if ($pythonPath) {
+            $ver = & $pythonPath --version 2>$null
+            Write-Success "Python installed: $ver"
+            return $true
+        }
+    } catch { }
+    
+    Write-Err "Failed to install Python $PythonVersion"
+    Write-Info "Install Python $PythonVersion manually, then re-run this script"
     return $false
 }
 
@@ -113,7 +167,7 @@ function Test-Git {
         return $true
     }
     
-    Write-Error "Git not found"
+    Write-Err "Git not found"
     Write-Info "Please install Git from:"
     Write-Info "  https://git-scm.com/download/win"
     return $false
@@ -129,7 +183,7 @@ function Test-Node {
         return $true
     }
     
-    Write-Warning "Node.js not found (browser tools will be limited)"
+    Write-Warn "Node.js not found (browser tools will be limited)"
     Write-Info "To install Node.js (optional):"
     Write-Info "  https://nodejs.org/en/download/"
     $script:HasNode = $false
@@ -146,7 +200,7 @@ function Test-Ripgrep {
         return $true
     }
     
-    Write-Warning "ripgrep not found (file search will use findstr fallback)"
+    Write-Warn "ripgrep not found (file search will use findstr fallback)"
     
     # Check what package managers are available
     $hasWinget = Get-Command winget -ErrorAction SilentlyContinue
@@ -193,7 +247,7 @@ function Test-Ripgrep {
             } catch { }
         }
         
-        Write-Warning "Auto-install failed. You can install manually:"
+        Write-Warn "Auto-install failed. You can install manually:"
     } else {
         Write-Info "Skipping ripgrep installation. To install manually:"
     }
@@ -224,13 +278,12 @@ function Install-Repository {
             git pull origin $Branch
             Pop-Location
         } else {
-            Write-Error "Directory exists but is not a git repository: $InstallDir"
+            Write-Err "Directory exists but is not a git repository: $InstallDir"
             Write-Info "Remove it or choose a different directory with -InstallDir"
             exit 1
         }
     } else {
         # Try SSH first (for private repo access), fall back to HTTPS
-        # Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
         Write-Info "Trying SSH clone..."
         $sshResult = git clone --branch $Branch --recurse-submodules $RepoUrlSsh $InstallDir 2>&1
         
@@ -243,7 +296,7 @@ function Install-Repository {
             if ($LASTEXITCODE -eq 0) {
                 Write-Success "Cloned via HTTPS"
             } else {
-                Write-Error "Failed to clone repository"
+                Write-Err "Failed to clone repository"
                 Write-Info "For private repo access, ensure your SSH key is added to GitHub:"
                 Write-Info "  ssh-add ~/.ssh/id_rsa"
                 Write-Info "  ssh -T git@github.com  # Test connection"
@@ -252,7 +305,7 @@ function Install-Repository {
         }
     }
     
-    # Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
+    # Ensure submodules are initialized and updated
     Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
     Push-Location $InstallDir
     git submodule update --init --recursive
@@ -268,23 +321,21 @@ function Install-Venv {
         return
     }
     
-    Write-Info "Creating virtual environment..."
+    Write-Info "Creating virtual environment with Python $PythonVersion..."
     
     Push-Location $InstallDir
     
-    if (-not (Test-Path "venv")) {
-        & $PythonCmd -m venv venv
+    if (Test-Path "venv") {
+        Write-Info "Virtual environment already exists, recreating..."
+        Remove-Item -Recurse -Force "venv"
     }
     
-    # Activate
-    & .\venv\Scripts\Activate.ps1
-    
-    # Upgrade pip
-    pip install --upgrade pip wheel setuptools | Out-Null
+    # uv creates the venv and pins the Python version in one step
+    & $UvCmd venv venv --python $PythonVersion
     
     Pop-Location
     
-    Write-Success "Virtual environment ready"
+    Write-Success "Virtual environment ready (Python $PythonVersion)"
 }
 
 function Install-Dependencies {
@@ -293,14 +344,15 @@ function Install-Dependencies {
     Push-Location $InstallDir
     
     if (-not $NoVenv) {
-        & .\venv\Scripts\Activate.ps1
+        # Tell uv to install into our venv (no activation needed)
+        $env:VIRTUAL_ENV = "$InstallDir\venv"
     }
     
-    # Install main package
+    # Install main package with all extras
     try {
-        pip install -e ".[all]" 2>&1 | Out-Null
+        & $UvCmd pip install -e ".[all]" 2>&1 | Out-Null
     } catch {
-        pip install -e "." | Out-Null
+        & $UvCmd pip install -e "." | Out-Null
     }
     
     Write-Success "Main package installed"
@@ -309,32 +361,25 @@ function Install-Dependencies {
     Write-Info "Installing mini-swe-agent (terminal tool backend)..."
     if (Test-Path "mini-swe-agent\pyproject.toml") {
         try {
-            pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
+            & $UvCmd pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
             Write-Success "mini-swe-agent installed"
         } catch {
-            Write-Warning "mini-swe-agent install failed (terminal tools may not work)"
+            Write-Warn "mini-swe-agent install failed (terminal tools may not work)"
         }
     } else {
-        Write-Warning "mini-swe-agent not found (run: git submodule update --init)"
+        Write-Warn "mini-swe-agent not found (run: git submodule update --init)"
     }
     
     Write-Info "Installing tinker-atropos (RL training backend)..."
     if (Test-Path "tinker-atropos\pyproject.toml") {
-        # tinker-atropos depends on the 'tinker' package which requires Python >= 3.11
-        $major, $minor = $PythonVersion.Split('.')
-        if ([int]$minor -ge 11) {
-            try {
-                pip install -e ".\tinker-atropos" 2>&1 | Out-Null
-                Write-Success "tinker-atropos installed"
-            } catch {
-                Write-Warning "tinker-atropos install failed (RL tools may not work)"
-            }
-        } else {
-            Write-Warning "tinker-atropos requires Python 3.11+ (skipping — RL training tools won't be available)"
-            Write-Info "Upgrade to Python 3.11+ to enable RL training features"
+        try {
+            & $UvCmd pip install -e ".\tinker-atropos" 2>&1 | Out-Null
+            Write-Success "tinker-atropos installed"
+        } catch {
+            Write-Warn "tinker-atropos install failed (RL tools may not work)"
         }
     } else {
-        Write-Warning "tinker-atropos not found (run: git submodule update --init)"
+        Write-Warn "tinker-atropos not found (run: git submodule update --init)"
     }
     
     Pop-Location
@@ -343,41 +388,44 @@ function Install-Dependencies {
 }
 
 function Set-PathVariable {
-    Write-Info "Setting up PATH..."
+    Write-Info "Setting up hermes command..."
     
     if ($NoVenv) {
-        $binDir = "$InstallDir"
+        $hermesBin = "$InstallDir"
     } else {
-        $binDir = "$InstallDir\venv\Scripts"
+        $hermesBin = "$InstallDir\venv\Scripts"
     }
     
-    # Add to user PATH
+    # Add the venv Scripts dir to user PATH so hermes is globally available
+    # On Windows, the hermes.exe in venv\Scripts\ has the venv Python baked in
     $currentPath = [Environment]::GetEnvironmentVariable("Path", "User")
     
-    if ($currentPath -notlike "*$binDir*") {
+    if ($currentPath -notlike "*$hermesBin*") {
         [Environment]::SetEnvironmentVariable(
             "Path",
-            "$binDir;$currentPath",
+            "$hermesBin;$currentPath",
             "User"
         )
-        Write-Success "Added to user PATH"
+        Write-Success "Added to user PATH: $hermesBin"
     } else {
         Write-Info "PATH already configured"
     }
     
     # Update current session
-    $env:Path = "$binDir;$env:Path"
+    $env:Path = "$hermesBin;$env:Path"
+    
+    Write-Success "hermes command ready"
 }
 
 function Copy-ConfigTemplates {
     Write-Info "Setting up configuration files..."
     
-    # Create ~/.hermes directory structure (config at top level, code in subdir)
+    # Create ~/.hermes directory structure
     New-Item -ItemType Directory -Force -Path "$HermesHome\cron" | Out-Null
     New-Item -ItemType Directory -Force -Path "$HermesHome\sessions" | Out-Null
     New-Item -ItemType Directory -Force -Path "$HermesHome\logs" | Out-Null
     
-    # Create .env at ~/.hermes/.env (top level, easy to find)
+    # Create .env
     $envPath = "$HermesHome\.env"
     if (-not (Test-Path $envPath)) {
         $examplePath = "$InstallDir\.env.example"
@@ -385,7 +433,6 @@ function Copy-ConfigTemplates {
             Copy-Item $examplePath $envPath
             Write-Success "Created ~/.hermes/.env from template"
         } else {
-            # Create empty .env if no example exists
             New-Item -ItemType File -Force -Path $envPath | Out-Null
             Write-Success "Created ~/.hermes/.env"
         }
@@ -393,7 +440,7 @@ function Copy-ConfigTemplates {
         Write-Info "~/.hermes/.env already exists, keeping it"
     }
     
-    # Create config.yaml at ~/.hermes/config.yaml (top level, easy to find)
+    # Create config.yaml
     $configPath = "$HermesHome\config.yaml"
     if (-not (Test-Path $configPath)) {
         $examplePath = "$InstallDir\cli-config.yaml.example"
@@ -422,7 +469,7 @@ function Install-NodeDeps {
             npm install --silent 2>&1 | Out-Null
             Write-Success "Node.js dependencies installed"
         } catch {
-            Write-Warning "npm install failed (browser tools may not work)"
+            Write-Warn "npm install failed (browser tools may not work)"
         }
     }
     
@@ -441,12 +488,13 @@ function Invoke-SetupWizard {
     
     Push-Location $InstallDir
     
+    # Run hermes setup using the venv Python directly (no activation needed)
     if (-not $NoVenv) {
-        & .\venv\Scripts\Activate.ps1
+        & ".\venv\Scripts\python.exe" -m hermes_cli.main setup
+    } else {
+        python -m hermes_cli.main setup
     }
     
-    python -m hermes_cli.main setup
-    
     Pop-Location
 }
 
@@ -493,7 +541,6 @@ function Write-Completion {
     Write-Host "⚡ Restart your terminal for PATH changes to take effect" -ForegroundColor Yellow
     Write-Host ""
     
-    # Show notes about optional tools
     if (-not $HasNode) {
         Write-Host "Note: Node.js was not found. Browser automation tools" -ForegroundColor Yellow
         Write-Host "will have limited functionality." -ForegroundColor Yellow
@@ -515,6 +562,7 @@ function Write-Completion {
 function Main {
     Write-Banner
     
+    if (-not (Install-Uv)) { exit 1 }
     if (-not (Test-Python)) { exit 1 }
     if (-not (Test-Git)) { exit 1 }
     Test-Node      # Optional, doesn't fail
diff --git a/scripts/install.sh b/scripts/install.sh
index c97cbc8a85..09f93cb768 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -3,6 +3,7 @@
 # Hermes Agent Installer
 # ============================================================================
 # Installation script for Linux and macOS.
+# Uses uv for fast Python provisioning and package management.
 #
 # Usage:
 #   curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
@@ -29,7 +30,7 @@ REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
 REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
 HERMES_HOME="$HOME/.hermes"
 INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
-PYTHON_MIN_VERSION="3.10"
+PYTHON_VERSION="3.11"
 
 # Options
 USE_VENV=true
@@ -64,7 +65,7 @@ while [[ $# -gt 0 ]]; do
             echo "  --no-venv      Don't create virtual environment"
             echo "  --skip-setup   Skip interactive setup wizard"
             echo "  --branch NAME  Git branch to install (default: main)"
-            echo "  --dir PATH     Installation directory (default: ~/.hermes-agent)"
+            echo "  --dir PATH     Installation directory (default: ~/.hermes/hermes-agent)"
             echo "  -h, --help     Show this help"
             exit 0
             ;;
@@ -146,57 +147,80 @@ detect_os() {
 # Dependency checks
 # ============================================================================
 
-check_python() {
-    log_info "Checking Python..."
+install_uv() {
+    log_info "Checking for uv package manager..."
     
-    # Try different python commands (prefer 3.11+ for full feature support)
-    for cmd in python3.12 python3.11 python3.10 python3 python; do
-        if command -v $cmd &> /dev/null; then
-            PYTHON_CMD=$cmd
-            PYTHON_VERSION=$($cmd -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
-            
-            # Check minimum version (3.10)
-            if $cmd -c "import sys; exit(0 if sys.version_info >= (3, 10) else 1)" 2>/dev/null; then
-                log_success "Python $PYTHON_VERSION found"
-                
-                # Warn if < 3.11 (RL training tools require 3.11+)
-                if ! $cmd -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
-                    log_warn "Python 3.11+ recommended — RL Training tools (tinker-atropos) require >= 3.11"
-                    log_info "Core agent features will work fine on $PYTHON_VERSION"
-                fi
-                
-                return 0
-            fi
+    # Check common locations for uv
+    if command -v uv &> /dev/null; then
+        UV_CMD="uv"
+        UV_VERSION=$($UV_CMD --version 2>/dev/null)
+        log_success "uv found ($UV_VERSION)"
+        return 0
+    fi
+    
+    # Check ~/.local/bin (default uv install location) even if not on PATH yet
+    if [ -x "$HOME/.local/bin/uv" ]; then
+        UV_CMD="$HOME/.local/bin/uv"
+        UV_VERSION=$($UV_CMD --version 2>/dev/null)
+        log_success "uv found at ~/.local/bin ($UV_VERSION)"
+        return 0
+    fi
+    
+    # Check ~/.cargo/bin (alternative uv install location)
+    if [ -x "$HOME/.cargo/bin/uv" ]; then
+        UV_CMD="$HOME/.cargo/bin/uv"
+        UV_VERSION=$($UV_CMD --version 2>/dev/null)
+        log_success "uv found at ~/.cargo/bin ($UV_VERSION)"
+        return 0
+    fi
+    
+    # Install uv
+    log_info "Installing uv (fast Python package manager)..."
+    if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
+        # uv installs to ~/.local/bin by default
+        if [ -x "$HOME/.local/bin/uv" ]; then
+            UV_CMD="$HOME/.local/bin/uv"
+        elif [ -x "$HOME/.cargo/bin/uv" ]; then
+            UV_CMD="$HOME/.cargo/bin/uv"
+        elif command -v uv &> /dev/null; then
+            UV_CMD="uv"
+        else
+            log_error "uv installed but not found on PATH"
+            log_info "Try adding ~/.local/bin to your PATH and re-running"
+            exit 1
         fi
-    done
+        UV_VERSION=$($UV_CMD --version 2>/dev/null)
+        log_success "uv installed ($UV_VERSION)"
+    else
+        log_error "Failed to install uv"
+        log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
+        exit 1
+    fi
+}
+
+check_python() {
+    log_info "Checking Python $PYTHON_VERSION..."
     
-    log_error "Python 3.10+ not found"
-    log_info "Please install Python 3.11 or newer (recommended):"
+    # Let uv handle Python — it can download and manage Python versions
+    # First check if a suitable Python is already available
+    if $UV_CMD python find "$PYTHON_VERSION" &> /dev/null; then
+        PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
+        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+        log_success "Python found: $PYTHON_FOUND_VERSION"
+        return 0
+    fi
     
-    case "$OS" in
-        linux)
-            case "$DISTRO" in
-                ubuntu|debian)
-                    log_info "  sudo apt update && sudo apt install python3.11 python3.11-venv"
-                    ;;
-                fedora)
-                    log_info "  sudo dnf install python3.11"
-                    ;;
-                arch)
-                    log_info "  sudo pacman -S python"
-                    ;;
-                *)
-                    log_info "  Use your package manager to install Python 3.11+"
-                    ;;
-            esac
-            ;;
-        macos)
-            log_info "  brew install python@3.11"
-            log_info "  Or download from https://www.python.org/downloads/"
-            ;;
-    esac
-    
-    exit 1
+    # Python not found — use uv to install it (no sudo needed!)
+    log_info "Python $PYTHON_VERSION not found, installing via uv..."
+    if $UV_CMD python install "$PYTHON_VERSION"; then
+        PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
+        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+        log_success "Python installed: $PYTHON_FOUND_VERSION"
+    else
+        log_error "Failed to install Python $PYTHON_VERSION"
+        log_info "Install Python $PYTHON_VERSION manually, then re-run this script"
+        exit 1
+    fi
 }
 
 check_git() {
@@ -301,7 +325,6 @@ check_ripgrep() {
         # Check if we can use sudo
         CAN_SUDO=false
         if command -v sudo &> /dev/null; then
-            # Check if user has sudo access (without actually running sudo)
             if sudo -n true 2>/dev/null || sudo -v 2>/dev/null; then
                 CAN_SUDO=true
             fi
@@ -335,7 +358,6 @@ check_ripgrep() {
                     esac
                 else
                     log_warn "sudo not available - cannot auto-install system packages"
-                    # Try cargo as fallback if available
                     if command -v cargo &> /dev/null; then
                         log_info "Trying cargo install (no sudo required)..."
                         if cargo install ripgrep 2>/dev/null; then
@@ -378,7 +400,6 @@ check_ripgrep() {
                     log_info "  https://github.com/BurntSushi/ripgrep#installation"
                     ;;
             esac
-            # Show cargo alternative for users without sudo
             if command -v cargo &> /dev/null; then
                 log_info "  Or without sudo: cargo install ripgrep"
             fi
@@ -447,39 +468,36 @@ setup_venv() {
         return 0
     fi
     
-    log_info "Creating virtual environment..."
+    log_info "Creating virtual environment with Python $PYTHON_VERSION..."
     
     if [ -d "venv" ]; then
-        log_info "Virtual environment already exists"
-    else
-        $PYTHON_CMD -m venv venv
+        log_info "Virtual environment already exists, recreating..."
+        rm -rf venv
     fi
     
-    # Activate
-    source venv/bin/activate
+    # uv creates the venv and pins the Python version in one step
+    $UV_CMD venv venv --python "$PYTHON_VERSION"
     
-    # Upgrade pip
-    pip install --upgrade pip wheel setuptools > /dev/null
-    
-    log_success "Virtual environment ready"
+    log_success "Virtual environment ready (Python $PYTHON_VERSION)"
 }
 
 install_deps() {
     log_info "Installing dependencies..."
     
     if [ "$USE_VENV" = true ]; then
-        source venv/bin/activate
+        # Tell uv to install into our venv (no need to activate)
+        export VIRTUAL_ENV="$INSTALL_DIR/venv"
     fi
     
     # Install the main package in editable mode with all extras
-    pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
+    $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
     
     log_success "Main package installed"
     
     # Install submodules
     log_info "Installing mini-swe-agent (terminal tool backend)..."
     if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
-        pip install -e "./mini-swe-agent" > /dev/null 2>&1 || log_warn "mini-swe-agent install failed (terminal tools may not work)"
+        $UV_CMD pip install -e "./mini-swe-agent" || log_warn "mini-swe-agent install failed (terminal tools may not work)"
         log_success "mini-swe-agent installed"
     else
         log_warn "mini-swe-agent not found (run: git submodule update --init)"
@@ -487,14 +505,8 @@ install_deps() {
     
     log_info "Installing tinker-atropos (RL training backend)..."
     if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
-        # tinker-atropos depends on the 'tinker' package which requires Python >= 3.11
-        if $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
-            pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
-            log_success "tinker-atropos installed"
-        else
-            log_warn "tinker-atropos requires Python 3.11+ (skipping — RL training tools won't be available)"
-            log_info "Upgrade to Python 3.11+ to enable RL training features"
-        fi
+        $UV_CMD pip install -e "./tinker-atropos" || log_warn "tinker-atropos install failed (RL tools may not work)"
+        log_success "tinker-atropos installed"
     else
         log_warn "tinker-atropos not found (run: git submodule update --init)"
     fi
@@ -503,53 +515,56 @@ install_deps() {
 }
 
 setup_path() {
-    log_info "Setting up PATH..."
+    log_info "Setting up hermes command..."
     
-    # Determine the bin directory
     if [ "$USE_VENV" = true ]; then
-        BIN_DIR="$INSTALL_DIR/venv/bin"
+        HERMES_BIN="$INSTALL_DIR/venv/bin/hermes"
     else
-        BIN_DIR="$HOME/.local/bin"
-        mkdir -p "$BIN_DIR"
+        HERMES_BIN="$(which hermes 2>/dev/null || echo "")"
+        if [ -z "$HERMES_BIN" ]; then
+            log_warn "hermes not found on PATH after install"
+            return 0
+        fi
+    fi
+    
+    # Create symlink in ~/.local/bin (standard user binary location, usually on PATH)
+    mkdir -p "$HOME/.local/bin"
+    ln -sf "$HERMES_BIN" "$HOME/.local/bin/hermes"
+    log_success "Symlinked hermes → ~/.local/bin/hermes"
+    
+    # Check if ~/.local/bin is on PATH; if not, add it to shell config
+    if ! echo "$PATH" | tr ':' '\n' | grep -q "^$HOME/.local/bin$"; then
+        SHELL_CONFIG=""
+        if [ -n "$BASH_VERSION" ]; then
+            if [ -f "$HOME/.bashrc" ]; then
+                SHELL_CONFIG="$HOME/.bashrc"
+            elif [ -f "$HOME/.bash_profile" ]; then
+                SHELL_CONFIG="$HOME/.bash_profile"
+            fi
+        elif [ -n "$ZSH_VERSION" ] || [ -f "$HOME/.zshrc" ]; then
+            SHELL_CONFIG="$HOME/.zshrc"
+        fi
         
-        # Create a wrapper script
-        cat > "$BIN_DIR/hermes" << EOF
-#!/bin/bash
-cd "$INSTALL_DIR"
-exec python -m hermes_cli.main "\$@"
-EOF
-        chmod +x "$BIN_DIR/hermes"
-    fi
-    
-    # Add to PATH in shell config
-    SHELL_CONFIG=""
-    if [ -n "$BASH_VERSION" ]; then
-        if [ -f "$HOME/.bashrc" ]; then
-            SHELL_CONFIG="$HOME/.bashrc"
-        elif [ -f "$HOME/.bash_profile" ]; then
-            SHELL_CONFIG="$HOME/.bash_profile"
+        PATH_LINE='export PATH="$HOME/.local/bin:$PATH"'
+        
+        if [ -n "$SHELL_CONFIG" ]; then
+            if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then
+                echo "" >> "$SHELL_CONFIG"
+                echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG"
+                echo "$PATH_LINE" >> "$SHELL_CONFIG"
+                log_success "Added ~/.local/bin to PATH in $SHELL_CONFIG"
+            else
+                log_info "~/.local/bin already referenced in $SHELL_CONFIG"
+            fi
         fi
-    elif [ -n "$ZSH_VERSION" ] || [ -f "$HOME/.zshrc" ]; then
-        SHELL_CONFIG="$HOME/.zshrc"
+    else
+        log_info "~/.local/bin already on PATH"
     fi
     
-    PATH_LINE="export PATH=\"$BIN_DIR:\$PATH\""
+    # Export for current session so hermes works immediately
+    export PATH="$HOME/.local/bin:$PATH"
     
-    if [ -n "$SHELL_CONFIG" ]; then
-        if ! grep -q "hermes-agent" "$SHELL_CONFIG" 2>/dev/null; then
-            echo "" >> "$SHELL_CONFIG"
-            echo "# Hermes Agent" >> "$SHELL_CONFIG"
-            echo "$PATH_LINE" >> "$SHELL_CONFIG"
-            log_success "Added to $SHELL_CONFIG"
-        else
-            log_info "PATH already configured in $SHELL_CONFIG"
-        fi
-    fi
-    
-    # Also export for current session
-    export PATH="$BIN_DIR:$PATH"
-    
-    log_success "PATH configured"
+    log_success "hermes command ready"
 }
 
 copy_config_templates() {
@@ -566,7 +581,6 @@ copy_config_templates() {
             cp "$INSTALL_DIR/.env.example" "$HERMES_HOME/.env"
             log_success "Created ~/.hermes/.env from template"
         else
-            # Create empty .env if no example exists
             touch "$HERMES_HOME/.env"
             log_success "Created ~/.hermes/.env"
         fi
@@ -614,12 +628,14 @@ run_setup_wizard() {
     log_info "Starting setup wizard..."
     echo ""
     
-    if [ "$USE_VENV" = true ]; then
-        source "$INSTALL_DIR/venv/bin/activate"
-    fi
-    
     cd "$INSTALL_DIR"
-    python -m hermes_cli.main setup
+    
+    # Run hermes setup using the venv Python directly (no activation needed)
+    if [ "$USE_VENV" = true ]; then
+        "$INSTALL_DIR/venv/bin/python" -m hermes_cli.main setup
+    else
+        python -m hermes_cli.main setup
+    fi
 }
 
 print_success() {
@@ -686,6 +702,7 @@ main() {
     print_banner
     
     detect_os
+    install_uv
     check_python
     check_git
     check_node
diff --git a/setup-hermes.sh b/setup-hermes.sh
index e1a9dcb447..a1c9b72393 100755
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -3,16 +3,18 @@
 # Hermes Agent Setup Script
 # ============================================================================
 # Quick setup for developers who cloned the repo manually.
+# Uses uv for fast Python provisioning and package management.
 #
 # Usage:
 #   ./setup-hermes.sh
 #
 # This script:
-# 1. Creates a virtual environment (if not exists)
-# 2. Installs dependencies
-# 3. Creates .env from template (if not exists)
-# 4. Installs the 'hermes' CLI command
-# 5. Runs the setup wizard (optional)
+# 1. Installs uv if not present
+# 2. Creates a virtual environment with Python 3.11 via uv
+# 3. Installs all dependencies (main package + submodules)
+# 4. Creates .env from template (if not exists)
+# 5. Symlinks the 'hermes' CLI command into ~/.local/bin
+# 6. Runs the setup wizard (optional)
 # ============================================================================
 
 set -e
@@ -21,42 +23,74 @@ set -e
 GREEN='\033[0;32m'
 YELLOW='\033[0;33m'
 CYAN='\033[0;36m'
+RED='\033[0;31m'
 NC='\033[0m'
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR"
 
+PYTHON_VERSION="3.11"
+
 echo ""
 echo -e "${CYAN}🦋 Hermes Agent Setup${NC}"
 echo ""
 
 # ============================================================================
-# Python check
+# Install / locate uv
 # ============================================================================
 
-echo -e "${CYAN}→${NC} Checking Python..."
+echo -e "${CYAN}→${NC} Checking for uv..."
 
-PYTHON_CMD=""
-for cmd in python3.12 python3.11 python3.10 python3 python; do
-    if command -v $cmd &> /dev/null; then
-        if $cmd -c "import sys; exit(0 if sys.version_info >= (3, 10) else 1)" 2>/dev/null; then
-            PYTHON_CMD=$cmd
-            break
-        fi
-    fi
-done
-
-if [ -z "$PYTHON_CMD" ]; then
-    echo -e "${YELLOW}✗${NC} Python 3.10+ required"
-    exit 1
+UV_CMD=""
+if command -v uv &> /dev/null; then
+    UV_CMD="uv"
+elif [ -x "$HOME/.local/bin/uv" ]; then
+    UV_CMD="$HOME/.local/bin/uv"
+elif [ -x "$HOME/.cargo/bin/uv" ]; then
+    UV_CMD="$HOME/.cargo/bin/uv"
 fi
 
-PYTHON_VERSION=$($PYTHON_CMD -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
-echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION found"
+if [ -n "$UV_CMD" ]; then
+    UV_VERSION=$($UV_CMD --version 2>/dev/null)
+    echo -e "${GREEN}✓${NC} uv found ($UV_VERSION)"
+else
+    echo -e "${CYAN}→${NC} Installing uv..."
+    if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
+        if [ -x "$HOME/.local/bin/uv" ]; then
+            UV_CMD="$HOME/.local/bin/uv"
+        elif [ -x "$HOME/.cargo/bin/uv" ]; then
+            UV_CMD="$HOME/.cargo/bin/uv"
+        fi
+        
+        if [ -n "$UV_CMD" ]; then
+            UV_VERSION=$($UV_CMD --version 2>/dev/null)
+            echo -e "${GREEN}✓${NC} uv installed ($UV_VERSION)"
+        else
+            echo -e "${RED}✗${NC} uv installed but not found. Add ~/.local/bin to PATH and retry."
+            exit 1
+        fi
+    else
+        echo -e "${RED}✗${NC} Failed to install uv. Visit https://docs.astral.sh/uv/"
+        exit 1
+    fi
+fi
 
-# Warn if < 3.11 (RL training tools require 3.11+)
-if ! $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
-    echo -e "${YELLOW}⚠${NC} Python 3.11+ recommended — RL Training tools (tinker-atropos) require >= 3.11"
+# ============================================================================
+# Python check (uv can provision it automatically)
+# ============================================================================
+
+echo -e "${CYAN}→${NC} Checking Python $PYTHON_VERSION..."
+
+if $UV_CMD python find "$PYTHON_VERSION" &> /dev/null; then
+    PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
+    PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+    echo -e "${GREEN}✓${NC} $PYTHON_FOUND_VERSION found"
+else
+    echo -e "${CYAN}→${NC} Python $PYTHON_VERSION not found, installing via uv..."
+    $UV_CMD python install "$PYTHON_VERSION"
+    PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
+    PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+    echo -e "${GREEN}✓${NC} $PYTHON_FOUND_VERSION installed"
 fi
 
 # ============================================================================
@@ -65,15 +99,16 @@ fi
 
 echo -e "${CYAN}→${NC} Setting up virtual environment..."
 
-if [ ! -d "venv" ]; then
-    $PYTHON_CMD -m venv venv
-    echo -e "${GREEN}✓${NC} Created venv"
-else
-    echo -e "${GREEN}✓${NC} venv exists"
+if [ -d "venv" ]; then
+    echo -e "${CYAN}→${NC} Removing old venv..."
+    rm -rf venv
 fi
 
-source venv/bin/activate
-pip install --upgrade pip wheel setuptools > /dev/null
+$UV_CMD venv venv --python "$PYTHON_VERSION"
+echo -e "${GREEN}✓${NC} venv created (Python $PYTHON_VERSION)"
+
+# Tell uv to install into this venv (no activation needed for uv)
+export VIRTUAL_ENV="$SCRIPT_DIR/venv"
 
 # ============================================================================
 # Dependencies
@@ -81,7 +116,7 @@ pip install --upgrade pip wheel setuptools > /dev/null
 
 echo -e "${CYAN}→${NC} Installing dependencies..."
 
-pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
+$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
 
 echo -e "${GREEN}✓${NC} Dependencies installed"
 
@@ -93,22 +128,18 @@ echo -e "${CYAN}→${NC} Installing submodules..."
 
 # mini-swe-agent (terminal tool backend)
 if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
-    pip install -e "./mini-swe-agent" > /dev/null 2>&1 && \
+    $UV_CMD pip install -e "./mini-swe-agent" && \
         echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \
         echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (terminal tools may not work)"
 else
     echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)"
 fi
 
-# tinker-atropos (RL training backend — requires Python 3.11+)
+# tinker-atropos (RL training backend)
 if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
-    if $PYTHON_CMD -c "import sys; exit(0 if sys.version_info >= (3, 11) else 1)" 2>/dev/null; then
-        pip install -e "./tinker-atropos" > /dev/null 2>&1 && \
-            echo -e "${GREEN}✓${NC} tinker-atropos installed" || \
-            echo -e "${YELLOW}⚠${NC} tinker-atropos install failed (RL tools may not work)"
-    else
-        echo -e "${YELLOW}⚠${NC} tinker-atropos requires Python 3.11+ (skipping — RL training tools won't be available)"
-    fi
+    $UV_CMD pip install -e "./tinker-atropos" && \
+        echo -e "${GREEN}✓${NC} tinker-atropos installed" || \
+        echo -e "${YELLOW}⚠${NC} tinker-atropos install failed (RL tools may not work)"
 else
     echo -e "${YELLOW}⚠${NC} tinker-atropos not found (run: git submodule update --init --recursive)"
 fi
@@ -174,14 +205,17 @@ else
 fi
 
 # ============================================================================
-# PATH setup
+# PATH setup — symlink hermes into ~/.local/bin
 # ============================================================================
 
 echo -e "${CYAN}→${NC} Setting up hermes command..."
 
-BIN_DIR="$SCRIPT_DIR/venv/bin"
+HERMES_BIN="$SCRIPT_DIR/venv/bin/hermes"
+mkdir -p "$HOME/.local/bin"
+ln -sf "$HERMES_BIN" "$HOME/.local/bin/hermes"
+echo -e "${GREEN}✓${NC} Symlinked hermes → ~/.local/bin/hermes"
 
-# Add to shell config if not already there
+# Ensure ~/.local/bin is on PATH in shell config
 SHELL_CONFIG=""
 if [ -f "$HOME/.zshrc" ]; then
     SHELL_CONFIG="$HOME/.zshrc"
@@ -192,13 +226,17 @@ elif [ -f "$HOME/.bash_profile" ]; then
 fi
 
 if [ -n "$SHELL_CONFIG" ]; then
-    if ! grep -q "hermes-agent" "$SHELL_CONFIG" 2>/dev/null; then
-        echo "" >> "$SHELL_CONFIG"
-        echo "# Hermes Agent" >> "$SHELL_CONFIG"
-        echo "export PATH=\"$BIN_DIR:\$PATH\"" >> "$SHELL_CONFIG"
-        echo -e "${GREEN}✓${NC} Added to $SHELL_CONFIG"
+    if ! echo "$PATH" | tr ':' '\n' | grep -q "^$HOME/.local/bin$"; then
+        if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then
+            echo "" >> "$SHELL_CONFIG"
+            echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG"
+            echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$SHELL_CONFIG"
+            echo -e "${GREEN}✓${NC} Added ~/.local/bin to PATH in $SHELL_CONFIG"
+        else
+            echo -e "${GREEN}✓${NC} ~/.local/bin already in $SHELL_CONFIG"
+        fi
     else
-        echo -e "${GREEN}✓${NC} PATH already in $SHELL_CONFIG"
+        echo -e "${GREEN}✓${NC} ~/.local/bin already on PATH"
     fi
 fi
 
@@ -232,5 +270,6 @@ read -p "Would you like to run the setup wizard now? [Y/n] " -n 1 -r
 echo
 if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
     echo ""
-    python -m hermes_cli.main setup
+    # Run directly with venv Python (no activation needed)
+    "$SCRIPT_DIR/venv/bin/python" -m hermes_cli.main setup
 fi

From d999d9876d9bb2e091dcfdb585e1de9592bf96e9 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sun, 8 Feb 2026 05:00:47 +0000
Subject: [PATCH 44/48] Enhance async tool execution and error handling in
 Hermes agent for Atropos integration

- Updated `.gitignore` to exclude `testlogs` directory.
- Refactored `handle_web_function_call` in `model_tools.py` to support running async functions in existing event loops, improving compatibility with Atropos.
- Introduced a thread pool executor in `agent_loop.py` for running synchronous tool calls that internally use `asyncio.run()`, preventing deadlocks.
- Added `ToolError` class to track tool execution errors, enhancing error reporting during agent loops.
- Updated `wandb_log` method in `hermes_base_env.py` to log tool error statistics for better monitoring.
- Implemented patches in `patches.py` to ensure async-safe operation of tools within Atropos's event loop.
- Enhanced `ToolContext` and `terminal_tool.py` to utilize the new async handling, improving overall tool execution reliability.
---
 .gitignore                        |   1 +
 environments/agent_loop.py        |  84 +++++++++++--
 environments/hermes_base_env.py   |  93 +++++++++++++--
 environments/patches.py           | 188 ++++++++++++++++++++++++++++++
 environments/terminal_test_env.py |   2 +-
 environments/tool_context.py      |  49 +++++++-
 model_tools.py                    |  15 ++-
 tools/file_tools.py               |  80 +++++++++++--
 tools/terminal_tool.py            |  92 ++++++++++-----
 9 files changed, 540 insertions(+), 64 deletions(-)
 create mode 100644 environments/patches.py

diff --git a/.gitignore b/.gitignore
index 3c5ca3743b..d36c78d3af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@ images/
 __pycache__/
 hermes_agent.egg-info/
 wandb/
+testlogs
 
 # CLI config (may contain sensitive SSH paths)
 cli-config.yaml
diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index 7e9453b606..c7b311d7ae 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -11,6 +11,8 @@ identical to hermes-agent's run_agent.py. Tool execution is dispatched via
 handle_function_call() from model_tools.py.
 """
 
+import asyncio
+import concurrent.futures
 import json
 import logging
 import uuid
@@ -19,9 +21,25 @@ from typing import Any, Dict, List, Optional, Set
 
 from model_tools import handle_function_call
 
+# Thread pool for running sync tool calls that internally use asyncio.run()
+# (e.g., mini-swe-agent's modal/docker backends). Running them in a separate
+# thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
+_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
+
 logger = logging.getLogger(__name__)
 
 
+@dataclass
+class ToolError:
+    """Record of a tool execution error during the agent loop."""
+
+    turn: int                  # Which turn the error occurred on
+    tool_name: str             # Which tool was called
+    arguments: str             # The arguments passed (truncated)
+    error: str                 # The error message
+    tool_result: str           # The raw result returned to the model
+
+
 @dataclass
 class AgentResult:
     """Result of running the agent loop."""
@@ -36,6 +54,8 @@ class AgentResult:
     finished_naturally: bool = False
     # Extracted reasoning content per turn (from PR #297 helpers)
     reasoning_per_turn: List[Optional[str]] = field(default_factory=list)
+    # Tool errors encountered during the loop
+    tool_errors: List[ToolError] = field(default_factory=list)
 
 
 def _extract_reasoning_from_message(message) -> Optional[str]:
@@ -133,6 +153,7 @@ class HermesAgentLoop:
             AgentResult with full conversation history, managed state, and metadata
         """
         reasoning_per_turn = []
+        tool_errors: List[ToolError] = []
 
         for turn in range(self.max_turns):
             # Build the chat_completion kwargs
@@ -161,6 +182,7 @@ class HermesAgentLoop:
                     turns_used=turn + 1,
                     finished_naturally=False,
                     reasoning_per_turn=reasoning_per_turn,
+                    tool_errors=tool_errors,
                 )
 
             if not response or not response.choices:
@@ -171,6 +193,7 @@ class HermesAgentLoop:
                     turns_used=turn + 1,
                     finished_naturally=False,
                     reasoning_per_turn=reasoning_per_turn,
+                    tool_errors=tool_errors,
                 )
 
             assistant_msg = response.choices[0].message
@@ -209,6 +232,7 @@ class HermesAgentLoop:
                 # Execute each tool call via hermes-agent's dispatch
                 for tc in assistant_msg.tool_calls:
                     tool_name = tc.function.name
+                    tool_args_raw = tc.function.arguments
 
                     # Validate tool name
                     if tool_name not in self.valid_tool_names:
@@ -218,35 +242,75 @@ class HermesAgentLoop:
                                 f"Available tools: {sorted(self.valid_tool_names)}"
                             }
                         )
+                        tool_errors.append(ToolError(
+                            turn=turn + 1, tool_name=tool_name,
+                            arguments=tool_args_raw[:200],
+                            error=f"Unknown tool '{tool_name}'",
+                            tool_result=tool_result,
+                        ))
                         logger.warning(
                             "Model called unknown tool '%s' on turn %d",
-                            tool_name,
-                            turn + 1,
+                            tool_name, turn + 1,
                         )
                     else:
                         # Parse arguments and dispatch
                         try:
-                            args = json.loads(tc.function.arguments)
+                            args = json.loads(tool_args_raw)
                         except json.JSONDecodeError:
                             args = {}
                             logger.warning(
                                 "Invalid JSON in tool call arguments for '%s': %s",
-                                tool_name,
-                                tc.function.arguments[:200],
+                                tool_name, tool_args_raw[:200],
                             )
 
                         try:
-                            tool_result = handle_function_call(
-                                tool_name, args, task_id=self.task_id
+                            if tool_name == "terminal":
+                                import os
+                                backend = os.getenv("TERMINAL_ENV", "local")
+                                cmd_preview = args.get("command", "")[:80]
+                                print(f"  🖥️  [{backend}] $ {cmd_preview}")
+
+                            # Run tool calls in a thread pool so backends that use
+                            # asyncio.run() internally (modal, docker) get a clean
+                            # event loop instead of deadlocking inside Atropos's loop.
+                            loop = asyncio.get_event_loop()
+                            tool_result = await loop.run_in_executor(
+                                _tool_executor,
+                                lambda: handle_function_call(
+                                    tool_name, args, task_id=self.task_id
+                                ),
                             )
                         except Exception as e:
                             tool_result = json.dumps(
-                                {"error": f"Tool execution failed: {str(e)}"}
+                                {"error": f"Tool execution failed: {type(e).__name__}: {str(e)}"}
                             )
+                            tool_errors.append(ToolError(
+                                turn=turn + 1, tool_name=tool_name,
+                                arguments=tool_args_raw[:200],
+                                error=f"{type(e).__name__}: {str(e)}",
+                                tool_result=tool_result,
+                            ))
                             logger.error(
-                                "Tool '%s' execution failed: %s", tool_name, e
+                                "Tool '%s' execution failed on turn %d: %s",
+                                tool_name, turn + 1, e,
                             )
 
+                        # Also check if the tool returned an error in its JSON result
+                        try:
+                            result_data = json.loads(tool_result)
+                            if isinstance(result_data, dict):
+                                err = result_data.get("error")
+                                exit_code = result_data.get("exit_code")
+                                if err and exit_code and exit_code < 0:
+                                    tool_errors.append(ToolError(
+                                        turn=turn + 1, tool_name=tool_name,
+                                        arguments=tool_args_raw[:200],
+                                        error=str(err),
+                                        tool_result=tool_result[:500],
+                                    ))
+                        except (json.JSONDecodeError, TypeError):
+                            pass
+
                     # Add tool response to conversation
                     messages.append(
                         {
@@ -282,6 +346,7 @@ class HermesAgentLoop:
                     turns_used=turn + 1,
                     finished_naturally=True,
                     reasoning_per_turn=reasoning_per_turn,
+                    tool_errors=tool_errors,
                 )
 
         # Hit max turns without the model stopping
@@ -292,6 +357,7 @@ class HermesAgentLoop:
             turns_used=self.max_turns,
             finished_naturally=False,
             reasoning_per_turn=reasoning_per_turn,
+            tool_errors=tool_errors,
         )
 
     def _get_managed_state(self) -> Optional[Dict[str, Any]]:
diff --git a/environments/hermes_base_env.py b/environments/hermes_base_env.py
index d17fcd6ab7..861d88af91 100644
--- a/environments/hermes_base_env.py
+++ b/environments/hermes_base_env.py
@@ -41,6 +41,12 @@ _env_path = _repo_root / ".env"
 if _env_path.exists():
     load_dotenv(dotenv_path=_env_path)
 
+# Apply monkey patches for async-safe tool operation inside Atropos's event loop.
+# This patches SwerexModalEnvironment to use a background thread instead of
+# asyncio.run(), which would deadlock inside Atropos. Safe for normal CLI too.
+from environments.patches import apply_patches
+apply_patches()
+
 from atroposlib.envs.base import (
     BaseEnv,
     BaseEnvConfig,
@@ -172,10 +178,14 @@ class HermesAgentBaseEnv(BaseEnv):
         # Set terminal backend environment variable so hermes tools pick it up
         if config.terminal_backend:
             os.environ["TERMINAL_ENV"] = config.terminal_backend
+            print(f"🖥️  Terminal backend: {config.terminal_backend}")
 
         # Current group's resolved tools (set in collect_trajectories)
         self._current_group_tools: Optional[Tuple[List[Dict], Set[str]]] = None
 
+        # Tool error tracking for wandb logging
+        self._tool_error_buffer: List[Dict[str, Any]] = []
+
     # =========================================================================
     # Toolset resolution (per-group)
     # =========================================================================
@@ -348,6 +358,33 @@ class HermesAgentBaseEnv(BaseEnv):
         if len(self.rollouts_for_wandb) > self.config.num_rollouts_to_keep:
             self.rollouts_for_wandb.pop(0)
 
+    async def wandb_log(self, wandb_metrics: Optional[Dict] = None):
+        """Log base metrics including tool errors to wandb."""
+        if wandb_metrics is None:
+            wandb_metrics = {}
+
+        # Log tool error stats
+        if self._tool_error_buffer:
+            wandb_metrics["train/tool_errors_count"] = len(self._tool_error_buffer)
+
+            # Log error details as a summary string (tables can crash wandb on tmp cleanup)
+            error_summaries = []
+            for err in self._tool_error_buffer:
+                error_summaries.append(
+                    f"[turn {err['turn']}] {err['tool']}({err['args'][:80]}) -> {err['error'][:150]}"
+                )
+            wandb_metrics["train/tool_error_details"] = "\n".join(error_summaries)
+
+            # Also print to stdout for immediate visibility
+            for summary in error_summaries:
+                print(f"  Tool Error: {summary}")
+
+            self._tool_error_buffer = []
+        else:
+            wandb_metrics["train/tool_errors_count"] = 0
+
+        await super().wandb_log(wandb_metrics)
+
     async def collect_trajectory(
         self, item: Item
     ) -> Tuple[Optional[Union[ScoredDataItem, Any]], List[Item]]:
@@ -376,8 +413,22 @@ class HermesAgentBaseEnv(BaseEnv):
         result: AgentResult
         if self._use_managed_server():
             # Phase 2: ManagedServer with parser -- exact tokens + logprobs
+            # Load the tool call parser from registry based on config
+            from environments.tool_call_parsers import get_parser
             try:
-                async with self.server.managed_server(tokenizer=self.tokenizer) as managed:
+                tc_parser = get_parser(self.config.tool_call_parser)
+            except KeyError:
+                logger.warning(
+                    "Tool call parser '%s' not found, falling back to 'hermes'",
+                    self.config.tool_call_parser,
+                )
+                tc_parser = get_parser("hermes")
+
+            try:
+                async with self.server.managed_server(
+                    tokenizer=self.tokenizer,
+                    tool_call_parser=tc_parser,
+                ) as managed:
                     agent = HermesAgentLoop(
                         server=managed,
                         tool_schemas=tools,
@@ -417,15 +468,39 @@ class HermesAgentBaseEnv(BaseEnv):
             )
             result = await agent.run(messages)
 
-        # Compute reward using ToolContext (gives verifier full tool access)
-        ctx = ToolContext(task_id)
-        try:
-            reward = await self.compute_reward(item, result, ctx)
-        except Exception as e:
-            logger.error("compute_reward failed: %s", e)
+        # Skip reward computation if the agent loop produced no meaningful work
+        # (e.g., API call failed on turn 1). No point spinning up a Modal sandbox
+        # just to verify files that were never created.
+        only_system_and_user = all(
+            msg.get("role") in ("system", "user") for msg in result.messages
+        )
+        if result.turns_used == 0 or only_system_and_user:
+            logger.warning(
+                "Agent loop produced no output (turns=%d, msgs=%d). Skipping reward.",
+                result.turns_used, len(result.messages),
+            )
             reward = 0.0
-        finally:
-            ctx.cleanup()
+        else:
+            # Compute reward using ToolContext (gives verifier full tool access)
+            ctx = ToolContext(task_id)
+            try:
+                reward = await self.compute_reward(item, result, ctx)
+            except Exception as e:
+                logger.error("compute_reward failed: %s", e)
+                reward = 0.0
+            finally:
+                ctx.cleanup()
+
+        # Track tool errors for wandb logging
+        if result.tool_errors:
+            for err in result.tool_errors:
+                self._tool_error_buffer.append({
+                    "turn": err.turn,
+                    "tool": err.tool_name,
+                    "args": err.arguments[:150],
+                    "error": err.error[:300],
+                    "result": err.tool_result[:300],
+                })
 
         # Build ScoredDataItem from ManagedServer state
         # Phase 2: real tokens/masks/logprobs from SequenceNodes
diff --git a/environments/patches.py b/environments/patches.py
new file mode 100644
index 0000000000..f6cfaeb458
--- /dev/null
+++ b/environments/patches.py
@@ -0,0 +1,188 @@
+"""
+Monkey patches for making hermes-agent tools work inside async frameworks (Atropos).
+
+Problem:
+    Some tools use asyncio.run() internally (e.g., mini-swe-agent's Modal backend,
+    web_extract). This crashes when called from inside Atropos's event loop because
+    asyncio.run() can't be nested.
+
+Solution:
+    Replace the problematic methods with versions that use a dedicated background
+    thread with its own event loop. The calling code sees the same sync interface --
+    call a function, get a result -- but internally the async work happens on a
+    separate thread that doesn't conflict with Atropos's loop.
+
+    These patches are safe for normal CLI use too: when there's no running event
+    loop, the behavior is identical (the background thread approach works regardless).
+
+What gets patched:
+    - SwerexModalEnvironment.__init__ -- creates Modal deployment on a background thread
+    - SwerexModalEnvironment.execute -- runs commands on the same background thread
+    - SwerexModalEnvironment.stop -- stops deployment on the background thread
+
+Usage:
+    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
+    This is idempotent -- calling it multiple times is safe.
+"""
+
+import asyncio
+import logging
+import threading
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+_patches_applied = False
+
+
+class _AsyncWorker:
+    """
+    A dedicated background thread with its own event loop.
+
+    Allows sync code to submit async coroutines and block for results,
+    even when called from inside another running event loop. Used to
+    bridge sync tool interfaces with async backends (Modal, SWE-ReX).
+    """
+
+    def __init__(self):
+        self._loop: asyncio.AbstractEventLoop = None
+        self._thread: threading.Thread = None
+        self._started = threading.Event()
+
+    def start(self):
+        """Start the background event loop thread."""
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+        self._started.wait(timeout=30)
+
+    def _run_loop(self):
+        """Background thread entry point -- runs the event loop forever."""
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        self._started.set()
+        self._loop.run_forever()
+
+    def run_coroutine(self, coro, timeout=600):
+        """
+        Submit a coroutine to the background loop and block until it completes.
+
+        Safe to call from any thread, including threads that already have
+        a running event loop.
+        """
+        if self._loop is None or self._loop.is_closed():
+            raise RuntimeError("AsyncWorker loop is not running")
+        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return future.result(timeout=timeout)
+
+    def stop(self):
+        """Stop the background event loop and join the thread."""
+        if self._loop and self._loop.is_running():
+            self._loop.call_soon_threadsafe(self._loop.stop)
+        if self._thread:
+            self._thread.join(timeout=10)
+
+
+def _patch_swerex_modal():
+    """
+    Monkey patch SwerexModalEnvironment to use a background thread event loop
+    instead of asyncio.run(). This makes it safe to call from inside Atropos's
+    async event loop.
+
+    The patched methods have the exact same interface and behavior -- the only
+    difference is HOW the async work is executed internally.
+    """
+    try:
+        from minisweagent.environments.extra.swerex_modal import (
+            SwerexModalEnvironment,
+            SwerexModalEnvironmentConfig,
+        )
+        from swerex.deployment.modal import ModalDeployment
+        from swerex.runtime.abstract import Command as RexCommand
+    except ImportError:
+        # mini-swe-agent or swe-rex not installed -- nothing to patch
+        logger.debug("mini-swe-agent Modal backend not available, skipping patch")
+        return
+
+    # Save original methods so we can refer to config handling
+    _original_init = SwerexModalEnvironment.__init__
+
+    def _patched_init(self, **kwargs):
+        """Patched __init__: creates Modal deployment on a background thread."""
+        self.config = SwerexModalEnvironmentConfig(**kwargs)
+
+        # Start a dedicated event loop thread for all Modal async operations
+        self._worker = _AsyncWorker()
+        self._worker.start()
+
+        # Create AND start the deployment entirely on the worker's loop/thread
+        # so all gRPC channels and async state are bound to that loop
+        async def _create_and_start():
+            deployment = ModalDeployment(
+                image=self.config.image,
+                startup_timeout=self.config.startup_timeout,
+                runtime_timeout=self.config.runtime_timeout,
+                deployment_timeout=self.config.deployment_timeout,
+                install_pipx=self.config.install_pipx,
+                modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
+            )
+            await deployment.start()
+            return deployment
+
+        self.deployment = self._worker.run_coroutine(_create_and_start())
+
+    def _patched_execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
+        """Patched execute: runs commands on the background thread's loop."""
+        async def _do_execute():
+            return await self.deployment.runtime.execute(
+                RexCommand(
+                    command=command,
+                    shell=True,
+                    check=False,
+                    cwd=cwd or self.config.cwd,
+                    timeout=timeout or self.config.timeout,
+                    merge_output_streams=True,
+                    env=self.config.env if self.config.env else None,
+                )
+            )
+
+        output = self._worker.run_coroutine(_do_execute())
+        return {
+            "output": output.stdout,
+            "returncode": output.exit_code,
+        }
+
+    def _patched_stop(self):
+        """Patched stop: stops deployment on the background thread, then stops the thread."""
+        try:
+            self._worker.run_coroutine(
+                asyncio.wait_for(self.deployment.stop(), timeout=10),
+                timeout=15,
+            )
+        except Exception:
+            pass
+        finally:
+            self._worker.stop()
+
+    # Apply the patches
+    SwerexModalEnvironment.__init__ = _patched_init
+    SwerexModalEnvironment.execute = _patched_execute
+    SwerexModalEnvironment.stop = _patched_stop
+
+    logger.debug("Patched SwerexModalEnvironment for async-safe operation")
+
+
+def apply_patches():
+    """
+    Apply all monkey patches needed for Atropos compatibility.
+
+    Safe to call multiple times -- patches are only applied once.
+    Safe for normal CLI use -- patched code works identically when
+    there is no running event loop.
+    """
+    global _patches_applied
+    if _patches_applied:
+        return
+
+    _patch_swerex_modal()
+
+    _patches_applied = True
diff --git a/environments/terminal_test_env.py b/environments/terminal_test_env.py
index 9a5bdc2cbe..eb9414965a 100644
--- a/environments/terminal_test_env.py
+++ b/environments/terminal_test_env.py
@@ -132,7 +132,7 @@ class TerminalTestEnv(HermesAgentBaseEnv):
             terminal_backend="modal",
             # Atropos settings
             group_size=3,              # 3 rollouts per group
-            tokenizer_name="NousResearch/DeepHermes-3-Llama-3-3B-Preview",
+            tokenizer_name="NousResearch/q-30b-t-h45-e1",
             tool_call_parser="hermes",
             steps_per_eval=3,          # Eval after all 3 steps
             total_steps=3,             # 3 groups total (1 group per step)
diff --git a/environments/tool_context.py b/environments/tool_context.py
index 4c9f0d3632..03a49c11a9 100644
--- a/environments/tool_context.py
+++ b/environments/tool_context.py
@@ -25,14 +25,43 @@ Example usage in a compute_reward():
 
 import json
 import logging
+import os
 from typing import Any, Dict, List, Optional
 
+import asyncio
+import concurrent.futures
+
 from model_tools import handle_function_call
 from tools.terminal_tool import cleanup_vm
 from tools.browser_tool import cleanup_browser
 
 logger = logging.getLogger(__name__)
 
+# Thread pool for running sync tool calls that internally use asyncio.run()
+_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+
+
+def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) -> str:
+    """
+    Run a tool call in a thread pool executor so backends that use asyncio.run()
+    internally (modal, docker) get a clean event loop.
+
+    If we're already in an async context, uses run_in_executor.
+    If not (e.g., called from sync code), runs directly.
+    """
+    try:
+        loop = asyncio.get_running_loop()
+        # We're in an async context -- need to run in thread
+        import concurrent.futures
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(
+                handle_function_call, tool_name, arguments, task_id
+            )
+            return future.result(timeout=300)
+    except RuntimeError:
+        # No running event loop -- safe to call directly
+        return handle_function_call(tool_name, arguments, task_id)
+
 
 class ToolContext:
     """
@@ -61,10 +90,15 @@ class ToolContext:
         Returns:
             Dict with 'exit_code' (int) and 'output' (str)
         """
-        result = handle_function_call(
+        import os
+        backend = os.getenv("TERMINAL_ENV", "local")
+        logger.debug("ToolContext.terminal [%s backend] task=%s: %s", backend, self.task_id[:8], command[:100])
+
+        # Run in thread pool so modal/docker backends' asyncio.run() doesn't deadlock
+        result = _run_tool_in_thread(
             "terminal",
             {"command": command, "timeout": timeout},
-            task_id=self.task_id,
+            self.task_id,
         )
         try:
             return json.loads(result)
@@ -222,7 +256,7 @@ class ToolContext:
         Returns:
             Raw JSON string result from the tool
         """
-        return handle_function_call(tool_name, arguments, task_id=self.task_id)
+        return _run_tool_in_thread(tool_name, arguments, self.task_id)
 
     # -------------------------------------------------------------------------
     # Cleanup
@@ -240,7 +274,16 @@ class ToolContext:
         except Exception as e:
             logger.debug("VM cleanup for task %s: %s", self.task_id, e)
 
+        # Suppress browser_tool's noisy debug prints during cleanup.
+        # The cleanup still runs (safe), it just doesn't spam the console.
+        _prev_quiet = os.environ.get("HERMES_QUIET")
+        os.environ["HERMES_QUIET"] = "1"
         try:
             cleanup_browser(self.task_id)
         except Exception as e:
             logger.debug("Browser cleanup for task %s: %s", self.task_id, e)
+        finally:
+            if _prev_quiet is None:
+                os.environ.pop("HERMES_QUIET", None)
+            else:
+                os.environ["HERMES_QUIET"] = _prev_quiet
diff --git a/model_tools.py b/model_tools.py
index 203a6669d5..7106150995 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -1191,8 +1191,19 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
         urls = function_args.get("urls", [])
         # Limit URLs to prevent abuse
         urls = urls[:5] if isinstance(urls, list) else []
-        # Run async function in event loop
-        return asyncio.run(web_extract_tool(urls, "markdown"))
+        # Run async function -- use existing loop if available (Atropos),
+        # otherwise create one (normal CLI)
+        try:
+            loop = asyncio.get_running_loop()
+            # Already in an async context (Atropos) -- run in a thread
+            import concurrent.futures
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+                return pool.submit(
+                    lambda: asyncio.run(web_extract_tool(urls, "markdown"))
+                ).result(timeout=120)
+        except RuntimeError:
+            # No running loop (normal CLI) -- use asyncio.run directly
+            return asyncio.run(web_extract_tool(urls, "markdown"))
     
     else:
         return json.dumps({"error": f"Unknown web function: {function_name}"}, ensure_ascii=False)
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 71704fba6b..955bdbd18b 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -2,6 +2,7 @@
 """File Tools Module - LLM agent file manipulation tools."""
 
 import json
+import os
 import threading
 from typing import Optional
 from tools.file_operations import ShellFileOperations
@@ -11,23 +12,85 @@ _file_ops_cache: dict = {}
 
 
 def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
-    """Get or create ShellFileOperations for a terminal environment."""
-    from tools.terminal_tool import _active_environments, _env_lock, _LocalEnvironment
+    """Get or create ShellFileOperations for a terminal environment.
     
+    Respects the TERMINAL_ENV setting -- if the task_id doesn't have an
+    environment yet, creates one using the configured backend (local, docker,
+    modal, etc.) rather than always defaulting to local.
+    """
+    from tools.terminal_tool import (
+        _active_environments, _env_lock, _create_environment,
+        _get_env_config, _last_activity, _start_cleanup_thread,
+        _check_disk_usage_warning,
+    )
+    import time
+    
+    # Fast path: check cache without heavy locks
     with _file_ops_lock:
         if task_id in _file_ops_cache:
             return _file_ops_cache[task_id]
+    
+    # Check if we need to create a new environment
+    needs_creation = False
+    with _env_lock:
+        if task_id not in _active_environments:
+            needs_creation = True
+    
+    # Create environment OUTSIDE locks so we don't block other rollouts
+    # during slow Modal/Docker startup (~10s)
+    if needs_creation:
+        config = _get_env_config()
+        env_type = config["env_type"]
         
+        if env_type == "docker":
+            image = config["docker_image"]
+        elif env_type == "singularity":
+            image = config["singularity_image"]
+        elif env_type == "modal":
+            image = config["modal_image"]
+        else:
+            image = ""
+        
+        cwd = config["cwd"]
+        _check_disk_usage_warning()
+        if not os.getenv("HERMES_QUIET"):
+            print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
+        
+        new_env = _create_environment(
+            env_type=env_type,
+            image=image,
+            cwd=cwd,
+            timeout=config["timeout"],
+        )
+        
+        # Store under lock (brief) -- do NOT call _start_cleanup_thread inside
+        # the lock because it also acquires _env_lock (non-reentrant = deadlock)
+        created = False
         with _env_lock:
             if task_id not in _active_environments:
-                import os
-                env = _LocalEnvironment(cwd=os.getcwd(), timeout=60)
-                _active_environments[task_id] = env
-            terminal_env = _active_environments[task_id]
+                _active_environments[task_id] = new_env
+                created = True
+            else:
+                try:
+                    if hasattr(new_env, 'stop'):
+                        new_env.stop()
+                except Exception:
+                    pass
         
-        file_ops = ShellFileOperations(terminal_env)
+        if created:
+            _start_cleanup_thread()
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
+    
+    # Now get the environment and build file_ops
+    with _env_lock:
+        _last_activity[task_id] = time.time()
+        terminal_env = _active_environments[task_id]
+    
+    file_ops = ShellFileOperations(terminal_env)
+    with _file_ops_lock:
         _file_ops_cache[task_id] = file_ops
-        return file_ops
+    return file_ops
 
 
 def clear_file_ops_cache(task_id: str = None):
@@ -56,6 +119,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
         result = file_ops.write_file(path, content)
         return json.dumps(result.to_dict(), ensure_ascii=False)
     except Exception as e:
+        print(f"[FileTools] write_file error: {type(e).__name__}: {e}", flush=True)  
         return json.dumps({"error": str(e)}, ensure_ascii=False)
 
 
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 72301ed199..9fe1162ac6 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1347,40 +1347,61 @@ def terminal_tool(
         _start_cleanup_thread()
 
         # Get or create environment
+        # Check under lock, but create OUTSIDE lock so we don't block
+        # other concurrent rollouts during slow Modal/Docker startup
+        needs_creation = False
         with _env_lock:
             if effective_task_id not in _active_environments:
-                # Check disk usage before creating new environment
-                _check_disk_usage_warning()
-                
-                try:
-                    # Build SSH config if using SSH environment
-                    ssh_config = None
-                    if env_type == "ssh":
-                        ssh_config = {
-                            "host": config.get("ssh_host", ""),
-                            "user": config.get("ssh_user", ""),
-                            "port": config.get("ssh_port", 22),
-                            "key": config.get("ssh_key", ""),
-                        }
-                    
-                    _active_environments[effective_task_id] = _create_environment(
-                        env_type=env_type,
-                        image=image,
-                        cwd=cwd,
-                        timeout=effective_timeout,
-                        ssh_config=ssh_config
-                    )
-                except ImportError as e:
-                    return json.dumps({
-                        "output": "",
-                        "exit_code": -1,
-                        "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
-                        "status": "disabled"
-                    }, ensure_ascii=False)
+                needs_creation = True
+            else:
+                _last_activity[effective_task_id] = time.time()
+                env = _active_environments[effective_task_id]
 
-            # Update last activity time
-            _last_activity[effective_task_id] = time.time()
-            env = _active_environments[effective_task_id]
+        if needs_creation:
+            _check_disk_usage_warning()
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
+            try:
+                ssh_config = None
+                if env_type == "ssh":
+                    ssh_config = {
+                        "host": config.get("ssh_host", ""),
+                        "user": config.get("ssh_user", ""),
+                        "port": config.get("ssh_port", 22),
+                        "key": config.get("ssh_key", ""),
+                    }
+
+                new_env = _create_environment(
+                    env_type=env_type,
+                    image=image,
+                    cwd=cwd,
+                    timeout=effective_timeout,
+                    ssh_config=ssh_config
+                )
+            except ImportError as e:
+                return json.dumps({
+                    "output": "",
+                    "exit_code": -1,
+                    "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
+                    "status": "disabled"
+                }, ensure_ascii=False)
+
+            # Store under lock (brief)
+            with _env_lock:
+                if effective_task_id not in _active_environments:
+                    _active_environments[effective_task_id] = new_env
+                else:
+                    # Another thread created it while we were building -- clean up ours
+                    try:
+                        if hasattr(new_env, 'stop'):
+                            new_env.stop()
+                    except Exception:
+                        pass
+
+                _last_activity[effective_task_id] = time.time()
+                env = _active_environments[effective_task_id]
+                if not os.getenv("HERMES_QUIET"):
+                    print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)
 
         # Check for dangerous commands (only for local/ssh in interactive modes)
         # Skip check if force=True (user has confirmed they want to run it)
@@ -1435,13 +1456,20 @@ def terminal_tool(
                         retry_count += 1
                         wait_time = 2 ** retry_count
                         print(f"⚠️  Terminal: execution error, retrying in {wait_time}s (attempt {retry_count}/{max_retries})")
+                        print(f"   Command: {command[:200]}")
+                        print(f"   Error: {type(e).__name__}: {e}")
+                        print(f"   Task ID: {effective_task_id}, Backend: {env_type}")
                         time.sleep(wait_time)
                         continue
                     
+                    print(f"❌ Terminal: execution failed after {max_retries} retries")
+                    print(f"   Command: {command[:200]}")
+                    print(f"   Error: {type(e).__name__}: {e}")
+                    print(f"   Task ID: {effective_task_id}, Backend: {env_type}")
                     return json.dumps({
                         "output": "",
                         "exit_code": -1,
-                        "error": f"Command execution failed: {str(e)}"
+                        "error": f"Command execution failed: {type(e).__name__}: {str(e)}"
                     }, ensure_ascii=False)
                 
                 # Got a result

From f12ea1bc027b001591cd3a5df8dd8bbf6c7581cc Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sun, 8 Feb 2026 10:49:24 +0000
Subject: [PATCH 45/48] Enhance BatchRunner and AIAgent with new configuration
 options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.
---
 batch_runner.py         |  64 +++++++++-
 cli-config.yaml.example |   4 +-
 cli.py                  |  31 ++++-
 cron/scheduler.py       |   2 +-
 gateway/run.py          |   2 +-
 hermes_cli/config.py    |   6 +-
 run_agent.py            | 255 +++++++++++++++++++++++++++++++++++-----
 7 files changed, 324 insertions(+), 40 deletions(-)

diff --git a/batch_runner.py b/batch_runner.py
index 80f5cabff9..c3679cdc35 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -244,6 +244,9 @@ def _process_single_prompt(
             providers_ignored=config.get("providers_ignored"),
             providers_order=config.get("providers_order"),
             provider_sort=config.get("provider_sort"),
+            max_tokens=config.get("max_tokens"),
+            reasoning_config=config.get("reasoning_config"),
+            prefill_messages=config.get("prefill_messages"),
         )
 
         # Run the agent with task_id to ensure each task gets its own isolated VM
@@ -428,6 +431,9 @@ class BatchRunner:
         providers_ignored: List[str] = None,
         providers_order: List[str] = None,
         provider_sort: str = None,
+        max_tokens: int = None,
+        reasoning_config: Dict[str, Any] = None,
+        prefill_messages: List[Dict[str, Any]] = None,
     ):
         """
         Initialize the batch runner.
@@ -449,6 +455,9 @@ class BatchRunner:
             providers_ignored (List[str]): OpenRouter providers to ignore (optional)
             providers_order (List[str]): OpenRouter providers to try in order (optional)
             provider_sort (str): Sort providers by price/throughput/latency (optional)
+            max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
+            reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
+            prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
         """
         self.dataset_file = Path(dataset_file)
         self.batch_size = batch_size
@@ -466,6 +475,9 @@ class BatchRunner:
         self.providers_ignored = providers_ignored
         self.providers_order = providers_order
         self.provider_sort = provider_sort
+        self.max_tokens = max_tokens
+        self.reasoning_config = reasoning_config
+        self.prefill_messages = prefill_messages
         
         # Validate distribution
         if not validate_distribution(distribution):
@@ -735,6 +747,9 @@ class BatchRunner:
             "providers_ignored": self.providers_ignored,
             "providers_order": self.providers_order,
             "provider_sort": self.provider_sort,
+            "max_tokens": self.max_tokens,
+            "reasoning_config": self.reasoning_config,
+            "prefill_messages": self.prefill_messages,
         }
         
         # For backward compatibility, still track by index (but this is secondary to content matching)
@@ -956,6 +971,10 @@ def main(
     providers_ignored: str = None,
     providers_order: str = None,
     provider_sort: str = None,
+    max_tokens: int = None,
+    reasoning_effort: str = None,
+    reasoning_disabled: bool = False,
+    prefill_messages_file: str = None,
 ):
     """
     Run batch processing of agent prompts from a dataset.
@@ -979,6 +998,10 @@ def main(
         providers_ignored (str): Comma-separated list of OpenRouter providers to ignore (e.g. "together,deepinfra")
         providers_order (str): Comma-separated list of OpenRouter providers to try in order (e.g. "anthropic,openai,google")
         provider_sort (str): Sort providers by "price", "throughput", or "latency" (OpenRouter only)
+        max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
+        reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "xhigh")
+        reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False)
+        prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts)
         
     Examples:
         # Basic usage
@@ -990,9 +1013,13 @@ def main(
         # Use specific distribution
         python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=image_test --distribution=image_gen
         
-        # With ephemeral system prompt (not saved to dataset)
+        # With disabled reasoning and max tokens
         python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run \\
-                               --ephemeral_system_prompt="You are a helpful assistant focused on image generation."
+                               --reasoning_disabled --max_tokens=128000
+        
+        # With prefill messages from file
+        python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run \\
+                               --prefill_messages_file=configs/prefill_opus.json
         
         # List available distributions
         python batch_runner.py --list_distributions
@@ -1031,6 +1058,36 @@ def main(
     providers_ignored_list = [p.strip() for p in providers_ignored.split(",")] if providers_ignored else None
     providers_order_list = [p.strip() for p in providers_order.split(",")] if providers_order else None
     
+    # Build reasoning_config from CLI flags
+    # --reasoning_disabled takes priority, then --reasoning_effort, then default (xhigh)
+    reasoning_config = None
+    if reasoning_disabled:
+        # Completely disable reasoning/thinking tokens
+        reasoning_config = {"effort": "none"}
+        print("🧠 Reasoning: DISABLED (effort=none)")
+    elif reasoning_effort:
+        # Use specified effort level
+        valid_efforts = ["xhigh", "high", "medium", "low", "minimal", "none"]
+        if reasoning_effort not in valid_efforts:
+            print(f"❌ Error: --reasoning_effort must be one of: {', '.join(valid_efforts)}")
+            return
+        reasoning_config = {"enabled": True, "effort": reasoning_effort}
+        print(f"🧠 Reasoning effort: {reasoning_effort}")
+    
+    # Load prefill messages from JSON file if provided
+    prefill_messages = None
+    if prefill_messages_file:
+        try:
+            with open(prefill_messages_file, 'r', encoding='utf-8') as f:
+                prefill_messages = json.load(f)
+            if not isinstance(prefill_messages, list):
+                print(f"❌ Error: prefill_messages_file must contain a JSON array of messages")
+                return
+            print(f"💬 Loaded {len(prefill_messages)} prefill messages from {prefill_messages_file}")
+        except Exception as e:
+            print(f"❌ Error loading prefill messages: {e}")
+            return
+    
     # Initialize and run batch runner
     try:
         runner = BatchRunner(
@@ -1050,6 +1107,9 @@ def main(
             providers_ignored=providers_ignored_list,
             providers_order=providers_order_list,
             provider_sort=provider_sort,
+            max_tokens=max_tokens,
+            reasoning_config=reasoning_config,
+            prefill_messages=prefill_messages,
         )
 
         runner.run(resume=resume)
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 63e4f75556..0e9099533f 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -7,7 +7,7 @@
 # =============================================================================
 model:
   # Default model to use (can be overridden with --model flag)
-  default: "anthropic/claude-sonnet-4"
+  default: "anthropic/claude-opus-4.6"
   
   # API configuration (falls back to OPENROUTER_API_KEY env var)
   # api_key: "your-key-here"  # Uncomment to set here instead of .env
@@ -140,7 +140,7 @@ compression:
   
   # Model to use for generating summaries (fast/cheap recommended)
   # This model compresses the middle turns into a concise summary
-  summary_model: "google/gemini-2.0-flash-001"
+  summary_model: "google/gemini-3-flash-preview"
 
 # =============================================================================
 # Agent Behavior
diff --git a/cli.py b/cli.py
index 795c880bee..66f0eafe42 100755
--- a/cli.py
+++ b/cli.py
@@ -83,7 +83,7 @@ def load_cli_config() -> Dict[str, Any]:
     # Default configuration
     defaults = {
         "model": {
-            "default": "anthropic/claude-opus-4-20250514",
+            "default": "anthropic/claude-opus-4.6",
             "base_url": "https://openrouter.ai/api/v1",
         },
         "terminal": {
@@ -101,7 +101,7 @@ def load_cli_config() -> Dict[str, Any]:
         "compression": {
             "enabled": True,      # Auto-compress when approaching context limit
             "threshold": 0.85,    # Compress at 85% of model's context limit
-            "summary_model": "google/gemini-2.0-flash-001",  # Fast/cheap model for summaries
+            "summary_model": "google/gemini-3-flash-preview",  # Fast/cheap model for summaries
         },
         "agent": {
             "max_turns": 60,  # Default max tool-calling iterations
@@ -1332,6 +1332,11 @@ class HermesCLI:
             # Get the final response
             response = result.get("final_response", "") if result else ""
             
+            # Handle failed results (e.g., non-retryable errors like invalid model)
+            if result and result.get("failed") and not response:
+                error_detail = result.get("error", "Unknown error")
+                response = f"Error: {error_detail}"
+            
             # Handle interrupt - check if we were interrupted
             pending_message = None
             if result and result.get("interrupted"):
@@ -1403,6 +1408,7 @@ class HermesCLI:
         self._agent_running = False
         self._pending_input = queue.Queue()
         self._should_exit = False
+        self._last_ctrl_c_time = 0  # Track double Ctrl+C for force exit
         
         # Create a persistent input area using prompt_toolkit Application
         input_buffer = Buffer()
@@ -1422,11 +1428,28 @@ class HermesCLI:
         
         @kb.add('c-c')
         def handle_ctrl_c(event):
-            """Handle Ctrl+C - interrupt or exit."""
+            """Handle Ctrl+C - interrupt agent or force exit on double press.
+            
+            First Ctrl+C: interrupt the running agent gracefully.
+            Second Ctrl+C within 2 seconds (or when agent is idle): force exit.
+            """
+            import time as _time
+            now = _time.time()
+            
             if self._agent_running and self.agent:
-                print("\n⚡ Interrupting agent...")
+                # Check for double Ctrl+C (second press within 2 seconds)
+                if now - self._last_ctrl_c_time < 2.0:
+                    print("\n⚡ Force exiting...")
+                    self._should_exit = True
+                    event.app.exit()
+                    return
+                
+                # First Ctrl+C: try graceful interrupt
+                self._last_ctrl_c_time = now
+                print("\n⚡ Interrupting agent... (press Ctrl+C again to force exit)")
                 self.agent.interrupt()
             else:
+                # Agent not running, exit immediately
                 self._should_exit = True
                 event.app.exit()
         
diff --git a/cron/scheduler.py b/cron/scheduler.py
index ea8f1c40e6..cdb8b7e0ab 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -40,7 +40,7 @@ def run_job(job: dict) -> tuple[bool, str, Optional[str]]:
         # Create agent with default settings
         # Jobs run in isolated sessions (no prior context)
         agent = AIAgent(
-            model=os.getenv("HERMES_MODEL", "anthropic/claude-sonnet-4"),
+            model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"),
             quiet_mode=True,
             session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
         )
diff --git a/gateway/run.py b/gateway/run.py
index 08de429c43..2e126d59fc 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -481,7 +481,7 @@ class GatewayRunner:
             max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
             
             agent = AIAgent(
-                model=os.getenv("HERMES_MODEL", "anthropic/claude-sonnet-4"),
+                model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"),
                 max_iterations=max_iterations,
                 quiet_mode=True,
                 enabled_toolsets=[toolset],
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 82ce6ae7ec..f31cc040d8 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -71,7 +71,7 @@ def ensure_hermes_home():
 # =============================================================================
 
 DEFAULT_CONFIG = {
-    "model": "anthropic/claude-sonnet-4.5",
+    "model": "anthropic/claude-opus-4.6",
     "toolsets": ["hermes-cli"],
     "max_turns": 100,
     
@@ -91,7 +91,7 @@ DEFAULT_CONFIG = {
     "compression": {
         "enabled": True,
         "threshold": 0.85,
-        "summary_model": "google/gemini-2.0-flash-001",
+        "summary_model": "google/gemini-3-flash-preview",
     },
     
     "display": {
@@ -555,7 +555,7 @@ def show_config():
     print(f"  Enabled:      {'yes' if enabled else 'no'}")
     if enabled:
         print(f"  Threshold:    {compression.get('threshold', 0.85) * 100:.0f}%")
-        print(f"  Model:        {compression.get('summary_model', 'google/gemini-2.0-flash-001')}")
+        print(f"  Model:        {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
     
     # Messaging
     print()
diff --git a/run_agent.py b/run_agent.py
index 1aceb5b589..eeb24dd8f5 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -66,6 +66,7 @@ _MODEL_CACHE_TTL = 3600  # 1 hour cache TTL
 DEFAULT_CONTEXT_LENGTHS = {
     "anthropic/claude-opus-4": 200000,
     "anthropic/claude-opus-4.5": 200000,
+    "anthropic/claude-opus-4.6": 200000,
     "anthropic/claude-sonnet-4": 200000,
     "anthropic/claude-sonnet-4-20250514": 200000,
     "anthropic/claude-haiku-4.5": 200000,
@@ -206,7 +207,7 @@ class ContextCompressor:
         self,
         model: str,
         threshold_percent: float = 0.85,
-        summary_model: str = "google/gemini-2.0-flash-001",
+        summary_model: str = "google/gemini-3-flash-preview",
         protect_first_n: int = 3,
         protect_last_n: int = 4,
         summary_target_tokens: int = 500,
@@ -584,7 +585,7 @@ class AIAgent:
         self,
         base_url: str = None,
         api_key: str = None,
-        model: str = "anthropic/claude-sonnet-4-20250514",  # OpenRouter format
+        model: str = "anthropic/claude-opus-4.6",  # OpenRouter format
         max_iterations: int = 60,  # Default tool-calling iterations
         tool_delay: float = 1.0,
         enabled_toolsets: List[str] = None,
@@ -601,6 +602,9 @@ class AIAgent:
         provider_sort: str = None,
         session_id: str = None,
         tool_progress_callback: callable = None,
+        max_tokens: int = None,
+        reasoning_config: Dict[str, Any] = None,
+        prefill_messages: List[Dict[str, Any]] = None,
     ):
         """
         Initialize the AI Agent.
@@ -625,6 +629,12 @@ class AIAgent:
             provider_sort (str): Sort providers by price/throughput/latency (optional)
             session_id (str): Pre-generated session ID for logging (optional, auto-generated if not provided)
             tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
+            max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
+            reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
+                If None, defaults to {"enabled": True, "effort": "xhigh"} for OpenRouter. Set to disable/customize reasoning.
+            prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context.
+                Useful for injecting a few-shot example or priming the model's response style.
+                Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}]
         """
         self.model = model
         self.max_iterations = max_iterations
@@ -653,6 +663,11 @@ class AIAgent:
         self.enabled_toolsets = enabled_toolsets
         self.disabled_toolsets = disabled_toolsets
         
+        # Model response configuration
+        self.max_tokens = max_tokens  # None = use model default
+        self.reasoning_config = reasoning_config  # None = use default (xhigh for OpenRouter)
+        self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
+        
         # Configure logging
         if self.verbose_logging:
             logging.basicConfig(
@@ -781,7 +796,7 @@ class AIAgent:
         # Compresses conversation when approaching model's context limit
         # Configuration via environment variables (can be set in .env or cli-config.yaml)
         compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
-        compression_model = os.getenv("CONTEXT_COMPRESSION_MODEL", "google/gemini-2.0-flash-001")
+        compression_model = os.getenv("CONTEXT_COMPRESSION_MODEL", "google/gemini-3-flash-preview")
         compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
         
         self.context_compressor = ContextCompressor(
@@ -1086,6 +1101,25 @@ class AIAgent:
         
         return json.dumps(formatted_tools, ensure_ascii=False)
     
+    @staticmethod
+    def _convert_scratchpad_to_think(content: str) -> str:
+        """
+        Convert <REASONING_SCRATCHPAD> tags to <think> tags in content.
+        
+        When native thinking/reasoning is disabled and the model is prompted to
+        reason inside <REASONING_SCRATCHPAD> XML tags instead, this converts those
+        to the standard <think> format used in our trajectory storage.
+        
+        Args:
+            content: Assistant message content that may contain scratchpad tags
+            
+        Returns:
+            Content with scratchpad tags replaced by think tags
+        """
+        if not content or "<REASONING_SCRATCHPAD>" not in content:
+            return content
+        return content.replace("<REASONING_SCRATCHPAD>", "<think>").replace("</REASONING_SCRATCHPAD>", "</think>")
+    
     def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
         """
         Convert internal message format to trajectory format for saving.
@@ -1120,14 +1154,19 @@ class AIAgent:
             "value": system_msg
         })
         
-        # Add the initial user message
+        # Add the actual user prompt (from the dataset) as the first human message
         trajectory.append({
             "from": "human",
             "value": user_query
         })
         
-        # Process remaining messages
-        i = 1  # Skip the first user message as we already added it
+        # Calculate where agent responses start in the messages list.
+        # Prefill messages are ephemeral (only used to prime model response style)
+        # so we skip them entirely in the saved trajectory.
+        # Layout: [*prefill_msgs, actual_user_msg, ...agent_responses...]
+        num_prefill = len(self.prefill_messages) if self.prefill_messages else 0
+        i = num_prefill + 1  # Skip prefill messages + the actual user message (already added above)
+        
         while i < len(messages):
             msg = messages[i]
             
@@ -1138,12 +1177,14 @@ class AIAgent:
                     # Add <think> tags around reasoning for trajectory storage
                     content = ""
                     
-                    # Prepend reasoning in <think> tags if available
+                    # Prepend reasoning in <think> tags if available (native thinking tokens)
                     if msg.get("reasoning") and msg["reasoning"].strip():
                         content = f"<think>\n{msg['reasoning']}\n</think>\n"
                     
                     if msg.get("content") and msg["content"].strip():
-                        content += msg["content"] + "\n"
+                        # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
+                        # (used when native thinking is disabled and model reasons via XML)
+                        content += self._convert_scratchpad_to_think(msg["content"]) + "\n"
                     
                     # Add tool calls wrapped in XML tags
                     for tool_call in msg["tool_calls"]:
@@ -1206,11 +1247,14 @@ class AIAgent:
                     # Add <think> tags around reasoning for trajectory storage
                     content = ""
                     
-                    # Prepend reasoning in <think> tags if available
+                    # Prepend reasoning in <think> tags if available (native thinking tokens)
                     if msg.get("reasoning") and msg["reasoning"].strip():
                         content = f"<think>\n{msg['reasoning']}\n</think>\n"
                     
-                    content += msg["content"] or ""
+                    # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
+                    # (used when native thinking is disabled and model reasons via XML)
+                    raw_content = msg["content"] or ""
+                    content += self._convert_scratchpad_to_think(raw_content)
                     
                     trajectory.append({
                         "from": "gpt",
@@ -1261,6 +1305,66 @@ class AIAgent:
         except Exception as e:
             print(f"⚠️ Failed to save trajectory: {e}")
     
+    def _log_api_payload(self, turn_number: int, api_kwargs: Dict[str, Any], response=None):
+        """
+        [TEMPORARY DEBUG] Log the full API payload and response token metrics
+        for each agent turn to a per-session JSONL file for inspection.
+        
+        Writes one JSON line per turn to logs/payload_<session_id>.jsonl.
+        Tool schemas are summarized (just names) to keep logs readable.
+        
+        Args:
+            turn_number: Which API call this is (1-indexed)
+            api_kwargs: The full kwargs dict being passed to chat.completions.create
+            response: The API response object (optional, added after the call completes)
+        """
+        try:
+            payload_log_file = self.logs_dir / f"payload_{self.session_id}.jsonl"
+            
+            # Build a serializable copy of the request payload
+            payload = {
+                "turn": turn_number,
+                "timestamp": datetime.now().isoformat(),
+                "model": api_kwargs.get("model"),
+                "max_tokens": api_kwargs.get("max_tokens"),
+                "extra_body": api_kwargs.get("extra_body"),
+                "num_tools": len(api_kwargs.get("tools") or []),
+                "tool_names": [t["function"]["name"] for t in (api_kwargs.get("tools") or [])],
+                "messages": api_kwargs.get("messages", []),
+            }
+            
+            # Add response token metrics if available
+            if response is not None:
+                try:
+                    usage_raw = response.usage.model_dump() if hasattr(response.usage, 'model_dump') else {}
+                    payload["response"] = {
+                        # Core token counts
+                        "prompt_tokens": usage_raw.get("prompt_tokens"),
+                        "completion_tokens": usage_raw.get("completion_tokens"),
+                        "total_tokens": usage_raw.get("total_tokens"),
+                        # Completion breakdown (reasoning tokens, etc.)
+                        "completion_tokens_details": usage_raw.get("completion_tokens_details"),
+                        # Prompt breakdown (cached tokens, etc.)
+                        "prompt_tokens_details": usage_raw.get("prompt_tokens_details"),
+                        # Cost tracking
+                        "cost": usage_raw.get("cost"),
+                        "is_byok": usage_raw.get("is_byok"),
+                        "cost_details": usage_raw.get("cost_details"),
+                        # Provider info (top-level field from OpenRouter)
+                        "provider": getattr(response, 'provider', None),
+                        "response_model": getattr(response, 'model', None),
+                    }
+                except Exception:
+                    payload["response"] = {"error": "failed to extract usage"}
+            
+            with open(payload_log_file, "a", encoding="utf-8") as f:
+                f.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
+                
+        except Exception as e:
+            # Silent fail - don't interrupt the agent for debug logging
+            if self.verbose_logging:
+                logging.warning(f"Failed to log API payload: {e}")
+    
     def _save_session_log(self, messages: List[Dict[str, Any]] = None):
         """
         Save the current session trajectory to the logs directory.
@@ -1276,10 +1380,12 @@ class AIAgent:
             return
         
         try:
-            # Extract the first user message for the trajectory format
-            # The first message should be the user's initial query
+            # Extract the actual user query for the trajectory format.
+            # Skip prefill messages (they're ephemeral and shouldn't appear in trajectories)
+            # so the first user message we find is the real task prompt.
             first_user_query = ""
-            for msg in messages:
+            start_idx = len(self.prefill_messages) if self.prefill_messages else 0
+            for msg in messages[start_idx:]:
                 if msg.get("role") == "user":
                     first_user_query = msg.get("content", "")
                     break
@@ -1373,6 +1479,12 @@ class AIAgent:
         # Initialize conversation
         messages = conversation_history or []
         
+        # Inject prefill messages at the start of conversation (before user's actual prompt)
+        # This is used for few-shot priming, e.g., a greeting exchange to set response style
+        if self.prefill_messages and not conversation_history:
+            for prefill_msg in self.prefill_messages:
+                messages.append(prefill_msg.copy())
+        
         # Add user message
         messages.append({
             "role": "user",
@@ -1493,6 +1605,10 @@ class AIAgent:
                         "timeout": 600.0  # 10 minute timeout for very long responses
                     }
                     
+                    # Add max_tokens if configured (overrides model default)
+                    if self.max_tokens is not None:
+                        api_kwargs["max_tokens"] = self.max_tokens
+                    
                     # Add extra_body for OpenRouter (provider preferences + reasoning)
                     extra_body = {}
                     
@@ -1500,12 +1616,17 @@ class AIAgent:
                     if provider_preferences:
                         extra_body["provider"] = provider_preferences
                     
-                    # Enable reasoning with xhigh effort for OpenRouter
+                    # Configure reasoning for OpenRouter
+                    # If reasoning_config is explicitly provided, use it (allows disabling/customizing)
+                    # Otherwise, default to xhigh effort for OpenRouter models
                     if "openrouter" in self.base_url.lower():
-                        extra_body["reasoning"] = {
-                            "enabled": True,
-                            "effort": "xhigh"
-                        }
+                        if self.reasoning_config is not None:
+                            extra_body["reasoning"] = self.reasoning_config
+                        else:
+                            extra_body["reasoning"] = {
+                                "enabled": True,
+                                "effort": "xhigh"
+                            }
                     
                     if extra_body:
                         api_kwargs["extra_body"] = extra_body
@@ -1527,6 +1648,9 @@ class AIAgent:
                         # Log response with provider info if available
                         resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
                         logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
+                    
+                    # [DEBUG] Log the full API payload + response token metrics
+                    self._log_api_payload(api_call_count, api_kwargs, response=response)
 
                     # Validate response has valid choices before proceeding
                     if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
@@ -1589,7 +1713,20 @@ class AIAgent:
                         wait_time = min(5 * (2 ** (retry_count - 1)), 120)  # 5s, 10s, 20s, 40s, 80s, 120s
                         print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
                         logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
-                        time.sleep(wait_time)
+                        
+                        # Sleep in small increments to stay responsive to interrupts
+                        sleep_end = time.time() + wait_time
+                        while time.time() < sleep_end:
+                            if self._interrupt_requested:
+                                print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
+                                return {
+                                    "final_response": "Operation interrupted.",
+                                    "messages": messages,
+                                    "api_calls": api_call_count,
+                                    "completed": False,
+                                    "interrupted": True,
+                                }
+                            time.sleep(0.2)
                         continue  # Retry the API call
 
                     # Check finish_reason before proceeding
@@ -1668,6 +1805,41 @@ class AIAgent:
                     print(f"{self.log_prefix}   📝 Error: {str(api_error)[:200]}")
                     print(f"{self.log_prefix}   📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
                     
+                    # Check for interrupt before deciding to retry
+                    if self._interrupt_requested:
+                        print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
+                        return {
+                            "final_response": "Operation interrupted.",
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "interrupted": True,
+                        }
+                    
+                    # Check for non-retryable client errors (4xx HTTP status codes).
+                    # These indicate a problem with the request itself (bad model ID,
+                    # invalid API key, forbidden, etc.) and will never succeed on retry.
+                    is_client_error = any(phrase in error_msg for phrase in [
+                        'error code: 400', 'error code: 401', 'error code: 403',
+                        'error code: 404', 'error code: 422',
+                        'is not a valid model', 'invalid model', 'model not found',
+                        'invalid api key', 'invalid_api_key', 'authentication',
+                        'unauthorized', 'forbidden', 'not found',
+                    ])
+                    
+                    if is_client_error:
+                        print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
+                        print(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.")
+                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
+                        return {
+                            "final_response": None,
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "failed": True,
+                            "error": str(api_error),
+                        }
+                    
                     # Check for non-retryable errors (context length exceeded)
                     is_context_length_error = any(phrase in error_msg for phrase in [
                         'context length', 'maximum context', 'token limit', 
@@ -1708,7 +1880,21 @@ class AIAgent:
                     print(f"⚠️  OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
                     print(f"⏳ Retrying in {wait_time}s...")
                     logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
-                    time.sleep(wait_time)
+                    
+                    # Sleep in small increments so we can respond to interrupts quickly
+                    # instead of blocking the entire wait_time in one sleep() call
+                    sleep_end = time.time() + wait_time
+                    while time.time() < sleep_end:
+                        if self._interrupt_requested:
+                            print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
+                            return {
+                                "final_response": "Operation interrupted.",
+                                "messages": messages,
+                                "api_calls": api_call_count,
+                                "completed": False,
+                                "interrupted": True,
+                            }
+                        time.sleep(0.2)  # Check interrupt every 200ms
             
             try:
                 assistant_message = response.choices[0].message
@@ -2069,13 +2255,28 @@ class AIAgent:
                 if self.ephemeral_system_prompt:
                     api_messages = [{"role": "system", "content": self.ephemeral_system_prompt}] + api_messages
                 
-                summary_response = self.client.chat.completions.create(
-                    model=self.model,
-                    messages=api_messages,
+                # Build extra_body for summary call (same reasoning config as main loop)
+                summary_extra_body = {}
+                if "openrouter" in self.base_url.lower():
+                    if self.reasoning_config is not None:
+                        summary_extra_body["reasoning"] = self.reasoning_config
+                    else:
+                        summary_extra_body["reasoning"] = {
+                            "enabled": True,
+                            "effort": "xhigh"
+                        }
+                
+                summary_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
                     # No tools parameter - forces text response
-                    extra_headers=self.extra_headers,
-                    extra_body=self.extra_body,
-                )
+                }
+                if self.max_tokens is not None:
+                    summary_kwargs["max_tokens"] = self.max_tokens
+                if summary_extra_body:
+                    summary_kwargs["extra_body"] = summary_extra_body
+                
+                summary_response = self.client.chat.completions.create(**summary_kwargs)
                 
                 if summary_response.choices and summary_response.choices[0].message.content:
                     final_response = summary_response.choices[0].message.content
@@ -2151,7 +2352,7 @@ class AIAgent:
 
 def main(
     query: str = None,
-    model: str = "anthropic/claude-sonnet-4-20250514",
+    model: str = "anthropic/claude-opus-4.6",
     api_key: str = None,
     base_url: str = "https://openrouter.ai/api/v1",
     max_turns: int = 10,

From dd70d57b9bc3229e1b1ae32252bcd01d502b6e9b Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Sun, 8 Feb 2026 20:19:14 +0000
Subject: [PATCH 46/48] Refactor BatchRunner and AIAgent for enhanced reasoning
 and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.
---
 batch_runner.py          | 125 +++++++++++++++++++++++------
 model_tools.py           | 165 ++++++++++++++++++++++++---------------
 run_agent.py             |  69 ++++++++++++++++
 toolset_distributions.py |   8 +-
 4 files changed, 277 insertions(+), 90 deletions(-)

diff --git a/batch_runner.py b/batch_runner.py
index c3679cdc35..9a3f3e3b45 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -41,24 +41,17 @@ from toolset_distributions import (
     sample_toolsets_from_distribution,
     validate_distribution
 )
+from model_tools import TOOL_TO_TOOLSET_MAP
 
 
 # Global configuration for worker processes
 _WORKER_CONFIG = {}
 
-# All possible tools - used to ensure consistent schema across all trajectory entries
-# This is required because Arrow/Parquet (used by HuggingFace datasets) needs identical schemas
-ALL_POSSIBLE_TOOLS = {
-    'terminal', 'web_search', 'web_extract',
-    'vision_analyze', 'image_generate', 'mixture_of_agents',
-    # Skills tools
-    'skills_categories', 'skills_list', 'skill_view',
-    # Browser automation tools
-    'browser_navigate', 'browser_snapshot', 'browser_click',
-    'browser_type', 'browser_scroll', 'browser_back',
-    'browser_press', 'browser_close', 'browser_get_images',
-    'browser_vision'
-}
+# All possible tools - auto-derived from the master mapping in model_tools.py.
+# This stays in sync automatically when new tools are added to TOOL_TO_TOOLSET_MAP.
+# Used for consistent schema in Arrow/Parquet (HuggingFace datasets) and for
+# filtering corrupted entries during trajectory combination.
+ALL_POSSIBLE_TOOLS = set(TOOL_TO_TOOLSET_MAP.keys())
 
 # Default stats for tools that weren't used
 DEFAULT_TOOL_STATS = {'count': 0, 'success': 0, 'failure': 0}
@@ -200,6 +193,42 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
     return tool_stats
 
 
+def _extract_reasoning_stats(messages: List[Dict[str, Any]]) -> Dict[str, int]:
+    """
+    Count how many assistant turns have reasoning vs no reasoning.
+    
+    Checks for <REASONING_SCRATCHPAD> in content or a non-empty 'reasoning' field
+    (native thinking tokens). Returns counts for tracking reasoning coverage.
+    
+    Args:
+        messages: Message history
+        
+    Returns:
+        Dict with 'total_assistant_turns', 'turns_with_reasoning', 'turns_without_reasoning'
+    """
+    total = 0
+    with_reasoning = 0
+    
+    for msg in messages:
+        if msg.get("role") != "assistant":
+            continue
+        total += 1
+        
+        content = msg.get("content", "") or ""
+        has_scratchpad = "<REASONING_SCRATCHPAD>" in content
+        has_native_reasoning = bool(msg.get("reasoning", "").strip()) if msg.get("reasoning") else False
+        
+        if has_scratchpad or has_native_reasoning:
+            with_reasoning += 1
+    
+    return {
+        "total_assistant_turns": total,
+        "turns_with_reasoning": with_reasoning,
+        "turns_without_reasoning": total - with_reasoning,
+        "has_any_reasoning": with_reasoning > 0,
+    }
+
+
 def _process_single_prompt(
     prompt_index: int,
     prompt_data: Dict[str, Any],
@@ -255,6 +284,9 @@ def _process_single_prompt(
         # Extract tool usage statistics
         tool_stats = _extract_tool_stats(result["messages"])
         
+        # Extract reasoning coverage stats
+        reasoning_stats = _extract_reasoning_stats(result["messages"])
+        
         # Convert to trajectory format (using existing method)
         trajectory = agent._convert_to_trajectory_format(
             result["messages"],
@@ -267,6 +299,7 @@ def _process_single_prompt(
             "prompt_index": prompt_index,
             "trajectory": trajectory,
             "tool_stats": tool_stats,
+            "reasoning_stats": reasoning_stats,
             "completed": result["completed"],
             "partial": result.get("partial", False),
             "api_calls": result["api_calls"],
@@ -335,7 +368,9 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
     
     # Initialize aggregated stats for this batch
     batch_tool_stats = {}
+    batch_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
     completed_in_batch = []
+    discarded_no_reasoning = 0
     
     # Process each prompt sequentially in this batch
     for prompt_index, prompt_data in prompts_to_process:
@@ -349,6 +384,13 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
         
         # Save trajectory if successful
         if result["success"] and result["trajectory"]:
+            # Discard samples with zero reasoning across all turns
+            reasoning = result.get("reasoning_stats", {})
+            if not reasoning.get("has_any_reasoning", True):
+                print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
+                discarded_no_reasoning += 1
+                continue
+            
             # Get and normalize tool stats for consistent schema across all entries
             raw_tool_stats = result.get("tool_stats", {})
             tool_stats = _normalize_tool_stats(raw_tool_stats)
@@ -389,6 +431,10 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
             batch_tool_stats[tool_name]["success"] += stats["success"]
             batch_tool_stats[tool_name]["failure"] += stats["failure"]
         
+        # Aggregate reasoning stats
+        for key in batch_reasoning_stats:
+            batch_reasoning_stats[key] += result.get("reasoning_stats", {}).get(key, 0)
+        
         # Only mark as completed if successfully saved (failed prompts can be retried on resume)
         if result["success"] and result["trajectory"]:
             completed_in_batch.append(prompt_index)
@@ -404,6 +450,8 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
         "processed": len(prompts_to_process),
         "skipped": len(batch_data) - len(prompts_to_process),
         "tool_stats": batch_tool_stats,
+        "reasoning_stats": batch_reasoning_stats,
+        "discarded_no_reasoning": discarded_no_reasoning,
         "completed_prompts": completed_in_batch
     }
 
@@ -434,6 +482,7 @@ class BatchRunner:
         max_tokens: int = None,
         reasoning_config: Dict[str, Any] = None,
         prefill_messages: List[Dict[str, Any]] = None,
+        max_samples: int = None,
     ):
         """
         Initialize the batch runner.
@@ -458,6 +507,7 @@ class BatchRunner:
             max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
             reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
             prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
+            max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
         """
         self.dataset_file = Path(dataset_file)
         self.batch_size = batch_size
@@ -478,6 +528,7 @@ class BatchRunner:
         self.max_tokens = max_tokens
         self.reasoning_config = reasoning_config
         self.prefill_messages = prefill_messages
+        self.max_samples = max_samples
         
         # Validate distribution
         if not validate_distribution(distribution):
@@ -493,8 +544,12 @@ class BatchRunner:
         # Statistics file
         self.stats_file = self.output_dir / "statistics.json"
         
-        # Load dataset
+        # Load dataset (and optionally truncate to max_samples)
         self.dataset = self._load_dataset()
+        if self.max_samples and self.max_samples < len(self.dataset):
+            full_count = len(self.dataset)
+            self.dataset = self.dataset[:self.max_samples]
+            print(f"✂️  Truncated dataset from {full_count} to {self.max_samples} samples (--max_samples)")
         
         # Create batches
         self.batches = self._create_batches()
@@ -812,6 +867,8 @@ class BatchRunner:
         
         # Aggregate all batch statistics and update checkpoint
         all_completed_prompts = list(completed_prompts_set)
+        total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
+        
         for batch_result in results:
             # Add newly completed prompts
             all_completed_prompts.extend(batch_result.get("completed_prompts", []))
@@ -828,6 +885,10 @@ class BatchRunner:
                 total_tool_stats[tool_name]["count"] += stats["count"]
                 total_tool_stats[tool_name]["success"] += stats["success"]
                 total_tool_stats[tool_name]["failure"] += stats["failure"]
+            
+            # Aggregate reasoning stats
+            for key in total_reasoning_stats:
+                total_reasoning_stats[key] += batch_result.get("reasoning_stats", {}).get(key, 0)
         
         # Save final checkpoint
         checkpoint_data["completed_prompts"] = all_completed_prompts
@@ -850,15 +911,8 @@ class BatchRunner:
         combined_file = self.output_dir / "trajectories.jsonl"
         print(f"\n📦 Combining ALL batch files into {combined_file.name}...")
         
-        VALID_TOOLS = {'web_search', 'web_extract', 'terminal', 'vision_analyze', 
-                       'image_generate', 'mixture_of_agents',
-                       # Skills tools
-                       'skills_categories', 'skills_list', 'skill_view',
-                       # Browser automation tools
-                       'browser_navigate', 'browser_snapshot', 'browser_click',
-                       'browser_type', 'browser_scroll', 'browser_back',
-                       'browser_press', 'browser_close', 'browser_get_images',
-                       'browser_vision'}
+        # Valid tools auto-derived from model_tools.py — no manual updates needed
+        VALID_TOOLS = ALL_POSSIBLE_TOOLS
         
         total_entries = 0
         filtered_entries = 0
@@ -907,7 +961,8 @@ class BatchRunner:
             "model": self.model,
             "completed_at": datetime.now().isoformat(),
             "duration_seconds": round(time.time() - start_time, 2),
-            "tool_statistics": total_tool_stats
+            "tool_statistics": total_tool_stats,
+            "reasoning_statistics": total_reasoning_stats,
         }
         
         with open(self.stats_file, 'w', encoding='utf-8') as f:
@@ -945,6 +1000,25 @@ class BatchRunner:
         else:
             print("No tool calls were made during this run.")
         
+        # Print reasoning coverage stats
+        total_discarded = sum(r.get("discarded_no_reasoning", 0) for r in results)
+        
+        print(f"\n🧠 Reasoning Coverage:")
+        print("-" * 70)
+        total_turns = total_reasoning_stats["total_assistant_turns"]
+        with_reasoning = total_reasoning_stats["turns_with_reasoning"]
+        without_reasoning = total_reasoning_stats["turns_without_reasoning"]
+        if total_turns > 0:
+            pct_with = round(with_reasoning / total_turns * 100, 1)
+            pct_without = round(without_reasoning / total_turns * 100, 1)
+            print(f"   Total assistant turns:    {total_turns:,}")
+            print(f"   With reasoning:           {with_reasoning:,} ({pct_with}%)")
+            print(f"   Without reasoning:        {without_reasoning:,} ({pct_without}%)")
+        else:
+            print("   No assistant turns recorded.")
+        if total_discarded > 0:
+            print(f"   🚫 Samples discarded (zero reasoning): {total_discarded:,}")
+        
         print(f"\n💾 Results saved to: {self.output_dir}")
         print(f"   - Trajectories: trajectories.jsonl (combined)")
         print(f"   - Individual batches: batch_*.jsonl (for debugging)")
@@ -975,6 +1049,7 @@ def main(
     reasoning_effort: str = None,
     reasoning_disabled: bool = False,
     prefill_messages_file: str = None,
+    max_samples: int = None,
 ):
     """
     Run batch processing of agent prompts from a dataset.
@@ -1002,6 +1077,7 @@ def main(
         reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "xhigh")
         reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False)
         prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts)
+        max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
         
     Examples:
         # Basic usage
@@ -1110,6 +1186,7 @@ def main(
             max_tokens=max_tokens,
             reasoning_config=reasoning_config,
             prefill_messages=prefill_messages,
+            max_samples=max_samples,
         )
 
         runner.run(resume=resume)
diff --git a/model_tools.py b/model_tools.py
index 7106150995..9210e732e4 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -700,13 +700,21 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "read_file",
-                "description": "Read a file with pagination support. Returns content with line numbers in 'LINE_NUM|CONTENT' format. For binary files (images), returns base64-encoded data. If file not found, suggests similar filenames.",
+                "description": (
+                    "Read a file with pagination support. Preferred over 'cat' in the terminal because it "
+                    "provides line numbers, handles binary/image files, and suggests similar filenames if "
+                    "the file is not found.\n\n"
+                    "**Output format:** Each line is returned as 'LINE_NUM|CONTENT' for easy reference.\n"
+                    "**Binary files:** Detected automatically; images (png/jpg/gif/webp) are returned as base64 with MIME type and dimensions.\n"
+                    "**Large files:** Use offset and limit to paginate. The response includes total line count and a hint for the next page.\n"
+                    "**Paths:** Supports absolute paths, relative paths (from working directory), and ~ expansion."
+                ),
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "path": {
                             "type": "string",
-                            "description": "Path to the file to read (absolute or relative)"
+                            "description": "Path to the file to read (absolute, relative, or ~/path)"
                         },
                         "offset": {
                             "type": "integer",
@@ -729,17 +737,25 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "write_file",
-                "description": "Write content to a file. Creates parent directories automatically. Returns bytes written and lint check results for supported languages.",
+                "description": (
+                    "Write content to a file, completely replacing any existing content. Creates parent "
+                    "directories automatically if they don't exist. Preferred over 'echo' or heredoc in the "
+                    "terminal because it safely handles special characters, newlines, and shell metacharacters "
+                    "without escaping issues.\n\n"
+                    "**Important:** This OVERWRITES the entire file. To make targeted edits to an existing file, "
+                    "use the 'patch' tool instead.\n"
+                    "**Paths:** Supports absolute paths, relative paths, and ~ expansion."
+                ),
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "path": {
                             "type": "string",
-                            "description": "Path to the file to write (will be created if doesn't exist)"
+                            "description": "Path to the file to write (will be created if it doesn't exist, overwritten if it does)"
                         },
                         "content": {
                             "type": "string",
-                            "description": "Content to write to the file"
+                            "description": "Complete content to write to the file"
                         }
                     },
                     "required": ["path", "content"]
@@ -750,36 +766,48 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "patch",
-                "description": "Modify files using either simple string replacement or V4A patch format. Mode 'replace' does find-and-replace with fuzzy matching. Mode 'patch' applies multi-file changes using V4A format (*** Begin/End Patch). Auto-runs syntax checks on modified files.",
+                "description": (
+                    "Modify existing files using targeted edits. Preferred over 'sed' or manual rewriting because "
+                    "it uses intelligent fuzzy matching that tolerates minor whitespace and indentation differences, "
+                    "and auto-runs syntax checks (Python, JS, TS, Go, Rust) after editing.\n\n"
+                    "**Replace mode (recommended):** Find a unique string in the file and replace it. Uses a "
+                    "9-strategy fuzzy matching chain (exact → line-trimmed → whitespace-normalized → "
+                    "indentation-flexible → context-aware) so small formatting differences won't cause failures. "
+                    "Returns a unified diff showing exactly what changed.\n\n"
+                    "**Patch mode:** Apply multi-file changes using V4A patch format for large-scale edits across "
+                    "multiple files in one call.\n\n"
+                    "**Auto-lint:** After every edit, automatically runs syntax checks and reports errors so you "
+                    "can fix them immediately."
+                ),
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "mode": {
                             "type": "string",
                             "enum": ["replace", "patch"],
-                            "description": "Edit mode: 'replace' for string replacement, 'patch' for V4A patch format",
+                            "description": "Edit mode: 'replace' for targeted find-and-replace, 'patch' for V4A multi-file patches",
                             "default": "replace"
                         },
                         "path": {
                             "type": "string",
-                            "description": "File path (required for 'replace' mode)"
+                            "description": "File path to edit (required for 'replace' mode)"
                         },
                         "old_string": {
                             "type": "string",
-                            "description": "Text to find and replace (required for 'replace' mode). Must be unique in file unless replace_all=true"
+                            "description": "Text to find in the file (required for 'replace' mode). Must be unique in the file unless replace_all=true. Include enough surrounding context to ensure uniqueness."
                         },
                         "new_string": {
                             "type": "string",
-                            "description": "Replacement text (required for 'replace' mode)"
+                            "description": "Replacement text (required for 'replace' mode). Can be empty string to delete the matched text."
                         },
                         "replace_all": {
                             "type": "boolean",
-                            "description": "Replace all occurrences instead of requiring unique match (default: false)",
+                            "description": "Replace all occurrences instead of requiring a unique match (default: false)",
                             "default": False
                         },
                         "patch": {
                             "type": "string",
-                            "description": "V4A format patch content (required for 'patch' mode). Format: *** Begin Patch / *** Update File: path / @@ context @@ / -removed / +added / *** End Patch"
+                            "description": "V4A format patch content (required for 'patch' mode). Format:\n*** Begin Patch\n*** Update File: path/to/file\n@@ context hint @@\n context line\n-removed line\n+added line\n*** End Patch"
                         }
                     },
                     "required": ["mode"]
@@ -790,7 +818,16 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "search",
-                "description": "Search for content in files or search for files by name. Use target='content' to search inside files (like grep), or target='files' to find files by name pattern (like glob/find). Results sorted by modification time (newest first).",
+                "description": (
+                    "Search for content inside files or find files by name. Preferred over 'grep' or 'find' "
+                    "in the terminal because it uses ripgrep (fast) with automatic fallback to grep, handles "
+                    "pagination, and returns structured results sorted by modification time (newest first).\n\n"
+                    "**Content search (target='content'):** Regex-powered search inside files with optional "
+                    "file type filtering and context lines. Three output modes: full matches with line numbers, "
+                    "file paths only, or match counts per file.\n\n"
+                    "**File search (target='files'):** Find files by glob pattern (e.g., '*.py', '*config*'). "
+                    "Results sorted by modification time so recently changed files appear first."
+                ),
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -801,12 +838,12 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
                         "target": {
                             "type": "string",
                             "enum": ["content", "files"],
-                            "description": "Search mode: 'content' searches inside files, 'files' searches for files by name",
+                            "description": "Search mode: 'content' searches inside files (like grep/rg), 'files' searches for files by name (like find/glob)",
                             "default": "content"
                         },
                         "path": {
                             "type": "string",
-                            "description": "Directory or file to search in (default: current directory)",
+                            "description": "Directory or file to search in (default: current working directory)",
                             "default": "."
                         },
                         "file_glob": {
@@ -815,7 +852,7 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
                         },
                         "limit": {
                             "type": "integer",
-                            "description": "Maximum number of results (default: 50)",
+                            "description": "Maximum number of results to return (default: 50)",
                             "default": 50
                         },
                         "offset": {
@@ -826,12 +863,12 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]:
                         "output_mode": {
                             "type": "string",
                             "enum": ["content", "files_only", "count"],
-                            "description": "For target='content': 'content' shows matches, 'files_only' shows file paths, 'count' shows match counts per file",
+                            "description": "Output format for content search: 'content' shows matching lines with line numbers, 'files_only' lists file paths, 'count' shows match counts per file",
                             "default": "content"
                         },
                         "context": {
                             "type": "integer",
-                            "description": "Lines of context around matches (only for target='content', output_mode='content')",
+                            "description": "Number of lines to show before and after each match (only for target='content', output_mode='content')",
                             "default": 0
                         }
                     },
@@ -909,6 +946,53 @@ def get_all_tool_names() -> List[str]:
     return tool_names
 
 
+# Master mapping of every tool name → its toolset.
+# This is the single source of truth for all valid tool names in the system.
+# Import TOOL_TO_TOOLSET_MAP from here whenever you need to check valid tools.
+TOOL_TO_TOOLSET_MAP = {
+    "web_search": "web_tools",
+    "web_extract": "web_tools",
+    "terminal": "terminal_tools",
+    "vision_analyze": "vision_tools",
+    "mixture_of_agents": "moa_tools",
+    "image_generate": "image_tools",
+    # Skills tools
+    "skills_categories": "skills_tools",
+    "skills_list": "skills_tools",
+    "skill_view": "skills_tools",
+    # Browser automation tools
+    "browser_navigate": "browser_tools",
+    "browser_snapshot": "browser_tools",
+    "browser_click": "browser_tools",
+    "browser_type": "browser_tools",
+    "browser_scroll": "browser_tools",
+    "browser_back": "browser_tools",
+    "browser_press": "browser_tools",
+    "browser_close": "browser_tools",
+    "browser_get_images": "browser_tools",
+    "browser_vision": "browser_tools",
+    # Cronjob management tools
+    "schedule_cronjob": "cronjob_tools",
+    "list_cronjobs": "cronjob_tools",
+    "remove_cronjob": "cronjob_tools",
+    # RL Training tools
+    "rl_list_environments": "rl_tools",
+    "rl_select_environment": "rl_tools",
+    "rl_get_current_config": "rl_tools",
+    "rl_edit_config": "rl_tools",
+    "rl_start_training": "rl_tools",
+    "rl_check_status": "rl_tools",
+    "rl_stop_training": "rl_tools",
+    "rl_get_results": "rl_tools",
+    "rl_list_runs": "rl_tools",
+    # File manipulation tools
+    "read_file": "file_tools",
+    "write_file": "file_tools",
+    "patch": "file_tools",
+    "search": "file_tools",
+}
+
+
 def get_toolset_for_tool(tool_name: str) -> str:
     """
     Get the toolset that a tool belongs to.
@@ -919,50 +1003,7 @@ def get_toolset_for_tool(tool_name: str) -> str:
     Returns:
         str: Name of the toolset, or "unknown" if not found
     """
-    toolset_mapping = {
-        "web_search": "web_tools",
-        "web_extract": "web_tools",
-        "terminal": "terminal_tools",
-        "vision_analyze": "vision_tools",
-        "mixture_of_agents": "moa_tools",
-        "image_generate": "image_tools",
-        # Skills tools
-        "skills_categories": "skills_tools",
-        "skills_list": "skills_tools",
-        "skill_view": "skills_tools",
-        # Browser automation tools
-        "browser_navigate": "browser_tools",
-        "browser_snapshot": "browser_tools",
-        "browser_click": "browser_tools",
-        "browser_type": "browser_tools",
-        "browser_scroll": "browser_tools",
-        "browser_back": "browser_tools",
-        "browser_press": "browser_tools",
-        "browser_close": "browser_tools",
-        "browser_get_images": "browser_tools",
-        "browser_vision": "browser_tools",
-        # Cronjob management tools
-        "schedule_cronjob": "cronjob_tools",
-        "list_cronjobs": "cronjob_tools",
-        "remove_cronjob": "cronjob_tools",
-        # RL Training tools
-        "rl_list_environments": "rl_tools",
-        "rl_select_environment": "rl_tools",
-        "rl_get_current_config": "rl_tools",
-        "rl_edit_config": "rl_tools",
-        "rl_start_training": "rl_tools",
-        "rl_check_status": "rl_tools",
-        "rl_stop_training": "rl_tools",
-        "rl_get_results": "rl_tools",
-        "rl_list_runs": "rl_tools",
-        # File manipulation tools
-        "read_file": "file_tools",
-        "write_file": "file_tools",
-        "patch": "file_tools",
-        "search": "file_tools",
-    }
-    
-    return toolset_mapping.get(tool_name, "unknown")
+    return TOOL_TO_TOOLSET_MAP.get(tool_name, "unknown")
 
 
 def get_tool_definitions(
diff --git a/run_agent.py b/run_agent.py
index eeb24dd8f5..078f8f0f4d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1120,6 +1120,24 @@ class AIAgent:
             return content
         return content.replace("<REASONING_SCRATCHPAD>", "<think>").replace("</REASONING_SCRATCHPAD>", "</think>")
     
+    @staticmethod
+    def _has_incomplete_scratchpad(content: str) -> bool:
+        """
+        Check if content has an opening <REASONING_SCRATCHPAD> without a closing tag.
+        
+        This indicates the model ran out of output tokens mid-reasoning, producing
+        a broken turn that shouldn't be saved. The caller should retry or discard.
+        
+        Args:
+            content: Assistant message content to check
+            
+        Returns:
+            True if there's an unclosed scratchpad tag
+        """
+        if not content:
+            return False
+        return "<REASONING_SCRATCHPAD>" in content and "</REASONING_SCRATCHPAD>" not in content
+    
     def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
         """
         Convert internal message format to trajectory format for saving.
@@ -1204,6 +1222,11 @@ class AIAgent:
                         }
                         content += f"<tool_call>\n{json.dumps(tool_call_json, ensure_ascii=False)}\n</tool_call>\n"
                     
+                    # Ensure every gpt turn has a <think> block (empty if no reasoning)
+                    # so the format is consistent for training data
+                    if "<think>" not in content:
+                        content = "<think>\n</think>\n" + content
+                    
                     trajectory.append({
                         "from": "gpt",
                         "value": content.rstrip()
@@ -1256,6 +1279,10 @@ class AIAgent:
                     raw_content = msg["content"] or ""
                     content += self._convert_scratchpad_to_think(raw_content)
                     
+                    # Ensure every gpt turn has a <think> block (empty if no reasoning)
+                    if "<think>" not in content:
+                        content = "<think>\n</think>\n" + content
+                    
                     trajectory.append({
                         "from": "gpt",
                         "value": content.strip()
@@ -1903,6 +1930,48 @@ class AIAgent:
                 if assistant_message.content and not self.quiet_mode:
                     print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
                 
+                # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
+                # This means the model ran out of output tokens mid-reasoning — retry up to 2 times
+                if self._has_incomplete_scratchpad(assistant_message.content or ""):
+                    if not hasattr(self, '_incomplete_scratchpad_retries'):
+                        self._incomplete_scratchpad_retries = 0
+                    self._incomplete_scratchpad_retries += 1
+                    
+                    print(f"{self.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
+                    
+                    if self._incomplete_scratchpad_retries <= 2:
+                        print(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
+                        # Don't add the broken message, just retry
+                        continue
+                    else:
+                        # Max retries - discard this turn and save as partial
+                        print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
+                        self._incomplete_scratchpad_retries = 0
+                        
+                        rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
+                        
+                        try:
+                            cleanup_vm(effective_task_id)
+                        except Exception:
+                            pass
+                        try:
+                            cleanup_browser(effective_task_id)
+                        except Exception:
+                            pass
+                        
+                        return {
+                            "final_response": None,
+                            "messages": rolled_back_messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "partial": True,
+                            "error": "Incomplete REASONING_SCRATCHPAD after 2 retries"
+                        }
+                
+                # Reset incomplete scratchpad counter on clean response
+                if hasattr(self, '_incomplete_scratchpad_retries'):
+                    self._incomplete_scratchpad_retries = 0
+                
                 # Check for tool calls
                 if assistant_message.tool_calls:
                     if not self.quiet_mode:
diff --git a/toolset_distributions.py b/toolset_distributions.py
index 7f829c2784..0dc23b887b 100644
--- a/toolset_distributions.py
+++ b/toolset_distributions.py
@@ -198,10 +198,10 @@ DISTRIBUTIONS = {
         "toolsets": {
             "terminal": 97,   # 97% - terminal almost always available
             "file": 97,       # 97% - file tools almost always available
-            "web": 15,        # 15% - web search/scrape for documentation
-            "browser": 10,    # 10% - browser occasionally for web interaction
-            "vision": 8,      # 8% - vision analysis rarely
-            "image_gen": 3    # 3% - image generation very rarely
+            "web": 97,        # 15% - web search/scrape for documentation
+            "browser": 75,    # 10% - browser occasionally for web interaction
+            "vision": 50,      # 8% - vision analysis rarely
+            "image_gen": 10    # 3% - image generation very rarely
         }
     },
     

From c441681dc2e45254018792d322519d194518c0db Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 8 Feb 2026 12:56:40 -0800
Subject: [PATCH 47/48] Update default model to 'anthropic/claude-opus-4.6' and
 refine terminal working directory settings

- Changed the default LLM model in the setup wizard and example environment file to 'anthropic/claude-opus-4.6'.
- Updated terminal working directory settings in CLI and related files to use the current directory ('.') instead of '/tmp'.
- Enhanced documentation comments for clarity on terminal configuration and working directory behavior.
---
 .env.example             | 11 +++++++----
 cli.py                   |  4 ++--
 hermes_cli/setup.py      | 35 ++++++++++++++++++++---------------
 tools/file_operations.py |  4 ++--
 tools/terminal_tool.py   | 21 +++++++++++++++++----
 5 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/.env.example b/.env.example
index 85ecf09d73..905bd42223 100644
--- a/.env.example
+++ b/.env.example
@@ -10,8 +10,8 @@
 OPENROUTER_API_KEY=
 
 # Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-sonnet-4, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
-LLM_MODEL=anthropic/claude-sonnet-4
+# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
+LLM_MODEL=anthropic/claude-opus-4.6
 
 # =============================================================================
 # TOOL API KEYS
@@ -46,8 +46,11 @@ TERMINAL_DOCKER_IMAGE=python:3.11
 TERMINAL_SINGULARITY_IMAGE=docker://python:3.11
 TERMINAL_MODAL_IMAGE=python:3.11
 
-# Working directory inside the container
-TERMINAL_CWD=/tmp
+# Working directory for terminal commands
+# For CLI: "." means current directory (resolved automatically from config.yaml)
+# For containers (docker/singularity/modal): absolute path inside the container
+# Usually managed by config.yaml (terminal.cwd) — uncomment to override
+# TERMINAL_CWD=.
 
 # Default command timeout in seconds
 TERMINAL_TIMEOUT=60
diff --git a/cli.py b/cli.py
index 66f0eafe42..73dffbee44 100755
--- a/cli.py
+++ b/cli.py
@@ -88,7 +88,7 @@ def load_cli_config() -> Dict[str, Any]:
         },
         "terminal": {
             "env_type": "local",
-            "cwd": "/tmp",
+            "cwd": ".",  # "." is resolved to os.getcwd() at runtime
             "timeout": 60,
             "lifetime_seconds": 300,
             "docker_image": "python:3.11",
@@ -839,7 +839,7 @@ class HermesCLI:
         """Display current configuration with kawaii ASCII art."""
         # Get terminal config from environment (which was set from cli-config.yaml)
         terminal_env = os.getenv("TERMINAL_ENV", "local")
-        terminal_cwd = os.getenv("TERMINAL_CWD", "/tmp")
+        terminal_cwd = os.getenv("TERMINAL_CWD", os.getcwd())
         terminal_timeout = os.getenv("TERMINAL_TIMEOUT", "60")
         
         config_path = Path(__file__).parent / 'cli-config.yaml'
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 75e019d9eb..3b52dd9044 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -501,11 +501,12 @@ def run_setup_wizard(args):
     # =========================================================================
     print_header("Default Model")
     
-    current_model = config.get('model', 'anthropic/claude-sonnet-4')
+    current_model = config.get('model', 'anthropic/claude-opus-4.6')
     print_info(f"Current: {current_model}")
     
     model_choices = [
-        "anthropic/claude-sonnet-4.5 (recommended)",
+        "anthropic/claude-opus-4.6 (recommended)",
+        "anthropic/claude-sonnet-4.5",
         "anthropic/claude-opus-4.5",
         "openai/gpt-5.2",
         "openai/gpt-5.2-codex",
@@ -518,27 +519,31 @@ def run_setup_wizard(args):
         f"Keep current ({current_model})"
     ]
     
-    model_idx = prompt_choice("Select default model:", model_choices, 10)  # Default: keep current
+    model_idx = prompt_choice("Select default model:", model_choices, 11)  # Default: keep current
     
     model_map = {
-        0: "anthropic/claude-sonnet-4.5",
-        1: "anthropic/claude-opus-4.5",
-        2: "openai/gpt-5.2",
-        3: "openai/gpt-5.2-codex",
-        4: "google/gemini-3-pro-preview",
-        5: "google/gemini-3-flash-preview",
-        6: "z-ai/glm-4.7",
-        7: "moonshotai/kimi-k2.5",
-        8: "minimax/minimax-m2.1",
+        0: "anthropic/claude-opus-4.6",
+        1: "anthropic/claude-sonnet-4.5",
+        2: "anthropic/claude-opus-4.5",
+        3: "openai/gpt-5.2",
+        4: "openai/gpt-5.2-codex",
+        5: "google/gemini-3-pro-preview",
+        6: "google/gemini-3-flash-preview",
+        7: "z-ai/glm-4.7",
+        8: "moonshotai/kimi-k2.5",
+        9: "minimax/minimax-m2.1",
     }
     
     if model_idx in model_map:
         config['model'] = model_map[model_idx]
-    elif model_idx == 9:  # Custom
-        custom = prompt("Enter model name (e.g., anthropic/claude-sonnet-4.5)")
+        # Also update LLM_MODEL in .env so it stays in sync (cli.py reads .env first)
+        save_env_value("LLM_MODEL", model_map[model_idx])
+    elif model_idx == 10:  # Custom
+        custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
         if custom:
             config['model'] = custom
-    # else: Keep current (model_idx == 10)
+            save_env_value("LLM_MODEL", custom)
+    # else: Keep current (model_idx == 11)
     
     # =========================================================================
     # Step 4: Terminal Backend
diff --git a/tools/file_operations.py b/tools/file_operations.py
index 2509df3c57..7d399dc9b0 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -254,12 +254,12 @@ class ShellFileOperations(FileOperations):
         Args:
             terminal_env: Any object with execute(command, cwd) method.
                          Returns {"output": str, "returncode": int}
-            cwd: Working directory (defaults to env's cwd or /tmp)
+            cwd: Working directory (defaults to env's cwd or current directory)
         """
         self.env = terminal_env
         # Determine cwd from various possible sources
         self.cwd = cwd or getattr(terminal_env, 'cwd', None) or \
-                   getattr(getattr(terminal_env, 'config', None), 'cwd', None) or '/tmp'
+                   getattr(getattr(terminal_env, 'config', None), 'cwd', None) or os.getcwd()
         
         # Cache for command availability checks
         self._command_cache: Dict[str, bool] = {}
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 9fe1162ac6..93e250c861 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -961,7 +961,7 @@ class _ModalEnvironment:
     Note: stdin handling is not needed for Modal since it uses remote async execution.
     """
     
-    def __init__(self, image: str, cwd: str = "/", timeout: int = 60):
+    def __init__(self, image: str, cwd: str = "/root", timeout: int = 60):
         from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
         self._inner = SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout)
         self.cwd = cwd
@@ -1034,12 +1034,25 @@ def _get_env_config() -> Dict[str, Any]:
     """Get terminal environment configuration from environment variables."""
     # Default image with Python and Node.js for maximum compatibility
     default_image = "nikolaik/python-nodejs:python3.11-nodejs20"
+    env_type = os.getenv("TERMINAL_ENV", "local")
+    
+    # Default cwd depends on backend:
+    #   - local/ssh: current working directory (CLI resolves "." before we get here)
+    #   - docker/singularity: /tmp inside the container (singularity bind-mounts /scratch there)
+    #   - modal: /root (ephemeral cloud container, full filesystem access)
+    if env_type == "modal":
+        default_cwd = "/root"
+    elif env_type in ("docker", "singularity"):
+        default_cwd = "/tmp"
+    else:
+        default_cwd = os.getcwd()
+    
     return {
-        "env_type": os.getenv("TERMINAL_ENV", "local"),  # local, docker, singularity, modal, or ssh
+        "env_type": env_type,
         "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image),
         "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
         "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
-        "cwd": os.getenv("TERMINAL_CWD", "/tmp"),
+        "cwd": os.getenv("TERMINAL_CWD", default_cwd),
         "timeout": int(os.getenv("TERMINAL_TIMEOUT", "60")),
         "lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
         # SSH-specific config
@@ -1574,6 +1587,6 @@ if __name__ == "__main__":
     print(f"  TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}")
     print(f"  TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}")
     print(f"  TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
-    print(f"  TERMINAL_CWD: {os.getenv('TERMINAL_CWD', '/tmp')}")
+    print(f"  TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}")
     print(f"  TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")
     print(f"  TERMINAL_LIFETIME_SECONDS: {os.getenv('TERMINAL_LIFETIME_SECONDS', '300')}")

From 192ce958c37d8e2c6e91bbfd521e81310fc4235a Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 8 Feb 2026 13:31:45 -0800
Subject: [PATCH 48/48] Enhance CLI command handling and introduce resource
 cleanup features

- Added imports for resource cleanup during safe shutdown, including terminal and browser session cleanup.
- Refactored command handling to preserve original case for model names and prompt text, improving user experience.
- Introduced a dedicated interrupt queue to manage user input while the agent is running, preventing race conditions.
- Updated comments and documentation for clarity on command processing and input handling.
---
 cli.py | 119 +++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 82 insertions(+), 37 deletions(-)

diff --git a/cli.py b/cli.py
index 73dffbee44..bdae39474a 100755
--- a/cli.py
+++ b/cli.py
@@ -238,6 +238,10 @@ from toolsets import get_all_toolsets, get_toolset_info, resolve_toolset, valida
 # Cron job system for scheduled tasks
 from cron import create_job, list_jobs, remove_job, get_job, run_daemon as run_cron_daemon, tick as cron_tick
 
+# Resource cleanup imports for safe shutdown (terminal VMs, browser sessions)
+from tools.terminal_tool import cleanup_all_environments as _cleanup_all_terminals
+from tools.browser_tool import _emergency_cleanup_all_sessions as _cleanup_all_browsers
+
 # ============================================================================
 # ASCII Art & Branding
 # ============================================================================
@@ -1217,33 +1221,35 @@ class HermesCLI:
         Returns:
             bool: True to continue, False to exit
         """
-        cmd = command.lower().strip()
+        # Lowercase only for dispatch matching; preserve original case for arguments
+        cmd_lower = command.lower().strip()
+        cmd_original = command.strip()
         
-        if cmd in ("/quit", "/exit", "/q"):
+        if cmd_lower in ("/quit", "/exit", "/q"):
             return False
-        elif cmd == "/help":
+        elif cmd_lower == "/help":
             self.show_help()
-        elif cmd == "/tools":
+        elif cmd_lower == "/tools":
             self.show_tools()
-        elif cmd == "/toolsets":
+        elif cmd_lower == "/toolsets":
             self.show_toolsets()
-        elif cmd == "/config":
+        elif cmd_lower == "/config":
             self.show_config()
-        elif cmd == "/clear":
-            # Clear terminal screen
-            import os as _os
-            _os.system('clear' if _os.name != 'nt' else 'cls')
+        elif cmd_lower == "/clear":
+            # Clear terminal screen using Rich (portable, no shell needed)
+            self.console.clear()
             # Reset conversation
             self.conversation_history = []
             # Show fresh banner
             self.show_banner()
             print("  ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n")
-        elif cmd == "/history":
+        elif cmd_lower == "/history":
             self.show_history()
-        elif cmd == "/reset":
+        elif cmd_lower == "/reset":
             self.reset_conversation()
-        elif cmd.startswith("/model"):
-            parts = cmd.split(maxsplit=1)
+        elif cmd_lower.startswith("/model"):
+            # Use original case so model names like "Anthropic/Claude-Opus-4" are preserved
+            parts = cmd_original.split(maxsplit=1)
             if len(parts) > 1:
                 new_model = parts[1]
                 self.model = new_model
@@ -1256,18 +1262,20 @@ class HermesCLI:
             else:
                 print(f"Current model: {self.model}")
                 print("  Usage: /model <model-name> to change")
-        elif cmd.startswith("/prompt"):
-            self._handle_prompt_command(cmd)
-        elif cmd.startswith("/personality"):
-            self._handle_personality_command(cmd)
-        elif cmd == "/save":
+        elif cmd_lower.startswith("/prompt"):
+            # Use original case so prompt text isn't lowercased
+            self._handle_prompt_command(cmd_original)
+        elif cmd_lower.startswith("/personality"):
+            # Use original case (handler lowercases the personality name itself)
+            self._handle_personality_command(cmd_original)
+        elif cmd_lower == "/save":
             self.save_conversation()
-        elif cmd.startswith("/cron"):
-            self._handle_cron_command(command)  # Use original command for proper parsing
-        elif cmd == "/platforms" or cmd == "/gateway":
+        elif cmd_lower.startswith("/cron"):
+            self._handle_cron_command(cmd_original)
+        elif cmd_lower == "/platforms" or cmd_lower == "/gateway":
             self._show_gateway_status()
         else:
-            self.console.print(f"[bold red]Unknown command: {cmd}[/]")
+            self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
             self.console.print("[dim #B8860B]Type /help for available commands[/]")
         
         return True
@@ -1276,6 +1284,11 @@ class HermesCLI:
         """
         Send a message to the agent and get a response.
         
+        Uses a dedicated _interrupt_queue (separate from _pending_input) to avoid
+        race conditions between the process_loop and interrupt monitoring. Messages
+        typed while the agent is running go to _interrupt_queue; messages typed while
+        idle go to _pending_input.
+        
         Args:
             message: The user's message
             
@@ -1307,21 +1320,22 @@ class HermesCLI:
             agent_thread = threading.Thread(target=run_agent)
             agent_thread.start()
             
-            # Monitor for new input in the pending queue while agent runs
+            # Monitor the dedicated interrupt queue while the agent runs.
+            # _interrupt_queue is separate from _pending_input, so process_loop
+            # and chat() never compete for the same queue.
             interrupt_msg = None
             while agent_thread.is_alive():
-                # Check if there's new input in the queue (from the persistent input area)
-                if hasattr(self, '_pending_input'):
+                if hasattr(self, '_interrupt_queue'):
                     try:
-                        interrupt_msg = self._pending_input.get(timeout=0.1)
+                        interrupt_msg = self._interrupt_queue.get(timeout=0.1)
                         if interrupt_msg:
                             print(f"\n⚡ New message detected, interrupting...")
                             self.agent.interrupt(interrupt_msg)
                             break
-                    except:
+                    except queue.Empty:
                         pass  # Queue empty or timeout, continue waiting
                 else:
-                    # Fallback if no queue (shouldn't happen)
+                    # Fallback for non-interactive mode (e.g., single-query)
                     agent_thread.join(0.1)
             
             agent_thread.join()  # Ensure agent thread completes
@@ -1356,10 +1370,11 @@ class HermesCLI:
                 print()
                 print("─" * 60)
             
-            # If we have a pending message from interrupt, process it immediately
-            if pending_message:
-                print(f"\n📨 Processing: '{pending_message[:50]}{'...' if len(pending_message) > 50 else ''}'")
-                return self.chat(pending_message)  # Recursive call to handle the new message
+            # If we have a pending message from interrupt, re-queue it for process_loop
+            # instead of recursing (avoids unbounded recursion from rapid interrupts)
+            if pending_message and hasattr(self, '_pending_input'):
+                print(f"\n📨 Queued: '{pending_message[:50]}{'...' if len(pending_message) > 50 else ''}'")
+                self._pending_input.put(pending_message)
             
             return response
             
@@ -1406,7 +1421,8 @@ class HermesCLI:
         
         # State for async operation
         self._agent_running = False
-        self._pending_input = queue.Queue()
+        self._pending_input = queue.Queue()     # For normal input (commands + new queries)
+        self._interrupt_queue = queue.Queue()   # For messages typed while agent is running
         self._should_exit = False
         self._last_ctrl_c_time = 0  # Track double Ctrl+C for force exit
         
@@ -1418,11 +1434,22 @@ class HermesCLI:
         
         @kb.add('enter')
         def handle_enter(event):
-            """Handle Enter key - submit input."""
+            """Handle Enter key - submit input.
+            
+            Routes to the correct queue based on agent state:
+            - Agent running: goes to _interrupt_queue (chat() monitors this)
+            - Agent idle: goes to _pending_input (process_loop monitors this)
+            Commands (starting with /) always go to _pending_input so they're
+            handled as commands, not sent as interrupt text to the agent.
+            """
             text = event.app.current_buffer.text.strip()
             if text:
-                # Store the input
-                self._pending_input.put(text)
+                if self._agent_running and not text.startswith("/"):
+                    # Agent is working - route to interrupt queue for chat() to pick up
+                    self._interrupt_queue.put(text)
+                else:
+                    # Agent idle, or it's a command - route to normal input queue
+                    self._pending_input.put(text)
                 # Clear the buffer
                 event.app.current_buffer.reset()
         
@@ -1542,6 +1569,11 @@ class HermesCLI:
         process_thread = threading.Thread(target=process_loop, daemon=True)
         process_thread.start()
         
+        # Register atexit cleanup so resources are freed even on unexpected exit
+        # (terminal VMs, browser sessions, etc.)
+        atexit.register(_cleanup_all_browsers)
+        atexit.register(_cleanup_all_terminals)
+        
         # Run the application with patch_stdout for proper output handling
         try:
             with patch_stdout():
@@ -1550,6 +1582,15 @@ class HermesCLI:
             pass
         finally:
             self._should_exit = True
+            # Explicitly clean up resources before exit
+            try:
+                _cleanup_all_terminals()
+            except Exception:
+                pass
+            try:
+                _cleanup_all_browsers()
+            except Exception:
+                pass
             print("\nGoodbye! ⚕")
 
 
@@ -1669,6 +1710,10 @@ def main(
         cli.show_toolsets()
         sys.exit(0)
     
+    # Register cleanup for single-query mode (interactive mode registers in run())
+    atexit.register(_cleanup_all_browsers)
+    atexit.register(_cleanup_all_terminals)
+    
     # Handle single query mode
     if query:
         cli.show_banner()