diff --git a/architecture/agents.md b/architecture/agents.md
new file mode 100644
index 000000000..625d4ccf5
--- /dev/null
+++ b/architecture/agents.md
@@ -0,0 +1,55 @@
+# Agents
+
+Agents can be viewed as an FSM using an LLM to generate inputs into the system that operates over a DAG.
+
+What this really means is that the agent is just a function without memory that uses text inputs and outputs in a
+defined order.
+
+```python
+def my_agent(*args, **kwargs) -> str:
+ # do whatever you want!
+ return "Hi I'm an agent!"
+```
+
+Now obviously, that's like saying water's wet, but we're going to be using that definition to inform our design of the
+library, namely, that we should *not* store agent state outside the function call.
+
+## The Agent Class
+
+So we don't have state, why are we using a class?
+
+Well, we want to initialize things, we want to have some configuration, and we want to have some helper functions.
+Preferably all in a single place.
+
+```python
+class BaseAgent:
+ def agent_primitives(self) -> list[BaseAgent]:
+ # Returns a list of Agents that are utilized by this agent to generate inputs
+ # We use agent primitives here instead of subagents because these are going to be part
+ # of the message graph, not a subagent tool call.
+ raise NotImplementedError
+
+ def tools(self) -> list[BaseTool]:
+ # Returns a list of tools that the agent needs to run
+ raise NotImplementedError
+
+
+ def run(self, config, *args, **kwargs) -> ConversationGraph:
+ llm = get_llm(config)
+ tools = self.tools()
+ for agent in self.agent_primitives():
+ tools.extend(agent.tools())
+ tools = remove_duplicates(tools)
+ tools = initialize_tools(tools, config)
+ return self(llm, tools, config, *args, **kwargs)
+
+ @staticmethod
+ def __call__(self, llm, tools, config, *args, **kwargs) -> ConversationGraph:
+ # Returns a ConversationGraph that can be parsed to get the output of the agent
+ # Use w/e args/kwargs you want, as long as llm/tools/config are satisfied.
+ raise NotImplementedError
+```
+
+Doesn't seem too bad (I hope), it is a bit annoying that we don't initialize everything in the constructor, but
+hopefully we all kinda like it :)
+
diff --git a/architecture/llm_client.md b/architecture/llm_client.md
new file mode 100644
index 000000000..fe15d23a4
--- /dev/null
+++ b/architecture/llm_client.md
@@ -0,0 +1,14 @@
+# LLM Client
+
+A quick wrapper over openai apis
+
+## Responsibilities
+
+- Transform "normal" chat/completions requests into graphs
+- Translate graphs into LLM requests
+- Keep a history of graphs parsed by it
+ - On Policy Data
+ - Deduplicating graphs, so we don't keep previous history as separate graphs
+
+## How to use
+Exactly the same as the openai api! Just with the additional support of graph inputs and outputs.
\ No newline at end of file
diff --git a/architecture/message_graph.md b/architecture/message_graph.md
new file mode 100644
index 000000000..251a3a416
--- /dev/null
+++ b/architecture/message_graph.md
@@ -0,0 +1,114 @@
+# Message Graph
+
+```mermaid
+graph TD
+ %% Message nodes
+ SystemMsg["📋 System Message
Role: System
Content: Messages are nodes in a graph"]
+ UserMsg["👤 User Message
Role: User
Content: But messages aren't the only thing in the graph"]
+ subgraph PrevMessages["Previous Messages"]
+ PrevSystemMsg["📋 System Message
Role: System
Content: Edits are kept in the graph as context"]
+ PrevUserMsg["👤 User Message
Role: User
Content: So we can ensure they're immutable while keeping them editable"]
+ end
+
+ %% Chat Response as a subgraph
+ subgraph ChatResponseBox["💬 Chat Response"]
+ ChatMetadata["📊 Metadata
Temp: 1.0
..."]
+ ChatResponseText["📝 Response
Hello, Here's a subagent call: <tool>subagent</tool>"]
+ ChatContent["Content: Hello, Here's a subagent call..."]
+ end
+
+ %% Tool Response as a subgraph
+ subgraph ToolResponseBox["🔧 Tool Response"]
+ subgraph ToolMetadata["📊 Tool Metadata"]
+ ToolMetadataLength["Length: 3"]
+ subgraph ToolChat["💭 Subagent Chat"]
+ SubagentSystem["📋 System
Content: Subagent call received"]
+ SubagentUser["👤 User
Content: Process this request"]
+ SubagentAssistant["🤖 Assistant
Content: Processing..."]
+ SubagentSystem --> SubagentUser
+ SubagentUser --> SubagentAssistant
+ end
+ end
+ ToolContent["Content: Subagent call output"]
+ end
+
+ %% Graph flow connections
+ SystemMsg --> UserMsg
+ PrevSystemMsg --> PrevUserMsg
+ PrevMessages -.-> UserMsg
+ UserMsg --> ChatResponseBox
+ ChatResponseBox --> ToolResponseBox
+
+ class SystemMsg,UserMsg messageNode
+ class ChatResponseBox responseNode
+ class ToolResponseBox responseNode
+ class ChatMetadata,ChatResponseText,ChatContent,ToolMetadata,ToolChat,ToolContent,ToolMetadataLength metadataNode
+```
+
+Messages should be a graph (DAG, specifically) of immutable elements.
+
+## Why immutable elements?
+We want to train on policy
+- This means the context cannot change after we call a response.
+
+## Why a graph?
+Nodes and connections are a natural way to represent the flow of information in an agent conversation.
+
+## Will this be annoying to deal with?
+
+It shouldn't be! While there will be internal stuff that may look ???, for the interface, it should be as simple as your
+normal context window edits, so `message_history[2]['content'] = my_edit`, but internally we'll deal with the recordkeeping
+and how this ends up parsing into on policy training data, if requested.
+
+## Edges
+
+Edges are the connections between nodes, and there are two types we are concerned with:
+- **Sequential edges**: These represent the flow of conversation, connecting messages in the order they were sent. For example, a user message followed by an assistant response.
+- **Parallel edges**: These represent versioning, e.g. edit history, context squishing, etc.
+We, however, are only concerned about parallel edges when we break the prefix, and ignore any other parallel edges.
+
+## So what does this look like in practice?
+
+```python
+import copy
+
+
+class MessageGraph:
+ def __init__(self):
+ self.messages = []
+ self.prev_graph = None
+
+ def append(self, message):
+ self.messages.append(message)
+
+ def __getitem__(self, index):
+ return self.messages[index]
+
+ def __setitem__(self, key, value):
+ # check if an assistant message is after this indx
+ needs_new_graph = False
+ first_idx = -1
+ for i in range(key, len(self.messages)):
+ if (i == key) and (value['role'] == 'assistant') and (value['content'] == self.messages[i]['content']):
+ # no op
+ return
+ needs_new_graph = needs_new_graph or (self.messages[i]['role'] == 'assistant')
+ if needs_new_graph and first_idx == -1:
+ first_idx = i
+ if needs_new_graph:
+ self.prev_graph = copy.deepcopy(self)
+ self.messages[key] = value
+
+ def __len__(self):
+ return len(self.messages)
+
+ def __eq__(self, other):
+ return "\n\n".join(f"{msg['role']}: {msg['content']}" for msg in self) == "\n\n".join(
+ f"{msg['role']}: {msg['content']}" for msg in other)
+
+
+# in use
+messages = MessageGraph()
+messages.append({'role': 'system', 'content': 'Hello, I am a system message'})
+messages[0] = {'role': 'user', 'content': 'Hello, I am a user message'}
+```
\ No newline at end of file
diff --git a/architecture/tools.md b/architecture/tools.md
new file mode 100644
index 000000000..b899c5ebd
--- /dev/null
+++ b/architecture/tools.md
@@ -0,0 +1,16 @@
+# Tools
+
+Not much on this, yet. Tools are just a stateful wrapper around a function, so we can do things like:
+- Keep a docker container running
+- Keep a game online
+
+```python
+class BaseTool:
+ def definitions(self) -> List[Dict[str, Any]]:
+ # OpenAI API compatible definitions
+ raise NotImplementedError
+
+ def __call__(self, *args, **kwargs) -> Dict[str, Any]:
+ # Returns at minimum {'role': 'tool', 'content': '...'}
+ raise NotImplementedError
+```
\ No newline at end of file
diff --git a/batch_runner.py b/batch_runner.py
index 8943b4e04..16f461a7c 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -98,10 +98,9 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
# Terminal wraps its response in a "content" field
if "content" in content_json and isinstance(content_json["content"], dict):
inner_content = content_json["content"]
- # Check for actual error (non-null error field or non-zero exit code)
- has_error = (inner_content.get("error") is not None or
- inner_content.get("exit_code", 0) != 0)
- if has_error:
+ # Check for actual error (non-null error field)
+ # Note: non-zero exit codes are not failures - the model can self-correct
+ if inner_content.get("error") is not None:
is_success = False
# Check for "success": false pattern used by some tools
diff --git a/model_tools.py b/model_tools.py
index 9e5a6e8cf..eb27b7535 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -31,7 +31,9 @@ import asyncio
from typing import Dict, Any, List, Optional
from tools.web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_firecrawl_api_key
-from tools.terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOOL_DESCRIPTION
+from tools.simple_terminal_tool import simple_terminal_tool, check_requirements as check_simple_terminal_requirements, SIMPLE_TERMINAL_TOOL_DESCRIPTION
+# Keep old terminal tool for backwards compatibility if needed
+# from tools.terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOOL_DESCRIPTION
from tools.vision_tools import vision_analyze_tool, check_vision_requirements
from tools.mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
from tools.image_generation_tool import image_generate_tool, check_image_generation_requirements
@@ -111,7 +113,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
"""
Get tool definitions for terminal tools in OpenAI's expected format.
-
+
Returns:
List[Dict]: List of terminal tool definitions compatible with OpenAI API
"""
@@ -120,7 +122,7 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
"type": "function",
"function": {
"name": "terminal",
- "description": TERMINAL_TOOL_DESCRIPTION,
+ "description": SIMPLE_TERMINAL_TOOL_DESCRIPTION,
"parameters": {
"type": "object",
"properties": {
@@ -128,28 +130,18 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
"type": "string",
"description": "The command to execute on the VM"
},
- "input_keys": {
- "type": "string",
- "description": "Keystrokes to send to the most recent interactive session (e.g., 'hello\\n' for typing hello + Enter). If no active session exists, this will be ignored."
- },
"background": {
"type": "boolean",
"description": "Whether to run the command in the background (default: false)",
"default": False
},
- "idle_threshold": {
- "type": "number",
- "description": "Seconds to wait for output before considering session idle (default: 5.0)",
- "default": 5.0,
- "minimum": 0.1
- },
"timeout": {
"type": "integer",
"description": "Command timeout in seconds (optional)",
"minimum": 1
}
},
- "required": []
+ "required": ["command"]
}
}
}
@@ -262,11 +254,11 @@ def get_all_tool_names() -> List[str]:
# Web tools
if check_firecrawl_api_key():
tool_names.extend(["web_search", "web_extract", "web_crawl"])
-
- # Terminal tools
- if check_hecate_requirements():
+
+ # Terminal tools
+ if check_simple_terminal_requirements():
tool_names.extend(["terminal"])
-
+
# Vision tools
if check_vision_requirements():
tool_names.extend(["vision_analyze"])
@@ -346,11 +338,11 @@ def get_tool_definitions(
if check_firecrawl_api_key():
for tool in get_web_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
-
- if check_hecate_requirements():
+
+ if check_simple_terminal_requirements():
for tool in get_terminal_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
-
+
if check_vision_requirements():
for tool in get_vision_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
@@ -494,12 +486,10 @@ def handle_terminal_function_call(function_name: str, function_args: Dict[str, A
"""
if function_name == "terminal":
command = function_args.get("command")
- input_keys = function_args.get("input_keys")
background = function_args.get("background", False)
- idle_threshold = function_args.get("idle_threshold", 5.0)
timeout = function_args.get("timeout")
- return terminal_tool(command, input_keys, None, background, idle_threshold, timeout, task_id)
+ return simple_terminal_tool(command=command, background=background, timeout=timeout, task_id=task_id)
else:
return json.dumps({"error": f"Unknown terminal function: {function_name}"}, ensure_ascii=False)
@@ -681,10 +671,10 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
"requirements": ["FIRECRAWL_API_KEY environment variable"]
},
"terminal_tools": {
- "available": check_hecate_requirements(),
- "tools": ["terminal_tool"],
- "description": "Execute commands with optional interactive session support on Linux VMs",
- "requirements": ["MORPH_API_KEY environment variable", "hecate package"]
+ "available": check_simple_terminal_requirements(),
+ "tools": ["simple_terminal_tool"],
+ "description": "Execute commands on secure Linux VMs without session persistence",
+ "requirements": ["MORPH_API_KEY environment variable"]
},
"vision_tools": {
"available": check_vision_requirements(),
@@ -711,13 +701,13 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
def check_toolset_requirements() -> Dict[str, bool]:
"""
Check if all requirements for available toolsets are met.
-
+
Returns:
Dict: Status of each toolset's requirements
"""
return {
"web_tools": check_firecrawl_api_key(),
- "terminal_tools": check_hecate_requirements(),
+ "terminal_tools": check_simple_terminal_requirements(),
"vision_tools": check_vision_requirements(),
"moa_tools": check_moa_requirements(),
"image_tools": check_image_generation_requirements()
diff --git a/requirements.txt b/requirements.txt
index d0a1bb734..f9b9514dd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,9 @@
firecrawl-py
openai
fal-client
+fire
+git@github.com:NousResearch/hecate.git
+tenacity
python-dotenv
fire
-httpx
\ No newline at end of file
+httpx
diff --git a/run_agent.py b/run_agent.py
index 7e6f901e2..e080d843b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -460,8 +460,8 @@ class AIAgent:
api_start_time = time.time()
retry_count = 0
- max_retries = 3
-
+ max_retries = 6 # Increased to allow longer backoff periods
+
while retry_count <= max_retries:
try:
# Build OpenRouter provider preferences if specified
@@ -494,9 +494,9 @@ class AIAgent:
if self.verbose_logging:
logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-
+
break # Success, exit retry loop
-
+
except Exception as api_error:
retry_count += 1
elapsed_time = time.time() - api_start_time
@@ -515,10 +515,11 @@ class AIAgent:
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
raise api_error
-
- wait_time = min(2 ** retry_count, 10) # Exponential backoff, max 10s
- print(f"{self.log_prefix}⏳ Retrying in {wait_time}s...")
- logging.warning(f"API retry {retry_count}/{max_retries} after {error_type}: {error_msg[:200]}")
+
+ wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
+ print(f"⚠️ OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
+ print(f"⏳ Retrying in {wait_time}s...")
+ logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
time.sleep(wait_time)
try:
@@ -624,11 +625,11 @@ class AIAgent:
"reasoning": reasoning_content # Store reasoning for trajectory
})
- print(f"🎉 Conversation completed after {api_call_count} API call(s)")
+ print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
break
except Exception as e:
- error_msg = f"Error during API call #{api_call_count}: {str(e)}"
+ error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
print(f"❌ {error_msg}")
if self.verbose_logging:
diff --git a/tools/mixture_of_agents_tool.py b/tools/mixture_of_agents_tool.py
index a0e8db1ee..c94d9e1de 100644
--- a/tools/mixture_of_agents_tool.py
+++ b/tools/mixture_of_agents_tool.py
@@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st
async def _run_reference_model_safe(
- model: str,
- user_prompt: str,
+ model: str,
+ user_prompt: str,
temperature: float = REFERENCE_TEMPERATURE,
max_tokens: int = 32000,
- max_retries: int = 3
+ max_retries: int = 6
) -> tuple[str, str, bool]:
"""
Run a single reference model with retry logic and graceful failure handling.
@@ -212,8 +212,8 @@ async def _run_reference_model_safe(
print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}")
if attempt < max_retries - 1:
- # Exponential backoff for rate limiting
- sleep_time = 2 ** attempt
+ # Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
+ sleep_time = min(2 ** (attempt + 1), 60)
print(f" Retrying in {sleep_time}s...")
await asyncio.sleep(sleep_time)
else:
diff --git a/tools/simple_terminal_tool.py b/tools/simple_terminal_tool.py
new file mode 100644
index 000000000..6ebfeeda7
--- /dev/null
+++ b/tools/simple_terminal_tool.py
@@ -0,0 +1,395 @@
+#!/usr/bin/env python3
+"""
+Simple Terminal Tool Module
+
+A simplified terminal tool that executes commands on MorphCloud VMs without tmux.
+No session persistence, no interactive app support - just simple command execution.
+
+Features:
+- Direct SSH command execution
+- Background task support
+- VM lifecycle management with TTL
+- Automatic cleanup after inactivity
+
+Usage:
+ from simple_terminal_tool import simple_terminal_tool
+
+ # Execute a simple command
+ result = simple_terminal_tool("ls -la")
+
+ # Execute in background
+ result = simple_terminal_tool("python server.py", background=True)
+"""
+
+import json
+import os
+import time
+import threading
+import atexit
+from typing import Optional, Dict, Any
+
+# Tool description for LLM
+SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM environment.
+
+**Environment:**
+- Minimal Debian-based OS with internet access
+- Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
+- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
+
+**Command Execution:**
+- Simple commands: Just provide the 'command' parameter
+- Background processes: Set 'background': True for servers/long-running tasks
+- Command timeout: Optional 'timeout' parameter in seconds
+
+**Examples:**
+- Run command: `{"command": "ls -la"}`
+- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
+- With timeout: `{"command": "long_task.sh", "timeout": 300}`
+
+**Best Practices:**
+- Run servers/long processes in background
+- Monitor disk usage for large tasks
+- Install whatever tools you need with sudo apt-get
+- Do not be afraid to run pip with --break-system-packages
+
+**Things to avoid**
+- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
+"""
+
+# Global state for VM lifecycle management
+_active_instances: Dict[str, Any] = {}
+_last_activity: Dict[str, float] = {}
+_instance_lock = threading.Lock()
+_cleanup_thread = None
+_cleanup_running = False
+
+
+def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
+ """Clean up VMs that have been inactive for longer than vm_lifetime_seconds."""
+ global _active_instances, _last_activity
+
+ current_time = time.time()
+ tasks_to_cleanup = []
+
+ with _instance_lock:
+ for task_id, last_time in list(_last_activity.items()):
+ if current_time - last_time > vm_lifetime_seconds:
+ tasks_to_cleanup.append(task_id)
+
+ for task_id in tasks_to_cleanup:
+ try:
+ if task_id in _active_instances:
+ instance = _active_instances[task_id]
+ if hasattr(instance, 'terminate'):
+ instance.terminate()
+ elif hasattr(instance, 'stop'):
+ instance.stop()
+ elif hasattr(instance, 'delete'):
+ instance.delete()
+
+ del _active_instances[task_id]
+ print(f"[VM Cleanup] Terminated inactive VM for task: {task_id}")
+
+ if task_id in _last_activity:
+ del _last_activity[task_id]
+
+ except Exception as e:
+ # 404 errors are benign - VM already cleaned up by TTL
+ error_str = str(e)
+ if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+ print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+ else:
+ print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
+
+
+def _cleanup_thread_worker():
+ """Background thread worker that periodically cleans up inactive VMs."""
+ global _cleanup_running
+
+ while _cleanup_running:
+ try:
+ vm_lifetime = int(os.getenv("HECATE_VM_LIFETIME_SECONDS", "300"))
+ _cleanup_inactive_vms(vm_lifetime)
+ except Exception as e:
+ print(f"[VM Cleanup] Error in cleanup thread: {e}")
+
+ for _ in range(60):
+ if not _cleanup_running:
+ break
+ time.sleep(1)
+
+
+def _start_cleanup_thread():
+ """Start the background cleanup thread if not already running."""
+ global _cleanup_thread, _cleanup_running
+
+ with _instance_lock:
+ if _cleanup_thread is None or not _cleanup_thread.is_alive():
+ _cleanup_running = True
+ _cleanup_thread = threading.Thread(target=_cleanup_thread_worker, daemon=True)
+ _cleanup_thread.start()
+
+
+def _stop_cleanup_thread():
+ """Stop the background cleanup thread."""
+ global _cleanup_running
+ _cleanup_running = False
+ if _cleanup_thread is not None:
+ _cleanup_thread.join(timeout=5)
+
+
+def cleanup_vm(task_id: str):
+ """Manually clean up a specific VM by task_id."""
+ global _active_instances, _last_activity
+
+ with _instance_lock:
+ try:
+ if task_id in _active_instances:
+ instance = _active_instances[task_id]
+ if hasattr(instance, 'terminate'):
+ instance.terminate()
+ elif hasattr(instance, 'stop'):
+ instance.stop()
+ elif hasattr(instance, 'delete'):
+ instance.delete()
+
+ del _active_instances[task_id]
+ print(f"[VM Cleanup] Manually terminated VM for task: {task_id}")
+
+ if task_id in _last_activity:
+ del _last_activity[task_id]
+
+ except Exception as e:
+ # 404 errors are benign - VM already cleaned up by TTL
+ error_str = str(e)
+ if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
+ print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
+ else:
+ print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
+
+
+atexit.register(_stop_cleanup_thread)
+
+
+def _execute_ssh_command(instance, command: str, timeout: Optional[int] = None) -> Dict[str, Any]:
+ """
+ Execute a command via SSH on the VM instance.
+
+ Args:
+ instance: MorphVM instance
+ command: Command to execute
+ timeout: Optional timeout in seconds
+
+ Returns:
+ dict with stdout, stderr, returncode
+ """
+ ssh_context_manager = None
+ try:
+ # Use the instance's SSH context manager
+ ssh_context_manager = instance.ssh()
+ ssh_context = ssh_context_manager.__enter__()
+
+ # Execute the command
+ result = ssh_context.run(command, get_pty=False, timeout=timeout or 120)
+
+ # Close the SSH connection
+ if ssh_context_manager:
+ try:
+ ssh_context_manager.__exit__(None, None, None)
+ except:
+ pass
+
+ return {
+ "stdout": result.stdout or "",
+ "stderr": result.stderr or "",
+ "returncode": result.returncode
+ }
+
+ except Exception as e:
+ # Close connection on error
+ if ssh_context_manager:
+ try:
+ ssh_context_manager.__exit__(None, None, None)
+ except:
+ pass
+
+ # Check if it's a timeout
+ error_str = str(e).lower()
+ if "timeout" in error_str:
+ return {
+ "stdout": "",
+ "stderr": f"Command timed out after {timeout or 120} seconds",
+ "returncode": 124
+ }
+
+ return {
+ "stdout": "",
+ "stderr": f"SSH execution failed: {str(e)}",
+ "returncode": -1
+ }
+
+
+def simple_terminal_tool(
+ command: str,
+ background: bool = False,
+ timeout: Optional[int] = None,
+ task_id: Optional[str] = None
+) -> str:
+ """
+ Execute a command on a MorphCloud VM without session persistence.
+
+ Args:
+ command: The command to execute
+ background: Whether to run in background (default: False)
+ timeout: Command timeout in seconds (default: 120)
+ task_id: Unique identifier for VM isolation (optional)
+
+ Returns:
+ str: JSON string with output, exit_code, and error fields
+
+ Examples:
+ # Execute a simple command
+ >>> result = simple_terminal_tool(command="ls -la /tmp")
+
+ # Run a background task
+ >>> result = simple_terminal_tool(command="python server.py", background=True)
+
+ # With custom timeout
+ >>> result = simple_terminal_tool(command="long_task.sh", timeout=300)
+ """
+ global _active_instances, _last_activity
+
+ try:
+ # Import required modules
+ try:
+ from morphcloud.api import MorphCloudClient
+ except ImportError as import_error:
+ return json.dumps({
+ "output": "",
+ "exit_code": -1,
+ "error": f"Terminal tool disabled: {import_error}",
+ "status": "disabled"
+ }, ensure_ascii=False)
+
+ # Get configuration
+ vm_ttl_seconds = int(os.getenv("HECATE_VM_TTL_SECONDS", "1200"))
+ snapshot_id = os.getenv("HECATE_DEFAULT_SNAPSHOT_ID", "snapshot_defv9tjg")
+
+ # Check API key
+ morph_api_key = os.getenv("MORPH_API_KEY")
+ if not morph_api_key:
+ return json.dumps({
+ "output": "",
+ "exit_code": -1,
+ "error": "MORPH_API_KEY environment variable not set",
+ "status": "disabled"
+ }, ensure_ascii=False)
+
+ # Use task_id for VM isolation
+ effective_task_id = task_id or "default"
+
+ # Start cleanup thread
+ _start_cleanup_thread()
+
+ # Get or create VM instance
+ with _instance_lock:
+ if effective_task_id not in _active_instances:
+ morph_client = MorphCloudClient(api_key=morph_api_key)
+ _active_instances[effective_task_id] = morph_client.instances.start(
+ snapshot_id=snapshot_id,
+ ttl_seconds=vm_ttl_seconds,
+ ttl_action="stop"
+ )
+
+ # Update last activity time
+ _last_activity[effective_task_id] = time.time()
+ instance = _active_instances[effective_task_id]
+
+ # Wait for instance to be ready
+ instance.wait_until_ready()
+
+ # Prepare command for execution
+ if background:
+ # Run in background with nohup and redirect output
+ exec_command = f"nohup {command} > /tmp/bg_output.log 2>&1 &"
+ result = _execute_ssh_command(instance, exec_command, timeout=10)
+
+ # For background tasks, return immediately with info
+ if result["returncode"] == 0:
+ return json.dumps({
+ "output": "Background task started successfully",
+ "exit_code": 0,
+ "error": None
+ }, ensure_ascii=False)
+ else:
+ return json.dumps({
+ "output": result["stdout"],
+ "exit_code": result["returncode"],
+ "error": result["stderr"]
+ }, ensure_ascii=False)
+ else:
+ # Run foreground command
+ result = _execute_ssh_command(instance, command, timeout=timeout)
+
+ # Combine stdout and stderr for output
+ output = result["stdout"]
+ if result["stderr"] and result["returncode"] != 0:
+ output = f"{output}\n{result['stderr']}" if output else result["stderr"]
+
+ return json.dumps({
+ "output": output.strip(),
+ "exit_code": result["returncode"],
+ "error": result["stderr"] if result["returncode"] != 0 else None
+ }, ensure_ascii=False)
+
+ except Exception as e:
+ return json.dumps({
+ "output": "",
+ "exit_code": -1,
+ "error": f"Failed to execute command: {str(e)}",
+ "status": "error"
+ }, ensure_ascii=False)
+
+
+def check_requirements() -> bool:
+ """Check if all requirements for the simple terminal tool are met."""
+ required_vars = ["MORPH_API_KEY"]
+ missing_required = [var for var in required_vars if not os.getenv(var)]
+
+ if missing_required:
+ print(f"Missing required environment variables: {', '.join(missing_required)}")
+ return False
+
+ try:
+ from morphcloud.api import MorphCloudClient
+ return True
+ except Exception as e:
+ print(f"MorphCloud not available: {e}")
+ return False
+
+
+if __name__ == "__main__":
+ """Simple test when run directly."""
+ print("Simple Terminal Tool Module")
+ print("=" * 40)
+
+ if not check_requirements():
+ print("Requirements not met. Please check the messages above.")
+ exit(1)
+
+ print("All requirements met!")
+ print("\nAvailable Tool:")
+ print(" - simple_terminal_tool: Execute commands without session persistence")
+
+ print("\nUsage Examples:")
+ print(" # Execute a command")
+ print(" result = simple_terminal_tool(command='ls -la')")
+ print(" ")
+ print(" # Run a background task")
+ print(" result = simple_terminal_tool(command='python server.py', background=True)")
+
+ print("\nEnvironment Variables:")
+ print(f" MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
+ print(f" HECATE_VM_TTL_SECONDS: {os.getenv('HECATE_VM_TTL_SECONDS', '1200')} (default: 1200 / 20 minutes)")
+ print(f" HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300 / 5 minutes)")
+ print(f" HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_defv9tjg')}")
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index e4b436426..3d6ea11cd 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -280,7 +280,7 @@ def terminal_tool(
# Get configuration from environment
vm_lifetime_seconds = int(os.getenv("HECATE_VM_LIFETIME_SECONDS", "300"))
vm_ttl_seconds = int(os.getenv("HECATE_VM_TTL_SECONDS", "1200")) # 20 minutes default
- snapshot_id = os.getenv("HECATE_DEFAULT_SNAPSHOT_ID", "snapshot_defv9tjg")
+ snapshot_id = os.getenv("HECATE_DEFAULT_SNAPSHOT_ID", "snapshot_1a8xowaq")
# Check API key
morph_api_key = os.getenv("MORPH_API_KEY")
@@ -453,4 +453,4 @@ if __name__ == "__main__":
print(f" OPENAI_API_KEY: {'Set' if os.getenv('OPENAI_API_KEY') else 'Not set (optional)'}")
print(f" HECATE_VM_TTL_SECONDS: {os.getenv('HECATE_VM_TTL_SECONDS', '1200')} (default: 1200 / 20 minutes)")
print(f" HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300 / 5 minutes)")
- print(f" HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_defv9tjg')} (default: snapshot_defv9tjg)")
+ print(f" HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_1a8xowaq')} (default: snapshot_1a8xowaq)")
diff --git a/tools/web_tools.py b/tools/web_tools.py
index edafc282c..2f21f27fc 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
# Call the LLM asynchronously with retry logic for flaky API
- max_retries = 3
+ max_retries = 6
retry_delay = 2 # Start with 2 seconds
last_error = None
-
+
for attempt in range(max_retries):
try:
response = await summarizer_client.chat.completions.create(
@@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
print(f" Retrying in {retry_delay}s...")
await asyncio.sleep(retry_delay)
- retry_delay *= 2 # Exponential backoff: 2s, 4s, 8s
+ retry_delay = min(retry_delay * 2, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
else:
# All retries exhausted
raise last_error