Merge branch 'NousResearch:main' into docker-optimization

This commit is contained in:
Bryan Cross 2026-03-30 15:21:45 -05:00 committed by GitHub
commit fdef0456a7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1432 additions and 82 deletions

View file

@ -162,6 +162,21 @@ def _is_oauth_token(key: str) -> bool:
return True
def _requires_bearer_auth(base_url: str | None) -> bool:
"""Return True for Anthropic-compatible providers that require Bearer auth.
Some third-party /anthropic endpoints implement Anthropic's Messages API but
require Authorization: Bearer instead of Anthropic's native x-api-key header.
MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
"""
if not base_url:
return False
normalized = base_url.rstrip("/").lower()
return normalized.startswith("https://api.minimax.io/anthropic") or normalized.startswith(
"https://api.minimaxi.com/anthropic"
)
def build_anthropic_client(api_key: str, base_url: str = None):
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
@ -180,7 +195,17 @@ def build_anthropic_client(api_key: str, base_url: str = None):
if base_url:
kwargs["base_url"] = base_url
if _is_oauth_token(api_key):
if _requires_bearer_auth(base_url):
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
# Authorization: Bearer even for regular API keys. Route those endpoints
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
# Check this before OAuth token shape detection because MiniMax secrets do
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
# Anthropic OAuth/setup tokens.
kwargs["auth_token"] = api_key
if _COMMON_BETAS:
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
elif _is_oauth_token(api_key):
# OAuth access token / setup-token → Bearer auth + Claude Code identity.
# Anthropic routes OAuth requests based on user-agent and headers;
# without Claude Code's fingerprint, requests get intermittent 500s.

24
cli.py
View file

@ -2837,6 +2837,28 @@ class HermesCLI:
print(" Example: python cli.py --toolsets web,terminal")
print()
def _handle_profile_command(self):
"""Display active profile name and home directory."""
from hermes_constants import get_hermes_home, display_hermes_home
home = get_hermes_home()
display = display_hermes_home()
profiles_parent = Path.home() / ".hermes" / "profiles"
try:
rel = home.relative_to(profiles_parent)
profile_name = str(rel).split("/")[0]
except ValueError:
profile_name = None
print()
if profile_name:
print(f" Profile: {profile_name}")
else:
print(" Profile: default")
print(f" Home: {display}")
print()
def show_config(self):
"""Display current configuration with kawaii ASCII art."""
# Get terminal config from environment (which was set from cli-config.yaml)
@ -3679,6 +3701,8 @@ class HermesCLI:
return False
elif canonical == "help":
self.show_help()
elif canonical == "profile":
self._handle_profile_command()
elif canonical == "tools":
self._handle_tools_command(cmd_original)
elif canonical == "toolsets":

View file

@ -476,6 +476,13 @@ class GatewayRunner:
self._honcho_managers: Dict[str, Any] = {}
self._honcho_configs: Dict[str, Any] = {}
# Rate-limit compression warning messages sent to users.
# Keyed by chat_id — value is the timestamp of the last warning sent.
# Prevents the warning from firing on every message when a session
# remains above the threshold after compression.
self._compression_warn_sent: Dict[str, float] = {}
self._compression_warn_cooldown: int = 3600 # seconds (1 hour)
# Ensure tirith security scanner is available (downloads if needed)
try:
from tools.tirith_security import ensure_installed
@ -1865,6 +1872,9 @@ class GatewayRunner:
if canonical == "commands":
return await self._handle_commands_command(event)
if canonical == "profile":
return await self._handle_profile_command(event)
if canonical == "status":
return await self._handle_status_command(event)
@ -2400,13 +2410,18 @@ class GatewayRunner:
pass
# Still too large after compression — warn user
# Rate-limited to once per cooldown period per
# chat to avoid spamming on every message.
if _new_tokens >= _warn_token_threshold:
logger.warning(
"Session hygiene: still ~%s tokens after "
"compression — suggesting /reset",
f"{_new_tokens:,}",
)
if _hyg_adapter:
_now = time.time()
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
self._compression_warn_sent[source.chat_id] = _now
try:
await _hyg_adapter.send(
source.chat_id,
@ -2428,7 +2443,10 @@ class GatewayRunner:
if _approx_tokens >= _warn_token_threshold:
_hyg_adapter = self.adapters.get(source.platform)
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
if _hyg_adapter:
_now = time.time()
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
self._compression_warn_sent[source.chat_id] = _now
try:
await _hyg_adapter.send(
source.chat_id,
@ -3055,6 +3073,36 @@ class GatewayRunner:
return f"{header}\n\n{session_info}"
return header
async def _handle_profile_command(self, event: MessageEvent) -> str:
"""Handle /profile — show active profile name and home directory."""
from hermes_constants import get_hermes_home, display_hermes_home
from pathlib import Path
home = get_hermes_home()
display = display_hermes_home()
# Detect profile name from HERMES_HOME path
# Profile paths look like: ~/.hermes/profiles/<name>
profiles_parent = Path.home() / ".hermes" / "profiles"
try:
rel = home.relative_to(profiles_parent)
profile_name = str(rel).split("/")[0]
except ValueError:
profile_name = None
if profile_name:
lines = [
f"👤 **Profile:** `{profile_name}`",
f"📂 **Home:** `{display}`",
]
else:
lines = [
"👤 **Profile:** default",
f"📂 **Home:** `{display}`",
]
return "\n".join(lines)
async def _handle_status_command(self, event: MessageEvent) -> str:
"""Handle /status command."""
source = event.source

View file

@ -71,6 +71,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("q",), args_hint="<prompt>"),
CommandDef("status", "Show session info", "Session",
gateway_only=True),
CommandDef("profile", "Show active profile name and home directory", "Info"),
CommandDef("sethome", "Set this chat as the home channel", "Session",
gateway_only=True, aliases=("set-home",)),
CommandDef("resume", "Resume a previously-named session", "Session",
@ -366,27 +367,41 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]:
"""Return Telegram menu commands (built-in + active skills), capped to the Bot API limit.
"""Return Telegram menu commands capped to the Bot API limit.
Built-in commands come first, then active skill commands. Commands beyond
``max_commands`` remain callable in the gateway; they are just omitted from
Telegram's native slash-command picker.
Priority order (higher priority = never bumped by overflow):
1. Core CommandDef commands (always included)
2. Plugin slash commands (take precedence over skills)
3. Built-in skill commands (fill remaining slots, alphabetical)
Skills are the only tier that gets trimmed when the cap is hit.
User-installed hub skills are excluded accessible via /skills.
Returns:
(menu_commands, hidden_count) where hidden_count is the number of
commands omitted due to the cap.
skill commands omitted due to the cap.
"""
all_commands = list(telegram_bot_commands())
# Append active BUILT-IN skill commands only (not user-installed hub skills).
# User-installed skills stay accessible via /skills and by typing the command
# directly, but don't clutter the Telegram menu.
# Plugin slash commands get priority over skills
try:
from hermes_cli.plugins import get_plugin_manager
pm = get_plugin_manager()
plugin_cmds = getattr(pm, "_plugin_commands", {})
for cmd_name in sorted(plugin_cmds):
tg_name = cmd_name.replace("-", "_")
desc = "Plugin command"
if len(desc) > 40:
desc = desc[:37] + "..."
all_commands.append((tg_name, desc))
except Exception:
pass
# Remaining slots go to built-in skill commands (not hub-installed).
skill_entries: list[tuple[str, str]] = []
try:
from agent.skill_commands import get_skill_commands
from tools.skills_tool import SKILLS_DIR
# Built-in skills are synced to SKILLS_DIR (~/.hermes/skills/).
# Hub-installed skills go into SKILLS_DIR/.hub/. Exclude .hub/ skills
# from the menu — they're user-installed, not repo built-in.
_skills_dir = str(SKILLS_DIR.resolve())
_hub_dir = str((SKILLS_DIR / ".hub").resolve())
skill_cmds = get_skill_commands()
@ -396,18 +411,21 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
if not skill_path.startswith(_skills_dir):
continue
if skill_path.startswith(_hub_dir):
continue # hub-installed, not built-in
continue
name = cmd_key.lstrip("/").replace("-", "_")
desc = info.get("description", "")
# Keep descriptions short — setMyCommands has an undocumented
# total payload limit. 40 chars fits 100 commands safely.
if len(desc) > 40:
desc = desc[:37] + "..."
all_commands.append((name, desc))
skill_entries.append((name, desc))
except Exception:
pass
hidden_count = max(0, len(all_commands) - max_commands)
# Skills fill remaining slots — they're the only tier that gets trimmed
remaining_slots = max(0, max_commands - len(all_commands))
hidden_count = max(0, len(skill_entries) - remaining_slots)
all_commands.extend(skill_entries[:remaining_slots])
return all_commands[:max_commands], hidden_count

View file

@ -706,6 +706,14 @@ OPTIONAL_ENV_VARS = {
"password": True,
"category": "tool",
},
"CAMOFOX_URL": {
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
"prompt": "Camofox server URL",
"url": "https://github.com/jo-inc/camofox-browser",
"tools": ["browser_navigate", "browser_click"],
"password": False,
"category": "tool",
},
"FAL_KEY": {
"description": "FAL API key for image generation",
"prompt": "FAL API key",

View file

@ -601,13 +601,15 @@ def _print_setup_summary(config: dict, hermes_home):
Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
).exists()
)
if get_env_value("BROWSERBASE_API_KEY"):
if get_env_value("CAMOFOX_URL"):
tool_status.append(("Browser Automation (Camofox)", True, None))
elif get_env_value("BROWSERBASE_API_KEY"):
tool_status.append(("Browser Automation (Browserbase)", True, None))
elif _ab_found:
tool_status.append(("Browser Automation (local)", True, None))
else:
tool_status.append(
("Browser Automation", False, "npm install -g agent-browser")
("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL")
)
# FAL (image generation)

View file

@ -273,6 +273,16 @@ TOOL_CATEGORIES = {
"browser_provider": "browser-use",
"post_setup": "browserbase",
},
{
"name": "Camofox",
"tag": "Local anti-detection browser (Firefox/Camoufox)",
"env_vars": [
{"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377",
"url": "https://github.com/jo-inc/camofox-browser"},
],
"browser_provider": "camofox",
"post_setup": "camofox",
},
],
},
"homeassistant": {
@ -337,6 +347,28 @@ def _run_post_setup(post_setup_key: str):
elif not node_modules.exists():
_print_warning(" Node.js not found - browser tools require: npm install (in hermes-agent directory)")
elif post_setup_key == "camofox":
camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser"
if not camofox_dir.exists() and shutil.which("npm"):
_print_info(" Installing Camofox browser server...")
import subprocess
result = subprocess.run(
["npm", "install", "--silent"],
capture_output=True, text=True, cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
_print_success(" Camofox installed")
else:
_print_warning(" npm install failed - run manually: npm install")
if camofox_dir.exists():
_print_info(" Start the Camofox server:")
_print_info(" npx @askjo/camoufox-browser")
_print_info(" First run downloads the Camoufox engine (~300MB)")
_print_info(" Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser")
elif not shutil.which("npm"):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 jo-inc/camofox-browser")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")

View file

@ -16,7 +16,8 @@
},
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
"dependencies": {
"agent-browser": "^0.13.0"
"agent-browser": "^0.13.0",
"@askjo/camoufox-browser": "^1.0.0"
},
"engines": {
"node": ">=18.0.0"

View file

@ -5221,11 +5221,8 @@ class AIAgent:
except Exception as e:
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
# Reset context pressure warning and token estimate — usage drops
# after compaction. Without this, the stale last_prompt_tokens from
# the previous API call causes the pressure calculation to stay at
# >1000% and spam warnings / re-trigger compression in a loop.
self._context_pressure_warned = False
# Update token estimate after compaction so pressure calculations
# use the post-compression count, not the stale pre-compression one.
_compressed_est = (
estimate_tokens_rough(new_system_prompt)
+ estimate_messages_tokens_rough(compressed)
@ -5233,6 +5230,16 @@ class AIAgent:
self.context_compressor.last_prompt_tokens = _compressed_est
self.context_compressor.last_completion_tokens = 0
# Only reset the pressure warning if compression actually brought
# us below the warning level (85% of threshold). When compression
# can't reduce enough (e.g. threshold is very low, or system prompt
# alone exceeds the warning level), keep the flag set to prevent
# spamming the user with repeated warnings every loop iteration.
if self.context_compressor.threshold_tokens > 0:
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
if _post_progress < 0.85:
self._context_pressure_warned = False
return compressed, new_system_prompt
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:

View file

@ -212,6 +212,49 @@ class TestSessionHygieneWarnThreshold:
assert post_compress_tokens < warn_threshold
class TestCompressionWarnRateLimit:
"""Compression warning messages must be rate-limited per chat_id."""
def _make_runner(self):
from unittest.mock import MagicMock, patch
with patch("gateway.run.load_gateway_config"), \
patch("gateway.run.SessionStore"), \
patch("gateway.run.DeliveryRouter"):
from gateway.run import GatewayRunner
runner = GatewayRunner.__new__(GatewayRunner)
runner._compression_warn_sent = {}
runner._compression_warn_cooldown = 3600
return runner
def test_first_warn_is_sent(self):
runner = self._make_runner()
now = 1_000_000.0
last = runner._compression_warn_sent.get("chat:1", 0)
assert now - last >= runner._compression_warn_cooldown
def test_second_warn_suppressed_within_cooldown(self):
runner = self._make_runner()
now = 1_000_000.0
runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago
last = runner._compression_warn_sent.get("chat:1", 0)
assert now - last < runner._compression_warn_cooldown
def test_warn_allowed_after_cooldown(self):
runner = self._make_runner()
now = 1_000_000.0
runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown
last = runner._compression_warn_sent.get("chat:1", 0)
assert now - last >= runner._compression_warn_cooldown
def test_rate_limit_is_per_chat(self):
"""Rate-limiting one chat must not suppress warnings for another."""
runner = self._make_runner()
now = 1_000_000.0
runner._compression_warn_sent["chat:1"] = now - 60 # suppressed
last_other = runner._compression_warn_sent.get("chat:2", 0)
assert now - last_other >= runner._compression_warn_cooldown
class TestEstimatedTokenThreshold:
"""Verify that hygiene thresholds are always below the model's context
limit for both actual and estimated token counts.

View file

@ -81,6 +81,19 @@ class TestBuildAnthropicClient:
kwargs = mock_sdk.Anthropic.call_args[1]
assert kwargs["base_url"] == "https://custom.api.com"
def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self):
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
build_anthropic_client(
"minimax-secret-123",
base_url="https://api.minimax.io/anthropic",
)
kwargs = mock_sdk.Anthropic.call_args[1]
assert kwargs["auth_token"] == "minimax-secret-123"
assert "api_key" not in kwargs
assert kwargs["default_headers"] == {
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
}
class TestReadClaudeCodeCredentials:
def test_reads_valid_credentials(self, tmp_path, monkeypatch):

View file

@ -0,0 +1,115 @@
"""Tests for trajectory_compressor AsyncOpenAI event loop binding.
The AsyncOpenAI client was created once at __init__ time and stored as an
instance attribute. When process_directory() calls asyncio.run() which
creates and closes a fresh event loop the client's internal httpx
transport remains bound to the now-closed loop. A second call to
process_directory() would fail with "Event loop is closed".
The fix creates the AsyncOpenAI client lazily via _get_async_client() so
each asyncio.run() gets a client bound to the current loop.
"""
import types
from unittest.mock import MagicMock, patch
import pytest
class TestAsyncClientLazyCreation:
"""trajectory_compressor.py — _get_async_client()"""
def test_async_client_none_after_init(self):
"""async_client should be None after __init__ (not eagerly created)."""
from trajectory_compressor import TrajectoryCompressor
comp = TrajectoryCompressor.__new__(TrajectoryCompressor)
comp.config = MagicMock()
comp.config.base_url = "https://api.example.com/v1"
comp.config.api_key_env = "TEST_API_KEY"
comp._use_call_llm = False
comp.async_client = None
comp._async_client_api_key = "test-key"
assert comp.async_client is None
def test_get_async_client_creates_new_client(self):
"""_get_async_client() should create a fresh AsyncOpenAI instance."""
from trajectory_compressor import TrajectoryCompressor
comp = TrajectoryCompressor.__new__(TrajectoryCompressor)
comp.config = MagicMock()
comp.config.base_url = "https://api.example.com/v1"
comp._async_client_api_key = "test-key"
comp.async_client = None
mock_async_openai = MagicMock()
with patch("openai.AsyncOpenAI", mock_async_openai):
client = comp._get_async_client()
mock_async_openai.assert_called_once_with(
api_key="test-key",
base_url="https://api.example.com/v1",
)
assert comp.async_client is not None
def test_get_async_client_creates_fresh_each_call(self):
"""Each call to _get_async_client() creates a NEW client instance,
so it binds to the current event loop."""
from trajectory_compressor import TrajectoryCompressor
comp = TrajectoryCompressor.__new__(TrajectoryCompressor)
comp.config = MagicMock()
comp.config.base_url = "https://api.example.com/v1"
comp._async_client_api_key = "test-key"
comp.async_client = None
call_count = 0
instances = []
def mock_constructor(**kwargs):
nonlocal call_count
call_count += 1
instance = MagicMock()
instances.append(instance)
return instance
with patch("openai.AsyncOpenAI", side_effect=mock_constructor):
client1 = comp._get_async_client()
client2 = comp._get_async_client()
# Should have created two separate instances
assert call_count == 2
assert instances[0] is not instances[1]
class TestSourceLineVerification:
"""Verify the actual source has the lazy pattern applied."""
@staticmethod
def _read_file() -> str:
import os
base = os.path.dirname(os.path.dirname(__file__))
with open(os.path.join(base, "trajectory_compressor.py")) as f:
return f.read()
def test_no_eager_async_openai_in_init(self):
"""__init__ should NOT create AsyncOpenAI eagerly."""
src = self._read_file()
# The old pattern: self.async_client = AsyncOpenAI(...) in _init_summarizer
# should not exist — only self.async_client = None
lines = src.split("\n")
for i, line in enumerate(lines, 1):
if "self.async_client = AsyncOpenAI(" in line and "_get_async_client" not in lines[max(0,i-3):i+1]:
# Allow it inside _get_async_client method
# Check if we're inside _get_async_client by looking at context
context = "\n".join(lines[max(0,i-10):i+1])
if "_get_async_client" not in context:
pytest.fail(
f"Line {i}: AsyncOpenAI created eagerly outside _get_async_client()"
)
def test_get_async_client_method_exists(self):
"""_get_async_client method should exist."""
src = self._read_file()
assert "def _get_async_client(self)" in src

View file

@ -0,0 +1,290 @@
"""Tests for the Camofox browser backend."""
import json
import os
from unittest.mock import MagicMock, patch
import pytest
from tools.browser_camofox import (
camofox_back,
camofox_click,
camofox_close,
camofox_console,
camofox_get_images,
camofox_navigate,
camofox_press,
camofox_scroll,
camofox_snapshot,
camofox_type,
camofox_vision,
check_camofox_available,
cleanup_all_camofox_sessions,
is_camofox_mode,
)
# ---------------------------------------------------------------------------
# Configuration detection
# ---------------------------------------------------------------------------
class TestCamofoxMode:
def test_disabled_by_default(self, monkeypatch):
monkeypatch.delenv("CAMOFOX_URL", raising=False)
assert is_camofox_mode() is False
def test_enabled_when_url_set(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
assert is_camofox_mode() is True
def test_health_check_unreachable(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999")
assert check_camofox_available() is False
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _mock_response(status=200, json_data=None):
resp = MagicMock()
resp.status_code = status
resp.json.return_value = json_data or {}
resp.content = b"\x89PNG\r\n\x1a\nfake"
resp.raise_for_status = MagicMock()
return resp
# ---------------------------------------------------------------------------
# Navigate
# ---------------------------------------------------------------------------
class TestCamofoxNavigate:
@patch("tools.browser_camofox.requests.post")
def test_creates_tab_on_first_navigate(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab1", "url": "https://example.com"})
result = json.loads(camofox_navigate("https://example.com", task_id="t1"))
assert result["success"] is True
assert result["url"] == "https://example.com"
@patch("tools.browser_camofox.requests.post")
def test_navigates_existing_tab(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
# First call creates tab
mock_post.return_value = _mock_response(json_data={"tabId": "tab2", "url": "https://a.com"})
camofox_navigate("https://a.com", task_id="t2")
# Second call navigates
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://b.com"})
result = json.loads(camofox_navigate("https://b.com", task_id="t2"))
assert result["success"] is True
assert result["url"] == "https://b.com"
def test_connection_error_returns_helpful_message(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999")
result = json.loads(camofox_navigate("https://example.com", task_id="t_err"))
assert result["success"] is False
assert "Cannot connect" in result["error"]
# ---------------------------------------------------------------------------
# Snapshot
# ---------------------------------------------------------------------------
class TestCamofoxSnapshot:
def test_no_session_returns_error(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_snapshot(task_id="no_such_task"))
assert result["success"] is False
assert "browser_navigate" in result["error"]
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.get")
def test_returns_snapshot(self, mock_get, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
# Create session
mock_post.return_value = _mock_response(json_data={"tabId": "tab3", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t3")
# Return snapshot
mock_get.return_value = _mock_response(json_data={
"snapshot": "- heading \"Test\" [e1]\n- button \"Submit\" [e2]",
"refsCount": 2,
})
result = json.loads(camofox_snapshot(task_id="t3"))
assert result["success"] is True
assert "[e1]" in result["snapshot"]
assert result["element_count"] == 2
# ---------------------------------------------------------------------------
# Click / Type / Scroll / Back / Press
# ---------------------------------------------------------------------------
class TestCamofoxInteractions:
@patch("tools.browser_camofox.requests.post")
def test_click(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab4", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t4")
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://x.com"})
result = json.loads(camofox_click("@e5", task_id="t4"))
assert result["success"] is True
assert result["clicked"] == "e5"
@patch("tools.browser_camofox.requests.post")
def test_type(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab5", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t5")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_type("@e3", "hello world", task_id="t5"))
assert result["success"] is True
assert result["typed"] == "hello world"
@patch("tools.browser_camofox.requests.post")
def test_scroll(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab6", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t6")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_scroll("down", task_id="t6"))
assert result["success"] is True
assert result["scrolled"] == "down"
@patch("tools.browser_camofox.requests.post")
def test_back(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab7", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t7")
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://prev.com"})
result = json.loads(camofox_back(task_id="t7"))
assert result["success"] is True
@patch("tools.browser_camofox.requests.post")
def test_press(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab8", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t8")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_press("Enter", task_id="t8"))
assert result["success"] is True
assert result["pressed"] == "Enter"
# ---------------------------------------------------------------------------
# Close
# ---------------------------------------------------------------------------
class TestCamofoxClose:
@patch("tools.browser_camofox.requests.delete")
@patch("tools.browser_camofox.requests.post")
def test_close_session(self, mock_post, mock_delete, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab9", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t9")
mock_delete.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_close(task_id="t9"))
assert result["success"] is True
assert result["closed"] is True
def test_close_nonexistent_session(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_close(task_id="nonexistent"))
assert result["success"] is True
# ---------------------------------------------------------------------------
# Console (limited support)
# ---------------------------------------------------------------------------
class TestCamofoxConsole:
def test_console_returns_empty_with_note(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_console(task_id="t_console"))
assert result["success"] is True
assert result["total_messages"] == 0
assert "not available" in result["note"]
# ---------------------------------------------------------------------------
# Images
# ---------------------------------------------------------------------------
class TestCamofoxGetImages:
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.get")
def test_get_images(self, mock_get, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t10")
mock_get.return_value = _mock_response(json_data={
"images": [{"src": "https://x.com/img.png", "alt": "Logo"}],
})
result = json.loads(camofox_get_images(task_id="t10"))
assert result["success"] is True
assert result["count"] == 1
assert result["images"][0]["src"] == "https://x.com/img.png"
# ---------------------------------------------------------------------------
# Routing integration — verify browser_tool routes to camofox
# ---------------------------------------------------------------------------
class TestBrowserToolRouting:
"""Verify that browser_tool.py delegates to camofox when CAMOFOX_URL is set."""
@patch("tools.browser_camofox.requests.post")
def test_browser_navigate_routes_to_camofox(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab_rt", "url": "https://example.com"})
from tools.browser_tool import browser_navigate
# Bypass SSRF check for test URL
with patch("tools.browser_tool._is_safe_url", return_value=True):
result = json.loads(browser_navigate("https://example.com", task_id="t_route"))
assert result["success"] is True
def test_check_requirements_passes_with_camofox(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
from tools.browser_tool import check_browser_requirements
assert check_browser_requirements() is True
# ---------------------------------------------------------------------------
# Cleanup helper
# ---------------------------------------------------------------------------
class TestCamofoxCleanup:
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.delete")
def test_cleanup_all(self, mock_delete, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab_c", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t_cleanup")
mock_delete.return_value = _mock_response(json_data={"ok": True})
cleanup_all_camofox_sessions()
# Session should be gone
result = json.loads(camofox_snapshot(task_id="t_cleanup"))
assert result["success"] is False

496
tools/browser_camofox.py Normal file
View file

@ -0,0 +1,496 @@
"""Camofox browser backend — local anti-detection browser via REST API.
Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox
fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1
to our browser tool interface: accessibility snapshots with element refs,
click/type/scroll by ref, screenshots, etc.
When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser
tools route through this module instead of the ``agent-browser`` CLI.
Setup::
# Option 1: npm
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
npm install && npm start # downloads Camoufox (~300MB) on first run
# Option 2: Docker
docker run -p 9377:9377 jo-inc/camofox-browser
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
"""
from __future__ import annotations
import base64
import json
import logging
import os
import threading
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
import requests
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
def get_camofox_url() -> str:
"""Return the configured Camofox server URL, or empty string."""
return os.getenv("CAMOFOX_URL", "").rstrip("/")
def is_camofox_mode() -> bool:
"""True when Camofox backend is configured."""
return bool(get_camofox_url())
def check_camofox_available() -> bool:
"""Verify the Camofox server is reachable."""
url = get_camofox_url()
if not url:
return False
try:
resp = requests.get(f"{url}/health", timeout=5)
return resp.status_code == 200
except Exception:
return False
# ---------------------------------------------------------------------------
# Session management
# ---------------------------------------------------------------------------
# Maps task_id -> {"user_id": str, "tab_id": str|None}
_sessions: Dict[str, Dict[str, Any]] = {}
_sessions_lock = threading.Lock()
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
"""Get or create a camofox session for the given task."""
task_id = task_id or "default"
with _sessions_lock:
if task_id in _sessions:
return _sessions[task_id]
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
}
_sessions[task_id] = session
return session
def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]:
"""Ensure a tab exists for the session, creating one if needed."""
session = _get_session(task_id)
if session["tab_id"]:
return session
base = get_camofox_url()
resp = requests.post(
f"{base}/tabs",
json={
"userId": session["user_id"],
"sessionKey": session["session_key"],
"url": url,
},
timeout=_DEFAULT_TIMEOUT,
)
resp.raise_for_status()
data = resp.json()
session["tab_id"] = data.get("tabId")
return session
def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]:
"""Remove and return session info."""
task_id = task_id or "default"
with _sessions_lock:
return _sessions.pop(task_id, None)
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""POST JSON to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.post(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""GET from camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response:
"""GET from camofox and return raw response (for binary data)."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp
def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""DELETE to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.delete(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
"""Navigate to a URL via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
# Create tab with the target URL directly
session = _ensure_tab(task_id, url)
data = {"ok": True, "url": url}
else:
# Navigate existing tab
data = _post(
f"/tabs/{session['tab_id']}/navigate",
{"userId": session["user_id"], "url": url},
timeout=60,
)
return json.dumps({
"success": True,
"url": data.get("url", url),
"title": data.get("title", ""),
})
except requests.HTTPError as e:
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
except requests.ConnectionError:
return json.dumps({
"success": False,
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
"Is the server running? Start with: npm start (in camofox-browser dir) "
"or: docker run -p 9377:9377 jo-inc/camofox-browser",
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
user_task: Optional[str] = None) -> str:
"""Get accessibility tree snapshot from Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
refs_count = data.get("refsCount", 0)
# Apply same summarization logic as the main browser tool
from tools.browser_tool import (
SNAPSHOT_SUMMARIZE_THRESHOLD,
_extract_relevant_content,
_truncate_snapshot,
)
if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD:
if user_task:
snapshot = _extract_relevant_content(snapshot, user_task)
else:
snapshot = _truncate_snapshot(snapshot)
return json.dumps({
"success": True,
"snapshot": snapshot,
"element_count": refs_count,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
"""Click an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
# Strip @ prefix if present (our tool convention)
clean_ref = ref.lstrip("@")
data = _post(
f"/tabs/{session['tab_id']}/click",
{"userId": session["user_id"], "ref": clean_ref},
)
return json.dumps({
"success": True,
"clicked": clean_ref,
"url": data.get("url", ""),
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
"""Type text into an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
clean_ref = ref.lstrip("@")
_post(
f"/tabs/{session['tab_id']}/type",
{"userId": session["user_id"], "ref": clean_ref, "text": text},
)
return json.dumps({
"success": True,
"typed": text,
"element": clean_ref,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
"""Scroll the page via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
_post(
f"/tabs/{session['tab_id']}/scroll",
{"userId": session["user_id"], "direction": direction},
)
return json.dumps({"success": True, "scrolled": direction})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_back(task_id: Optional[str] = None) -> str:
"""Navigate back via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
data = _post(
f"/tabs/{session['tab_id']}/back",
{"userId": session["user_id"]},
)
return json.dumps({"success": True, "url": data.get("url", "")})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_press(key: str, task_id: Optional[str] = None) -> str:
"""Press a keyboard key via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
_post(
f"/tabs/{session['tab_id']}/press",
{"userId": session["user_id"], "key": key},
)
return json.dumps({"success": True, "pressed": key})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_close(task_id: Optional[str] = None) -> str:
"""Close the browser session via Camofox."""
try:
session = _drop_session(task_id)
if not session:
return json.dumps({"success": True, "closed": True})
_delete(
f"/sessions/{session['user_id']}",
)
return json.dumps({"success": True, "closed": True})
except Exception as e:
return json.dumps({"success": True, "closed": True, "warning": str(e)})
def camofox_get_images(task_id: Optional[str] = None) -> str:
"""Get images on the current page via Camofox.
Extracts image information from the accessibility tree snapshot,
since Camofox does not expose a dedicated /images endpoint.
"""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
import re
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
# Parse img elements from the accessibility tree.
# Format: img "alt text" or img "alt text" [eN]
# URLs appear on /url: lines following img entries
images = []
lines = snapshot.split("\n")
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("- img ") or stripped.startswith("img "):
alt_match = re.search(r'img\s+"([^"]*)"', stripped)
alt = alt_match.group(1) if alt_match else ""
# Look for URL on the next line
src = ""
if i + 1 < len(lines):
url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip())
if url_match:
src = url_match.group(1)
if alt or src:
images.append({"src": src, "alt": alt})
return json.dumps({
"success": True,
"images": images,
"count": len(images),
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_vision(question: str, annotate: bool = False,
task_id: Optional[str] = None) -> str:
"""Take a screenshot and analyze it with vision AI via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
# Get screenshot as binary PNG
resp = _get_raw(
f"/tabs/{session['tab_id']}/screenshot",
params={"userId": session["user_id"]},
)
# Save screenshot to cache
from hermes_constants import get_hermes_home
screenshots_dir = get_hermes_home() / "browser_screenshots"
screenshots_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png")
with open(screenshot_path, "wb") as f:
f.write(resp.content)
# Encode for vision LLM
img_b64 = base64.b64encode(resp.content).decode("utf-8")
# Also get annotated snapshot if requested
annotation_context = ""
if annotate:
try:
snap_data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}"
except Exception:
pass
# Send to vision LLM
from agent.auxiliary_client import call_llm
vision_prompt = (
f"Analyze this browser screenshot and answer: {question}"
f"{annotation_context}"
)
try:
from hermes_cli.config import load_config
_cfg = load_config()
_vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120))
except Exception:
_vision_timeout = 120
analysis = call_llm(
messages=[{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_b64}",
},
},
],
}],
task="vision",
timeout=_vision_timeout,
)
return json.dumps({
"success": True,
"analysis": analysis,
"screenshot_path": screenshot_path,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str:
"""Get console output — limited support in Camofox.
Camofox does not expose browser console logs via its REST API.
Returns an empty result with a note.
"""
return json.dumps({
"success": True,
"console_messages": [],
"js_errors": [],
"total_messages": 0,
"total_errors": 0,
"note": "Console log capture is not available with the Camofox backend. "
"Use browser_snapshot or browser_vision to inspect page state.",
})
# ---------------------------------------------------------------------------
# Cleanup
# ---------------------------------------------------------------------------
def cleanup_all_camofox_sessions() -> None:
"""Close all active camofox sessions."""
with _sessions_lock:
sessions = list(_sessions.items())
for task_id, session in sessions:
try:
_delete(f"/sessions/{session['user_id']}")
except Exception:
pass
with _sessions_lock:
_sessions.clear()

View file

@ -79,6 +79,14 @@ from tools.browser_providers.base import CloudBrowserProvider
from tools.browser_providers.browserbase import BrowserbaseProvider
from tools.browser_providers.browser_use import BrowserUseProvider
# Camofox local anti-detection browser backend (optional).
# When CAMOFOX_URL is set, all browser operations route through the
# camofox REST API instead of the agent-browser CLI.
try:
from tools.browser_camofox import is_camofox_mode as _is_camofox_mode
except ImportError:
_is_camofox_mode = lambda: False # noqa: E731
logger = logging.getLogger(__name__)
# Standard PATH entries for environments with minimal PATH (e.g. systemd services).
@ -1046,6 +1054,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
"blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
})
# Camofox backend — delegate after safety checks pass
if _is_camofox_mode():
from tools.browser_camofox import camofox_navigate
return camofox_navigate(url, task_id)
effective_task_id = task_id or "default"
# Get session info to check if this is a new session
@ -1135,6 +1148,10 @@ def browser_snapshot(
Returns:
JSON string with page snapshot
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_snapshot
return camofox_snapshot(full, task_id, user_task)
effective_task_id = task_id or "default"
# Build command args based on full flag
@ -1180,6 +1197,10 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with click result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_click
return camofox_click(ref, task_id)
effective_task_id = task_id or "default"
# Ensure ref starts with @
@ -1212,6 +1233,10 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with type result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_type
return camofox_type(ref, text, task_id)
effective_task_id = task_id or "default"
# Ensure ref starts with @
@ -1245,6 +1270,10 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with scroll result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_scroll
return camofox_scroll(direction, task_id)
effective_task_id = task_id or "default"
# Validate direction
@ -1278,6 +1307,10 @@ def browser_back(task_id: Optional[str] = None) -> str:
Returns:
JSON string with navigation result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_back
return camofox_back(task_id)
effective_task_id = task_id or "default"
result = _run_browser_command(effective_task_id, "back", [])
@ -1305,6 +1338,10 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with key press result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_press
return camofox_press(key, task_id)
effective_task_id = task_id or "default"
result = _run_browser_command(effective_task_id, "press", [key])
@ -1330,6 +1367,10 @@ def browser_close(task_id: Optional[str] = None) -> str:
Returns:
JSON string with close result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_close
return camofox_close(task_id)
effective_task_id = task_id or "default"
with _cleanup_lock:
had_session = effective_task_id in _active_sessions
@ -1358,6 +1399,10 @@ def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
Returns:
JSON string with console messages and JS errors
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_console
return camofox_console(clear, task_id)
effective_task_id = task_id or "default"
console_args = ["--clear"] if clear else []
@ -1452,6 +1497,10 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
Returns:
JSON string with list of images (src and alt)
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_get_images
return camofox_get_images(task_id)
effective_task_id = task_id or "default"
# Use eval to run JavaScript that extracts images
@ -1516,6 +1565,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
Returns:
JSON string with vision analysis results and screenshot_path
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_vision
return camofox_vision(question, annotate, task_id)
import base64
import uuid as uuid_mod
from pathlib import Path
@ -1804,6 +1857,10 @@ def check_browser_requirements() -> bool:
Returns:
True if all requirements are met, False otherwise
"""
# Camofox backend — only needs the server URL, no agent-browser CLI
if _is_camofox_mode():
return True
# The agent-browser CLI is always required
try:
_find_agent_browser()

View file

@ -375,15 +375,34 @@ class TrajectoryCompressor:
raise RuntimeError(
f"Missing API key. Set {self.config.api_key_env} "
f"environment variable.")
from openai import OpenAI, AsyncOpenAI
from openai import OpenAI
self.client = OpenAI(
api_key=api_key, base_url=self.config.base_url)
self.async_client = AsyncOpenAI(
api_key=api_key, base_url=self.config.base_url)
# AsyncOpenAI is created lazily in _get_async_client() so it
# binds to the current event loop — avoids "Event loop is closed"
# when process_directory() is called multiple times (each call
# creates a new loop via asyncio.run()).
self.async_client = None
self._async_client_api_key = api_key
print(f"✅ Initialized summarizer client: {self.config.summarization_model}")
print(f" Max concurrent requests: {self.config.max_concurrent_requests}")
def _get_async_client(self):
"""Return an AsyncOpenAI client bound to the current event loop.
Created lazily so that each ``asyncio.run()`` call in
``process_directory()`` gets a client tied to its own loop,
avoiding "Event loop is closed" errors on repeated calls.
"""
from openai import AsyncOpenAI
# Always create a fresh client so it binds to the running loop.
self.async_client = AsyncOpenAI(
api_key=self._async_client_api_key,
base_url=self.config.base_url,
)
return self.async_client
def _detect_provider(self) -> str:
"""Detect the provider name from the configured base_url."""
url = (self.config.base_url or "").lower()
@ -615,7 +634,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
max_tokens=self.config.summary_target_tokens * 2,
)
else:
response = await self.async_client.chat.completions.create(
response = await self._get_async_client().chat.completions.create(
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,

View file

@ -699,65 +699,171 @@ Use this when you want lower latency or cost without fully changing your default
## Terminal Backend Configuration
Configure which environment the agent uses for terminal commands:
Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.
```yaml
terminal:
backend: local # or: docker, ssh, singularity, modal, daytona
cwd: "." # Working directory ("." = current dir)
timeout: 180 # Command timeout in seconds
# Docker-specific settings
docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into /workspace.
docker_forward_env: # Optional explicit allowlist for env passthrough
- "GITHUB_TOKEN"
docker_volumes: # Additional explicit host mounts
- "/home/user/projects:/workspace/projects"
- "/home/user/data:/data:ro" # :ro for read-only
# Container resource limits (docker, singularity, modal, daytona)
container_cpu: 1 # CPU cores
container_memory: 5120 # MB (default 5GB)
container_disk: 51200 # MB (default 50GB)
container_persistent: true # Persist filesystem across sessions
# Persistent shell — keep a long-lived bash process across commands
persistent_shell: true # Enabled by default for SSH backend
backend: local # local | docker | ssh | modal | daytona | singularity
cwd: "." # Working directory ("." = current dir for local, "/root" for containers)
timeout: 180 # Per-command timeout in seconds
```
### Backend Overview
| Backend | Where commands run | Isolation | Best for |
|---------|-------------------|-----------|----------|
| **local** | Your machine directly | None | Development, personal use |
| **docker** | Docker container | Full (namespaces, cap-drop) | Safe sandboxing, CI/CD |
| **ssh** | Remote server via SSH | Network boundary | Remote dev, powerful hardware |
| **modal** | Modal cloud sandbox | Full (cloud VM) | Ephemeral cloud compute, evals |
| **daytona** | Daytona workspace | Full (cloud container) | Managed cloud dev environments |
| **singularity** | Singularity/Apptainer container | Namespaces (--containall) | HPC clusters, shared machines |
### Local Backend
The default. Commands run directly on your machine with no isolation. No special setup required.
```yaml
terminal:
backend: local
```
:::warning
The agent has the same filesystem access as your user account. Use `hermes tools` to disable tools you don't want, or switch to Docker for sandboxing.
:::
### Docker Backend
Runs commands inside a Docker container with security hardening (all capabilities dropped, no privilege escalation, PID limits).
```yaml
terminal:
backend: docker
docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
docker_mount_cwd_to_workspace: false # Mount launch dir into /workspace
docker_forward_env: # Env vars to forward into container
- "GITHUB_TOKEN"
docker_volumes: # Host directory mounts
- "/home/user/projects:/workspace/projects"
- "/home/user/data:/data:ro" # :ro for read-only
# Resource limits
container_cpu: 1 # CPU cores (0 = unlimited)
container_memory: 5120 # MB (0 = unlimited)
container_disk: 51200 # MB (requires overlay2 on XFS+pquota)
container_persistent: true # Persist /workspace and /root across sessions
```
**Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle).
**Container lifecycle:** Each session starts a long-lived container (`docker run -d ... sleep 2h`). Commands run via `docker exec` with a login shell. On cleanup, the container is stopped and removed.
**Security hardening:**
- `--cap-drop ALL` with only `DAC_OVERRIDE`, `CHOWN`, `FOWNER` added back
- `--security-opt no-new-privileges`
- `--pids-limit 256`
- Size-limited tmpfs for `/tmp` (512MB), `/var/tmp` (256MB), `/run` (64MB)
**Credential forwarding:** Env vars listed in `docker_forward_env` are resolved from your shell environment first, then `~/.hermes/.env`. Skills can also declare `required_environment_variables` which are merged automatically.
### SSH Backend
Runs commands on a remote server over SSH. Uses ControlMaster for connection reuse (5-minute idle keepalive). Persistent shell is enabled by default — state (cwd, env vars) survives across commands.
```yaml
terminal:
backend: ssh
persistent_shell: true # Keep a long-lived bash session (default: true)
```
**Required environment variables:**
```bash
TERMINAL_SSH_HOST=my-server.example.com
TERMINAL_SSH_USER=ubuntu
```
**Optional:**
| Variable | Default | Description |
|----------|---------|-------------|
| `TERMINAL_SSH_PORT` | `22` | SSH port |
| `TERMINAL_SSH_KEY` | (system default) | Path to SSH private key |
| `TERMINAL_SSH_PERSISTENT` | `true` | Enable persistent shell |
**How it works:** Connects at init time with `BatchMode=yes` and `StrictHostKeyChecking=accept-new`. Persistent shell keeps a single `bash -l` process alive on the remote host, communicating via temporary files. Commands that need `stdin_data` or `sudo` automatically fall back to one-shot mode.
### Modal Backend
Runs commands in a [Modal](https://modal.com) cloud sandbox. Each task gets an isolated VM with configurable CPU, memory, and disk. Filesystem can be snapshot/restored across sessions.
```yaml
terminal:
backend: modal
container_cpu: 1 # CPU cores
container_memory: 5120 # MB (5GB)
container_disk: 51200 # MB (50GB)
container_persistent: true # Snapshot/restore filesystem
```
**Required:** Either `MODAL_TOKEN_ID` + `MODAL_TOKEN_SECRET` environment variables, or a `~/.modal.toml` config file.
**Persistence:** When enabled, the sandbox filesystem is snapshotted on cleanup and restored on next session. Snapshots are tracked in `~/.hermes/modal_snapshots.json`.
**Credential files:** Automatically mounted from `~/.hermes/` (OAuth tokens, etc.) and synced before each command.
### Daytona Backend
Runs commands in a [Daytona](https://daytona.io) managed workspace. Supports stop/resume for persistence.
```yaml
terminal:
backend: daytona
container_cpu: 1 # CPU cores
container_memory: 5120 # MB → converted to GiB
container_disk: 10240 # MB → converted to GiB (max 10 GiB)
container_persistent: true # Stop/resume instead of delete
```
**Required:** `DAYTONA_API_KEY` environment variable.
**Persistence:** When enabled, sandboxes are stopped (not deleted) on cleanup and resumed on next session. Sandbox names follow the pattern `hermes-{task_id}`.
**Disk limit:** Daytona enforces a 10 GiB maximum. Requests above this are capped with a warning.
### Singularity/Apptainer Backend
Runs commands in a [Singularity/Apptainer](https://apptainer.org) container. Designed for HPC clusters and shared machines where Docker isn't available.
```yaml
terminal:
backend: singularity
singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20"
container_cpu: 1 # CPU cores
container_memory: 5120 # MB
container_persistent: true # Writable overlay persists across sessions
```
**Requirements:** `apptainer` or `singularity` binary in `$PATH`.
**Image handling:** Docker URLs (`docker://...`) are automatically converted to SIF files and cached. Existing `.sif` files are used directly.
**Scratch directory:** Resolved in order: `TERMINAL_SCRATCH_DIR``TERMINAL_SANDBOX_DIR/singularity``/scratch/$USER/hermes-agent` (HPC convention) → `~/.hermes/sandboxes/singularity`.
**Isolation:** Uses `--containall --no-home` for full namespace isolation without mounting the host home directory.
### Common Terminal Backend Issues
If terminal commands fail immediately or the terminal tool is reported as disabled, check the following:
If terminal commands fail immediately or the terminal tool is reported as disabled:
- **Local backend**
- No special requirements. This is the safest default when you are just getting started.
- **Local** — No special requirements. The safest default when getting started.
- **Docker** — Run `docker version` to verify Docker is working. If it fails, fix Docker or `hermes config set terminal.backend local`.
- **SSH** — Both `TERMINAL_SSH_HOST` and `TERMINAL_SSH_USER` must be set. Hermes logs a clear error if either is missing.
- **Modal** — Needs `MODAL_TOKEN_ID` env var or `~/.modal.toml`. Run `hermes doctor` to check.
- **Daytona** — Needs `DAYTONA_API_KEY`. The Daytona SDK handles server URL configuration.
- **Singularity** — Needs `apptainer` or `singularity` in `$PATH`. Common on HPC clusters.
- **Docker backend**
- Ensure Docker Desktop (or the Docker daemon) is installed and running.
- Hermes needs to be able to find the `docker` CLI. It checks your `$PATH` first and also probes common Docker Desktop install locations on macOS. Run:
```bash
docker version
```
If this fails, fix your Docker installation or switch back to the local backend:
```bash
hermes config set terminal.backend local
```
- **SSH backend**
- Both `TERMINAL_SSH_HOST` and `TERMINAL_SSH_USER` must be set, for example:
```bash
export TERMINAL_ENV=ssh
export TERMINAL_SSH_HOST=my-server.example.com
export TERMINAL_SSH_USER=ubuntu
```
- If either value is missing, Hermes will log a clear error and refuse to use the SSH backend.
- **Modal backend**
- You need either a `MODAL_TOKEN_ID` environment variable or a `~/.modal.toml` config file.
- If neither is present, the backend check fails and Hermes will report that the Modal backend is not available.
When in doubt, set `terminal.backend` back to `local` and verify that commands run there first.
When in doubt, set `terminal.backend` back to `local` and verify commands run there first.
### Docker Volume Mounts

View file

@ -65,6 +65,12 @@ const config: Config = {
defaultMode: 'dark',
respectPrefersColorScheme: true,
},
docs: {
sidebar: {
hideable: true,
autoCollapseCategories: true,
},
},
navbar: {
title: 'Hermes Agent',
logo: {

View file

@ -5,7 +5,7 @@ const sidebars: SidebarsConfig = {
{
type: 'category',
label: 'Getting Started',
collapsed: false,
collapsed: true,
items: [
'getting-started/quickstart',
'getting-started/installation',
@ -17,7 +17,7 @@ const sidebars: SidebarsConfig = {
{
type: 'category',
label: 'Guides & Tutorials',
collapsed: false,
collapsed: true,
items: [
'guides/tips',
'guides/daily-briefing-bot',
@ -32,7 +32,7 @@ const sidebars: SidebarsConfig = {
{
type: 'category',
label: 'User Guide',
collapsed: false,
collapsed: true,
items: [
'user-guide/cli',
'user-guide/configuration',

View file

@ -199,6 +199,46 @@ pre.prism-code.language-ascii code {
border: 1px solid rgba(255, 215, 0, 0.08);
}
/* ─── Mobile sidebar improvements ─────────────────────────────────────────── */
/* Larger touch targets on mobile */
@media (max-width: 996px) {
.menu__link {
padding: 0.6rem 0.75rem;
font-size: 0.95rem;
}
.menu__list-item-collapsible > .menu__link {
font-weight: 600;
font-size: 1rem;
padding: 0.75rem 0.75rem;
border-bottom: 1px solid rgba(255, 215, 0, 0.06);
}
/* Category caret — more visible */
.menu__caret::before {
background-size: 1.5rem 1.5rem;
}
/* Indent subcategories clearly */
.menu__list .menu__list {
padding-left: 0.75rem;
border-left: 1px solid rgba(255, 215, 0, 0.06);
margin-left: 0.5rem;
}
/* Sidebar overlay — slightly more opaque for readability */
.navbar-sidebar__backdrop {
background-color: rgba(0, 0, 0, 0.6);
}
/* Sidebar width on mobile — use more of the screen */
.navbar-sidebar {
width: 85vw;
max-width: 360px;
}
}
/* Hero banner for docs landing if needed */
.hero--hermes {
background: linear-gradient(135deg, #07070d 0%, #0f0f18 100%);