diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e2503ebec..56d102692 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -706,6 +706,14 @@ OPTIONAL_ENV_VARS = { "password": True, "category": "tool", }, + "CAMOFOX_URL": { + "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)", + "prompt": "Camofox server URL", + "url": "https://github.com/jo-inc/camofox-browser", + "tools": ["browser_navigate", "browser_click"], + "password": False, + "category": "tool", + }, "FAL_KEY": { "description": "FAL API key for image generation", "prompt": "FAL API key", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 35695144d..304f34f56 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -601,13 +601,15 @@ def _print_setup_summary(config: dict, hermes_home): Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser" ).exists() ) - if get_env_value("BROWSERBASE_API_KEY"): + if get_env_value("CAMOFOX_URL"): + tool_status.append(("Browser Automation (Camofox)", True, None)) + elif get_env_value("BROWSERBASE_API_KEY"): tool_status.append(("Browser Automation (Browserbase)", True, None)) elif _ab_found: tool_status.append(("Browser Automation (local)", True, None)) else: tool_status.append( - ("Browser Automation", False, "npm install -g agent-browser") + ("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL") ) # FAL (image generation) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 91496d45d..63e26d362 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -273,6 +273,16 @@ TOOL_CATEGORIES = { "browser_provider": "browser-use", "post_setup": "browserbase", }, + { + "name": "Camofox", + "tag": "Local anti-detection browser (Firefox/Camoufox)", + "env_vars": [ + {"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377", + "url": "https://github.com/jo-inc/camofox-browser"}, + ], + "browser_provider": "camofox", + "post_setup": "camofox", + }, ], }, "homeassistant": { @@ -337,6 +347,28 @@ def _run_post_setup(post_setup_key: str): elif not node_modules.exists(): _print_warning(" Node.js not found - browser tools require: npm install (in hermes-agent directory)") + elif post_setup_key == "camofox": + camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser" + if not camofox_dir.exists() and shutil.which("npm"): + _print_info(" Installing Camofox browser server...") + import subprocess + result = subprocess.run( + ["npm", "install", "--silent"], + capture_output=True, text=True, cwd=str(PROJECT_ROOT) + ) + if result.returncode == 0: + _print_success(" Camofox installed") + else: + _print_warning(" npm install failed - run manually: npm install") + if camofox_dir.exists(): + _print_info(" Start the Camofox server:") + _print_info(" npx @askjo/camoufox-browser") + _print_info(" First run downloads the Camoufox engine (~300MB)") + _print_info(" Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser") + elif not shutil.which("npm"): + _print_warning(" Node.js not found. Install Camofox via Docker:") + _print_info(" docker run -p 9377:9377 jo-inc/camofox-browser") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") diff --git a/package.json b/package.json index 5e593367b..309217c82 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ }, "homepage": "https://github.com/NousResearch/Hermes-Agent#readme", "dependencies": { - "agent-browser": "^0.13.0" + "agent-browser": "^0.13.0", + "@askjo/camoufox-browser": "^1.0.0" }, "engines": { "node": ">=18.0.0" diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py new file mode 100644 index 000000000..a59862b9b --- /dev/null +++ b/tests/tools/test_browser_camofox.py @@ -0,0 +1,290 @@ +"""Tests for the Camofox browser backend.""" + +import json +import os +from unittest.mock import MagicMock, patch + +import pytest + +from tools.browser_camofox import ( + camofox_back, + camofox_click, + camofox_close, + camofox_console, + camofox_get_images, + camofox_navigate, + camofox_press, + camofox_scroll, + camofox_snapshot, + camofox_type, + camofox_vision, + check_camofox_available, + cleanup_all_camofox_sessions, + is_camofox_mode, +) + + +# --------------------------------------------------------------------------- +# Configuration detection +# --------------------------------------------------------------------------- + + +class TestCamofoxMode: + def test_disabled_by_default(self, monkeypatch): + monkeypatch.delenv("CAMOFOX_URL", raising=False) + assert is_camofox_mode() is False + + def test_enabled_when_url_set(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + assert is_camofox_mode() is True + + def test_health_check_unreachable(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999") + assert check_camofox_available() is False + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _mock_response(status=200, json_data=None): + resp = MagicMock() + resp.status_code = status + resp.json.return_value = json_data or {} + resp.content = b"\x89PNG\r\n\x1a\nfake" + resp.raise_for_status = MagicMock() + return resp + + +# --------------------------------------------------------------------------- +# Navigate +# --------------------------------------------------------------------------- + + +class TestCamofoxNavigate: + @patch("tools.browser_camofox.requests.post") + def test_creates_tab_on_first_navigate(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab1", "url": "https://example.com"}) + + result = json.loads(camofox_navigate("https://example.com", task_id="t1")) + assert result["success"] is True + assert result["url"] == "https://example.com" + + @patch("tools.browser_camofox.requests.post") + def test_navigates_existing_tab(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + # First call creates tab + mock_post.return_value = _mock_response(json_data={"tabId": "tab2", "url": "https://a.com"}) + camofox_navigate("https://a.com", task_id="t2") + + # Second call navigates + mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://b.com"}) + result = json.loads(camofox_navigate("https://b.com", task_id="t2")) + assert result["success"] is True + assert result["url"] == "https://b.com" + + def test_connection_error_returns_helpful_message(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999") + result = json.loads(camofox_navigate("https://example.com", task_id="t_err")) + assert result["success"] is False + assert "Cannot connect" in result["error"] + + +# --------------------------------------------------------------------------- +# Snapshot +# --------------------------------------------------------------------------- + + +class TestCamofoxSnapshot: + def test_no_session_returns_error(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + result = json.loads(camofox_snapshot(task_id="no_such_task")) + assert result["success"] is False + assert "browser_navigate" in result["error"] + + @patch("tools.browser_camofox.requests.post") + @patch("tools.browser_camofox.requests.get") + def test_returns_snapshot(self, mock_get, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + # Create session + mock_post.return_value = _mock_response(json_data={"tabId": "tab3", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t3") + + # Return snapshot + mock_get.return_value = _mock_response(json_data={ + "snapshot": "- heading \"Test\" [e1]\n- button \"Submit\" [e2]", + "refsCount": 2, + }) + result = json.loads(camofox_snapshot(task_id="t3")) + assert result["success"] is True + assert "[e1]" in result["snapshot"] + assert result["element_count"] == 2 + + +# --------------------------------------------------------------------------- +# Click / Type / Scroll / Back / Press +# --------------------------------------------------------------------------- + + +class TestCamofoxInteractions: + @patch("tools.browser_camofox.requests.post") + def test_click(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab4", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t4") + + mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://x.com"}) + result = json.loads(camofox_click("@e5", task_id="t4")) + assert result["success"] is True + assert result["clicked"] == "e5" + + @patch("tools.browser_camofox.requests.post") + def test_type(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab5", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t5") + + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_type("@e3", "hello world", task_id="t5")) + assert result["success"] is True + assert result["typed"] == "hello world" + + @patch("tools.browser_camofox.requests.post") + def test_scroll(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab6", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t6") + + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_scroll("down", task_id="t6")) + assert result["success"] is True + assert result["scrolled"] == "down" + + @patch("tools.browser_camofox.requests.post") + def test_back(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab7", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t7") + + mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://prev.com"}) + result = json.loads(camofox_back(task_id="t7")) + assert result["success"] is True + + @patch("tools.browser_camofox.requests.post") + def test_press(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab8", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t8") + + mock_post.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_press("Enter", task_id="t8")) + assert result["success"] is True + assert result["pressed"] == "Enter" + + +# --------------------------------------------------------------------------- +# Close +# --------------------------------------------------------------------------- + + +class TestCamofoxClose: + @patch("tools.browser_camofox.requests.delete") + @patch("tools.browser_camofox.requests.post") + def test_close_session(self, mock_post, mock_delete, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab9", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t9") + + mock_delete.return_value = _mock_response(json_data={"ok": True}) + result = json.loads(camofox_close(task_id="t9")) + assert result["success"] is True + assert result["closed"] is True + + def test_close_nonexistent_session(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + result = json.loads(camofox_close(task_id="nonexistent")) + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Console (limited support) +# --------------------------------------------------------------------------- + + +class TestCamofoxConsole: + def test_console_returns_empty_with_note(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + result = json.loads(camofox_console(task_id="t_console")) + assert result["success"] is True + assert result["total_messages"] == 0 + assert "not available" in result["note"] + + +# --------------------------------------------------------------------------- +# Images +# --------------------------------------------------------------------------- + + +class TestCamofoxGetImages: + @patch("tools.browser_camofox.requests.post") + @patch("tools.browser_camofox.requests.get") + def test_get_images(self, mock_get, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t10") + + mock_get.return_value = _mock_response(json_data={ + "images": [{"src": "https://x.com/img.png", "alt": "Logo"}], + }) + result = json.loads(camofox_get_images(task_id="t10")) + assert result["success"] is True + assert result["count"] == 1 + assert result["images"][0]["src"] == "https://x.com/img.png" + + +# --------------------------------------------------------------------------- +# Routing integration — verify browser_tool routes to camofox +# --------------------------------------------------------------------------- + + +class TestBrowserToolRouting: + """Verify that browser_tool.py delegates to camofox when CAMOFOX_URL is set.""" + + @patch("tools.browser_camofox.requests.post") + def test_browser_navigate_routes_to_camofox(self, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab_rt", "url": "https://example.com"}) + + from tools.browser_tool import browser_navigate + # Bypass SSRF check for test URL + with patch("tools.browser_tool._is_safe_url", return_value=True): + result = json.loads(browser_navigate("https://example.com", task_id="t_route")) + assert result["success"] is True + + def test_check_requirements_passes_with_camofox(self, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + from tools.browser_tool import check_browser_requirements + assert check_browser_requirements() is True + + +# --------------------------------------------------------------------------- +# Cleanup helper +# --------------------------------------------------------------------------- + + +class TestCamofoxCleanup: + @patch("tools.browser_camofox.requests.post") + @patch("tools.browser_camofox.requests.delete") + def test_cleanup_all(self, mock_delete, mock_post, monkeypatch): + monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377") + mock_post.return_value = _mock_response(json_data={"tabId": "tab_c", "url": "https://x.com"}) + camofox_navigate("https://x.com", task_id="t_cleanup") + + mock_delete.return_value = _mock_response(json_data={"ok": True}) + cleanup_all_camofox_sessions() + + # Session should be gone + result = json.loads(camofox_snapshot(task_id="t_cleanup")) + assert result["success"] is False diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py new file mode 100644 index 000000000..b1925d2c6 --- /dev/null +++ b/tools/browser_camofox.py @@ -0,0 +1,496 @@ +"""Camofox browser backend — local anti-detection browser via REST API. + +Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox +fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1 +to our browser tool interface: accessibility snapshots with element refs, +click/type/scroll by ref, screenshots, etc. + +When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser +tools route through this module instead of the ``agent-browser`` CLI. + +Setup:: + + # Option 1: npm + git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser + npm install && npm start # downloads Camoufox (~300MB) on first run + + # Option 2: Docker + docker run -p 9377:9377 jo-inc/camofox-browser + +Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``. +""" + +from __future__ import annotations + +import base64 +import json +import logging +import os +import threading +import time +import uuid +from pathlib import Path +from typing import Any, Dict, Optional + +import requests + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +_DEFAULT_TIMEOUT = 30 # seconds per HTTP request +_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit + + +def get_camofox_url() -> str: + """Return the configured Camofox server URL, or empty string.""" + return os.getenv("CAMOFOX_URL", "").rstrip("/") + + +def is_camofox_mode() -> bool: + """True when Camofox backend is configured.""" + return bool(get_camofox_url()) + + +def check_camofox_available() -> bool: + """Verify the Camofox server is reachable.""" + url = get_camofox_url() + if not url: + return False + try: + resp = requests.get(f"{url}/health", timeout=5) + return resp.status_code == 200 + except Exception: + return False + + +# --------------------------------------------------------------------------- +# Session management +# --------------------------------------------------------------------------- +# Maps task_id -> {"user_id": str, "tab_id": str|None} +_sessions: Dict[str, Dict[str, Any]] = {} +_sessions_lock = threading.Lock() + + +def _get_session(task_id: Optional[str]) -> Dict[str, Any]: + """Get or create a camofox session for the given task.""" + task_id = task_id or "default" + with _sessions_lock: + if task_id in _sessions: + return _sessions[task_id] + session = { + "user_id": f"hermes_{uuid.uuid4().hex[:10]}", + "tab_id": None, + "session_key": f"task_{task_id[:16]}", + } + _sessions[task_id] = session + return session + + +def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]: + """Ensure a tab exists for the session, creating one if needed.""" + session = _get_session(task_id) + if session["tab_id"]: + return session + base = get_camofox_url() + resp = requests.post( + f"{base}/tabs", + json={ + "userId": session["user_id"], + "sessionKey": session["session_key"], + "url": url, + }, + timeout=_DEFAULT_TIMEOUT, + ) + resp.raise_for_status() + data = resp.json() + session["tab_id"] = data.get("tabId") + return session + + +def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]: + """Remove and return session info.""" + task_id = task_id or "default" + with _sessions_lock: + return _sessions.pop(task_id, None) + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict: + """POST JSON to camofox and return parsed response.""" + url = f"{get_camofox_url()}{path}" + resp = requests.post(url, json=body, timeout=timeout) + resp.raise_for_status() + return resp.json() + + +def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: + """GET from camofox and return parsed response.""" + url = f"{get_camofox_url()}{path}" + resp = requests.get(url, params=params, timeout=timeout) + resp.raise_for_status() + return resp.json() + + +def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response: + """GET from camofox and return raw response (for binary data).""" + url = f"{get_camofox_url()}{path}" + resp = requests.get(url, params=params, timeout=timeout) + resp.raise_for_status() + return resp + + +def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: + """DELETE to camofox and return parsed response.""" + url = f"{get_camofox_url()}{path}" + resp = requests.delete(url, json=body, timeout=timeout) + resp.raise_for_status() + return resp.json() + + +# --------------------------------------------------------------------------- +# Tool implementations +# --------------------------------------------------------------------------- + +def camofox_navigate(url: str, task_id: Optional[str] = None) -> str: + """Navigate to a URL via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + # Create tab with the target URL directly + session = _ensure_tab(task_id, url) + data = {"ok": True, "url": url} + else: + # Navigate existing tab + data = _post( + f"/tabs/{session['tab_id']}/navigate", + {"userId": session["user_id"], "url": url}, + timeout=60, + ) + return json.dumps({ + "success": True, + "url": data.get("url", url), + "title": data.get("title", ""), + }) + except requests.HTTPError as e: + return json.dumps({"success": False, "error": f"Navigation failed: {e}"}) + except requests.ConnectionError: + return json.dumps({ + "success": False, + "error": f"Cannot connect to Camofox at {get_camofox_url()}. " + "Is the server running? Start with: npm start (in camofox-browser dir) " + "or: docker run -p 9377:9377 jo-inc/camofox-browser", + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_snapshot(full: bool = False, task_id: Optional[str] = None, + user_task: Optional[str] = None) -> str: + """Get accessibility tree snapshot from Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + data = _get( + f"/tabs/{session['tab_id']}/snapshot", + params={"userId": session["user_id"]}, + ) + + snapshot = data.get("snapshot", "") + refs_count = data.get("refsCount", 0) + + # Apply same summarization logic as the main browser tool + from tools.browser_tool import ( + SNAPSHOT_SUMMARIZE_THRESHOLD, + _extract_relevant_content, + _truncate_snapshot, + ) + + if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD: + if user_task: + snapshot = _extract_relevant_content(snapshot, user_task) + else: + snapshot = _truncate_snapshot(snapshot) + + return json.dumps({ + "success": True, + "snapshot": snapshot, + "element_count": refs_count, + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_click(ref: str, task_id: Optional[str] = None) -> str: + """Click an element by ref via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + # Strip @ prefix if present (our tool convention) + clean_ref = ref.lstrip("@") + + data = _post( + f"/tabs/{session['tab_id']}/click", + {"userId": session["user_id"], "ref": clean_ref}, + ) + return json.dumps({ + "success": True, + "clicked": clean_ref, + "url": data.get("url", ""), + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str: + """Type text into an element by ref via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + clean_ref = ref.lstrip("@") + + _post( + f"/tabs/{session['tab_id']}/type", + {"userId": session["user_id"], "ref": clean_ref, "text": text}, + ) + return json.dumps({ + "success": True, + "typed": text, + "element": clean_ref, + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str: + """Scroll the page via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + _post( + f"/tabs/{session['tab_id']}/scroll", + {"userId": session["user_id"], "direction": direction}, + ) + return json.dumps({"success": True, "scrolled": direction}) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_back(task_id: Optional[str] = None) -> str: + """Navigate back via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + data = _post( + f"/tabs/{session['tab_id']}/back", + {"userId": session["user_id"]}, + ) + return json.dumps({"success": True, "url": data.get("url", "")}) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_press(key: str, task_id: Optional[str] = None) -> str: + """Press a keyboard key via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + _post( + f"/tabs/{session['tab_id']}/press", + {"userId": session["user_id"], "key": key}, + ) + return json.dumps({"success": True, "pressed": key}) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_close(task_id: Optional[str] = None) -> str: + """Close the browser session via Camofox.""" + try: + session = _drop_session(task_id) + if not session: + return json.dumps({"success": True, "closed": True}) + + _delete( + f"/sessions/{session['user_id']}", + ) + return json.dumps({"success": True, "closed": True}) + except Exception as e: + return json.dumps({"success": True, "closed": True, "warning": str(e)}) + + +def camofox_get_images(task_id: Optional[str] = None) -> str: + """Get images on the current page via Camofox. + + Extracts image information from the accessibility tree snapshot, + since Camofox does not expose a dedicated /images endpoint. + """ + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + import re + + data = _get( + f"/tabs/{session['tab_id']}/snapshot", + params={"userId": session["user_id"]}, + ) + snapshot = data.get("snapshot", "") + + # Parse img elements from the accessibility tree. + # Format: img "alt text" or img "alt text" [eN] + # URLs appear on /url: lines following img entries + images = [] + lines = snapshot.split("\n") + for i, line in enumerate(lines): + stripped = line.strip() + if stripped.startswith("- img ") or stripped.startswith("img "): + alt_match = re.search(r'img\s+"([^"]*)"', stripped) + alt = alt_match.group(1) if alt_match else "" + # Look for URL on the next line + src = "" + if i + 1 < len(lines): + url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip()) + if url_match: + src = url_match.group(1) + if alt or src: + images.append({"src": src, "alt": alt}) + + return json.dumps({ + "success": True, + "images": images, + "count": len(images), + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_vision(question: str, annotate: bool = False, + task_id: Optional[str] = None) -> str: + """Take a screenshot and analyze it with vision AI via Camofox.""" + try: + session = _get_session(task_id) + if not session["tab_id"]: + return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) + + # Get screenshot as binary PNG + resp = _get_raw( + f"/tabs/{session['tab_id']}/screenshot", + params={"userId": session["user_id"]}, + ) + + # Save screenshot to cache + from hermes_constants import get_hermes_home + screenshots_dir = get_hermes_home() / "browser_screenshots" + screenshots_dir.mkdir(parents=True, exist_ok=True) + screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png") + + with open(screenshot_path, "wb") as f: + f.write(resp.content) + + # Encode for vision LLM + img_b64 = base64.b64encode(resp.content).decode("utf-8") + + # Also get annotated snapshot if requested + annotation_context = "" + if annotate: + try: + snap_data = _get( + f"/tabs/{session['tab_id']}/snapshot", + params={"userId": session["user_id"]}, + ) + annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}" + except Exception: + pass + + # Send to vision LLM + from agent.auxiliary_client import call_llm + + vision_prompt = ( + f"Analyze this browser screenshot and answer: {question}" + f"{annotation_context}" + ) + + try: + from hermes_cli.config import load_config + _cfg = load_config() + _vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120)) + except Exception: + _vision_timeout = 120 + + analysis = call_llm( + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": vision_prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{img_b64}", + }, + }, + ], + }], + task="vision", + timeout=_vision_timeout, + ) + + return json.dumps({ + "success": True, + "analysis": analysis, + "screenshot_path": screenshot_path, + }) + except Exception as e: + return json.dumps({"success": False, "error": str(e)}) + + +def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str: + """Get console output — limited support in Camofox. + + Camofox does not expose browser console logs via its REST API. + Returns an empty result with a note. + """ + return json.dumps({ + "success": True, + "console_messages": [], + "js_errors": [], + "total_messages": 0, + "total_errors": 0, + "note": "Console log capture is not available with the Camofox backend. " + "Use browser_snapshot or browser_vision to inspect page state.", + }) + + +# --------------------------------------------------------------------------- +# Cleanup +# --------------------------------------------------------------------------- + +def cleanup_all_camofox_sessions() -> None: + """Close all active camofox sessions.""" + with _sessions_lock: + sessions = list(_sessions.items()) + for task_id, session in sessions: + try: + _delete(f"/sessions/{session['user_id']}") + except Exception: + pass + with _sessions_lock: + _sessions.clear() diff --git a/tools/browser_tool.py b/tools/browser_tool.py index ffb772c1d..33a1c8ef6 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -79,6 +79,14 @@ from tools.browser_providers.base import CloudBrowserProvider from tools.browser_providers.browserbase import BrowserbaseProvider from tools.browser_providers.browser_use import BrowserUseProvider +# Camofox local anti-detection browser backend (optional). +# When CAMOFOX_URL is set, all browser operations route through the +# camofox REST API instead of the agent-browser CLI. +try: + from tools.browser_camofox import is_camofox_mode as _is_camofox_mode +except ImportError: + _is_camofox_mode = lambda: False # noqa: E731 + logger = logging.getLogger(__name__) # Standard PATH entries for environments with minimal PATH (e.g. systemd services). @@ -1046,6 +1054,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: "blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]}, }) + # Camofox backend — delegate after safety checks pass + if _is_camofox_mode(): + from tools.browser_camofox import camofox_navigate + return camofox_navigate(url, task_id) + effective_task_id = task_id or "default" # Get session info to check if this is a new session @@ -1135,6 +1148,10 @@ def browser_snapshot( Returns: JSON string with page snapshot """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_snapshot + return camofox_snapshot(full, task_id, user_task) + effective_task_id = task_id or "default" # Build command args based on full flag @@ -1180,6 +1197,10 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str: Returns: JSON string with click result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_click + return camofox_click(ref, task_id) + effective_task_id = task_id or "default" # Ensure ref starts with @ @@ -1212,6 +1233,10 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str: Returns: JSON string with type result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_type + return camofox_type(ref, text, task_id) + effective_task_id = task_id or "default" # Ensure ref starts with @ @@ -1245,6 +1270,10 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: Returns: JSON string with scroll result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_scroll + return camofox_scroll(direction, task_id) + effective_task_id = task_id or "default" # Validate direction @@ -1278,6 +1307,10 @@ def browser_back(task_id: Optional[str] = None) -> str: Returns: JSON string with navigation result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_back + return camofox_back(task_id) + effective_task_id = task_id or "default" result = _run_browser_command(effective_task_id, "back", []) @@ -1305,6 +1338,10 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str: Returns: JSON string with key press result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_press + return camofox_press(key, task_id) + effective_task_id = task_id or "default" result = _run_browser_command(effective_task_id, "press", [key]) @@ -1330,6 +1367,10 @@ def browser_close(task_id: Optional[str] = None) -> str: Returns: JSON string with close result """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_close + return camofox_close(task_id) + effective_task_id = task_id or "default" with _cleanup_lock: had_session = effective_task_id in _active_sessions @@ -1358,6 +1399,10 @@ def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str: Returns: JSON string with console messages and JS errors """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_console + return camofox_console(clear, task_id) + effective_task_id = task_id or "default" console_args = ["--clear"] if clear else [] @@ -1452,6 +1497,10 @@ def browser_get_images(task_id: Optional[str] = None) -> str: Returns: JSON string with list of images (src and alt) """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_get_images + return camofox_get_images(task_id) + effective_task_id = task_id or "default" # Use eval to run JavaScript that extracts images @@ -1516,6 +1565,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] Returns: JSON string with vision analysis results and screenshot_path """ + if _is_camofox_mode(): + from tools.browser_camofox import camofox_vision + return camofox_vision(question, annotate, task_id) + import base64 import uuid as uuid_mod from pathlib import Path @@ -1804,6 +1857,10 @@ def check_browser_requirements() -> bool: Returns: True if all requirements are met, False otherwise """ + # Camofox backend — only needs the server URL, no agent-browser CLI + if _is_camofox_mode(): + return True + # The agent-browser CLI is always required try: _find_agent_browser()