"""Camofox browser backend — local anti-detection browser via REST API. Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1 to our browser tool interface: accessibility snapshots with element refs, click/type/scroll by ref, screenshots, etc. When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser tools route through this module instead of the ``agent-browser`` CLI. Setup:: # Option 1: npm git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser npm install && npm start # downloads Camoufox (~300MB) on first run # Option 2: Docker docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``. For Docker Camofox, optionally set ``CAMOFOX_REWRITE_LOOPBACK_URLS=true`` so page URLs like ``http://127.0.0.1:3000`` are opened inside the container as ``http://host.docker.internal:3000``. """ from __future__ import annotations import base64 import json import logging import os import threading import uuid from typing import Any, Dict, Optional from urllib.parse import SplitResult, urlsplit, urlunsplit import requests from hermes_cli.config import cfg_get, load_config from tools.browser_camofox_state import get_camofox_identity from tools.registry import tool_error logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- _DEFAULT_TIMEOUT = 30 # seconds per HTTP request _SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit _vnc_url: Optional[str] = None # cached from /health response _vnc_url_checked = False # only probe once per process def get_camofox_url() -> str: """Return the configured Camofox server URL, or empty string.""" return os.getenv("CAMOFOX_URL", "").rstrip("/") def is_camofox_mode() -> bool: """True when Camofox backend is configured and no CDP override is active. When the user has explicitly connected to a live Chromium-family browser via ``/browser connect`` (which sets ``BROWSER_CDP_URL``), the CDP connection takes priority over Camofox so the browser tools operate on the real browser instead of being silently routed to the Camofox backend. """ if os.getenv("BROWSER_CDP_URL", "").strip(): return False return bool(get_camofox_url()) def check_camofox_available() -> bool: """Verify the Camofox server is reachable.""" global _vnc_url, _vnc_url_checked url = get_camofox_url() if not url: return False try: resp = requests.get(f"{url}/health", timeout=5) if resp.status_code == 200 and not _vnc_url_checked: try: data = resp.json() vnc_port = data.get("vncPort") if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535: from urllib.parse import urlparse parsed = urlparse(url) host = parsed.hostname or "localhost" _vnc_url = f"http://{host}:{vnc_port}" except (ValueError, KeyError): pass _vnc_url_checked = True return resp.status_code == 200 except Exception: return False def get_vnc_url() -> Optional[str]: """Return the VNC URL if the Camofox server exposes one, or None.""" if not _vnc_url_checked: check_camofox_available() return _vnc_url def _get_camofox_config() -> Dict[str, Any]: """Return the ``browser.camofox`` config block, or an empty dict.""" try: camofox_cfg = load_config().get("browser", {}).get("camofox", {}) except Exception as exc: logger.warning("camofox config check failed, defaulting to disabled: %s", exc) return {} return camofox_cfg if isinstance(camofox_cfg, dict) else {} def _managed_persistence_enabled() -> bool: """Return whether Hermes-managed persistence is enabled for Camofox. When enabled, sessions use a stable profile-scoped userId so the Camofox server can map it to a persistent browser profile directory. When disabled (default), each session gets a random userId (ephemeral). Controlled by ``browser.camofox.managed_persistence`` in config.yaml. """ return bool(_get_camofox_config().get("managed_persistence")) def _camofox_identity_override(task_id: Optional[str], camofox_cfg: Dict[str, Any]) -> Optional[Dict[str, str]]: """Return an externally configured Camofox identity, if one is set. Integrations that own the visible Camofox browser can set a shared user ID so Hermes operates in the same browser profile instead of creating a separate private session. """ user_id = os.getenv("CAMOFOX_USER_ID", "").strip() or str(camofox_cfg.get("user_id") or "").strip() if not user_id: return None session_key = ( os.getenv("CAMOFOX_SESSION_KEY", "").strip() or str(camofox_cfg.get("session_key") or "").strip() or f"task_{(task_id or 'default')[:16]}" ) return {"user_id": user_id, "session_key": session_key} def _env_flag(name: str) -> Optional[bool]: raw = os.getenv(name, "").strip().lower() if not raw: return None if raw in {"1", "true", "yes", "on"}: return True if raw in {"0", "false", "no", "off"}: return False logger.debug("Ignoring invalid boolean env %s=%r", name, raw) return None def _adopt_existing_tab_enabled(camofox_cfg: Dict[str, Any]) -> bool: """Return whether Hermes should recover an existing Camofox tab ID.""" env_value = _env_flag("CAMOFOX_ADOPT_EXISTING_TAB") if env_value is not None: return env_value return bool(camofox_cfg.get("adopt_existing_tab")) def _loopback_rewrite_enabled(camofox_cfg: Dict[str, Any]) -> bool: """Return whether loopback navigation URLs should be rewritten for Docker. ``CAMOFOX_URL`` itself often points at a host-published Docker port such as ``http://127.0.0.1:9377``. That is correct for Hermes talking to the Camofox control API, but a page URL like ``http://127.0.0.1:3000`` is opened by the browser *inside* the Docker container. In that context loopback points at the container, not the host running the web app. The rewrite is opt-in because non-Docker Camofox installs run the browser on the host, where loopback URLs are already correct. """ env_value = _env_flag("CAMOFOX_REWRITE_LOOPBACK_URLS") if env_value is not None: return env_value return bool(camofox_cfg.get("rewrite_loopback_urls")) def _loopback_rewrite_host(camofox_cfg: Dict[str, Any]) -> str: """Return the host alias used when rewriting loopback page URLs.""" return ( os.getenv("CAMOFOX_LOOPBACK_HOST_ALIAS", "").strip() or str(camofox_cfg.get("loopback_host_alias") or "").strip() or "host.docker.internal" ) def _is_loopback_hostname(hostname: Optional[str]) -> bool: """Return True for localhost/127.0.0.0/8/::1-style hostnames.""" if not hostname: return False host = hostname.strip().strip("[]").lower() if host in {"localhost", "localhost.localdomain"}: return True try: import ipaddress return ipaddress.ip_address(host).is_loopback except ValueError: return False def _rewrite_loopback_url_for_camofox(url: str) -> tuple[str, Optional[Dict[str, str]]]: """Rewrite loopback page URLs for Docker-hosted Camofox, if configured. Returns ``(rewritten_url, metadata)``. ``metadata`` is present only when a rewrite happened so the tool result can disclose the change to the model. """ camofox_cfg = _get_camofox_config() if not _loopback_rewrite_enabled(camofox_cfg): return url, None try: parsed = urlsplit(url) except ValueError: return url, None if parsed.scheme not in {"http", "https"} or not _is_loopback_hostname(parsed.hostname): return url, None alias = _loopback_rewrite_host(camofox_cfg) if not alias: return url, None userinfo = "" if parsed.username: userinfo = parsed.username if parsed.password: userinfo += f":{parsed.password}" userinfo += "@" host_part = f"[{alias}]" if ":" in alias and not alias.startswith("[") else alias port_part = f":{parsed.port}" if parsed.port else "" rewritten = urlunsplit( SplitResult(parsed.scheme, f"{userinfo}{host_part}{port_part}", parsed.path, parsed.query, parsed.fragment) ) return rewritten, { "from": parsed.hostname or "", "to": alias, "original_url": url, "rewritten_url": rewritten, } # --------------------------------------------------------------------------- # Session management # --------------------------------------------------------------------------- # Maps task_id -> {"user_id": str, "tab_id": str|None} _sessions: Dict[str, Dict[str, Any]] = {} _sessions_lock = threading.Lock() def _adopt_existing_tab(session: Dict[str, Any]) -> Dict[str, Any]: """Attach process-local state to an already-open managed Camofox tab. Some integrations own the visible Camofox tab outside Hermes. Gateway restarts can leave this module's in-memory session cache empty even though Camofox still has that tab, so rehydrate tab_id before creating a new tab. """ if session.get("tab_id") or not session.get("adopt_existing_tab"): return session if not get_camofox_url(): return session try: tabs = _get("/tabs", params={"userId": session["user_id"]}, timeout=5).get("tabs", []) except Exception as exc: logger.debug("Camofox tab adoption failed for %s: %s", session.get("user_id"), exc) return session if not isinstance(tabs, list) or not tabs: return session session_key = session.get("session_key") matching_tabs = [ tab for tab in tabs if isinstance(tab, dict) and tab.get("listItemId") == session_key ] candidates = matching_tabs or [tab for tab in tabs if isinstance(tab, dict)] latest = candidates[-1] if candidates else None tab_id = latest.get("tabId") if isinstance(latest, dict) else None if isinstance(tab_id, str) and tab_id: session["tab_id"] = tab_id logger.debug("Adopted existing Camofox tab %s for %s", tab_id, session.get("user_id")) return session def _get_session(task_id: Optional[str]) -> Dict[str, Any]: """Get or create a camofox session for the given task. When managed persistence is enabled, uses a deterministic userId derived from the Hermes profile so the Camofox server can map it to the same persistent browser profile across restarts. """ task_id = task_id or "default" with _sessions_lock: if task_id in _sessions: return _adopt_existing_tab(_sessions[task_id]) camofox_cfg = _get_camofox_config() identity_override = _camofox_identity_override(task_id, camofox_cfg) if identity_override: session = { "user_id": identity_override["user_id"], "tab_id": None, "session_key": identity_override["session_key"], "managed": True, "adopt_existing_tab": _adopt_existing_tab_enabled(camofox_cfg), } elif bool(camofox_cfg.get("managed_persistence")): identity = get_camofox_identity(task_id) session = { "user_id": identity["user_id"], "tab_id": None, "session_key": identity["session_key"], "managed": True, "adopt_existing_tab": _adopt_existing_tab_enabled(camofox_cfg), } else: session = { "user_id": f"hermes_{uuid.uuid4().hex[:10]}", "tab_id": None, "session_key": f"task_{task_id[:16]}", "managed": False, "adopt_existing_tab": False, } _sessions[task_id] = session return _adopt_existing_tab(session) def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]: """Ensure a tab exists for the session, creating one if needed.""" session = _get_session(task_id) if session["tab_id"]: return session base = get_camofox_url() resp = requests.post( f"{base}/tabs", json={ "userId": session["user_id"], "sessionKey": session["session_key"], "url": url, }, timeout=_DEFAULT_TIMEOUT, ) resp.raise_for_status() data = resp.json() session["tab_id"] = data.get("tabId") return session def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]: """Remove and return session info.""" task_id = task_id or "default" with _sessions_lock: return _sessions.pop(task_id, None) def camofox_soft_cleanup(task_id: Optional[str] = None) -> bool: """Release the in-memory session without destroying the server-side context. When managed persistence is enabled the browser profile (and its cookies) must survive across agent tasks. This helper drops only the local tracking entry and returns ``True``. When managed persistence is *not* enabled it does nothing and returns ``False`` so the caller can fall back to :func:`camofox_close`. """ camofox_cfg = _get_camofox_config() if bool(camofox_cfg.get("managed_persistence")) or _camofox_identity_override(task_id, camofox_cfg): _drop_session(task_id) logger.debug("Camofox soft cleanup for task %s (managed persistence)", task_id) return True return False # --------------------------------------------------------------------------- # HTTP helpers # --------------------------------------------------------------------------- def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict: """POST JSON to camofox and return parsed response.""" url = f"{get_camofox_url()}{path}" resp = requests.post(url, json=body, timeout=timeout) resp.raise_for_status() return resp.json() def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: """GET from camofox and return parsed response.""" url = f"{get_camofox_url()}{path}" resp = requests.get(url, params=params, timeout=timeout) resp.raise_for_status() return resp.json() def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response: """GET from camofox and return raw response (for binary data).""" url = f"{get_camofox_url()}{path}" resp = requests.get(url, params=params, timeout=timeout) resp.raise_for_status() return resp def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: """DELETE to camofox and return parsed response.""" url = f"{get_camofox_url()}{path}" resp = requests.delete(url, json=body, timeout=timeout) resp.raise_for_status() return resp.json() # --------------------------------------------------------------------------- # Tool implementations # --------------------------------------------------------------------------- def camofox_navigate(url: str, task_id: Optional[str] = None) -> str: """Navigate to a URL via Camofox.""" try: browser_url, rewrite_info = _rewrite_loopback_url_for_camofox(url) session = _get_session(task_id) if not session["tab_id"]: # Create tab with the target URL directly session = _ensure_tab(task_id, browser_url) data = {"ok": True, "url": browser_url} else: # Navigate existing tab data = _post( f"/tabs/{session['tab_id']}/navigate", {"userId": session["user_id"], "url": browser_url}, timeout=60, ) result = { "success": True, "url": data.get("url", browser_url), "title": data.get("title", ""), } if rewrite_info: result["requested_url"] = url result["url_rewrite"] = rewrite_info result["warning"] = ( "Rewrote loopback URL for Docker-hosted Camofox: " f"{rewrite_info['from']} -> {rewrite_info['to']}" ) vnc = get_vnc_url() if vnc: result["vnc_url"] = vnc result["vnc_hint"] = ( "Browser is visible via VNC. " "Share this link with the user so they can watch the browser live." ) # Auto-take a compact snapshot so the model can act immediately try: snap_data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) snapshot_text = snap_data.get("snapshot", "") from tools.browser_tool import ( SNAPSHOT_SUMMARIZE_THRESHOLD, _truncate_snapshot, ) if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD: snapshot_text = _truncate_snapshot(snapshot_text) result["snapshot"] = snapshot_text result["element_count"] = snap_data.get("refsCount", 0) except Exception: pass # Navigation succeeded; snapshot is a bonus return json.dumps(result) except requests.HTTPError as e: return tool_error(f"Navigation failed: {e}", success=False) except requests.ConnectionError: return json.dumps({ "success": False, "error": f"Cannot connect to Camofox at {get_camofox_url()}. " "Is the server running? Start with: npm start (in camofox-browser dir) " "or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser", }) except Exception as e: return tool_error(str(e), success=False) def camofox_snapshot(full: bool = False, task_id: Optional[str] = None, user_task: Optional[str] = None) -> str: """Get accessibility tree snapshot from Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) snapshot = data.get("snapshot", "") refs_count = data.get("refsCount", 0) # Apply same summarization logic as the main browser tool from tools.browser_tool import ( SNAPSHOT_SUMMARIZE_THRESHOLD, _extract_relevant_content, _truncate_snapshot, ) if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD: if user_task: snapshot = _extract_relevant_content(snapshot, user_task) else: snapshot = _truncate_snapshot(snapshot) return json.dumps({ "success": True, "snapshot": snapshot, "element_count": refs_count, }) except Exception as e: return tool_error(str(e), success=False) def camofox_click(ref: str, task_id: Optional[str] = None) -> str: """Click an element by ref via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) # Strip @ prefix if present (our tool convention) clean_ref = ref.lstrip("@") data = _post( f"/tabs/{session['tab_id']}/click", {"userId": session["user_id"], "ref": clean_ref}, ) return json.dumps({ "success": True, "clicked": clean_ref, "url": data.get("url", ""), }) except Exception as e: return tool_error(str(e), success=False) def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str: """Type text into an element by ref via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) clean_ref = ref.lstrip("@") _post( f"/tabs/{session['tab_id']}/type", {"userId": session["user_id"], "ref": clean_ref, "text": text}, ) return json.dumps({ "success": True, "typed": text, "element": clean_ref, }) except Exception as e: return tool_error(str(e), success=False) def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str: """Scroll the page via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) _post( f"/tabs/{session['tab_id']}/scroll", {"userId": session["user_id"], "direction": direction}, ) return json.dumps({"success": True, "scrolled": direction}) except Exception as e: return tool_error(str(e), success=False) def camofox_back(task_id: Optional[str] = None) -> str: """Navigate back via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) data = _post( f"/tabs/{session['tab_id']}/back", {"userId": session["user_id"]}, ) return json.dumps({"success": True, "url": data.get("url", "")}) except Exception as e: return tool_error(str(e), success=False) def camofox_press(key: str, task_id: Optional[str] = None) -> str: """Press a keyboard key via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) _post( f"/tabs/{session['tab_id']}/press", {"userId": session["user_id"], "key": key}, ) return json.dumps({"success": True, "pressed": key}) except Exception as e: return tool_error(str(e), success=False) def camofox_close(task_id: Optional[str] = None) -> str: """Close the browser session via Camofox.""" try: session = _drop_session(task_id) if not session: return json.dumps({"success": True, "closed": True}) _delete( f"/sessions/{session['user_id']}", ) return json.dumps({"success": True, "closed": True}) except Exception as e: return json.dumps({"success": True, "closed": True, "warning": str(e)}) def camofox_get_images(task_id: Optional[str] = None) -> str: """Get images on the current page via Camofox. Extracts image information from the accessibility tree snapshot, since Camofox does not expose a dedicated /images endpoint. """ try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) import re data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) snapshot = data.get("snapshot", "") # Parse img elements from the accessibility tree. # Format: img "alt text" or img "alt text" [eN] # URLs appear on /url: lines following img entries images = [] lines = snapshot.split("\n") for i, line in enumerate(lines): stripped = line.strip() if stripped.startswith(("- img ", "img ")): alt_match = re.search(r'img\s+"([^"]*)"', stripped) alt = alt_match.group(1) if alt_match else "" # Look for URL on the next line src = "" if i + 1 < len(lines): url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip()) if url_match: src = url_match.group(1) if alt or src: images.append({"src": src, "alt": alt}) return json.dumps({ "success": True, "images": images, "count": len(images), }) except Exception as e: return tool_error(str(e), success=False) def camofox_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> str: """Take a screenshot and analyze it with vision AI via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return tool_error("No browser session. Call browser_navigate first.", success=False) # Get screenshot as binary PNG resp = _get_raw( f"/tabs/{session['tab_id']}/screenshot", params={"userId": session["user_id"]}, ) # Save screenshot to cache from hermes_constants import get_hermes_home screenshots_dir = get_hermes_home() / "browser_screenshots" screenshots_dir.mkdir(parents=True, exist_ok=True) screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png") with open(screenshot_path, "wb") as f: f.write(resp.content) # Encode for vision LLM img_b64 = base64.b64encode(resp.content).decode("utf-8") # Also get annotated snapshot if requested annotation_context = "" if annotate: try: snap_data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}" except Exception: pass # Redact secrets from annotation context before sending to vision LLM. # The screenshot image itself cannot be redacted, but at least the # text-based accessibility tree snippet won't leak secret values. from agent.redact import redact_sensitive_text annotation_context = redact_sensitive_text(annotation_context) # Send to vision LLM from agent.auxiliary_client import call_llm vision_prompt = ( f"Analyze this browser screenshot and answer: {question}" f"{annotation_context}" ) try: _cfg = load_config() _vision_cfg = cfg_get(_cfg, "auxiliary", "vision", default={}) _vision_timeout = float(_vision_cfg.get("timeout", 120)) _vision_temperature = float(_vision_cfg.get("temperature", 0.1)) except Exception: _vision_timeout = 120.0 _vision_temperature = 0.1 response = call_llm( messages=[{ "role": "user", "content": [ {"type": "text", "text": vision_prompt}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{img_b64}", }, }, ], }], task="vision", temperature=_vision_temperature, timeout=_vision_timeout, ) analysis = (response.choices[0].message.content or "").strip() if response.choices else "" # Redact secrets the vision LLM may have read from the screenshot. from agent.redact import redact_sensitive_text analysis = redact_sensitive_text(analysis) return json.dumps({ "success": True, "analysis": analysis, "screenshot_path": screenshot_path, }) except Exception as e: return tool_error(str(e), success=False) def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str: """Get console output — limited support in Camofox. Camofox does not expose browser console logs via its REST API. Returns an empty result with a note. """ return json.dumps({ "success": True, "console_messages": [], "js_errors": [], "total_messages": 0, "total_errors": 0, "note": "Console log capture is not available with the Camofox backend. " "Use browser_snapshot or browser_vision to inspect page state.", })