hermes-agent/tests/tools/test_browser_supervisor.py

"""Integration tests for tools.browser_supervisor.

Exercises the supervisor end-to-end against a real local Chrome
(``--remote-debugging-port``).  Skipped when Chrome is not installed
— these are the tests that actually verify the CDP wire protocol
works, since mock-CDP unit tests can only prove the happy paths we
thought to model.

Run manually:
    scripts/run_tests.sh tests/tools/test_browser_supervisor.py

Automated: skipped in CI unless ``HERMES_E2E_BROWSER=1`` is set.
"""

from __future__ import annotations

import asyncio
import base64
import json
import os
import shutil
import subprocess
import tempfile
import time

import pytest


pytestmark = pytest.mark.skipif(
    not shutil.which("google-chrome") and not shutil.which("chromium"),
    reason="Chrome/Chromium not installed",
)


def _find_chrome() -> str:
    for candidate in ("google-chrome", "chromium", "chromium-browser"):
        path = shutil.which(candidate)
        if path:
            return path
    pytest.skip("no Chrome binary found")


@pytest.fixture
def chrome_cdp(worker_id):
    """Start a headless Chrome with --remote-debugging-port, yield its WS URL.

    Uses a unique port per xdist worker to avoid cross-worker collisions.
    Always launches with ``--site-per-process`` so cross-origin iframes
    become real OOPIFs (needed by the iframe interaction tests).
    """
    import socket

    # xdist worker_id is "master" in single-process mode or "gw0".."gwN" otherwise.
    if worker_id == "master":
        port_offset = 0
    else:
        port_offset = int(worker_id.lstrip("gw"))
    port = 9225 + port_offset
    profile = tempfile.mkdtemp(prefix="hermes-supervisor-test-")
    proc = subprocess.Popen(
        [
            _find_chrome(),
            f"--remote-debugging-port={port}",
            f"--user-data-dir={profile}",
            "--no-first-run",
            "--no-default-browser-check",
            "--headless=new",
            "--disable-gpu",
            "--site-per-process",  # force OOPIFs for cross-origin iframes
        ],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )

    ws_url = None
    deadline = time.monotonic() + 15
    while time.monotonic() < deadline:
        try:
            import urllib.request
            with urllib.request.urlopen(
                f"http://127.0.0.1:{port}/json/version", timeout=1
            ) as r:
                info = json.loads(r.read().decode())
                ws_url = info["webSocketDebuggerUrl"]
                break
        except Exception:
            time.sleep(0.25)
    if ws_url is None:
        proc.terminate()
        proc.wait(timeout=5)
        shutil.rmtree(profile, ignore_errors=True)
        pytest.skip("Chrome didn't expose CDP in time")

    yield ws_url, port

    proc.terminate()
    try:
        proc.wait(timeout=3)
    except Exception:
        proc.kill()
    shutil.rmtree(profile, ignore_errors=True)


def _test_page_url() -> str:
    html = """<!doctype html>
<html><head><title>Supervisor pytest</title></head><body>
<h1>Supervisor pytest</h1>
<iframe id="inner" srcdoc="<body><h2>frame-marker</h2></body>" width="400" height="100"></iframe>
</body></html>"""
    return "data:text/html;base64," + base64.b64encode(html.encode()).decode()


def _fire_on_page(cdp_url: str, expression: str) -> None:
    """Navigate the first page target to a data URL and fire `expression`."""
    import asyncio
    import websockets as _ws_mod

    async def run():
        async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws:
            next_id = [1]

            async def call(method, params=None, session_id=None):
                cid = next_id[0]
                next_id[0] += 1
                p = {"id": cid, "method": method}
                if params:
                    p["params"] = params
                if session_id:
                    p["sessionId"] = session_id
                await ws.send(json.dumps(p))
                async for raw in ws:
                    m = json.loads(raw)
                    if m.get("id") == cid:
                        return m

            targets = (await call("Target.getTargets"))["result"]["targetInfos"]
            page = next(t for t in targets if t.get("type") == "page")
            attach = await call(
                "Target.attachToTarget", {"targetId": page["targetId"], "flatten": True}
            )
            sid = attach["result"]["sessionId"]
            await call("Page.navigate", {"url": _test_page_url()}, session_id=sid)
            await asyncio.sleep(1.5)  # let the page load
            await call(
                "Runtime.evaluate",
                {"expression": expression, "returnByValue": True},
                session_id=sid,
            )

    asyncio.run(run())


@pytest.fixture
def supervisor_registry():
    """Yield the global registry and tear down any supervisors after the test."""
    from tools.browser_supervisor import SUPERVISOR_REGISTRY

    yield SUPERVISOR_REGISTRY
    SUPERVISOR_REGISTRY.stop_all()


def _wait_for_dialog(supervisor, timeout: float = 5.0):
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        snap = supervisor.snapshot()
        if snap.pending_dialogs:
            return snap.pending_dialogs
        time.sleep(0.1)
    return ()


def test_supervisor_start_and_snapshot(chrome_cdp, supervisor_registry):
    """Supervisor attaches, exposes an active snapshot with a top frame."""
    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-1", cdp_url=cdp_url)

    # Navigate so the frame tree populates.
    _fire_on_page(cdp_url, "/* no dialog */ void 0")

    # Give a moment for frame events to propagate
    time.sleep(1.0)
    snap = supervisor.snapshot()
    assert snap.active is True
    assert snap.task_id == "pytest-1"
    assert snap.pending_dialogs == ()
    # At minimum a top frame should exist after the navigate.
    assert snap.frame_tree.get("top") is not None


def test_main_frame_alert_detection_and_dismiss(chrome_cdp, supervisor_registry):
    """alert() in the main frame surfaces and can be dismissed via the sync API."""
    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-2", cdp_url=cdp_url)

    _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-MAIN-ALERT'), 50)")
    dialogs = _wait_for_dialog(supervisor)
    assert dialogs, "no dialog detected"
    d = dialogs[0]
    assert d.type == "alert"
    assert "PYTEST-MAIN-ALERT" in d.message

    result = supervisor.respond_to_dialog("dismiss")
    assert result["ok"] is True
    # State cleared after dismiss
    time.sleep(0.3)
    assert supervisor.snapshot().pending_dialogs == ()


def test_iframe_contentwindow_alert(chrome_cdp, supervisor_registry):
    """alert() fired from inside a same-origin iframe surfaces too."""
    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-3", cdp_url=cdp_url)

    _fire_on_page(
        cdp_url,
        "setTimeout(() => document.querySelector('#inner').contentWindow.alert('PYTEST-IFRAME'), 50)",
    )
    dialogs = _wait_for_dialog(supervisor)
    assert dialogs, "no iframe dialog detected"
    assert any("PYTEST-IFRAME" in d.message for d in dialogs)

    result = supervisor.respond_to_dialog("accept")
    assert result["ok"] is True


def test_prompt_dialog_with_response_text(chrome_cdp, supervisor_registry):
    """prompt() gets our prompt_text back inside the page."""
    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-4", cdp_url=cdp_url)

    # Fire a prompt and stash the answer on window
    _fire_on_page(
        cdp_url,
        "setTimeout(() => { window.__promptResult = prompt('give me a token', 'default-x'); }, 50)",
    )
    dialogs = _wait_for_dialog(supervisor)
    assert dialogs
    d = dialogs[0]
    assert d.type == "prompt"
    assert d.default_prompt == "default-x"

    result = supervisor.respond_to_dialog("accept", prompt_text="PYTEST-PROMPT-REPLY")
    assert result["ok"] is True


def test_respond_with_no_pending_dialog_errors_cleanly(chrome_cdp, supervisor_registry):
    """Calling respond_to_dialog when nothing is pending returns a clean error, not an exception."""
    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-5", cdp_url=cdp_url)

    result = supervisor.respond_to_dialog("accept")
    assert result["ok"] is False
    assert "no dialog" in result["error"].lower()


def test_auto_dismiss_policy(chrome_cdp, supervisor_registry):
    """auto_dismiss policy clears dialogs without the agent responding."""
    from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS

    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(
        task_id="pytest-6",
        cdp_url=cdp_url,
        dialog_policy=DIALOG_POLICY_AUTO_DISMISS,
    )

    _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-AUTO-DISMISS'), 50)")
    # Give the supervisor a moment to see + auto-dismiss
    time.sleep(2.0)
    snap = supervisor.snapshot()
    # Nothing pending because auto-dismiss cleared it immediately
    assert snap.pending_dialogs == ()


def test_registry_idempotent_get_or_start(chrome_cdp, supervisor_registry):
    """Calling get_or_start twice with the same (task, url) returns the same instance."""
    cdp_url, _port = chrome_cdp
    a = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url)
    b = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url)
    assert a is b


def test_registry_stop(chrome_cdp, supervisor_registry):
    """stop() tears down the supervisor and snapshot reports inactive."""
    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-stop", cdp_url=cdp_url)
    assert supervisor.snapshot().active is True
    supervisor_registry.stop("pytest-stop")
    # Post-stop snapshot reports inactive; supervisor obj may still exist
    assert supervisor.snapshot().active is False


def test_browser_dialog_tool_no_supervisor():
    """browser_dialog returns a clear error when no supervisor is attached."""
    from tools.browser_dialog_tool import browser_dialog

    r = json.loads(browser_dialog(action="accept", task_id="nonexistent-task"))
    assert r["success"] is False
    assert "No CDP supervisor" in r["error"]


def test_browser_dialog_invalid_action(chrome_cdp, supervisor_registry):
    """browser_dialog rejects actions that aren't accept/dismiss."""
    from tools.browser_dialog_tool import browser_dialog

    cdp_url, _port = chrome_cdp
    supervisor_registry.get_or_start(task_id="pytest-bad-action", cdp_url=cdp_url)

    r = json.loads(browser_dialog(action="eat", task_id="pytest-bad-action"))
    assert r["success"] is False
    assert "accept" in r["error"] and "dismiss" in r["error"]


def test_recent_dialogs_ring_buffer(chrome_cdp, supervisor_registry):
    """Closed dialogs show up in recent_dialogs with a closed_by tag."""
    from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS

    cdp_url, _port = chrome_cdp
    sv = supervisor_registry.get_or_start(
        task_id="pytest-recent",
        cdp_url=cdp_url,
        dialog_policy=DIALOG_POLICY_AUTO_DISMISS,
    )

    _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-RECENT'), 50)")
    # Wait for auto-dismiss to cycle the dialog through
    deadline = time.time() + 5
    while time.time() < deadline:
        recent = sv.snapshot().recent_dialogs
        if recent and any("PYTEST-RECENT" in r.message for r in recent):
            break
        time.sleep(0.1)

    recent = sv.snapshot().recent_dialogs
    assert recent, "recent_dialogs should contain the auto-dismissed dialog"
    match = next((r for r in recent if "PYTEST-RECENT" in r.message), None)
    assert match is not None
    assert match.type == "alert"
    assert match.closed_by == "auto_policy"
    assert match.closed_at >= match.opened_at


def test_browser_dialog_tool_end_to_end(chrome_cdp, supervisor_registry):
    """Full agent-path check: fire an alert, call the tool handler directly."""
    from tools.browser_dialog_tool import browser_dialog

    cdp_url, _port = chrome_cdp
    supervisor = supervisor_registry.get_or_start(task_id="pytest-tool", cdp_url=cdp_url)

    _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-TOOL-END2END'), 50)")
    assert _wait_for_dialog(supervisor), "no dialog detected via wait_for_dialog"

    r = json.loads(browser_dialog(action="dismiss", task_id="pytest-tool"))
    assert r["success"] is True
    assert r["action"] == "dismiss"
    assert "PYTEST-TOOL-END2END" in r["dialog"]["message"]


def test_browser_cdp_frame_id_routes_via_supervisor(chrome_cdp, supervisor_registry, monkeypatch):
    """browser_cdp(frame_id=...) routes Runtime.evaluate through supervisor.

    Mocks the supervisor with a known frame and verifies browser_cdp sends
    the call via the supervisor's loop rather than opening a stateless
    WebSocket. This is the path that makes cross-origin iframe eval work
    on Browserbase.
    """
    cdp_url, _port = chrome_cdp
    sv = supervisor_registry.get_or_start(task_id="frame-id-test", cdp_url=cdp_url)
    assert sv.snapshot().active

    # Inject a fake OOPIF frame pointing at the SUPERVISOR's own page session
    # so we can verify routing. We fake is_oopif=True so the code path
    # treats it as an OOPIF child.
    import tools.browser_supervisor as _bs
    with sv._state_lock:
        fake_frame_id = "FAKE-FRAME-001"
        sv._frames[fake_frame_id] = _bs.FrameInfo(
            frame_id=fake_frame_id,
            url="fake://",
            origin="",
            parent_frame_id=None,
            is_oopif=True,
            cdp_session_id=sv._page_session_id,  # route at page scope
        )

    # Route the tool through the supervisor. Should succeed and return
    # something that clearly came from CDP.
    from tools.browser_cdp_tool import browser_cdp
    result = browser_cdp(
        method="Runtime.evaluate",
        params={"expression": "1 + 1", "returnByValue": True},
        frame_id=fake_frame_id,
        task_id="frame-id-test",
    )
    r = json.loads(result)
    assert r.get("success") is True, f"expected success, got: {r}"
    assert r.get("frame_id") == fake_frame_id
    assert r.get("session_id") == sv._page_session_id
    value = r.get("result", {}).get("result", {}).get("value")
    assert value == 2, f"expected 2, got {value!r}"


def test_browser_cdp_frame_id_real_oopif_smoke_documented():
    """Document that real-OOPIF E2E was manually verified — see PR #14540.

    A pytest version of this hits an asyncio version-quirk in the venv
    (3.11) that doesn't show up in standalone scripts (3.13 + system
    websockets). The mechanism IS verified end-to-end by two separate
    smoke scripts in /tmp/dialog-iframe-test/:

      * smoke_local_oopif.py   — local Chrome + 2 http servers on
        different hostnames + --site-per-process. Outer page on
        localhost:18905, iframe src=http://127.0.0.1:18906. Calls
        browser_cdp(method='Runtime.evaluate', frame_id=<OOPIF>) and
        verifies inner page's title comes back from the OOPIF session.
        PASSED on 2026-04-23: iframe document.title = 'INNER-FRAME-XYZ'

      * smoke_bb_iframe_agent_path.py — Browserbase + real cross-origin
        iframe (src=https://example.com/). Same browser_cdp(frame_id=)
        path. PASSED on 2026-04-23: iframe document.title =
        'Example Domain'

    The test_browser_cdp_frame_id_routes_via_supervisor pytest covers
    the supervisor-routing plumbing with a fake injected OOPIF.
    """
    pytest.skip(
        "Real-OOPIF E2E verified manually with smoke_local_oopif.py and "
        "smoke_bb_iframe_agent_path.py — pytest version hits an asyncio "
        "version quirk between venv (3.11) and standalone (3.13). "
        "Smoke logs preserved in /tmp/dialog-iframe-test/."
    )


def test_browser_cdp_frame_id_missing_supervisor():
    """browser_cdp(frame_id=...) errors cleanly when no supervisor is attached."""
    from tools.browser_cdp_tool import browser_cdp
    result = browser_cdp(
        method="Runtime.evaluate",
        params={"expression": "1"},
        frame_id="any-frame-id",
        task_id="no-such-task",
    )
    r = json.loads(result)
    assert r.get("success") is not True
    assert "supervisor" in (r.get("error") or "").lower()


def test_browser_cdp_frame_id_not_in_frame_tree(chrome_cdp, supervisor_registry):
    """browser_cdp(frame_id=...) errors when the frame_id isn't known."""
    cdp_url, _port = chrome_cdp
    sv = supervisor_registry.get_or_start(task_id="bad-frame-test", cdp_url=cdp_url)
    assert sv.snapshot().active

    from tools.browser_cdp_tool import browser_cdp
    result = browser_cdp(
        method="Runtime.evaluate",
        params={"expression": "1"},
        frame_id="nonexistent-frame",
        task_id="bad-frame-test",
    )
    r = json.loads(result)
    assert r.get("success") is not True
    assert "not found" in (r.get("error") or "").lower()


def test_bridge_captures_prompt_and_returns_reply_text(chrome_cdp, supervisor_registry):
    """End-to-end: agent's prompt_text round-trips INTO the page's JS.

    Proves the bridge isn't just catching dialogs — it's properly round-
    tripping our reply back into the page via Fetch.fulfillRequest, so
    ``prompt()`` actually returns the agent-supplied string to the page.
    """
    import base64 as _b64

    cdp_url, _port = chrome_cdp
    sv = supervisor_registry.get_or_start(task_id="pytest-bridge-prompt", cdp_url=cdp_url)

    # Page fires prompt and stashes the return value on window.
    html = """<!doctype html><html><body><script>
      window.__ret = null;
      setTimeout(() => { window.__ret = prompt('PROMPT-MSG', 'default'); }, 50);
    </script></body></html>"""
    url = "data:text/html;base64," + _b64.b64encode(html.encode()).decode()

    import asyncio as _asyncio
    import websockets as _ws_mod

    async def nav_and_read():
        async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws:
            nid = [1]
            pending: dict = {}

            async def reader_fn():
                try:
                    async for raw in ws:
                        m = json.loads(raw)
                        if "id" in m:
                            fut = pending.pop(m["id"], None)
                            if fut and not fut.done():
                                fut.set_result(m)
                except Exception:
                    pass

            rd = _asyncio.create_task(reader_fn())

            async def call(method, params=None, sid=None):
                c = nid[0]; nid[0] += 1
                p = {"id": c, "method": method}
                if params: p["params"] = params
                if sid: p["sessionId"] = sid
                fut = _asyncio.get_event_loop().create_future()
                pending[c] = fut
                await ws.send(json.dumps(p))
                return await _asyncio.wait_for(fut, timeout=20)

            try:
                t = (await call("Target.getTargets"))["result"]["targetInfos"]
                pg = next(x for x in t if x.get("type") == "page")
                a = await call("Target.attachToTarget", {"targetId": pg["targetId"], "flatten": True})
                sid = a["result"]["sessionId"]

                # Fire navigate but don't await — prompt() blocks the page
                nav_id = nid[0]; nid[0] += 1
                nav_fut = _asyncio.get_event_loop().create_future()
                pending[nav_id] = nav_fut
                await ws.send(json.dumps({"id": nav_id, "method": "Page.navigate", "params": {"url": url}, "sessionId": sid}))

                # Wait for supervisor to see the prompt
                deadline = time.monotonic() + 10
                dialog = None
                while time.monotonic() < deadline:
                    snap = sv.snapshot()
                    if snap.pending_dialogs:
                        dialog = snap.pending_dialogs[0]
                        break
                    await _asyncio.sleep(0.05)
                assert dialog is not None, "no dialog captured"
                assert dialog.bridge_request_id is not None, "expected bridge path"
                assert dialog.type == "prompt"

                # Agent responds
                resp = sv.respond_to_dialog("accept", prompt_text="AGENT-SUPPLIED-REPLY")
                assert resp["ok"] is True

                # Wait for nav to complete + read back
                try:
                    await _asyncio.wait_for(nav_fut, timeout=10)
                except Exception:
                    pass
                await _asyncio.sleep(0.5)
                r = await call(
                    "Runtime.evaluate",
                    {"expression": "window.__ret", "returnByValue": True},
                    sid=sid,
                )
                return r.get("result", {}).get("result", {}).get("value")
            finally:
                rd.cancel()
                try: await rd
                except BaseException: pass

    value = asyncio.run(nav_and_read())
    assert value == "AGENT-SUPPLIED-REPLY", f"expected AGENT-SUPPLIED-REPLY, got {value!r}"