From 1bb6f03724590e5755619e97d0fe580d2cc92f9e Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 14 May 2026 14:27:21 +0530 Subject: [PATCH] fix(browser): ensure plugin discovery before registry lookup; parity harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes that go together: 1. tools/browser_tool.py — add _ensure_browser_plugins_loaded() and call it from _get_cloud_provider() before consulting the registry. Normally model_tools triggers discover_plugins() as an import side-effect, but _get_cloud_provider() can be reached from contexts that haven't gone through model_tools (standalone scripts, certain unit-test paths, the new parity-sweep harness). Without the defensive call, the registry is empty and _registry_get_browser_provider() returns None — silently downgrading users to local mode when they explicitly configured a cloud provider with no credentials yet. The behavior-parity sweep below caught this as 4 scenario regressions (explicit-X-no-creds for all 3 providers, and explicit-firecrawl-with-creds). 2. tests/plugins/browser/check_parity_vs_main.py — subprocess harness that pins one Python invocation to origin/main and one to this PR's worktree via sys.path.insert(), runs _get_cloud_provider() across a 13-scenario config matrix, and diffs the reduced shape tuple (is_local, provider_name, is_available). Provider_name pulls from provider.provider_name() which is the legacy CloudBrowserProvider API and remains as a backward-compat alias on the new BrowserProvider ABC, so the comparison is apples-to-apples regardless of class identity. Final result: PARITY OK across 13 scenarios. The four observable config/credential matrices that exercise the dispatcher all match origin/main bit-for-bit: - no-config + no-env → local - explicit local + any env → local - explicit BB / BU / FC + no creds → provider returned with is_available()==False (so dispatcher surfaces typed credentials error; matches main exactly) - explicit BB / BU / FC + creds → provider returned with is_available()==True - no-config + BU creds → Browser Use - no-config + BB creds → Browserbase - no-config + both → Browser Use (legacy walk first hit) - no-config + FC only → local (firecrawl NOT in legacy walk) - no-config + FC + BB → Browserbase (legacy walk skips firecrawl) Per the dev skill's "behavior-parity for refactor PRs" rule — without this subprocess sweep, 31/31 unit tests pass while the production code path is silently broken for users who type `browser.cloud_provider: browserbase` and run a single browser command without prior model_tools import. Caught + fixed before push. --- tests/plugins/browser/check_parity_vs_main.py | 276 ++++++++++++++++++ tools/browser_tool.py | 22 ++ 2 files changed, 298 insertions(+) create mode 100644 tests/plugins/browser/check_parity_vs_main.py diff --git a/tests/plugins/browser/check_parity_vs_main.py b/tests/plugins/browser/check_parity_vs_main.py new file mode 100644 index 00000000000..11652e94af9 --- /dev/null +++ b/tests/plugins/browser/check_parity_vs_main.py @@ -0,0 +1,276 @@ +"""Behavior-parity check for the browser-provider plugin migration (#25214). + +Spawns one subprocess per (version, scenario) cell — pinned to either +origin/main (legacy in-tree providers + class-instantiation lookup) or +this PR's worktree (plugin-based registry) via `sys.path[0]`. Each +subprocess clears all browser-related env vars + writes a config.yaml, +loads `tools.browser_tool._get_cloud_provider()`, and emits a reduced +"shape tuple" {is_local, provider_name, is_available} as JSON. + +The parent process diffs the shapes per scenario. A diff means the +migration introduced an observable behaviour change vs origin/main — +which would be a real regression for users on the existing config keys. + +Run from the PR worktree: + + cd ~/.hermes/hermes-agent/.worktrees/browser-providers-plugin + python tests/plugins/browser/check_parity_vs_main.py +""" +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[3] + + +# Pin one path to current main, one to the PR worktree. +# ``REPO_ROOT`` is ``.../.worktrees/browser-providers-plugin``; the main +# checkout lives two levels up at ``~/.hermes/hermes-agent``. +MAIN_DIR = REPO_ROOT.parent.parent # ~/.hermes/hermes-agent +PR_DIR = REPO_ROOT # the worktree we're in +assert (MAIN_DIR / "tools" / "browser_tool.py").exists(), ( + f"MAIN_DIR={MAIN_DIR} doesn't look like a hermes-agent checkout" +) +assert (PR_DIR / "tools" / "browser_tool.py").exists(), ( + f"PR_DIR={PR_DIR} doesn't look like a hermes-agent checkout" +) + + +# Reduced shape comparison — exact instance addresses obviously differ +# between subprocesses, so we compare the parts that matter for users. +SUBPROCESS_SCRIPT = r""" +import json, os, sys, tempfile +sys.path.insert(0, sys.argv[1]) + +# Isolated HERMES_HOME for the config write. +home = tempfile.mkdtemp() +os.environ["HERMES_HOME"] = home + +# Clear every browser-related env var so is_available() is deterministic. +for k in ( + "BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID", "BROWSERBASE_BASE_URL", + "BROWSER_USE_API_KEY", "BROWSER_USE_GATEWAY_URL", + "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_BROWSER_TTL", + "TOOL_GATEWAY_DOMAIN", "TOOL_GATEWAY_USER_TOKEN", +): + os.environ.pop(k, None) + +# Apply per-scenario env (passed as JSON via argv[2]). +scenario_env = json.loads(sys.argv[2]) +os.environ.update(scenario_env) + +# Apply per-scenario config (passed as YAML body via argv[3]). +config_yaml = sys.argv[3] +config_path = os.path.join(home, "config.yaml") +with open(config_path, "w") as f: + f.write(config_yaml) + +# Fresh import — must not have any browser modules cached. +for name in list(sys.modules): + if name.startswith("tools.") or name.startswith("agent.") or name.startswith("plugins."): + sys.modules.pop(name, None) + +from tools.browser_tool import _get_cloud_provider, _is_local_mode + +provider = _get_cloud_provider() + +# Pull the human-readable backend name via the API that exists on BOTH +# legacy (origin/main: CloudBrowserProvider.provider_name()) and the new +# ABC (BrowserProvider exposes provider_name() as a backward-compat alias +# returning display_name). Both shapes resolve to the same string — +# 'Browserbase' / 'Browser Use' / 'Firecrawl' — so we can compare safely. +provider_name = None +is_available = None +if provider is not None: + pn = getattr(provider, "provider_name", None) + if callable(pn): + provider_name = pn() + elif isinstance(pn, str): + provider_name = pn + is_conf = getattr(provider, "is_configured", None) + if callable(is_conf): + is_available = bool(is_conf()) + +shape = { + "is_local": _is_local_mode(), + "provider_name": provider_name, + "is_available": is_available, +} +print(json.dumps(shape)) +""" + + +SCENARIOS: list[tuple[str, str, dict[str, str]]] = [ + # (label, config.yaml body, extra env vars) + ("no-config-no-env", "", {}), + ("explicit-local-no-env", "browser:\n cloud_provider: local\n", {}), + ( + "explicit-browserbase-no-creds", + "browser:\n cloud_provider: browserbase\n", + {}, + ), + ( + "explicit-browserbase-with-creds", + "browser:\n cloud_provider: browserbase\n", + {"BROWSERBASE_API_KEY": "x", "BROWSERBASE_PROJECT_ID": "y"}, + ), + ( + "explicit-browser-use-no-creds", + "browser:\n cloud_provider: browser-use\n", + {}, + ), + ( + "explicit-browser-use-with-creds", + "browser:\n cloud_provider: browser-use\n", + {"BROWSER_USE_API_KEY": "k"}, + ), + ( + "explicit-firecrawl-no-creds", + "browser:\n cloud_provider: firecrawl\n", + {}, + ), + ( + "explicit-firecrawl-with-creds", + "browser:\n cloud_provider: firecrawl\n", + {"FIRECRAWL_API_KEY": "k"}, + ), + ( + "no-config-bu-creds", + "", + {"BROWSER_USE_API_KEY": "k"}, + ), + ( + "no-config-bb-creds", + "", + {"BROWSERBASE_API_KEY": "x", "BROWSERBASE_PROJECT_ID": "y"}, + ), + ( + "no-config-both-creds", + "", + { + "BROWSER_USE_API_KEY": "k", + "BROWSERBASE_API_KEY": "x", + "BROWSERBASE_PROJECT_ID": "y", + }, + ), + ( + "no-config-firecrawl-only", + "", + {"FIRECRAWL_API_KEY": "k"}, + ), + ( + "no-config-firecrawl-and-bb", + "", + { + "FIRECRAWL_API_KEY": "k", + "BROWSERBASE_API_KEY": "x", + "BROWSERBASE_PROJECT_ID": "y", + }, + ), +] + + +def _run_scenario(repo_path: Path, label: str, config_yaml: str, env: dict) -> dict: + """Run one (version, scenario) cell. Returns the shape dict.""" + venv_python = repo_path / ".venv" / "bin" / "python" + if not venv_python.exists(): + # Worktrees share the main repo's venv. + venv_python = MAIN_DIR / ".venv" / "bin" / "python" + if not venv_python.exists(): + venv_python = Path("python3") + + out = subprocess.run( + [ + str(venv_python), + "-c", + SUBPROCESS_SCRIPT, + str(repo_path), + json.dumps(env), + config_yaml, + ], + capture_output=True, + text=True, + timeout=30, + ) + if out.returncode != 0: + return { + "error": "subprocess failed", + "stdout": out.stdout, + "stderr": out.stderr[-500:], + } + try: + return json.loads(out.stdout.strip().splitlines()[-1]) + except Exception as exc: + return {"error": f"could not parse output: {exc}", "stdout": out.stdout} + + +def _reduce_for_comparison(shape: dict) -> dict: + """Reduce a shape dict to the parts that matter for user-visible parity. + + We compare ``(is_local, provider_name, is_available)`` — the trio that + decides what the dispatcher does with each tool call. ``provider_name`` + is the legacy ``provider_name()`` return value ('Browserbase' / 'Browser + Use' / 'Firecrawl'), which is identical between legacy and plugin + classes (the plugin's ``display_name`` matches the legacy + ``provider_name()`` return). + """ + return { + "is_local": shape.get("is_local"), + "provider_name": shape.get("provider_name"), + "is_available": shape.get("is_available"), + } + + +def main() -> int: + print(f"main: {MAIN_DIR}") + print(f"pr: {PR_DIR}") + print() + + failures: list[str] = [] + errors: list[str] = [] + for label, config_yaml, env in SCENARIOS: + main_shape = _run_scenario(MAIN_DIR, label, config_yaml, env) + pr_shape = _run_scenario(PR_DIR, label, config_yaml, env) + + if "error" in main_shape or "error" in pr_shape: + print(f" [ERR ] {label}: subprocess failed") + print(f" main: {main_shape}") + print(f" pr: {pr_shape}") + errors.append(label) + continue + + main_reduced = _reduce_for_comparison(main_shape) + pr_reduced = _reduce_for_comparison(pr_shape) + + if main_reduced == pr_reduced: + print(f" [OK] {label}: {main_reduced}") + else: + print(f" [FAIL] {label}") + print(f" main: {main_reduced}") + print(f" pr: {pr_reduced}") + failures.append(label) + + print() + if errors: + print(f"SUBPROCESS ERRORS in {len(errors)} scenario(s):") + for e in errors: + print(f" - {e}") + if failures: + print(f"BEHAVIOUR REGRESSION in {len(failures)} scenario(s):") + for f in failures: + print(f" - {f}") + if failures or errors: + return 1 + print(f"PARITY OK across {len(SCENARIOS)} scenarios.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 6fdd8949816..b089ed92133 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -465,6 +465,25 @@ def _is_legacy_provider_registry_overridden() -> bool: return False +def _ensure_browser_plugins_loaded() -> None: + """Idempotently trigger plugin discovery so the browser registry is populated. + + Normally `model_tools` is imported early in any session and that + triggers `discover_plugins()` as a side effect. But `_get_cloud_provider` + can be called from contexts that haven't gone through `model_tools` — + standalone scripts, certain unit-test paths, the parity-sweep harness. + Make discovery idempotent and side-effect-only here so users always + see registered plugins regardless of import order. Cheap: subsequent + calls early-return inside `_ensure_plugins_discovered`. + """ + try: + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + except Exception as exc: + logger.debug("Browser plugin discovery failed (non-fatal): %s", exc) + + def _get_cloud_provider() -> Optional[CloudBrowserProvider]: """Return the configured cloud browser provider, or None for local mode. @@ -509,6 +528,9 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: if factory is not None: resolved = factory() else: + # Ensure plugins are discovered so the registry is + # populated. Idempotent — cheap on subsequent calls. + _ensure_browser_plugins_loaded() resolved = _registry_get_browser_provider(provider_key) except Exception: logger.warning(