fix: harden website blocklist — default off, TTL cache, fail-open, guarded imports

- Default enabled: false (zero overhead when not configured)
- Fast path: cached disabled state skips all work immediately
- TTL cache (30s) for parsed policy — avoids re-reading config.yaml
  on every URL check
- Missing shared files warn + skip instead of crashing all web tools
- Lazy yaml import — missing PyYAML doesn't break browser toolset
- Guarded browser_tool import — fail-open lambda fallback
- check_website_access never raises for default path (fail-open with
  warning log); only raises with explicit config_path (test mode)
- Simplified enforcement code in web_tools/browser_tool — no more
  try/except wrappers since errors are handled internally
This commit is contained in:
teknium1 2026-03-17 03:11:21 -07:00
parent d132a3dfbb
commit 6fc76ef954
5 changed files with 136 additions and 53 deletions

View file

@ -65,7 +65,11 @@ import requests
from typing import Dict, Any, Optional, List
from pathlib import Path
from agent.auxiliary_client import call_llm
from tools.website_policy import check_website_access
try:
from tools.website_policy import check_website_access
except Exception:
check_website_access = lambda url: None # noqa: E731 — fail-open if policy module unavailable
from tools.browser_providers.base import CloudBrowserProvider
from tools.browser_providers.browserbase import BrowserbaseProvider
from tools.browser_providers.browser_use import BrowserUseProvider
@ -903,12 +907,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
JSON string with navigation result (includes stealth features info on first nav)
"""
# Website policy check — block before navigating
try:
blocked = check_website_access(url)
except Exception as _policy_err:
return json.dumps({"success": False, "error": f"Website policy error: {_policy_err}"})
blocked = check_website_access(url)
if blocked:
logger.info("Blocked browser_navigate to %s by rule %s", blocked["host"], blocked["rule"])
return json.dumps({
"success": False,
"error": blocked["message"],