mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add website blocklist enforcement for web/browser tools (#1064)
Adds security.website_blocklist config for user-managed domain blocking
across URL-capable tools. Enforced at the tool level (not monkey-patching)
so it's safe and predictable.
- tools/website_policy.py: shared policy loader with domain normalization,
wildcard support (*.tracking.example), shared file imports, and
structured block metadata
- web_extract: pre-fetch URL check + post-redirect recheck
- web_crawl: pre-crawl URL check + per-page URL recheck
- browser_navigate: pre-navigation URL check
- Blocked responses include blocked_by_policy metadata so the agent
can explain exactly what was denied
Config:
security:
website_blocklist:
enabled: true
domains: ["evil.com", "*.tracking.example"]
shared_files: ["team-blocklist.txt"]
Salvaged from PR #1086 by @kshitijk4poor. Browser post-redirect checks
deferred (browser_tool was fully rewritten since the PR branched).
Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
This commit is contained in:
parent
6020db0243
commit
30c417fe70
5 changed files with 758 additions and 2 deletions
193
tools/website_policy.py
Normal file
193
tools/website_policy.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
"""Website access policy helpers for URL-capable tools.
|
||||
|
||||
This module loads a user-managed website blocklist from ~/.hermes/config.yaml
|
||||
and optional shared list files. It is intentionally lightweight so web/browser
|
||||
tools can enforce URL policy without pulling in the heavier CLI config stack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fnmatch
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
_DEFAULT_WEBSITE_BLOCKLIST = {
|
||||
"enabled": True,
|
||||
"domains": [],
|
||||
"shared_files": [],
|
||||
}
|
||||
|
||||
|
||||
def _get_hermes_home() -> Path:
|
||||
return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
|
||||
|
||||
def _get_default_config_path() -> Path:
|
||||
return _get_hermes_home() / "config.yaml"
|
||||
|
||||
|
||||
class WebsitePolicyError(Exception):
|
||||
"""Raised when a website policy file is malformed."""
|
||||
|
||||
|
||||
def _normalize_host(host: str) -> str:
|
||||
return (host or "").strip().lower().rstrip(".")
|
||||
|
||||
|
||||
def _normalize_rule(rule: Any) -> Optional[str]:
|
||||
if not isinstance(rule, str):
|
||||
return None
|
||||
value = rule.strip().lower()
|
||||
if not value or value.startswith("#"):
|
||||
return None
|
||||
if "://" in value:
|
||||
parsed = urlparse(value)
|
||||
value = parsed.netloc or parsed.path
|
||||
value = value.split("/", 1)[0].strip().rstrip(".")
|
||||
if value.startswith("www."):
|
||||
value = value[4:]
|
||||
return value or None
|
||||
|
||||
|
||||
def _iter_blocklist_file_rules(path: Path) -> List[str]:
|
||||
try:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
except FileNotFoundError as exc:
|
||||
raise WebsitePolicyError(f"Shared blocklist file not found: {path}") from exc
|
||||
except (OSError, UnicodeDecodeError) as exc:
|
||||
raise WebsitePolicyError(f"Failed to read shared blocklist file {path}: {exc}") from exc
|
||||
|
||||
rules: List[str] = []
|
||||
for line in raw.splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("#"):
|
||||
continue
|
||||
normalized = _normalize_rule(stripped)
|
||||
if normalized:
|
||||
rules.append(normalized)
|
||||
return rules
|
||||
|
||||
|
||||
def _load_policy_config(config_path: Optional[Path] = None) -> Dict[str, Any]:
|
||||
config_path = config_path or _get_default_config_path()
|
||||
if not config_path.exists():
|
||||
return dict(_DEFAULT_WEBSITE_BLOCKLIST)
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
except yaml.YAMLError as exc:
|
||||
raise WebsitePolicyError(f"Invalid config YAML at {config_path}: {exc}") from exc
|
||||
except OSError as exc:
|
||||
raise WebsitePolicyError(f"Failed to read config file {config_path}: {exc}") from exc
|
||||
if not isinstance(config, dict):
|
||||
raise WebsitePolicyError("config root must be a mapping")
|
||||
|
||||
security = config.get("security", {})
|
||||
if security is None:
|
||||
security = {}
|
||||
if not isinstance(security, dict):
|
||||
raise WebsitePolicyError("security must be a mapping")
|
||||
|
||||
website_blocklist = security.get("website_blocklist", {})
|
||||
if website_blocklist is None:
|
||||
website_blocklist = {}
|
||||
if not isinstance(website_blocklist, dict):
|
||||
raise WebsitePolicyError("security.website_blocklist must be a mapping")
|
||||
|
||||
policy = dict(_DEFAULT_WEBSITE_BLOCKLIST)
|
||||
policy.update(website_blocklist)
|
||||
return policy
|
||||
|
||||
|
||||
def load_website_blocklist(config_path: Optional[Path] = None) -> Dict[str, Any]:
|
||||
config_path = config_path or _get_default_config_path()
|
||||
policy = _load_policy_config(config_path)
|
||||
|
||||
raw_domains = policy.get("domains", []) or []
|
||||
if not isinstance(raw_domains, list):
|
||||
raise WebsitePolicyError("security.website_blocklist.domains must be a list")
|
||||
|
||||
raw_shared_files = policy.get("shared_files", []) or []
|
||||
if not isinstance(raw_shared_files, list):
|
||||
raise WebsitePolicyError("security.website_blocklist.shared_files must be a list")
|
||||
|
||||
enabled = policy.get("enabled", True)
|
||||
if not isinstance(enabled, bool):
|
||||
raise WebsitePolicyError("security.website_blocklist.enabled must be a boolean")
|
||||
|
||||
rules: List[Dict[str, str]] = []
|
||||
seen: set[Tuple[str, str]] = set()
|
||||
|
||||
for raw_rule in raw_domains:
|
||||
normalized = _normalize_rule(raw_rule)
|
||||
if normalized and ("config", normalized) not in seen:
|
||||
rules.append({"pattern": normalized, "source": "config"})
|
||||
seen.add(("config", normalized))
|
||||
|
||||
for shared_file in raw_shared_files:
|
||||
if not isinstance(shared_file, str) or not shared_file.strip():
|
||||
continue
|
||||
path = Path(shared_file).expanduser()
|
||||
if not path.is_absolute():
|
||||
path = (_get_hermes_home() / path).resolve()
|
||||
for normalized in _iter_blocklist_file_rules(path):
|
||||
key = (str(path), normalized)
|
||||
if key in seen:
|
||||
continue
|
||||
rules.append({"pattern": normalized, "source": str(path)})
|
||||
seen.add(key)
|
||||
|
||||
return {"enabled": enabled, "rules": rules}
|
||||
|
||||
|
||||
def _match_host_against_rule(host: str, pattern: str) -> bool:
|
||||
if not host or not pattern:
|
||||
return False
|
||||
if pattern.startswith("*."):
|
||||
return fnmatch.fnmatch(host, pattern)
|
||||
return host == pattern or host.endswith(f".{pattern}")
|
||||
|
||||
|
||||
def _extract_host_from_urlish(url: str) -> str:
|
||||
parsed = urlparse(url)
|
||||
host = _normalize_host(parsed.hostname or parsed.netloc)
|
||||
if host:
|
||||
return host
|
||||
|
||||
if "://" not in url:
|
||||
schemeless = urlparse(f"//{url}")
|
||||
host = _normalize_host(schemeless.hostname or schemeless.netloc)
|
||||
if host:
|
||||
return host
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def check_website_access(url: str, config_path: Optional[Path] = None) -> Optional[Dict[str, str]]:
|
||||
host = _extract_host_from_urlish(url)
|
||||
if not host:
|
||||
return None
|
||||
|
||||
policy = load_website_blocklist(config_path)
|
||||
if not policy.get("enabled"):
|
||||
return None
|
||||
|
||||
for rule in policy.get("rules", []):
|
||||
pattern = rule.get("pattern", "")
|
||||
if _match_host_against_rule(host, pattern):
|
||||
return {
|
||||
"url": url,
|
||||
"host": host,
|
||||
"rule": pattern,
|
||||
"source": rule.get("source", "config"),
|
||||
"message": (
|
||||
f"Blocked by website policy: '{host}' matched rule '{pattern}'"
|
||||
f" from {rule.get('source', 'config')}"
|
||||
),
|
||||
}
|
||||
return None
|
||||
Loading…
Add table
Add a link
Reference in a new issue