Merge branch 'main' into rewbs/tool-use-charge-to-subscription

This commit is contained in:
Robin Fernandes 2026-03-31 08:48:54 +09:00
commit 6e4598ce1e
269 changed files with 33678 additions and 2273 deletions

View file

@ -1,262 +1,25 @@
#!/usr/bin/env python3
"""
Tools Package
"""Tools package namespace.
This package contains all the specific tool implementations for the Hermes Agent.
Each module provides specialized functionality for different capabilities:
Keep package import side effects minimal. Importing ``tools`` should not
eagerly import the full tool stack, because several subsystems load tools while
``hermes_cli.config`` is still initializing.
- web_tools: Web search, content extraction, and crawling
- terminal_tool: Command execution (local/docker/modal/daytona/ssh/singularity backends)
- vision_tools: Image analysis and understanding
- mixture_of_agents_tool: Multi-model collaborative reasoning
- image_generation_tool: Text-to-image generation with upscaling
Callers should import concrete submodules directly, for example:
The tools are imported into model_tools.py which provides a unified interface
for the AI agent to access all capabilities.
import tools.web_tools
from tools import browser_tool
Python will resolve those submodules via the package path without needing them
to be re-exported here.
"""
# Export all tools for easy importing
from .web_tools import (
web_search_tool,
web_extract_tool,
web_crawl_tool,
check_firecrawl_api_key
)
# Primary terminal tool (local/docker/singularity/modal/daytona/ssh)
from .terminal_tool import (
terminal_tool,
check_terminal_requirements,
cleanup_vm,
cleanup_all_environments,
get_active_environments_info,
register_task_env_overrides,
clear_task_env_overrides,
TERMINAL_TOOL_DESCRIPTION
)
from .vision_tools import (
vision_analyze_tool,
check_vision_requirements
)
from .mixture_of_agents_tool import (
mixture_of_agents_tool,
check_moa_requirements
)
from .image_generation_tool import (
image_generate_tool,
check_image_generation_requirements
)
from .skills_tool import (
skills_list,
skill_view,
check_skills_requirements,
SKILLS_TOOL_DESCRIPTION
)
from .skill_manager_tool import (
skill_manage,
check_skill_manage_requirements,
SKILL_MANAGE_SCHEMA
)
# Browser automation tools (agent-browser + Browserbase)
from .browser_tool import (
browser_navigate,
browser_snapshot,
browser_click,
browser_type,
browser_scroll,
browser_back,
browser_press,
browser_close,
browser_get_images,
browser_vision,
cleanup_browser,
cleanup_all_browsers,
get_active_browser_sessions,
check_browser_requirements,
BROWSER_TOOL_SCHEMAS
)
# Cronjob management tools (CLI-only, hermes-cli toolset)
from .cronjob_tools import (
cronjob,
schedule_cronjob,
list_cronjobs,
remove_cronjob,
check_cronjob_requirements,
get_cronjob_tool_definitions,
CRONJOB_SCHEMA,
)
# RL Training tools (Tinker-Atropos)
from .rl_training_tool import (
rl_list_environments,
rl_select_environment,
rl_get_current_config,
rl_edit_config,
rl_start_training,
rl_check_status,
rl_stop_training,
rl_get_results,
rl_list_runs,
rl_test_inference,
check_rl_api_keys,
get_missing_keys,
)
# File manipulation tools (read, write, patch, search)
from .file_tools import (
read_file_tool,
write_file_tool,
patch_tool,
search_tool,
get_file_tools,
clear_file_ops_cache,
)
# Text-to-speech tools (Edge TTS / ElevenLabs / OpenAI)
from .tts_tool import (
text_to_speech_tool,
check_tts_requirements,
)
# Planning & task management tool
from .todo_tool import (
todo_tool,
check_todo_requirements,
TODO_SCHEMA,
TodoStore,
)
# Clarifying questions tool (interactive Q&A with the user)
from .clarify_tool import (
clarify_tool,
check_clarify_requirements,
CLARIFY_SCHEMA,
)
# Code execution sandbox (programmatic tool calling)
from .code_execution_tool import (
execute_code,
check_sandbox_requirements,
EXECUTE_CODE_SCHEMA,
)
# Subagent delegation (spawn child agents with isolated context)
from .delegate_tool import (
delegate_task,
check_delegate_requirements,
DELEGATE_TASK_SCHEMA,
)
# File tools have no external requirements - they use the terminal backend
def check_file_requirements():
"""File tools only require terminal backend to be available."""
"""File tools only require terminal backend availability."""
from .terminal_tool import check_terminal_requirements
return check_terminal_requirements()
__all__ = [
# Web tools
'web_search_tool',
'web_extract_tool',
'web_crawl_tool',
'check_firecrawl_api_key',
# Terminal tools
'terminal_tool',
'check_terminal_requirements',
'cleanup_vm',
'cleanup_all_environments',
'get_active_environments_info',
'register_task_env_overrides',
'clear_task_env_overrides',
'TERMINAL_TOOL_DESCRIPTION',
# Vision tools
'vision_analyze_tool',
'check_vision_requirements',
# MoA tools
'mixture_of_agents_tool',
'check_moa_requirements',
# Image generation tools
'image_generate_tool',
'check_image_generation_requirements',
# Skills tools
'skills_list',
'skill_view',
'check_skills_requirements',
'SKILLS_TOOL_DESCRIPTION',
# Skill management
'skill_manage',
'check_skill_manage_requirements',
'SKILL_MANAGE_SCHEMA',
# Browser automation tools
'browser_navigate',
'browser_snapshot',
'browser_click',
'browser_type',
'browser_scroll',
'browser_back',
'browser_press',
'browser_close',
'browser_get_images',
'browser_vision',
'cleanup_browser',
'cleanup_all_browsers',
'get_active_browser_sessions',
'check_browser_requirements',
'BROWSER_TOOL_SCHEMAS',
# Cronjob management tools (CLI-only)
'cronjob',
'schedule_cronjob',
'list_cronjobs',
'remove_cronjob',
'check_cronjob_requirements',
'get_cronjob_tool_definitions',
'CRONJOB_SCHEMA',
# RL Training tools
'rl_list_environments',
'rl_select_environment',
'rl_get_current_config',
'rl_edit_config',
'rl_start_training',
'rl_check_status',
'rl_stop_training',
'rl_get_results',
'rl_list_runs',
'rl_test_inference',
'check_rl_api_keys',
'get_missing_keys',
# File manipulation tools
'read_file_tool',
'write_file_tool',
'patch_tool',
'search_tool',
'get_file_tools',
'clear_file_ops_cache',
'check_file_requirements',
# Text-to-speech tools
'text_to_speech_tool',
'check_tts_requirements',
# Planning & task management tool
'todo_tool',
'check_todo_requirements',
'TODO_SCHEMA',
'TodoStore',
# Clarifying questions tool
'clarify_tool',
'check_clarify_requirements',
'CLARIFY_SCHEMA',
# Code execution sandbox
'execute_code',
'check_sandbox_requirements',
'EXECUTE_CODE_SCHEMA',
# Subagent delegation
'delegate_task',
'check_delegate_requirements',
'DELEGATE_TASK_SCHEMA',
]
__all__ = ["check_file_requirements"]

View file

@ -18,6 +18,21 @@ from typing import Optional
logger = logging.getLogger(__name__)
# Sensitive write targets that should trigger approval even when referenced
# via shell expansions like $HOME or $HERMES_HOME.
_SSH_SENSITIVE_PATH = r'(?:~|\$home|\$\{home\})/\.ssh(?:/|$)'
_HERMES_ENV_PATH = (
r'(?:~\/\.hermes/|'
r'(?:\$home|\$\{home\})/\.hermes/|'
r'(?:\$hermes_home|\$\{hermes_home\})/)'
r'\.env\b'
)
_SENSITIVE_WRITE_TARGET = (
r'(?:/etc/|/dev/sd|'
rf'{_SSH_SENSITIVE_PATH}|'
rf'{_HERMES_ENV_PATH})'
)
# =========================================================================
# Dangerous command patterns
# =========================================================================
@ -46,13 +61,16 @@ DANGEROUS_PATTERNS = [
(r'\b(python[23]?|perl|ruby|node)\s+-[ec]\s+', "script execution via -e/-c flag"),
(r'\b(curl|wget)\b.*\|\s*(ba)?sh\b', "pipe remote content to shell"),
(r'\b(bash|sh|zsh|ksh)\s+<\s*<?\s*\(\s*(curl|wget)\b', "execute remote script via process substitution"),
(r'\btee\b.*(/etc/|/dev/sd|\.ssh/|\.hermes/\.env)', "overwrite system file via tee"),
(rf'\btee\b.*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via tee"),
(rf'>>?\s*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via redirection"),
(r'\bxargs\s+.*\brm\b', "xargs with rm"),
(r'\bfind\b.*-exec\s+(/\S*/)?rm\b', "find -exec rm"),
(r'\bfind\b.*-delete\b', "find -delete"),
# Gateway protection: never start gateway outside systemd management
(r'gateway\s+run\b.*(&\s*$|&\s*;|\bdisown\b|\bsetsid\b)', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
(r'\bnohup\b.*gateway\s+run\b', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
# Self-termination protection: prevent agent from killing its own process
(r'\b(pkill|killall)\b.*\b(hermes|gateway|cli\.py)\b', "kill hermes/gateway process (self-termination)"),
]
@ -456,6 +474,33 @@ def check_dangerous_command(command: str, env_type: str,
# Combined pre-exec guard (tirith + dangerous command detection)
# =========================================================================
def _format_tirith_description(tirith_result: dict) -> str:
"""Build a human-readable description from tirith findings.
Includes severity, title, and description for each finding so users
can make an informed approval decision.
"""
findings = tirith_result.get("findings") or []
if not findings:
summary = tirith_result.get("summary") or "security issue detected"
return f"Security scan: {summary}"
parts = []
for f in findings:
severity = f.get("severity", "")
title = f.get("title", "")
desc = f.get("description", "")
if title and desc:
parts.append(f"[{severity}] {title}: {desc}" if severity else f"{title}: {desc}")
elif title:
parts.append(f"[{severity}] {title}" if severity else title)
if not parts:
summary = tirith_result.get("summary") or "security issue detected"
return f"Security scan: {summary}"
return "Security scan — " + "; ".join(parts)
def check_all_command_guards(command: str, env_type: str,
approval_callback=None) -> dict:
"""Run all pre-exec security checks and return a single approval decision.
@ -499,24 +544,20 @@ def check_all_command_guards(command: str, env_type: str,
# --- Phase 2: Decide ---
# If tirith blocks, block immediately (no approval possible)
if tirith_result["action"] == "block":
summary = tirith_result.get("summary") or "security issue detected"
return {
"approved": False,
"message": f"BLOCKED: Command blocked by security scan ({summary}). Do NOT retry.",
}
# Collect warnings that need approval
warnings = [] # list of (pattern_key, description, is_tirith)
session_key = os.getenv("HERMES_SESSION_KEY", "default")
if tirith_result["action"] == "warn":
# Tirith block/warn → approvable warning with rich findings.
# Previously, tirith "block" was a hard block with no approval prompt.
# Now both block and warn go through the approval flow so users can
# inspect the explanation and approve if they understand the risk.
if tirith_result["action"] in ("block", "warn"):
findings = tirith_result.get("findings") or []
rule_id = findings[0].get("rule_id", "unknown") if findings else "unknown"
tirith_key = f"tirith:{rule_id}"
tirith_desc = f"Security scan: {tirith_result.get('summary') or 'security warning detected'}"
tirith_desc = _format_tirith_description(tirith_result)
if not is_approved(session_key, tirith_key):
warnings.append((tirith_key, tirith_desc, True))

View file

@ -1027,7 +1027,7 @@ def _extract_relevant_content(
if model:
call_kwargs["model"] = model
response = call_llm(**call_kwargs)
return response.choices[0].message.content
return (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
except Exception:
return _truncate_snapshot(snapshot_text)
@ -1557,8 +1557,8 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
effective_task_id = task_id or "default"
# Save screenshot to persistent location so it can be shared with users
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
screenshots_dir = hermes_home / "browser_screenshots"
from hermes_constants import get_hermes_dir
screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
try:
@ -1657,10 +1657,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
call_kwargs["model"] = vision_model
response = call_llm(**call_kwargs)
analysis = response.choices[0].message.content
analysis = (response.choices[0].message.content or "").strip()
response_data = {
"success": True,
"analysis": analysis,
"analysis": analysis or "Vision analysis returned no content.",
"screenshot_path": str(screenshot_path),
}
# Include annotation data if annotated screenshot was taken

163
tools/credential_files.py Normal file
View file

@ -0,0 +1,163 @@
"""Credential file passthrough registry for remote terminal backends.
Skills that declare ``required_credential_files`` in their frontmatter need
those files available inside sandboxed execution environments (Modal, Docker).
By default remote backends create bare containers with no host files.
This module provides a session-scoped registry so skill-declared credential
files (and user-configured overrides) are mounted into remote sandboxes.
Two sources feed the registry:
1. **Skill declarations** when a skill is loaded via ``skill_view``, its
``required_credential_files`` entries are registered here if the files
exist on the host.
2. **User config** ``terminal.credential_files`` in config.yaml lets users
explicitly list additional files to mount.
Remote backends (``tools/environments/modal.py``, ``docker.py``) call
:func:`get_credential_file_mounts` at sandbox creation time.
Each registered entry is a dict::
{
"host_path": "/home/user/.hermes/google_token.json",
"container_path": "/root/.hermes/google_token.json",
}
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
from typing import Dict, List
logger = logging.getLogger(__name__)
# Session-scoped list of credential files to mount.
# Key: container_path (deduplicated), Value: host_path
_registered_files: Dict[str, str] = {}
# Cache for config-based file list (loaded once per process).
_config_files: List[Dict[str, str]] | None = None
def _resolve_hermes_home() -> Path:
return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
def register_credential_file(
relative_path: str,
container_base: str = "/root/.hermes",
) -> bool:
"""Register a credential file for mounting into remote sandboxes.
*relative_path* is relative to ``HERMES_HOME`` (e.g. ``google_token.json``).
Returns True if the file exists on the host and was registered.
"""
hermes_home = _resolve_hermes_home()
host_path = hermes_home / relative_path
if not host_path.is_file():
logger.debug("credential_files: skipping %s (not found)", host_path)
return False
container_path = f"{container_base.rstrip('/')}/{relative_path}"
_registered_files[container_path] = str(host_path)
logger.debug("credential_files: registered %s -> %s", host_path, container_path)
return True
def register_credential_files(
entries: list,
container_base: str = "/root/.hermes",
) -> List[str]:
"""Register multiple credential files from skill frontmatter entries.
Each entry is either a string (relative path) or a dict with a ``path``
key. Returns the list of relative paths that were NOT found on the host
(i.e. missing files).
"""
missing = []
for entry in entries:
if isinstance(entry, str):
rel_path = entry.strip()
elif isinstance(entry, dict):
rel_path = (entry.get("path") or "").strip()
else:
continue
if not rel_path:
continue
if not register_credential_file(rel_path, container_base):
missing.append(rel_path)
return missing
def _load_config_files() -> List[Dict[str, str]]:
"""Load ``terminal.credential_files`` from config.yaml (cached)."""
global _config_files
if _config_files is not None:
return _config_files
result: List[Dict[str, str]] = []
try:
hermes_home = _resolve_hermes_home()
config_path = hermes_home / "config.yaml"
if config_path.exists():
import yaml
with open(config_path) as f:
cfg = yaml.safe_load(f) or {}
cred_files = cfg.get("terminal", {}).get("credential_files")
if isinstance(cred_files, list):
for item in cred_files:
if isinstance(item, str) and item.strip():
host_path = hermes_home / item.strip()
if host_path.is_file():
container_path = f"/root/.hermes/{item.strip()}"
result.append({
"host_path": str(host_path),
"container_path": container_path,
})
except Exception as e:
logger.debug("Could not read terminal.credential_files from config: %s", e)
_config_files = result
return _config_files
def get_credential_file_mounts() -> List[Dict[str, str]]:
"""Return all credential files that should be mounted into remote sandboxes.
Each item has ``host_path`` and ``container_path`` keys.
Combines skill-registered files and user config.
"""
mounts: Dict[str, str] = {}
# Skill-registered files
for container_path, host_path in _registered_files.items():
# Re-check existence (file may have been deleted since registration)
if Path(host_path).is_file():
mounts[container_path] = host_path
# Config-based files
for entry in _load_config_files():
cp = entry["container_path"]
if cp not in mounts and Path(entry["host_path"]).is_file():
mounts[cp] = entry["host_path"]
return [
{"host_path": hp, "container_path": cp}
for cp, hp in mounts.items()
]
def clear_credential_files() -> None:
"""Reset the skill-scoped registry (e.g. on session reset)."""
_registered_files.clear()
def reset_config_cache() -> None:
"""Force re-read of config on next access (for testing)."""
global _config_files
_config_files = None

View file

@ -372,7 +372,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
},
"deliver": {
"type": "string",
"description": "Delivery target: origin, local, telegram, discord, slack, whatsapp, signal, matrix, mattermost, homeassistant, dingtalk, email, sms, or platform:chat_id or platform:chat_id:thread_id for Telegram topics. Examples: 'origin', 'local', 'telegram', 'telegram:-1001234567890:17585', 'discord:#engineering'"
"description": "Delivery target: origin, local, telegram, discord, slack, whatsapp, signal, matrix, mattermost, homeassistant, dingtalk, feishu, wecom, email, sms, or platform:chat_id or platform:chat_id:thread_id for Telegram topics. Examples: 'origin', 'local', 'telegram', 'telegram:-1001234567890:17585', 'discord:#engineering'"
},
"model": {
"type": "string",

View file

@ -289,7 +289,10 @@ def _run_single_child(
if interrupted:
status = "interrupted"
elif completed and summary:
elif summary:
# A summary means the subagent produced usable output.
# exit_reason ("completed" vs "max_iterations") already
# tells the parent *how* the task ended.
status = "completed"
else:
status = "failed"

View file

@ -5,7 +5,7 @@ import os
import subprocess
from pathlib import Path
from hermes_cli.config import get_hermes_home
from hermes_constants import get_hermes_home
def get_sandbox_dir() -> Path:

View file

@ -312,6 +312,24 @@ class DockerEnvironment(BaseEnvironment):
elif workspace_explicitly_mounted:
logger.debug("Skipping docker cwd mount: /workspace already mounted by user config")
# Mount credential files (OAuth tokens, etc.) declared by skills.
# Read-only so the container can authenticate but not modify host creds.
try:
from tools.credential_files import get_credential_file_mounts
for mount_entry in get_credential_file_mounts():
volume_args.extend([
"-v",
f"{mount_entry['host_path']}:{mount_entry['container_path']}:ro",
])
logger.info(
"Docker: mounting credential %s -> %s",
mount_entry["host_path"],
mount_entry["container_path"],
)
except Exception as e:
logger.debug("Docker: could not load credential file mounts: %s", e)
logger.info(f"Docker volume_args: {volume_args}")
all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
logger.info(f"Docker run_args: {all_run_args}")
@ -406,8 +424,17 @@ class DockerEnvironment(BaseEnvironment):
if effective_stdin is not None:
cmd.append("-i")
cmd.extend(["-w", work_dir])
hermes_env = _load_hermes_env_vars() if self._forward_env else {}
for key in self._forward_env:
# Combine explicit docker_forward_env with skill-declared env_passthrough
# vars so skills that declare required_environment_variables (e.g. Notion)
# have their keys forwarded into the container automatically.
forward_keys = set(self._forward_env)
try:
from tools.env_passthrough import get_all_passthrough
forward_keys |= get_all_passthrough()
except Exception:
pass
hermes_env = _load_hermes_env_vars() if forward_keys else {}
for key in sorted(forward_keys):
value = os.getenv(key)
if value is None:
value = hermes_env.get(key)

View file

@ -391,12 +391,17 @@ class LocalEnvironment(PersistentShellMixin, BaseEnvironment):
effective_stdin = stdin_data
user_shell = _find_bash()
# Newline-separated wrapper (not `cmd; __hermes_rc=...` on one line).
# A trailing `; __hermes_rc` glued to `<<EOF` / a closing `EOF` line breaks
# heredoc parsing: the delimiter must be alone on its line, otherwise the
# rest of this script becomes heredoc body and leaks into stdout (e.g. gh
# issue/PR flows that use here-documents for bodies).
fenced_cmd = (
f"printf '{_OUTPUT_FENCE}';"
f" {exec_command};"
f" __hermes_rc=$?;"
f" printf '{_OUTPUT_FENCE}';"
f" exit $__hermes_rc"
f"printf '{_OUTPUT_FENCE}'\n"
f"{exec_command}\n"
f"__hermes_rc=$?\n"
f"printf '{_OUTPUT_FENCE}'\n"
f"exit $__hermes_rc\n"
)
run_env = _make_run_env(self.env)

View file

@ -1,19 +1,19 @@
"""Modal cloud execution environment using SWE-ReX directly.
"""Modal cloud execution environment using the native Modal SDK directly.
Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
is snapshotted on cleanup and restored on next creation, so installed packages,
project files, and config changes survive across sessions.
Uses ``Sandbox.create()`` + ``Sandbox.exec()`` instead of the older runtime
wrapper, while preserving Hermes' persistent snapshot behavior across sessions.
"""
import asyncio
import json
import logging
import shlex
import threading
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_cli.config import get_hermes_home
from hermes_constants import get_hermes_home
from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted
@ -44,7 +44,7 @@ def _direct_snapshot_key(task_id: str) -> str:
def _get_snapshot_restore_candidate(task_id: str) -> tuple[str | None, bool]:
"""Return a snapshot id for direct Modal restore and whether the key is legacy."""
"""Return a snapshot id and whether it came from the legacy key format."""
snapshots = _load_snapshots()
namespaced_key = _direct_snapshot_key(task_id)
@ -104,7 +104,7 @@ def _resolve_modal_image(image_spec: Any) -> Any:
class _AsyncWorker:
"""Background thread with its own event loop for async-safe swe-rex calls."""
"""Background thread with its own event loop for async-safe Modal calls."""
def __init__(self):
self._loop: Optional[asyncio.AbstractEventLoop] = None
@ -136,12 +136,7 @@ class _AsyncWorker:
class ModalEnvironment(BaseEnvironment):
"""Modal cloud execution via SWE-ReX.
Uses swe-rex's ModalDeployment directly for sandbox management.
Adds sudo -S support, configurable resources (CPU, memory, disk),
and optional filesystem persistence via Modal's snapshot API.
"""
"""Modal cloud execution via native Modal sandboxes."""
def __init__(
self,
@ -157,39 +152,73 @@ class ModalEnvironment(BaseEnvironment):
self._persistent = persistent_filesystem
self._task_id = task_id
self._base_image = image
self._deployment = None
self._sandbox = None
self._app = None
self._worker = _AsyncWorker()
self._synced_creds: Dict[str, tuple] = {}
sandbox_kwargs = dict(modal_sandbox_kwargs or {})
restored_snapshot_id = None
restored_from_legacy_key = False
if self._persistent:
restored_snapshot_id, restored_from_legacy_key = _get_snapshot_restore_candidate(self._task_id)
restored_snapshot_id, restored_from_legacy_key = _get_snapshot_restore_candidate(
self._task_id
)
if restored_snapshot_id:
logger.info("Modal: restoring from snapshot %s", restored_snapshot_id[:20])
import modal as _modal
cred_mounts = []
try:
from tools.credential_files import get_credential_file_mounts
for mount_entry in get_credential_file_mounts():
cred_mounts.append(
_modal.Mount.from_local_file(
mount_entry["host_path"],
remote_path=mount_entry["container_path"],
)
)
logger.info(
"Modal: mounting credential %s -> %s",
mount_entry["host_path"],
mount_entry["container_path"],
)
except Exception as e:
logger.debug("Modal: could not load credential file mounts: %s", e)
self._worker.start()
from swerex.deployment.modal import ModalDeployment
async def _create_and_start(image_spec: Any):
deployment = ModalDeployment(
async def _create_sandbox(image_spec: Any):
app = await _modal.App.lookup.aio("hermes-agent", create_if_missing=True)
create_kwargs = dict(sandbox_kwargs)
if cred_mounts:
existing_mounts = list(create_kwargs.pop("mounts", []))
existing_mounts.extend(cred_mounts)
create_kwargs["mounts"] = existing_mounts
sandbox = await _modal.Sandbox.create.aio(
"sleep",
"infinity",
image=image_spec,
startup_timeout=180.0,
runtime_timeout=3600.0,
deployment_timeout=3600.0,
install_pipx=True,
modal_sandbox_kwargs=sandbox_kwargs,
app=app,
timeout=int(create_kwargs.pop("timeout", 3600)),
**create_kwargs,
)
await deployment.start()
return deployment
return app, sandbox
try:
target_image_spec = restored_snapshot_id or image
try:
# _resolve_modal_image keeps the Modal bootstrap fix together:
# it applies ensurepip via setup_dockerfile_commands before
# Modal builds or restores the image.
effective_image = _resolve_modal_image(target_image_spec)
self._deployment = self._worker.run_coroutine(_create_and_start(effective_image))
self._app, self._sandbox = self._worker.run_coroutine(
_create_sandbox(effective_image),
timeout=300,
)
except Exception as exc:
if not restored_snapshot_id:
raise
@ -201,18 +230,83 @@ class ModalEnvironment(BaseEnvironment):
)
_delete_direct_snapshot(self._task_id, restored_snapshot_id)
base_image = _resolve_modal_image(image)
self._deployment = self._worker.run_coroutine(_create_and_start(base_image))
self._app, self._sandbox = self._worker.run_coroutine(
_create_sandbox(base_image),
timeout=300,
)
else:
if restored_snapshot_id and restored_from_legacy_key:
_store_direct_snapshot(self._task_id, restored_snapshot_id)
logger.info("Modal: migrated legacy snapshot entry for task %s", self._task_id)
logger.info(
"Modal: migrated legacy snapshot entry for task %s",
self._task_id,
)
except Exception:
self._worker.stop()
raise
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
logger.info("Modal: sandbox created (task=%s)", self._task_id)
def _sync_credential_files(self) -> None:
"""Push credential files into the running sandbox."""
try:
from tools.credential_files import get_credential_file_mounts
mounts = get_credential_file_mounts()
if not mounts:
return
for entry in mounts:
host_path = entry["host_path"]
container_path = entry["container_path"]
hp = Path(host_path)
try:
stat = hp.stat()
file_key = (stat.st_mtime, stat.st_size)
except OSError:
continue
if self._synced_creds.get(container_path) == file_key:
continue
try:
content = hp.read_text(encoding="utf-8")
except Exception:
continue
import base64
b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
container_dir = str(Path(container_path).parent)
cmd = (
f"mkdir -p {shlex.quote(container_dir)} && "
f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
)
async def _write():
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
await proc.wait.aio()
self._worker.run_coroutine(_write(), timeout=15)
self._synced_creds[container_path] = file_key
logger.debug(
"Modal: synced credential %s -> %s",
host_path,
container_path,
)
except Exception as e:
logger.debug("Modal: credential file sync failed: %s", e)
def execute(
self,
command: str,
cwd: str = "",
*,
timeout: int | None = None,
stdin_data: str | None = None,
) -> dict:
self._sync_credential_files()
if stdin_data is not None:
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
while marker in stdin_data:
@ -221,42 +315,48 @@ class ModalEnvironment(BaseEnvironment):
exec_command, sudo_stdin = self._prepare_command(command)
# Modal sandboxes execute commands via the Modal SDK and cannot pipe
# subprocess stdin directly the way a local Popen can. When a sudo
# password is present, use a shell-level pipe from printf so that the
# password feeds sudo -S without appearing as an echo argument embedded
# in the shell string.
# Modal sandboxes execute commands via exec() and cannot pipe
# subprocess stdin directly. When a sudo password is present,
# use a shell-level pipe from printf.
if sudo_stdin is not None:
import shlex
exec_command = (
f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
)
from swerex.runtime.abstract import Command as RexCommand
effective_cwd = cwd or self.cwd
effective_timeout = timeout or self.timeout
full_command = f"cd {shlex.quote(effective_cwd)} && {exec_command}"
result_holder = {"value": None, "error": None}
def _run():
try:
async def _do_execute():
return await self._deployment.runtime.execute(
RexCommand(
command=exec_command,
shell=True,
check=False,
cwd=effective_cwd,
timeout=effective_timeout,
merge_output_streams=True,
)
process = await self._sandbox.exec.aio(
"bash",
"-c",
full_command,
timeout=effective_timeout,
)
stdout = await process.stdout.read.aio()
stderr = await process.stderr.read.aio()
exit_code = await process.wait.aio()
if isinstance(stdout, bytes):
stdout = stdout.decode("utf-8", errors="replace")
if isinstance(stderr, bytes):
stderr = stderr.decode("utf-8", errors="replace")
output = stdout
if stderr:
output = f"{stdout}\n{stderr}" if stdout else stderr
return output, exit_code
output = self._worker.run_coroutine(_do_execute())
output, exit_code = self._worker.run_coroutine(
_do_execute(),
timeout=effective_timeout + 30,
)
result_holder["value"] = {
"output": output.stdout,
"returncode": output.exit_code,
"output": output,
"returncode": exit_code,
}
except Exception as e:
result_holder["error"] = e
@ -268,7 +368,7 @@ class ModalEnvironment(BaseEnvironment):
if is_interrupted():
try:
self._worker.run_coroutine(
asyncio.wait_for(self._deployment.stop(), timeout=10),
self._sandbox.terminate.aio(),
timeout=15,
)
except Exception:
@ -279,44 +379,46 @@ class ModalEnvironment(BaseEnvironment):
}
if result_holder["error"]:
return {"output": f"Modal execution error: {result_holder['error']}", "returncode": 1}
return {
"output": f"Modal execution error: {result_holder['error']}",
"returncode": 1,
}
return result_holder["value"]
def cleanup(self):
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
if self._deployment is None:
if self._sandbox is None:
return
if self._persistent:
try:
sandbox = getattr(self._deployment, "_sandbox", None)
if sandbox:
async def _snapshot():
img = await sandbox.snapshot_filesystem.aio()
return img.object_id
async def _snapshot():
img = await self._sandbox.snapshot_filesystem.aio()
return img.object_id
try:
snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
except Exception:
snapshot_id = None
try:
snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
except Exception:
snapshot_id = None
if snapshot_id:
_store_direct_snapshot(self._task_id, snapshot_id)
logger.info(
"Modal: saved filesystem snapshot %s for task %s",
snapshot_id[:20],
self._task_id,
)
if snapshot_id:
_store_direct_snapshot(self._task_id, snapshot_id)
logger.info(
"Modal: saved filesystem snapshot %s for task %s",
snapshot_id[:20],
self._task_id,
)
except Exception as e:
logger.warning("Modal: filesystem snapshot failed: %s", e)
try:
self._worker.run_coroutine(
asyncio.wait_for(self._deployment.stop(), timeout=10),
self._sandbox.terminate.aio(),
timeout=15,
)
except Exception:
pass
finally:
self._worker.stop()
self._deployment = None
self._sandbox = None
self._app = None

View file

@ -16,7 +16,7 @@ import uuid
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_cli.config import get_hermes_home
from hermes_constants import get_hermes_home
from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted

View file

@ -32,6 +32,7 @@ from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any
from pathlib import Path
from hermes_constants import get_hermes_home
# ---------------------------------------------------------------------------
@ -46,7 +47,7 @@ WRITE_DENIED_PATHS = {
os.path.join(_HOME, ".ssh", "id_rsa"),
os.path.join(_HOME, ".ssh", "id_ed25519"),
os.path.join(_HOME, ".ssh", "config"),
os.path.join(_HOME, ".hermes", ".env"),
str(get_hermes_home() / ".env"),
os.path.join(_HOME, ".bashrc"),
os.path.join(_HOME, ".zshrc"),
os.path.join(_HOME, ".profile"),

View file

@ -171,8 +171,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
# Security: block direct reads of internal Hermes cache/index files
# to prevent prompt injection via catalog or hub metadata files.
import pathlib as _pathlib
from hermes_constants import get_hermes_home as _get_hh
_resolved = _pathlib.Path(path).expanduser().resolve()
_hermes_home = _pathlib.Path("~/.hermes").expanduser().resolve()
_hermes_home = _get_hh().resolve()
_blocked_dirs = [
_hermes_home / "skills" / ".hub" / "index-cache",
_hermes_home / "skills" / ".hub",

View file

@ -45,8 +45,23 @@ def clear_session_context() -> None:
# ── Availability check ──
def _check_honcho_available() -> bool:
"""Tool is only available when Honcho is active."""
return _session_manager is not None and _session_key is not None
"""Tool is available when Honcho is active OR configured.
At banner time the session context hasn't been injected yet, but if
a valid config exists the tools *will* activate once the agent starts.
Returning True for "configured" prevents the banner from marking
honcho tools as red/disabled when they're actually going to work.
"""
# Fast path: session already active (mid-conversation)
if _session_manager is not None and _session_key is not None:
return True
# Slow path: check if Honcho is configured (banner time)
try:
from honcho_integration.client import HonchoClientConfig
cfg = HonchoClientConfig.from_global_config()
return cfg.enabled and bool(cfg.api_key or cfg.base_url)
except Exception:
return False
def _resolve_session_context(**kwargs):

View file

@ -8,7 +8,7 @@ from datetime import datetime, timezone
from dataclasses import dataclass
from typing import Callable, Optional
from hermes_cli.config import get_hermes_home
from hermes_constants import get_hermes_home
from tools.tool_backend_helpers import managed_nous_tools_enabled
_DEFAULT_TOOL_GATEWAY_DOMAIN = "nousresearch.com"

View file

@ -70,6 +70,7 @@ Thread safety:
"""
import asyncio
import inspect
import json
import logging
import math
@ -89,6 +90,8 @@ logger = logging.getLogger(__name__)
_MCP_AVAILABLE = False
_MCP_HTTP_AVAILABLE = False
_MCP_SAMPLING_TYPES = False
_MCP_NOTIFICATION_TYPES = False
_MCP_MESSAGE_HANDLER_SUPPORTED = False
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
@ -98,6 +101,13 @@ try:
_MCP_HTTP_AVAILABLE = True
except ImportError:
_MCP_HTTP_AVAILABLE = False
# Prefer the non-deprecated API (mcp >= 1.24.0); fall back to the
# deprecated wrapper for older SDK versions.
try:
from mcp.client.streamable_http import streamable_http_client
_MCP_NEW_HTTP = True
except ImportError:
_MCP_NEW_HTTP = False
# Sampling types -- separated so older SDK versions don't break MCP support
try:
from mcp.types import (
@ -112,9 +122,39 @@ try:
_MCP_SAMPLING_TYPES = True
except ImportError:
logger.debug("MCP sampling types not available -- sampling disabled")
# Notification types for dynamic tool discovery (tools/list_changed)
try:
from mcp.types import (
ServerNotification,
ToolListChangedNotification,
PromptListChangedNotification,
ResourceListChangedNotification,
)
_MCP_NOTIFICATION_TYPES = True
except ImportError:
logger.debug("MCP notification types not available -- dynamic tool discovery disabled")
except ImportError:
logger.debug("mcp package not installed -- MCP tool support disabled")
def _check_message_handler_support() -> bool:
"""Check if ClientSession accepts ``message_handler`` kwarg.
Inspects the constructor signature for backward compatibility with older
MCP SDK versions that don't support notification handlers.
"""
if not _MCP_AVAILABLE:
return False
try:
return "message_handler" in inspect.signature(ClientSession).parameters
except (TypeError, ValueError):
return False
_MCP_MESSAGE_HANDLER_SUPPORTED = _check_message_handler_support()
if _MCP_AVAILABLE and not _MCP_MESSAGE_HANDLER_SUPPORTED:
logger.debug("MCP SDK does not support message_handler -- dynamic tool discovery disabled")
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
@ -690,7 +730,7 @@ class MCPServerTask:
__slots__ = (
"name", "session", "tool_timeout",
"_task", "_ready", "_shutdown_event", "_tools", "_error", "_config",
"_sampling", "_registered_tool_names", "_auth_type",
"_sampling", "_registered_tool_names", "_auth_type", "_refresh_lock",
)
def __init__(self, name: str):
@ -706,11 +746,80 @@ class MCPServerTask:
self._sampling: Optional[SamplingHandler] = None
self._registered_tool_names: list[str] = []
self._auth_type: str = ""
self._refresh_lock = asyncio.Lock()
def _is_http(self) -> bool:
"""Check if this server uses HTTP transport."""
return "url" in self._config
# ----- Dynamic tool discovery (notifications/tools/list_changed) -----
def _make_message_handler(self):
"""Build a ``message_handler`` callback for ``ClientSession``.
Dispatches on notification type. Only ``ToolListChangedNotification``
triggers a refresh; prompt and resource change notifications are
logged as stubs for future work.
"""
async def _handler(message):
try:
if isinstance(message, Exception):
logger.debug("MCP message handler (%s): exception: %s", self.name, message)
return
if _MCP_NOTIFICATION_TYPES and isinstance(message, ServerNotification):
match message.root:
case ToolListChangedNotification():
logger.info(
"MCP server '%s': received tools/list_changed notification",
self.name,
)
await self._refresh_tools()
case PromptListChangedNotification():
logger.debug("MCP server '%s': prompts/list_changed (ignored)", self.name)
case ResourceListChangedNotification():
logger.debug("MCP server '%s': resources/list_changed (ignored)", self.name)
case _:
pass
except Exception:
logger.exception("Error in MCP message handler for '%s'", self.name)
return _handler
async def _refresh_tools(self):
"""Re-fetch tools from the server and update the registry.
Called when the server sends ``notifications/tools/list_changed``.
The lock prevents overlapping refreshes from rapid-fire notifications.
After the initial ``await`` (list_tools), all mutations are synchronous
atomic from the event loop's perspective.
"""
from tools.registry import registry
from toolsets import TOOLSETS
async with self._refresh_lock:
# 1. Fetch current tool list from server
tools_result = await self.session.list_tools()
new_mcp_tools = tools_result.tools if hasattr(tools_result, "tools") else []
# 2. Remove old tools from hermes-* umbrella toolsets
for ts_name, ts in TOOLSETS.items():
if ts_name.startswith("hermes-"):
ts["tools"] = [t for t in ts["tools"] if t not in self._registered_tool_names]
# 3. Deregister old tools from the central registry
for prefixed_name in self._registered_tool_names:
registry.deregister(prefixed_name)
# 4. Re-register with fresh tool list
self._tools = new_mcp_tools
self._registered_tool_names = _register_server_tools(
self.name, self, self._config
)
logger.info(
"MCP server '%s': dynamically refreshed %d tool(s)",
self.name, len(self._registered_tool_names),
)
async def _run_stdio(self, config: dict):
"""Run the server using stdio transport."""
command = config.get("command")
@ -731,6 +840,8 @@ class MCPServerTask:
)
sampling_kwargs = self._sampling.session_kwargs() if self._sampling else {}
if _MCP_NOTIFICATION_TYPES and _MCP_MESSAGE_HANDLER_SUPPORTED:
sampling_kwargs["message_handler"] = self._make_message_handler()
async with stdio_client(server_params) as (read_stream, write_stream):
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
await session.initialize()
@ -762,21 +873,52 @@ class MCPServerTask:
logger.warning("MCP OAuth setup failed for '%s': %s", self.name, exc)
sampling_kwargs = self._sampling.session_kwargs() if self._sampling else {}
_http_kwargs: dict = {
"headers": headers,
"timeout": float(connect_timeout),
}
if _oauth_auth is not None:
_http_kwargs["auth"] = _oauth_auth
async with streamablehttp_client(url, **_http_kwargs) as (
read_stream, write_stream, _get_session_id,
):
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
await session.initialize()
self.session = session
await self._discover_tools()
self._ready.set()
await self._shutdown_event.wait()
if _MCP_NOTIFICATION_TYPES and _MCP_MESSAGE_HANDLER_SUPPORTED:
sampling_kwargs["message_handler"] = self._make_message_handler()
if _MCP_NEW_HTTP:
# New API (mcp >= 1.24.0): build an explicit httpx.AsyncClient
# matching the SDK's own create_mcp_http_client defaults.
import httpx
client_kwargs: dict = {
"follow_redirects": True,
"timeout": httpx.Timeout(float(connect_timeout), read=300.0),
}
if headers:
client_kwargs["headers"] = headers
if _oauth_auth is not None:
client_kwargs["auth"] = _oauth_auth
# Caller owns the client lifecycle — the SDK skips cleanup when
# http_client is provided, so we wrap in async-with.
async with httpx.AsyncClient(**client_kwargs) as http_client:
async with streamable_http_client(url, http_client=http_client) as (
read_stream, write_stream, _get_session_id,
):
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
await session.initialize()
self.session = session
await self._discover_tools()
self._ready.set()
await self._shutdown_event.wait()
else:
# Deprecated API (mcp < 1.24.0): manages httpx client internally.
_http_kwargs: dict = {
"headers": headers,
"timeout": float(connect_timeout),
}
if _oauth_auth is not None:
_http_kwargs["auth"] = _oauth_auth
async with streamablehttp_client(url, **_http_kwargs) as (
read_stream, write_stream, _get_session_id,
):
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
await session.initialize()
self.session = session
await self._discover_tools()
self._ready.set()
await self._shutdown_event.wait()
async def _discover_tools(self):
"""Discover tools from the connected session."""
@ -797,7 +939,7 @@ class MCPServerTask:
"""
self._config = config
self.tool_timeout = config.get("timeout", _DEFAULT_TOOL_TIMEOUT)
self._auth_type = config.get("auth", "").lower().strip()
self._auth_type = (config.get("auth") or "").lower().strip()
# Set up sampling handler if enabled and SDK types are available
sampling_config = config.get("sampling", {})
@ -1486,24 +1628,19 @@ def _existing_tool_names() -> List[str]:
return names
async def _discover_and_register_server(name: str, config: dict) -> List[str]:
"""Connect to a single MCP server, discover tools, and register them.
def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> List[str]:
"""Register tools from an already-connected server into the registry.
Also registers utility tools for MCP Resources and Prompts support
(list_resources, read_resource, list_prompts, get_prompt).
Handles include/exclude filtering, utility tools, toolset creation,
and hermes-* umbrella toolset injection.
Returns list of registered tool names.
Used by both initial discovery and dynamic refresh (list_changed).
Returns:
List of registered prefixed tool names.
"""
from tools.registry import registry
from toolsets import create_custom_toolset
connect_timeout = config.get("connect_timeout", _DEFAULT_CONNECT_TIMEOUT)
server = await asyncio.wait_for(
_connect_server(name, config),
timeout=connect_timeout,
)
with _lock:
_servers[name] = server
from toolsets import create_custom_toolset, TOOLSETS
registered_names: List[str] = []
toolset_name = f"mcp-{name}"
@ -1589,8 +1726,6 @@ async def _discover_and_register_server(name: str, config: dict) -> List[str]:
)
registered_names.append(util_name)
server._registered_tool_names = list(registered_names)
# Create a custom toolset so these tools are discoverable
if registered_names:
create_custom_toolset(
@ -1598,6 +1733,31 @@ async def _discover_and_register_server(name: str, config: dict) -> List[str]:
description=f"MCP tools from {name} server",
tools=registered_names,
)
# Inject into hermes-* umbrella toolsets for default behavior
for ts_name, ts in TOOLSETS.items():
if ts_name.startswith("hermes-"):
for tool_name in registered_names:
if tool_name not in ts["tools"]:
ts["tools"].append(tool_name)
return registered_names
async def _discover_and_register_server(name: str, config: dict) -> List[str]:
"""Connect to a single MCP server, discover tools, and register them.
Returns list of registered tool names.
"""
connect_timeout = config.get("connect_timeout", _DEFAULT_CONNECT_TIMEOUT)
server = await asyncio.wait_for(
_connect_server(name, config),
timeout=connect_timeout,
)
with _lock:
_servers[name] = server
registered_names = _register_server_tools(name, server, config)
server._registered_tool_names = list(registered_names)
transport_type = "HTTP" if "url" in config else "stdio"
logger.info(

View file

@ -339,7 +339,7 @@ class MemoryStore:
entries = self._entries_for(target)
current = self._char_count(target)
limit = self._char_limit(target)
pct = int((current / limit) * 100) if limit > 0 else 0
pct = min(100, int((current / limit) * 100)) if limit > 0 else 0
resp = {
"success": True,
@ -360,7 +360,7 @@ class MemoryStore:
limit = self._char_limit(target)
content = ENTRY_DELIMITER.join(entries)
current = len(content)
pct = int((current / limit) * 100) if limit > 0 else 0
pct = min(100, int((current / limit) * 100)) if limit > 0 else 0
if target == "user":
header = f"USER PROFILE (who the user is) [{pct}% — {current:,}/{limit:,} chars]"

View file

@ -52,6 +52,7 @@ import asyncio
import datetime
from typing import Dict, Any, List, Optional
from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
from agent.auxiliary_client import extract_content_or_reasoning
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@ -143,7 +144,13 @@ async def _run_reference_model_safe(
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
content = extract_content_or_reasoning(response)
if not content:
# Reasoning-only response — let the retry loop handle it
logger.warning("%s returned empty content (attempt %s/%s), retrying", model, attempt + 1, max_retries)
if attempt < max_retries - 1:
await asyncio.sleep(min(2 ** (attempt + 1), 60))
continue
logger.info("%s responded (%s characters)", model, len(content))
return model, content, True
@ -211,7 +218,14 @@ async def _run_aggregator_model(
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
content = extract_content_or_reasoning(response)
# Retry once on empty content (reasoning-only response)
if not content:
logger.warning("Aggregator returned empty content, retrying once")
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = extract_content_or_reasoning(response)
logger.info("Aggregation complete (%s characters)", len(content))
return content

View file

@ -419,6 +419,23 @@ def _apply_update(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]:
if error:
return False, f"Could not apply hunk: {error}"
else:
# Addition-only hunk (no context or removed lines).
# Insert at the location indicated by the context hint, or at end of file.
insert_text = '\n'.join(replace_lines)
if hunk.context_hint:
hint_pos = new_content.find(hunk.context_hint)
if hint_pos != -1:
# Insert after the line containing the context hint
eol = new_content.find('\n', hint_pos)
if eol != -1:
new_content = new_content[:eol + 1] + insert_text + '\n' + new_content[eol + 1:]
else:
new_content = new_content + '\n' + insert_text
else:
new_content = new_content.rstrip('\n') + '\n' + insert_text + '\n'
else:
new_content = new_content.rstrip('\n') + '\n' + insert_text + '\n'
# Write new content
write_result = file_ops.write_file(op.file_path, new_content)

View file

@ -87,6 +87,23 @@ class ToolRegistry:
if check_fn and toolset not in self._toolset_checks:
self._toolset_checks[toolset] = check_fn
def deregister(self, name: str) -> None:
"""Remove a tool from the registry.
Also cleans up the toolset check if no other tools remain in the
same toolset. Used by MCP dynamic tool discovery to nuke-and-repave
when a server sends ``notifications/tools/list_changed``.
"""
entry = self._tools.pop(name, None)
if entry is None:
return
# Drop the toolset check if this was the last tool in that toolset
if entry.toolset in self._toolset_checks and not any(
e.toolset == entry.toolset for e in self._tools.values()
):
self._toolset_checks.pop(entry.toolset, None)
logger.debug("Deregistered tool: %s", name)
# ------------------------------------------------------------------
# Schema retrieval
# ------------------------------------------------------------------
@ -98,21 +115,26 @@ class ToolRegistry:
are included.
"""
result = []
check_results: Dict[Callable, bool] = {}
for name in sorted(tool_names):
entry = self._tools.get(name)
if not entry:
continue
if entry.check_fn:
try:
if not entry.check_fn():
if entry.check_fn not in check_results:
try:
check_results[entry.check_fn] = bool(entry.check_fn())
except Exception:
check_results[entry.check_fn] = False
if not quiet:
logger.debug("Tool %s unavailable (check failed)", name)
continue
except Exception:
logger.debug("Tool %s check raised; skipping", name)
if not check_results[entry.check_fn]:
if not quiet:
logger.debug("Tool %s check raised; skipping", name)
logger.debug("Tool %s unavailable (check failed)", name)
continue
result.append({"type": "function", "function": entry.schema})
# Ensure schema always has a "name" field — use entry.name as fallback
schema_with_name = {**entry.schema, "name": entry.name}
result.append({"type": "function", "function": schema_with_name})
return result
# ------------------------------------------------------------------
@ -146,6 +168,15 @@ class ToolRegistry:
"""Return sorted list of all registered tool names."""
return sorted(self._tools.keys())
def get_schema(self, name: str) -> Optional[dict]:
"""Return a tool's raw schema dict, bypassing check_fn filtering.
Useful for token estimation and introspection where availability
doesn't matter — only the schema content does.
"""
entry = self._tools.get(name)
return entry.schema if entry else None
def get_toolset_for_tool(self, name: str) -> Optional[str]:
"""Return the toolset a tool belongs to, or None."""
entry = self._tools.get(name)

View file

@ -15,6 +15,7 @@ import time
logger = logging.getLogger(__name__)
_TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
_FEISHU_TARGET_RE = re.compile(r"^\s*((?:oc|ou|on|chat|open)_[-A-Za-z0-9]+)(?::([-A-Za-z0-9_]+))?\s*$")
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"}
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
@ -128,6 +129,8 @@ def _handle_send(args):
"mattermost": Platform.MATTERMOST,
"homeassistant": Platform.HOMEASSISTANT,
"dingtalk": Platform.DINGTALK,
"feishu": Platform.FEISHU,
"wecom": Platform.WECOM,
"email": Platform.EMAIL,
"sms": Platform.SMS,
}
@ -198,6 +201,10 @@ def _parse_target_ref(platform_name: str, target_ref: str):
match = _TELEGRAM_TOPIC_TARGET_RE.fullmatch(target_ref)
if match:
return match.group(1), match.group(2), True
if platform_name == "feishu":
match = _FEISHU_TARGET_RE.fullmatch(target_ref)
if match:
return match.group(1), match.group(2), True
if target_ref.lstrip("-").isdigit():
return target_ref, None, True
return None, None, False
@ -280,6 +287,13 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
from gateway.platforms.discord import DiscordAdapter
from gateway.platforms.slack import SlackAdapter
# Feishu adapter import is optional (requires lark-oapi)
try:
from gateway.platforms.feishu import FeishuAdapter
_feishu_available = True
except ImportError:
_feishu_available = False
media_files = media_files or []
# Platform message length limits (from adapter class attributes)
@ -288,6 +302,8 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
Platform.DISCORD: DiscordAdapter.MAX_MESSAGE_LENGTH,
Platform.SLACK: SlackAdapter.MAX_MESSAGE_LENGTH,
}
if _feishu_available:
_MAX_LENGTHS[Platform.FEISHU] = FeishuAdapter.MAX_MESSAGE_LENGTH
# Smart-chunk the message to fit within platform limits.
# For short messages or platforms without a known limit this is a no-op.
@ -343,6 +359,18 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
result = await _send_email(pconfig.extra, chat_id, chunk)
elif platform == Platform.SMS:
result = await _send_sms(pconfig.api_key, chat_id, chunk)
elif platform == Platform.MATTERMOST:
result = await _send_mattermost(pconfig.token, pconfig.extra, chat_id, chunk)
elif platform == Platform.MATRIX:
result = await _send_matrix(pconfig.token, pconfig.extra, chat_id, chunk)
elif platform == Platform.HOMEASSISTANT:
result = await _send_homeassistant(pconfig.token, pconfig.extra, chat_id, chunk)
elif platform == Platform.DINGTALK:
result = await _send_dingtalk(pconfig.extra, chat_id, chunk)
elif platform == Platform.FEISHU:
result = await _send_feishu(pconfig, chat_id, chunk, thread_id=thread_id)
elif platform == Platform.WECOM:
result = await _send_wecom(pconfig.extra, chat_id, chunk)
else:
result = {"error": f"Direct sending not yet implemented for {platform.value}"}
@ -666,6 +694,193 @@ async def _send_sms(auth_token, chat_id, message):
return {"error": f"SMS send failed: {e}"}
async def _send_mattermost(token, extra, chat_id, message):
"""Send via Mattermost REST API."""
try:
import aiohttp
except ImportError:
return {"error": "aiohttp not installed. Run: pip install aiohttp"}
try:
base_url = (extra.get("url") or os.getenv("MATTERMOST_URL", "")).rstrip("/")
token = token or os.getenv("MATTERMOST_TOKEN", "")
if not base_url or not token:
return {"error": "Mattermost not configured (MATTERMOST_URL, MATTERMOST_TOKEN required)"}
url = f"{base_url}/api/v4/posts"
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
async with session.post(url, headers=headers, json={"channel_id": chat_id, "message": message}) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return {"error": f"Mattermost API error ({resp.status}): {body}"}
data = await resp.json()
return {"success": True, "platform": "mattermost", "chat_id": chat_id, "message_id": data.get("id")}
except Exception as e:
return {"error": f"Mattermost send failed: {e}"}
async def _send_matrix(token, extra, chat_id, message):
"""Send via Matrix Client-Server API."""
try:
import aiohttp
except ImportError:
return {"error": "aiohttp not installed. Run: pip install aiohttp"}
try:
homeserver = (extra.get("homeserver") or os.getenv("MATRIX_HOMESERVER", "")).rstrip("/")
token = token or os.getenv("MATRIX_ACCESS_TOKEN", "")
if not homeserver or not token:
return {"error": "Matrix not configured (MATRIX_HOMESERVER, MATRIX_ACCESS_TOKEN required)"}
txn_id = f"hermes_{int(time.time() * 1000)}"
url = f"{homeserver}/_matrix/client/v3/rooms/{chat_id}/send/m.room.message/{txn_id}"
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
async with session.put(url, headers=headers, json={"msgtype": "m.text", "body": message}) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return {"error": f"Matrix API error ({resp.status}): {body}"}
data = await resp.json()
return {"success": True, "platform": "matrix", "chat_id": chat_id, "message_id": data.get("event_id")}
except Exception as e:
return {"error": f"Matrix send failed: {e}"}
async def _send_homeassistant(token, extra, chat_id, message):
"""Send via Home Assistant notify service."""
try:
import aiohttp
except ImportError:
return {"error": "aiohttp not installed. Run: pip install aiohttp"}
try:
hass_url = (extra.get("url") or os.getenv("HASS_URL", "")).rstrip("/")
token = token or os.getenv("HASS_TOKEN", "")
if not hass_url or not token:
return {"error": "Home Assistant not configured (HASS_URL, HASS_TOKEN required)"}
url = f"{hass_url}/api/services/notify/notify"
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=30)) as session:
async with session.post(url, headers=headers, json={"message": message, "target": chat_id}) as resp:
if resp.status not in (200, 201):
body = await resp.text()
return {"error": f"Home Assistant API error ({resp.status}): {body}"}
return {"success": True, "platform": "homeassistant", "chat_id": chat_id}
except Exception as e:
return {"error": f"Home Assistant send failed: {e}"}
async def _send_dingtalk(extra, chat_id, message):
"""Send via DingTalk robot webhook.
Note: The gateway's DingTalk adapter uses per-session webhook URLs from
incoming messages (dingtalk-stream SDK). For cross-platform send_message
delivery we use a static robot webhook URL instead, which must be
configured via ``DINGTALK_WEBHOOK_URL`` env var or ``webhook_url`` in the
platform's extra config.
"""
try:
import httpx
except ImportError:
return {"error": "httpx not installed"}
try:
webhook_url = extra.get("webhook_url") or os.getenv("DINGTALK_WEBHOOK_URL", "")
if not webhook_url:
return {"error": "DingTalk not configured. Set DINGTALK_WEBHOOK_URL env var or webhook_url in dingtalk platform extra config."}
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
webhook_url,
json={"msgtype": "text", "text": {"content": message}},
)
resp.raise_for_status()
data = resp.json()
if data.get("errcode", 0) != 0:
return {"error": f"DingTalk API error: {data.get('errmsg', 'unknown')}"}
return {"success": True, "platform": "dingtalk", "chat_id": chat_id}
except Exception as e:
return {"error": f"DingTalk send failed: {e}"}
async def _send_wecom(extra, chat_id, message):
"""Send via WeCom using the adapter's WebSocket send pipeline."""
try:
from gateway.platforms.wecom import WeComAdapter, check_wecom_requirements
if not check_wecom_requirements():
return {"error": "WeCom requirements not met. Need aiohttp + WECOM_BOT_ID/SECRET."}
except ImportError:
return {"error": "WeCom adapter not available."}
try:
from gateway.config import PlatformConfig
pconfig = PlatformConfig(extra=extra)
adapter = WeComAdapter(pconfig)
connected = await adapter.connect()
if not connected:
return {"error": f"WeCom: failed to connect — {adapter.fatal_error_message or 'unknown error'}"}
try:
result = await adapter.send(chat_id, message)
if not result.success:
return {"error": f"WeCom send failed: {result.error}"}
return {"success": True, "platform": "wecom", "chat_id": chat_id, "message_id": result.message_id}
finally:
await adapter.disconnect()
except Exception as e:
return {"error": f"WeCom send failed: {e}"}
async def _send_feishu(pconfig, chat_id, message, media_files=None, thread_id=None):
"""Send via Feishu/Lark using the adapter's send pipeline."""
try:
from gateway.platforms.feishu import FeishuAdapter, FEISHU_AVAILABLE
if not FEISHU_AVAILABLE:
return {"error": "Feishu dependencies not installed. Run: pip install 'hermes-agent[feishu]'"}
from gateway.platforms.feishu import FEISHU_DOMAIN, LARK_DOMAIN
except ImportError:
return {"error": "Feishu dependencies not installed. Run: pip install 'hermes-agent[feishu]'"}
media_files = media_files or []
try:
adapter = FeishuAdapter(pconfig)
domain_name = getattr(adapter, "_domain_name", "feishu")
domain = FEISHU_DOMAIN if domain_name != "lark" else LARK_DOMAIN
adapter._client = adapter._build_lark_client(domain)
metadata = {"thread_id": thread_id} if thread_id else None
last_result = None
if message.strip():
last_result = await adapter.send(chat_id, message, metadata=metadata)
if not last_result.success:
return {"error": f"Feishu send failed: {last_result.error}"}
for media_path, is_voice in media_files:
if not os.path.exists(media_path):
return {"error": f"Media file not found: {media_path}"}
ext = os.path.splitext(media_path)[1].lower()
if ext in _IMAGE_EXTS:
last_result = await adapter.send_image_file(chat_id, media_path, metadata=metadata)
elif ext in _VIDEO_EXTS:
last_result = await adapter.send_video(chat_id, media_path, metadata=metadata)
elif ext in _VOICE_EXTS and is_voice:
last_result = await adapter.send_voice(chat_id, media_path, metadata=metadata)
elif ext in _AUDIO_EXTS:
last_result = await adapter.send_voice(chat_id, media_path, metadata=metadata)
else:
last_result = await adapter.send_document(chat_id, media_path, metadata=metadata)
if not last_result.success:
return {"error": f"Feishu media send failed: {last_result.error}"}
if last_result is None:
return {"error": "No deliverable text or media remained after processing MEDIA tags"}
return {
"success": True,
"platform": "feishu",
"chat_id": chat_id,
"message_id": last_result.message_id,
}
except Exception as e:
return {"error": f"Feishu send failed: {e}"}
def _check_send_message():
"""Gate send_message on gateway running (always available on messaging platforms)."""
platform = os.getenv("HERMES_SESSION_PLATFORM", "")

View file

@ -21,7 +21,7 @@ import json
import logging
from typing import Dict, Any, List, Optional, Union
from agent.auxiliary_client import async_call_llm
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 10000
@ -161,7 +161,15 @@ async def _summarize_session(
temperature=0.1,
max_tokens=MAX_SUMMARY_TOKENS,
)
return response.choices[0].message.content.strip()
content = extract_content_or_reasoning(response)
if content:
return content
# Reasoning-only / empty — let the retry loop handle it
logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
if attempt < max_retries - 1:
await asyncio.sleep(1 * (attempt + 1))
continue
return content
except RuntimeError:
logging.warning("No auxiliary model available for session summarization")
return None
@ -384,23 +392,30 @@ def session_search(
}, ensure_ascii=False)
summaries = []
for (session_id, match_info, _, _), result in zip(tasks, results):
for (session_id, match_info, conversation_text, _), result in zip(tasks, results):
if isinstance(result, Exception):
logging.warning(
"Failed to summarize session %s: %s",
session_id,
result,
exc_info=True,
session_id, result, exc_info=True,
)
continue
result = None
entry = {
"session_id": session_id,
"when": _format_timestamp(match_info.get("session_started")),
"source": match_info.get("source", "unknown"),
"model": match_info.get("model"),
}
if result:
summaries.append({
"session_id": session_id,
"when": _format_timestamp(match_info.get("session_started")),
"source": match_info.get("source", "unknown"),
"model": match_info.get("model"),
"summary": result,
})
entry["summary"] = result
else:
# Fallback: raw preview so matched sessions aren't silently
# dropped when the summarizer is unavailable (fixes #3409).
preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"
summaries.append(entry)
return json.dumps({
"success": True,

View file

@ -113,6 +113,31 @@ def _validate_name(name: str) -> Optional[str]:
return None
def _validate_category(category: Optional[str]) -> Optional[str]:
"""Validate an optional category name used as a single directory segment."""
if category is None:
return None
if not isinstance(category, str):
return "Category must be a string."
category = category.strip()
if not category:
return None
if "/" in category or "\\" in category:
return (
f"Invalid category '{category}'. Use lowercase letters, numbers, "
"hyphens, dots, and underscores. Categories must be a single directory name."
)
if len(category) > MAX_NAME_LENGTH:
return f"Category exceeds {MAX_NAME_LENGTH} characters."
if not VALID_NAME_RE.match(category):
return (
f"Invalid category '{category}'. Use lowercase letters, numbers, "
"hyphens, dots, and underscores. Categories must be a single directory name."
)
return None
def _validate_frontmatter(content: str) -> Optional[str]:
"""
Validate that SKILL.md content has proper frontmatter with required fields.
@ -241,6 +266,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
if err:
return {"success": False, "error": err}
err = _validate_category(category)
if err:
return {"success": False, "error": err}
# Validate content
err = _validate_frontmatter(content)
if err:
@ -547,6 +576,13 @@ def skill_manage(
else:
result = {"success": False, "error": f"Unknown action '{action}'. Use: create, edit, patch, delete, write_file, remove_file"}
if result.get("success"):
try:
from agent.prompt_builder import clear_skills_system_prompt_cache
clear_skills_system_prompt_cache(clear_snapshot=True)
except Exception:
pass
return json.dumps(result, ensure_ascii=False)

View file

@ -948,9 +948,9 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
# Call the LLM via the centralized provider router
try:
from agent.auxiliary_client import call_llm
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
response = call_llm(
call_kwargs = dict(
provider="openrouter",
model=model,
messages=[{
@ -960,7 +960,13 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
temperature=0,
max_tokens=1000,
)
llm_text = response.choices[0].message.content.strip()
response = call_llm(**call_kwargs)
llm_text = extract_content_or_reasoning(response)
# Retry once on empty content (reasoning-only response)
if not llm_text:
response = call_llm(**call_kwargs)
llm_text = extract_content_or_reasoning(response)
except Exception:
# LLM audit is best-effort — don't block install if the call fails
return static_result

View file

@ -251,6 +251,7 @@ class GitHubSource(SkillSource):
{"repo": "openai/skills", "path": "skills/"},
{"repo": "anthropics/skills", "path": "skills/"},
{"repo": "VoltAgent/awesome-agent-skills", "path": "skills/"},
{"repo": "garrytan/gstack", "path": ""},
]
def __init__(self, auth: GitHubAuth, extra_taps: Optional[List[Dict]] = None):
@ -395,7 +396,8 @@ class GitHubSource(SkillSource):
if dir_name.startswith(".") or dir_name.startswith("_"):
continue
skill_identifier = f"{repo}/{path.rstrip('/')}/{dir_name}"
prefix = path.rstrip("/")
skill_identifier = f"{repo}/{prefix}/{dir_name}" if prefix else f"{repo}/{dir_name}"
meta = self.inspect(skill_identifier)
if meta:
skills.append(meta)

View file

@ -78,7 +78,6 @@ from pathlib import Path
from typing import Dict, Any, List, Optional, Set, Tuple
import yaml
from hermes_cli.config import load_env, _ENV_VAR_NAME_RE
from tools.registry import registry
logger = logging.getLogger(__name__)
@ -101,11 +100,28 @@ _PLATFORM_MAP = {
"linux": "linux",
"windows": "win32",
}
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
_REMOTE_ENV_BACKENDS = frozenset({"docker", "singularity", "modal", "ssh", "daytona"})
_secret_capture_callback = None
def load_env() -> Dict[str, str]:
"""Load profile-scoped environment variables from HERMES_HOME/.env."""
env_path = get_hermes_home() / ".env"
env_vars: Dict[str, str] = {}
if not env_path.exists():
return env_vars
with env_path.open() as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
key, _, value = line.partition("=")
env_vars[key.strip()] = value.strip().strip("\"'")
return env_vars
class SkillReadinessStatus(str, Enum):
AVAILABLE = "available"
SETUP_NEEDED = "setup_needed"
@ -120,28 +136,11 @@ def set_secret_capture_callback(callback) -> None:
def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
"""Check if a skill is compatible with the current OS platform.
Skills declare platform requirements via a top-level ``platforms`` list
in their YAML frontmatter::
platforms: [macos] # macOS only
platforms: [macos, linux] # macOS and Linux
Valid values: ``macos``, ``linux``, ``windows``.
If the field is absent or empty the skill is compatible with **all**
platforms (backward-compatible default).
Delegates to ``agent.skill_utils.skill_matches_platform`` kept here
as a public re-export so existing callers don't need updating.
"""
platforms = frontmatter.get("platforms")
if not platforms:
return True # No restriction → loads everywhere
if not isinstance(platforms, list):
platforms = [platforms]
current = sys.platform
for p in platforms:
mapped = _PLATFORM_MAP.get(str(p).lower().strip(), str(p).lower().strip())
if current.startswith(mapped):
return True
return False
from agent.skill_utils import skill_matches_platform as _impl
return _impl(frontmatter)
def _normalize_prerequisite_values(value: Any) -> List[str]:
@ -372,13 +371,8 @@ def _remaining_required_environment_names(
capture_result: Dict[str, Any],
*,
env_snapshot: Dict[str, str] | None = None,
backend: str | None = None,
) -> List[str]:
if backend is None:
backend = _get_terminal_backend_name()
missing_names = set(capture_result["missing_names"])
if backend in _REMOTE_ENV_BACKENDS:
return [entry["name"] for entry in required_env_vars]
if env_snapshot is None:
env_snapshot = load_env()
@ -419,40 +413,13 @@ def check_skills_requirements() -> bool:
def _parse_frontmatter(content: str) -> Tuple[Dict[str, Any], str]:
"""Parse YAML frontmatter from markdown content.
Delegates to ``agent.skill_utils.parse_frontmatter`` kept here
as a public re-export so existing callers don't need updating.
"""
Parse YAML frontmatter from markdown content.
Uses yaml.safe_load for full YAML support (nested metadata, lists, etc.)
with a fallback to simple key:value splitting for robustness.
Args:
content: Full markdown file content
Returns:
Tuple of (frontmatter dict, remaining content)
"""
frontmatter = {}
body = content
if content.startswith("---"):
end_match = re.search(r"\n---\s*\n", content[3:])
if end_match:
yaml_content = content[3 : end_match.start() + 3]
body = content[end_match.end() + 3 :]
try:
parsed = yaml.safe_load(yaml_content)
if isinstance(parsed, dict):
frontmatter = parsed
# yaml.safe_load returns None for empty frontmatter
except yaml.YAMLError:
# Fallback: simple key:value parsing for malformed YAML
for line in yaml_content.strip().split("\n"):
if ":" in line:
key, value = line.split(":", 1)
frontmatter[key.strip()] = value.strip()
return frontmatter, body
from agent.skill_utils import parse_frontmatter
return parse_frontmatter(content)
def _get_category_from_path(skill_path: Path) -> Optional[str]:
@ -516,24 +483,13 @@ def _parse_tags(tags_value) -> List[str]:
def _get_disabled_skill_names() -> Set[str]:
"""Load disabled skill names from config (once per call).
"""Load disabled skill names from config.
Resolves platform from ``HERMES_PLATFORM`` env var, falls back to
the global disabled list.
Delegates to ``agent.skill_utils.get_disabled_skill_names`` kept here
as a public re-export so existing callers don't need updating.
"""
import os
try:
from hermes_cli.config import load_config
config = load_config()
skills_cfg = config.get("skills", {})
resolved_platform = os.getenv("HERMES_PLATFORM")
if resolved_platform:
platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
if platform_disabled is not None:
return set(platform_disabled)
return set(skills_cfg.get("disabled", []))
except Exception:
return set()
from agent.skill_utils import get_disabled_skill_names
return get_disabled_skill_names()
def _is_skill_disabled(name: str, platform: str = None) -> bool:
@ -554,7 +510,7 @@ def _is_skill_disabled(name: str, platform: str = None) -> bool:
def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]:
"""Recursively find all skills in ~/.hermes/skills/.
"""Recursively find all skills in ~/.hermes/skills/ and external dirs.
Args:
skip_disabled: If True, return ALL skills regardless of disabled
@ -564,59 +520,68 @@ def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]:
Returns:
List of skill metadata dicts (name, description, category).
"""
skills = []
from agent.skill_utils import get_external_skills_dirs
if not SKILLS_DIR.exists():
return skills
skills = []
seen_names: set = set()
# Load disabled set once (not per-skill)
disabled = set() if skip_disabled else _get_disabled_skill_names()
# Scan local dir first, then external dirs (local takes precedence)
dirs_to_scan = []
if SKILLS_DIR.exists():
dirs_to_scan.append(SKILLS_DIR)
dirs_to_scan.extend(get_external_skills_dirs())
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
if any(part in _EXCLUDED_SKILL_DIRS for part in skill_md.parts):
continue
skill_dir = skill_md.parent
try:
content = skill_md.read_text(encoding="utf-8")[:4000]
frontmatter, body = _parse_frontmatter(content)
if not skill_matches_platform(frontmatter):
for scan_dir in dirs_to_scan:
for skill_md in scan_dir.rglob("SKILL.md"):
if any(part in _EXCLUDED_SKILL_DIRS for part in skill_md.parts):
continue
name = frontmatter.get("name", skill_dir.name)[:MAX_NAME_LENGTH]
if name in disabled:
skill_dir = skill_md.parent
try:
content = skill_md.read_text(encoding="utf-8")[:4000]
frontmatter, body = _parse_frontmatter(content)
if not skill_matches_platform(frontmatter):
continue
name = frontmatter.get("name", skill_dir.name)[:MAX_NAME_LENGTH]
if name in seen_names:
continue
if name in disabled:
continue
description = frontmatter.get("description", "")
if not description:
for line in body.strip().split("\n"):
line = line.strip()
if line and not line.startswith("#"):
description = line
break
if len(description) > MAX_DESCRIPTION_LENGTH:
description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
category = _get_category_from_path(skill_md)
seen_names.add(name)
skills.append({
"name": name,
"description": description,
"category": category,
})
except (UnicodeDecodeError, PermissionError) as e:
logger.debug("Failed to read skill file %s: %s", skill_md, e)
continue
except Exception as e:
logger.debug(
"Skipping skill at %s: failed to parse: %s", skill_md, e, exc_info=True
)
continue
description = frontmatter.get("description", "")
if not description:
for line in body.strip().split("\n"):
line = line.strip()
if line and not line.startswith("#"):
description = line
break
if len(description) > MAX_DESCRIPTION_LENGTH:
description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
category = _get_category_from_path(skill_md)
skills.append({
"name": name,
"description": description,
"category": category,
})
except (UnicodeDecodeError, PermissionError) as e:
logger.debug("Failed to read skill file %s: %s", skill_md, e)
continue
except Exception as e:
logger.debug(
"Skipping skill at %s: failed to parse: %s", skill_md, e, exc_info=True
)
continue
return skills
@ -816,7 +781,15 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
JSON string with skill content or error message
"""
try:
if not SKILLS_DIR.exists():
from agent.skill_utils import get_external_skills_dirs
# Build list of all skill directories to search
all_dirs = []
if SKILLS_DIR.exists():
all_dirs.append(SKILLS_DIR)
all_dirs.extend(get_external_skills_dirs())
if not all_dirs:
return json.dumps(
{
"success": False,
@ -828,27 +801,37 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
skill_dir = None
skill_md = None
# Try direct path first (e.g., "mlops/axolotl")
direct_path = SKILLS_DIR / name
if direct_path.is_dir() and (direct_path / "SKILL.md").exists():
skill_dir = direct_path
skill_md = direct_path / "SKILL.md"
elif direct_path.with_suffix(".md").exists():
skill_md = direct_path.with_suffix(".md")
# Search all dirs: local first, then external (first match wins)
for search_dir in all_dirs:
# Try direct path first (e.g., "mlops/axolotl")
direct_path = search_dir / name
if direct_path.is_dir() and (direct_path / "SKILL.md").exists():
skill_dir = direct_path
skill_md = direct_path / "SKILL.md"
break
elif direct_path.with_suffix(".md").exists():
skill_md = direct_path.with_suffix(".md")
break
# Search by directory name
# Search by directory name across all dirs
if not skill_md:
for found_skill_md in SKILLS_DIR.rglob("SKILL.md"):
if found_skill_md.parent.name == name:
skill_dir = found_skill_md.parent
skill_md = found_skill_md
for search_dir in all_dirs:
for found_skill_md in search_dir.rglob("SKILL.md"):
if found_skill_md.parent.name == name:
skill_dir = found_skill_md.parent
skill_md = found_skill_md
break
if skill_md:
break
# Legacy: flat .md files
if not skill_md:
for found_md in SKILLS_DIR.rglob(f"{name}.md"):
if found_md.name != "SKILL.md":
skill_md = found_md
for search_dir in all_dirs:
for found_md in search_dir.rglob(f"{name}.md"):
if found_md.name != "SKILL.md":
skill_md = found_md
break
if skill_md:
break
if not skill_md or not skill_md.exists():
@ -875,12 +858,21 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
ensure_ascii=False,
)
# Security: warn if skill is loaded from outside the trusted skills directory
# Security: warn if skill is loaded from outside trusted directories
# (local skills dir + configured external_dirs are all trusted)
_outside_skills_dir = True
_trusted_dirs = [SKILLS_DIR.resolve()]
try:
skill_md.resolve().relative_to(SKILLS_DIR.resolve())
_outside_skills_dir = False
except ValueError:
_outside_skills_dir = True
_trusted_dirs.extend(d.resolve() for d in all_dirs[1:])
except Exception:
pass
for _td in _trusted_dirs:
try:
skill_md.resolve().relative_to(_td)
_outside_skills_dir = False
break
except ValueError:
continue
# Security: detect common prompt injection patterns
_INJECTION_PATTERNS = [
@ -1118,7 +1110,11 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
if script_files:
linked_files["scripts"] = script_files
rel_path = str(skill_md.relative_to(SKILLS_DIR))
try:
rel_path = str(skill_md.relative_to(SKILLS_DIR))
except ValueError:
# External skill — use path relative to the skill's own parent dir
rel_path = str(skill_md.relative_to(skill_md.parent.parent)) if skill_md.parent.parent else skill_md.name
skill_name = frontmatter.get(
"name", skill_md.stem if not skill_dir else skill_dir.name
)
@ -1131,8 +1127,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
missing_required_env_vars = [
e
for e in required_env_vars
if backend in _REMOTE_ENV_BACKENDS
or not _is_env_var_persisted(e["name"], env_snapshot)
if not _is_env_var_persisted(e["name"], env_snapshot)
]
capture_result = _capture_required_environment_variables(
skill_name,
@ -1144,7 +1139,6 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
required_env_vars,
capture_result,
env_snapshot=env_snapshot,
backend=backend,
)
setup_needed = bool(remaining_missing_required_envs)
@ -1168,6 +1162,27 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
exc_info=True,
)
# Register credential files for mounting into remote sandboxes
# (Modal, Docker). Files that exist on the host are registered;
# missing ones are added to the setup_needed indicators.
required_cred_files_raw = frontmatter.get("required_credential_files", [])
if not isinstance(required_cred_files_raw, list):
required_cred_files_raw = []
missing_cred_files: list = []
if required_cred_files_raw:
try:
from tools.credential_files import register_credential_files
missing_cred_files = register_credential_files(required_cred_files_raw)
if missing_cred_files:
setup_needed = True
except Exception:
logger.debug(
"Could not register credential files for skill %s",
skill_name,
exc_info=True,
)
result = {
"success": True,
"name": skill_name,
@ -1183,6 +1198,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
"required_environment_variables": required_env_vars,
"required_commands": [],
"missing_required_environment_variables": remaining_missing_required_envs,
"missing_credential_files": missing_cred_files,
"missing_required_commands": [],
"setup_needed": setup_needed,
"setup_skipped": capture_result["setup_skipped"],
@ -1201,6 +1217,8 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
if setup_needed:
missing_items = [
f"env ${env_name}" for env_name in remaining_missing_required_envs
] + [
f"file {path}" for path in missing_cred_files
]
setup_note = _build_setup_note(
SkillReadinessStatus.SETUP_NEEDED,

View file

@ -52,6 +52,7 @@ logger = logging.getLogger(__name__)
# long-running subprocesses immediately instead of blocking until timeout.
# ---------------------------------------------------------------------------
from tools.interrupt import is_interrupted, _interrupt_event # noqa: F401 — re-exported
# display_hermes_home imported lazily at call site (stale-module safety during hermes update)
def ensure_minisweagent_on_path(_repo_root: Path | None = None) -> None:
@ -69,7 +70,7 @@ from tools.tool_backend_helpers import (
coerce_modal_mode,
has_direct_modal_credentials,
managed_nous_tools_enabled,
normalize_modal_mode,
resolve_modal_backend_state,
)
@ -172,7 +173,8 @@ def _handle_sudo_failure(output: str, env_type: str) -> str:
for failure in sudo_failures:
if failure in output:
return output + "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
from hermes_constants import display_hermes_home as _dhh
return output + f"\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to {_dhh()}/.env on the agent machine."
return output
@ -546,30 +548,12 @@ def _get_env_config() -> Dict[str, Any]:
def _get_modal_backend_state(modal_mode: object | None) -> Dict[str, Any]:
"""Resolve direct vs managed Modal backend selection."""
requested_mode = coerce_modal_mode(modal_mode)
normalized_mode = normalize_modal_mode(modal_mode)
has_direct = has_direct_modal_credentials()
managed_ready = is_managed_tool_gateway_ready("modal")
managed_mode_blocked = (
requested_mode == "managed" and not managed_nous_tools_enabled()
return resolve_modal_backend_state(
modal_mode,
has_direct=has_direct_modal_credentials(),
managed_ready=is_managed_tool_gateway_ready("modal"),
)
if normalized_mode == "managed":
selected_backend = "managed" if managed_ready else None
elif normalized_mode == "direct":
selected_backend = "direct" if has_direct else None
else:
selected_backend = "direct" if has_direct else "managed" if managed_ready else None
return {
"requested_mode": requested_mode,
"mode": normalized_mode,
"has_direct": has_direct,
"managed_ready": managed_ready,
"managed_mode_blocked": managed_mode_blocked,
"selected_backend": selected_backend,
}
def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
ssh_config: dict = None, container_config: dict = None,
@ -1347,8 +1331,8 @@ def check_terminal_requirements() -> bool:
)
return False
if importlib.util.find_spec("swerex") is None:
logger.error("swe-rex is required for direct modal terminal backend: pip install 'swe-rex[modal]'")
if importlib.util.find_spec("modal") is None:
logger.error("modal is required for direct modal terminal backend: pip install modal")
return False
return True
@ -1406,7 +1390,8 @@ if __name__ == "__main__":
print(f" TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
print(f" TERMINAL_DAYTONA_IMAGE: {os.getenv('TERMINAL_DAYTONA_IMAGE', default_img)}")
print(f" TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}")
print(f" TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', '~/.hermes/sandboxes')}")
from hermes_constants import display_hermes_home as _dhh
print(f" TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', f'{_dhh()}/sandboxes')}")
print(f" TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")
print(f" TERMINAL_LIFETIME_SECONDS: {os.getenv('TERMINAL_LIFETIME_SECONDS', '300')}")

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import os
from pathlib import Path
from typing import Any, Dict
from utils import env_var_enabled
@ -33,10 +34,7 @@ def coerce_modal_mode(value: object | None) -> str:
def normalize_modal_mode(value: object | None) -> str:
"""Return a normalized modal execution mode."""
mode = coerce_modal_mode(value)
if mode == "managed" and not managed_nous_tools_enabled():
return "direct"
return mode
return coerce_modal_mode(value)
def has_direct_modal_credentials() -> bool:
@ -47,6 +45,42 @@ def has_direct_modal_credentials() -> bool:
)
def resolve_modal_backend_state(
modal_mode: object | None,
*,
has_direct: bool,
managed_ready: bool,
) -> Dict[str, Any]:
"""Resolve direct vs managed Modal backend selection.
Semantics:
- ``direct`` means direct-only
- ``managed`` means managed-only
- ``auto`` prefers managed when available, then falls back to direct
"""
requested_mode = coerce_modal_mode(modal_mode)
normalized_mode = normalize_modal_mode(modal_mode)
managed_mode_blocked = (
requested_mode == "managed" and not managed_nous_tools_enabled()
)
if normalized_mode == "managed":
selected_backend = "managed" if managed_nous_tools_enabled() and managed_ready else None
elif normalized_mode == "direct":
selected_backend = "direct" if has_direct else None
else:
selected_backend = "managed" if managed_nous_tools_enabled() and managed_ready else "direct" if has_direct else None
return {
"requested_mode": requested_mode,
"mode": normalized_mode,
"has_direct": has_direct,
"managed_ready": managed_ready,
"managed_mode_blocked": managed_mode_blocked,
"selected_backend": selected_backend,
}
def resolve_openai_audio_api_key() -> str:
"""Prefer the voice-tools key, but fall back to the normal OpenAI key."""
return (

View file

@ -12,7 +12,7 @@ Provides speech-to-text transcription with three providers:
Used by the messaging gateway to automatically transcribe voice messages
sent by users on Telegram, Discord, WhatsApp, Slack, and Signal.
Supported input formats: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg
Supported input formats: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg, aac
Usage::
@ -74,7 +74,7 @@ COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1")
OPENAI_BASE_URL = os.getenv("STT_OPENAI_BASE_URL", "https://api.openai.com/v1")
SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg"}
SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac"}
LOCAL_NATIVE_AUDIO_FORMATS = {".wav", ".aiff", ".aif"}
MAX_FILE_SIZE = 25 * 1024 * 1024 # 25 MB

View file

@ -34,7 +34,6 @@ import tempfile
import threading
import uuid
from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Callable, Dict, Any, Optional
from urllib.parse import urljoin
@ -79,7 +78,12 @@ DEFAULT_ELEVENLABS_STREAMING_MODEL_ID = "eleven_flash_v2_5"
DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts"
DEFAULT_OPENAI_VOICE = "alloy"
DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
DEFAULT_OUTPUT_DIR = str(get_hermes_home() / "audio_cache")
def _get_default_output_dir() -> str:
from hermes_constants import get_hermes_dir
return str(get_hermes_dir("cache/audio", "audio_cache"))
DEFAULT_OUTPUT_DIR = _get_default_output_dir()
MAX_TEXT_LENGTH = 4000
@ -107,7 +111,7 @@ def _load_tts_config() -> Dict[str, Any]:
def _get_provider(tts_config: Dict[str, Any]) -> str:
"""Get the configured TTS provider name."""
return tts_config.get("provider", DEFAULT_PROVIDER).lower().strip()
return (tts_config.get("provider") or DEFAULT_PROVIDER).lower().strip()
# ===========================================================================

View file

@ -37,8 +37,9 @@ from pathlib import Path
from typing import Any, Awaitable, Dict, Optional
from urllib.parse import urlparse
import httpx
from agent.auxiliary_client import async_call_llm
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
from tools.debug_helpers import DebugSession
from tools.website_policy import check_website_access
logger = logging.getLogger(__name__)
@ -76,6 +77,28 @@ def _validate_image_url(url: str) -> bool:
return True
def _detect_image_mime_type(image_path: Path) -> Optional[str]:
"""Return a MIME type when the file looks like a supported image."""
with image_path.open("rb") as f:
header = f.read(64)
if header.startswith(b"\x89PNG\r\n\x1a\n"):
return "image/png"
if header.startswith(b"\xff\xd8\xff"):
return "image/jpeg"
if header.startswith((b"GIF87a", b"GIF89a")):
return "image/gif"
if header.startswith(b"BM"):
return "image/bmp"
if len(header) >= 12 and header[:4] == b"RIFF" and header[8:12] == b"WEBP":
return "image/webp"
if image_path.suffix.lower() == ".svg":
head = image_path.read_text(encoding="utf-8", errors="ignore")[:4096].lower()
if "<svg" in head:
return "image/svg+xml"
return None
async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
"""
Download an image from a URL to a local destination (async) with retry logic.
@ -115,6 +138,10 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
last_error = None
for attempt in range(max_retries):
try:
blocked = check_website_access(image_url)
if blocked:
raise PermissionError(blocked["message"])
# Download the image with appropriate headers using async httpx
# Enable follow_redirects to handle image CDNs that redirect (e.g., Imgur, Picsum)
# SSRF: event_hooks validates each redirect target against private IP ranges
@ -131,6 +158,11 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
},
)
response.raise_for_status()
final_url = str(response.url)
blocked = check_website_access(final_url)
if blocked:
raise PermissionError(blocked["message"])
# Save the image content
destination.write_bytes(response.content)
@ -257,6 +289,7 @@ async def vision_analyze_tool(
# Track whether we should clean up the file after processing.
# Local files (e.g. from the image cache) should NOT be deleted.
should_cleanup = True
detected_mime_type = None
try:
from tools.interrupt import is_interrupted
@ -275,6 +308,9 @@ async def vision_analyze_tool(
should_cleanup = False # Don't delete cached/local files
elif _validate_image_url(image_url):
# Remote URL -- download to a temporary location
blocked = check_website_access(image_url)
if blocked:
raise PermissionError(blocked["message"])
logger.info("Downloading image from URL...")
temp_dir = Path("./temp_vision_images")
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
@ -289,10 +325,14 @@ async def vision_analyze_tool(
image_size_bytes = temp_image_path.stat().st_size
image_size_kb = image_size_bytes / 1024
logger.info("Image ready (%.1f KB)", image_size_kb)
detected_mime_type = _detect_image_mime_type(temp_image_path)
if not detected_mime_type:
raise ValueError("Only real image files are supported for vision analysis.")
# Convert image to base64 data URL
logger.info("Converting image to base64...")
image_data_url = _image_to_base64_data_url(temp_image_path)
image_data_url = _image_to_base64_data_url(temp_image_path, mime_type=detected_mime_type)
# Calculate size in KB for better readability
data_size_kb = len(image_data_url) / 1024
logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
@ -346,8 +386,15 @@ async def vision_analyze_tool(
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
# Extract the analysis
analysis = response.choices[0].message.content.strip()
# Extract the analysis — fall back to reasoning if content is empty
analysis = extract_content_or_reasoning(response)
# Retry once on empty content (reasoning-only response)
if not analysis:
logger.warning("Vision LLM returned empty content, retrying once")
response = await async_call_llm(**call_kwargs)
analysis = extract_content_or_reasoning(response)
analysis_length = len(analysis)
logger.info("Image analysis completed (%s characters)", analysis_length)

View file

@ -13,6 +13,7 @@ Available tools:
- web_crawl_tool: Crawl websites with specific instructions
Backend compatibility:
- Exa: https://exa.ai (search, extract)
- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl; direct or derived firecrawl-gateway.<domain> for Nous Subscribers)
- Parallel: https://docs.parallel.ai (search, extract)
- Tavily: https://tavily.com (search, extract, crawl)
@ -47,7 +48,11 @@ import asyncio
from typing import List, Dict, Any, Optional
import httpx
from firecrawl import Firecrawl
from agent.auxiliary_client import get_async_text_auxiliary_client
from agent.auxiliary_client import (
async_call_llm,
extract_content_or_reasoning,
get_async_text_auxiliary_client,
)
from tools.debug_helpers import DebugSession
from tools.managed_tool_gateway import (
build_vendor_gateway_url,
@ -82,8 +87,8 @@ def _get_backend() -> str:
Falls back to whichever API key is present for users who configured
keys manually without running setup.
"""
configured = _load_web_config().get("backend", "").lower().strip()
if configured in ("parallel", "firecrawl", "tavily"):
configured = (_load_web_config().get("backend") or "").lower().strip()
if configured in ("parallel", "firecrawl", "tavily", "exa"):
return configured
# Fallback for manual / legacy config — use whichever key is present.
@ -94,6 +99,9 @@ def _get_backend() -> str:
)
has_parallel = _has_env("PARALLEL_API_KEY")
has_tavily = _has_env("TAVILY_API_KEY")
has_exa = _has_env("EXA_API_KEY")
if has_exa and not has_firecrawl and not has_parallel and not has_tavily:
return "exa"
if has_tavily and not has_firecrawl and not has_parallel:
return "tavily"
if has_parallel and not has_firecrawl:
@ -105,6 +113,8 @@ def _get_backend() -> str:
def _is_backend_available(backend: str) -> bool:
"""Return True when the selected backend is currently usable."""
if backend == "exa":
return _has_env("EXA_API_KEY")
if backend == "parallel":
return _has_env("PARALLEL_API_KEY")
if backend == "firecrawl":
@ -178,6 +188,7 @@ def _firecrawl_backend_help_suffix() -> str:
def _web_requires_env() -> list[str]:
"""Return tool metadata env vars for the currently enabled web backends."""
requires = [
"EXA_API_KEY",
"PARALLEL_API_KEY",
"TAVILY_API_KEY",
"FIRECRAWL_API_KEY",
@ -621,18 +632,32 @@ Create a markdown summary that captures all key information in a well-organized,
if aux_client is None or not effective_model:
logger.warning("No auxiliary model available for web content processing")
return None
from agent.auxiliary_client import auxiliary_max_tokens_param
response = await aux_client.chat.completions.create(
model=effective_model,
messages=[
call_kwargs = {
"task": "web_extract",
"model": effective_model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
{"role": "user", "content": user_prompt},
],
temperature=0.1,
**auxiliary_max_tokens_param(max_tokens),
**({} if not extra_body else {"extra_body": extra_body}),
)
return response.choices[0].message.content.strip()
"temperature": 0.1,
"max_tokens": max_tokens,
}
if extra_body:
call_kwargs["extra_body"] = extra_body
response = await async_call_llm(**call_kwargs)
content = extract_content_or_reasoning(response)
if content:
return content
# Reasoning-only / empty response — let the retry loop handle it
logger.warning("LLM returned empty content (attempt %d/%d), retrying", attempt + 1, max_retries)
if attempt < max_retries - 1:
await asyncio.sleep(retry_delay)
retry_delay = min(retry_delay * 2, 60)
continue
return content # Return whatever we got after exhausting retries
except RuntimeError:
logger.warning("No auxiliary model available for web content processing")
return None
except Exception as api_error:
last_error = api_error
if attempt < max_retries - 1:
@ -744,19 +769,26 @@ Create a single, unified markdown summary."""
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
from agent.auxiliary_client import auxiliary_max_tokens_param
response = await aux_client.chat.completions.create(
model=effective_model,
messages=[
call_kwargs = {
"task": "web_extract",
"model": effective_model,
"messages": [
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
{"role": "user", "content": synthesis_prompt}
{"role": "user", "content": synthesis_prompt},
],
temperature=0.1,
**auxiliary_max_tokens_param(20000),
**({} if not extra_body else {"extra_body": extra_body}),
)
final_summary = response.choices[0].message.content.strip()
"temperature": 0.1,
"max_tokens": 20000,
}
if extra_body:
call_kwargs["extra_body"] = extra_body
response = await async_call_llm(**call_kwargs)
final_summary = extract_content_or_reasoning(response)
# Retry once on empty content (reasoning-only response)
if not final_summary:
logger.warning("Synthesis LLM returned empty content, retrying once")
response = await async_call_llm(**call_kwargs)
final_summary = extract_content_or_reasoning(response)
# Enforce hard cap
if len(final_summary) > max_output_size:
final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"
@ -810,6 +842,91 @@ def clean_base64_images(text: str) -> str:
return cleaned_text
# ─── Exa Client ──────────────────────────────────────────────────────────────
_exa_client = None
def _get_exa_client():
"""Get or create the Exa client (lazy initialization).
Requires EXA_API_KEY environment variable.
"""
from exa_py import Exa
global _exa_client
if _exa_client is None:
api_key = os.getenv("EXA_API_KEY")
if not api_key:
raise ValueError(
"EXA_API_KEY environment variable not set. "
"Get your API key at https://exa.ai"
)
_exa_client = Exa(api_key=api_key)
_exa_client.headers["x-exa-integration"] = "hermes-agent"
return _exa_client
# ─── Exa Search & Extract Helpers ─────────────────────────────────────────────
def _exa_search(query: str, limit: int = 10) -> dict:
"""Search using the Exa SDK and return results as a dict."""
from tools.interrupt import is_interrupted
if is_interrupted():
return {"error": "Interrupted", "success": False}
logger.info("Exa search: '%s' (limit=%d)", query, limit)
response = _get_exa_client().search(
query,
num_results=limit,
contents={
"highlights": True,
},
)
web_results = []
for i, result in enumerate(response.results or []):
highlights = result.highlights or []
web_results.append({
"url": result.url or "",
"title": result.title or "",
"description": " ".join(highlights) if highlights else "",
"position": i + 1,
})
return {"success": True, "data": {"web": web_results}}
def _exa_extract(urls: List[str]) -> List[Dict[str, Any]]:
"""Extract content from URLs using the Exa SDK.
Returns a list of result dicts matching the structure expected by the
LLM post-processing pipeline (url, title, content, metadata).
"""
from tools.interrupt import is_interrupted
if is_interrupted():
return [{"url": u, "error": "Interrupted", "title": ""} for u in urls]
logger.info("Exa extract: %d URL(s)", len(urls))
response = _get_exa_client().get_contents(
urls,
text=True,
)
results = []
for result in response.results or []:
content = result.text or ""
url = result.url or ""
title = result.title or ""
results.append({
"url": url,
"title": title,
"content": content,
"raw_content": content,
"metadata": {"sourceURL": url, "title": title},
})
return results
# ─── Parallel Search & Extract Helpers ────────────────────────────────────────
def _parallel_search(query: str, limit: int = 5) -> dict:
@ -947,6 +1064,15 @@ def web_search_tool(query: str, limit: int = 5) -> str:
_debug.save()
return result_json
if backend == "exa":
response_data = _exa_search(query, limit)
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
debug_call_data["final_response_size"] = len(result_json)
_debug.log_call("web_search_tool", debug_call_data)
_debug.save()
return result_json
if backend == "tavily":
logger.info("Tavily search: '%s' (limit: %d)", query, limit)
raw = _tavily_request("search", {
@ -1074,6 +1200,8 @@ async def web_extract_tool(
if backend == "parallel":
results = await _parallel_extract(safe_urls)
elif backend == "exa":
results = _exa_extract(safe_urls)
elif backend == "tavily":
logger.info("Tavily extract: %d URL(s)", len(safe_urls))
raw = _tavily_request("extract", {
@ -1737,9 +1865,9 @@ def check_firecrawl_api_key() -> bool:
def check_web_api_key() -> bool:
"""Check whether the configured web backend is available."""
configured = _load_web_config().get("backend", "").lower().strip()
if configured in ("parallel", "firecrawl", "tavily"):
if configured in ("exa", "parallel", "firecrawl", "tavily"):
return _is_backend_available(configured)
return any(_is_backend_available(backend) for backend in ("parallel", "firecrawl", "tavily"))
return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily"))
def check_auxiliary_model() -> bool:
@ -1771,7 +1899,9 @@ if __name__ == "__main__":
if web_available:
backend = _get_backend()
print(f"✅ Web backend: {backend}")
if backend == "parallel":
if backend == "exa":
print(" Using Exa API (https://exa.ai)")
elif backend == "parallel":
print(" Using Parallel API (https://parallel.ai)")
elif backend == "tavily":
print(" Using Tavily API (https://tavily.com)")
@ -1787,7 +1917,7 @@ if __name__ == "__main__":
else:
print("❌ No web search backend configured")
print(
"Set PARALLEL_API_KEY, TAVILY_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL"
"Set EXA_API_KEY, PARALLEL_API_KEY, TAVILY_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL"
f"{_firecrawl_backend_help_suffix()}"
)