mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(skills-guard): gate agent-created scanner on config.skills.guard_agent_created (default off)
Replaces the blanket 'always allow' change from the previous commit with
an opt-in config flag so users who want belt-and-suspenders security can
still get the keyword scan on skill_manage output.
## Default behavior (flag off)
skill_manage(action='create'|'edit'|'patch') no longer runs the keyword
scanner. The agent can write skills that mention risky keywords in prose
(documenting what reviewers should watch for, describing cache-bust
semantics in a PR-review skill, referencing AGENTS.md, etc.) without
getting blocked.
Rationale: the agent can already execute the same code paths via
terminal() with no gate, so the scan adds friction without meaningful
security against a compromised or malicious agent.
## Opt-in behavior (flag on)
Set skills.guard_agent_created: true in config.yaml to get the original
behavior back. Scanner runs on every skill_manage write; dangerous
verdicts surface as a tool error the agent can react to (retry without
the flagged content).
## External hub installs unaffected
trusted/community sources (hermes skills install) always get scanned
regardless of this flag. The gate is specifically for skill_manage,
which only agents call.
## Changes
- hermes_cli/config.py: add skills.guard_agent_created: False to DEFAULT_CONFIG
- tools/skill_manager_tool.py: _guard_agent_created_enabled() reads the flag;
_security_scan_skill() short-circuits to None when the flag is off
- tools/skills_guard.py: restore INSTALL_POLICY['agent-created'] =
('allow', 'allow', 'ask') so the scan remains strict when it does run
- tests/tools/test_skills_guard.py: restore original ask/force tests
- tests/tools/test_skill_manager_tool.py: new TestSecurityScanGate class
covering both flag states + config error handling
## Validation
- tests/tools/test_skills_guard.py + test_skill_manager_tool.py: 115/115 pass
- E2E: flagged-keyword skill creates with default config, blocks with flag on
This commit is contained in:
parent
e3c0084140
commit
ce089169d5
5 changed files with 134 additions and 22 deletions
|
|
@ -44,8 +44,8 @@ from typing import Dict, Any, Optional, Tuple
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import security scanner — agent-created skills get the same scrutiny as
|
||||
# community hub installs.
|
||||
# Import security scanner — external hub installs always get scanned;
|
||||
# agent-created skills only get scanned when skills.guard_agent_created is on.
|
||||
try:
|
||||
from tools.skills_guard import scan_skill, should_allow_install, format_scan_report
|
||||
_GUARD_AVAILABLE = True
|
||||
|
|
@ -53,10 +53,31 @@ except ImportError:
|
|||
_GUARD_AVAILABLE = False
|
||||
|
||||
|
||||
def _guard_agent_created_enabled() -> bool:
|
||||
"""Read skills.guard_agent_created from config (default False).
|
||||
|
||||
Off by default because the agent can already execute the same code
|
||||
paths via terminal() with no gate, so the scan adds friction without
|
||||
meaningful security. Users who want belt-and-suspenders can turn it
|
||||
on via `hermes config set skills.guard_agent_created true`.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
return bool(cfg.get("skills", {}).get("guard_agent_created", False))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _security_scan_skill(skill_dir: Path) -> Optional[str]:
|
||||
"""Scan a skill directory after write. Returns error string if blocked, else None."""
|
||||
"""Scan a skill directory after write. Returns error string if blocked, else None.
|
||||
|
||||
No-op when skills.guard_agent_created is disabled (the default).
|
||||
"""
|
||||
if not _GUARD_AVAILABLE:
|
||||
return None
|
||||
if not _guard_agent_created_enabled():
|
||||
return None
|
||||
try:
|
||||
result = scan_skill(skill_dir, source="agent-created")
|
||||
allowed, reason = should_allow_install(result)
|
||||
|
|
@ -65,7 +86,8 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]:
|
|||
return f"Security scan blocked this skill ({reason}):\n{report}"
|
||||
if allowed is None:
|
||||
# "ask" verdict — for agent-created skills this means dangerous
|
||||
# findings were detected. Block the skill and include the report.
|
||||
# findings were detected. Surface as an error so the agent can
|
||||
# retry with the flagged content removed.
|
||||
report = format_scan_report(result)
|
||||
logger.warning("Agent-created skill blocked (dangerous findings): %s", reason)
|
||||
return f"Security scan blocked this skill ({reason}):\n{report}"
|
||||
|
|
|
|||
|
|
@ -43,11 +43,11 @@ INSTALL_POLICY = {
|
|||
"builtin": ("allow", "allow", "allow"),
|
||||
"trusted": ("allow", "allow", "block"),
|
||||
"community": ("allow", "block", "block"),
|
||||
# Agent-created skills run in the same process as the agent that
|
||||
# wrote them — the agent could already execute the same code via
|
||||
# terminal(), so a dangerous-pattern gate on skill_manage adds
|
||||
# friction without meaningful security. Allow all verdicts.
|
||||
"agent-created": ("allow", "allow", "allow"),
|
||||
# Agent-created: "ask" on dangerous surfaces as an error to the agent,
|
||||
# which can retry without the flagged content. This gate only runs when
|
||||
# skills.guard_agent_created is enabled (off by default) — see
|
||||
# tools/skill_manager_tool.py::_guard_agent_created_enabled.
|
||||
"agent-created": ("allow", "allow", "ask"),
|
||||
}
|
||||
|
||||
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue