mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
feat(skills-guard): gate agent-created scanner on config.skills.guard_agent_created (default off)
Replaces the blanket 'always allow' change from the previous commit with
an opt-in config flag so users who want belt-and-suspenders security can
still get the keyword scan on skill_manage output.
## Default behavior (flag off)
skill_manage(action='create'|'edit'|'patch') no longer runs the keyword
scanner. The agent can write skills that mention risky keywords in prose
(documenting what reviewers should watch for, describing cache-bust
semantics in a PR-review skill, referencing AGENTS.md, etc.) without
getting blocked.
Rationale: the agent can already execute the same code paths via
terminal() with no gate, so the scan adds friction without meaningful
security against a compromised or malicious agent.
## Opt-in behavior (flag on)
Set skills.guard_agent_created: true in config.yaml to get the original
behavior back. Scanner runs on every skill_manage write; dangerous
verdicts surface as a tool error the agent can react to (retry without
the flagged content).
## External hub installs unaffected
trusted/community sources (hermes skills install) always get scanned
regardless of this flag. The gate is specifically for skill_manage,
which only agents call.
## Changes
- hermes_cli/config.py: add skills.guard_agent_created: False to DEFAULT_CONFIG
- tools/skill_manager_tool.py: _guard_agent_created_enabled() reads the flag;
_security_scan_skill() short-circuits to None when the flag is off
- tools/skills_guard.py: restore INSTALL_POLICY['agent-created'] =
('allow', 'allow', 'ask') so the scan remains strict when it does run
- tests/tools/test_skills_guard.py: restore original ask/force tests
- tests/tools/test_skill_manager_tool.py: new TestSecurityScanGate class
covering both flag states + config error handling
## Validation
- tests/tools/test_skills_guard.py + test_skill_manager_tool.py: 115/115 pass
- E2E: flagged-keyword skill creates with default config, blocks with flag on
This commit is contained in:
parent
e3c0084140
commit
ce089169d5
5 changed files with 134 additions and 22 deletions
|
|
@ -484,3 +484,85 @@ class TestSkillManageDispatcher:
|
|||
raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT)
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestSecurityScanGate:
|
||||
"""_security_scan_skill is gated by skills.guard_agent_created config flag."""
|
||||
|
||||
def test_scan_noop_when_flag_off(self, tmp_path):
|
||||
"""Default config (flag off) short-circuits before running scan_skill."""
|
||||
from tools.skill_manager_tool import _security_scan_skill
|
||||
|
||||
with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=False), \
|
||||
patch("tools.skill_manager_tool.scan_skill") as mock_scan:
|
||||
result = _security_scan_skill(tmp_path)
|
||||
|
||||
assert result is None
|
||||
mock_scan.assert_not_called() # scan never ran
|
||||
|
||||
def test_scan_runs_when_flag_on(self, tmp_path):
|
||||
"""When flag is on, scan_skill is invoked and its verdict is honored."""
|
||||
from tools.skill_manager_tool import _security_scan_skill
|
||||
from tools.skills_guard import ScanResult
|
||||
|
||||
# Fake a safe scan result — caller should return None (allow)
|
||||
fake_result = ScanResult(
|
||||
skill_name="test",
|
||||
source="agent-created",
|
||||
trust_level="agent-created",
|
||||
verdict="safe",
|
||||
findings=[],
|
||||
summary="ok",
|
||||
)
|
||||
with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=True), \
|
||||
patch("tools.skill_manager_tool.scan_skill", return_value=fake_result) as mock_scan:
|
||||
result = _security_scan_skill(tmp_path)
|
||||
|
||||
assert result is None
|
||||
mock_scan.assert_called_once()
|
||||
|
||||
def test_scan_blocks_dangerous_when_flag_on(self, tmp_path):
|
||||
"""Dangerous verdict + flag on → returns an error string for the agent."""
|
||||
from tools.skill_manager_tool import _security_scan_skill
|
||||
from tools.skills_guard import ScanResult, Finding
|
||||
|
||||
finding = Finding(
|
||||
pattern_id="test", severity="critical", category="exfiltration",
|
||||
file="SKILL.md", line=1, match="curl $TOKEN", description="test",
|
||||
)
|
||||
fake_result = ScanResult(
|
||||
skill_name="test",
|
||||
source="agent-created",
|
||||
trust_level="agent-created",
|
||||
verdict="dangerous",
|
||||
findings=[finding],
|
||||
summary="dangerous",
|
||||
)
|
||||
with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=True), \
|
||||
patch("tools.skill_manager_tool.scan_skill", return_value=fake_result):
|
||||
result = _security_scan_skill(tmp_path)
|
||||
|
||||
assert result is not None
|
||||
assert "Security scan blocked" in result
|
||||
|
||||
def test_guard_flag_reads_config_default_false(self):
|
||||
"""_guard_agent_created_enabled returns False when config doesn't set it."""
|
||||
from tools.skill_manager_tool import _guard_agent_created_enabled
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value={"skills": {}}):
|
||||
assert _guard_agent_created_enabled() is False
|
||||
|
||||
def test_guard_flag_reads_config_when_set(self):
|
||||
"""_guard_agent_created_enabled returns True when user explicitly enables."""
|
||||
from tools.skill_manager_tool import _guard_agent_created_enabled
|
||||
|
||||
with patch("hermes_cli.config.load_config",
|
||||
return_value={"skills": {"guard_agent_created": True}}):
|
||||
assert _guard_agent_created_enabled() is True
|
||||
|
||||
def test_guard_flag_handles_config_error(self):
|
||||
"""If load_config raises, _guard_agent_created_enabled defaults to False (fail-safe off)."""
|
||||
from tools.skill_manager_tool import _guard_agent_created_enabled
|
||||
|
||||
with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
|
||||
assert _guard_agent_created_enabled() is False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue