diff --git a/tests/tools/test_skills_guard.py b/tests/tools/test_skills_guard.py index 6fcd05b31c..12c527ca78 100644 --- a/tests/tools/test_skills_guard.py +++ b/tests/tools/test_skills_guard.py @@ -174,20 +174,27 @@ class TestShouldAllowInstall: assert allowed is True assert "agent-created" in reason - def test_dangerous_agent_created_asks(self): - """Agent-created skills with dangerous verdict return None (ask for confirmation).""" + def test_dangerous_agent_created_allowed(self): + """Agent-created skills bypass verdict gating — agent can already + execute the same code via terminal(), so skill_manage allows all + verdicts. This prevents friction when the agent writes skills that + mention risky keywords in prose (e.g. describing cache-busting or + persistence semantics in a PR-review skill).""" f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")] allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f)) - assert allowed is None - assert "Requires confirmation" in reason + assert allowed is True + assert "agent-created" in reason - def test_force_overrides_dangerous_for_agent_created(self): + def test_force_noop_for_agent_created_dangerous(self): + """With agent-created dangerous mapped to 'allow', force becomes a + no-op — the allow branch returns first. Force still works for any + trust level that maps to block (community/trusted).""" f = [Finding("x", "critical", "c", "f", 1, "m", "d")] allowed, reason = should_allow_install( self._result("agent-created", "dangerous", f), force=True ) assert allowed is True - assert "Force-installed" in reason + assert "agent-created" in reason # --------------------------------------------------------------------------- diff --git a/tools/skills_guard.py b/tools/skills_guard.py index 3513f46f04..fadbb8173a 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -43,7 +43,11 @@ INSTALL_POLICY = { "builtin": ("allow", "allow", "allow"), "trusted": ("allow", "allow", "block"), "community": ("allow", "block", "block"), - "agent-created": ("allow", "allow", "ask"), + # Agent-created skills run in the same process as the agent that + # wrote them — the agent could already execute the same code via + # terminal(), so a dangerous-pattern gate on skill_manage adds + # friction without meaningful security. Allow all verdicts. + "agent-created": ("allow", "allow", "allow"), } VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}