fix(skills_guard): agent-created dangerous skills ask instead of block

Changes the policy for agent-created skills with critical security
findings from 'block' (silently rejected) to 'ask' (allowed with
warning logged). The agent created the skill, so blocking it entirely
is too aggressive — let it through but log the findings.

- Policy: agent-created dangerous changed from block to ask
- should_allow_install returns None for 'ask' (vs True/False)
- format_scan_report shows 'NEEDS CONFIRMATION' for ask
- skill_manager_tool.py caller handles None (allows with warning)
- force=True still overrides as before

Based on PR #2271 by redhelix (closed — 3200 lines of unrelated
Mission Control code excluded).
This commit is contained in:
Teknium 2026-03-22 03:56:02 -07:00
parent 887e8a8d84
commit 0b370f2dd9
No known key found for this signature in database
3 changed files with 26 additions and 8 deletions

View file

@ -167,12 +167,12 @@ class TestShouldAllowInstall:
assert allowed is True
assert "agent-created" in reason
def test_dangerous_agent_created_blocked(self):
"""Agent-created skills with dangerous verdict (critical findings) stay blocked."""
def test_dangerous_agent_created_asks(self):
"""Agent-created skills with dangerous verdict return None (ask for confirmation)."""
f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")]
allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f))
assert allowed is False
assert "Blocked" in reason
assert allowed is None
assert "Requires confirmation" in reason
def test_force_overrides_dangerous_for_agent_created(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]

View file

@ -59,9 +59,15 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]:
try:
result = scan_skill(skill_dir, source="agent-created")
allowed, reason = should_allow_install(result)
if not allowed:
if allowed is False:
report = format_scan_report(result)
return f"Security scan blocked this skill ({reason}):\n{report}"
if allowed is None:
# "ask" — allow but include the warning so the user sees the findings
report = format_scan_report(result)
logger.warning("Agent-created skill has security findings: %s", reason)
# Don't block — return None to allow, but log the warning
return None
except Exception as e:
logger.warning("Security scan failed for %s: %s", skill_dir, e, exc_info=True)
return None

View file

@ -43,7 +43,7 @@ INSTALL_POLICY = {
"builtin": ("allow", "allow", "allow"),
"trusted": ("allow", "allow", "block"),
"community": ("allow", "block", "block"),
"agent-created": ("allow", "allow", "block"),
"agent-created": ("allow", "allow", "ask"),
}
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
@ -659,10 +659,17 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
if force:
return True, (
f"Force-installed despite blocked {result.verdict} verdict "
f"Force-installed despite {result.verdict} verdict "
f"({len(result.findings)} findings)"
)
if decision == "ask":
# Return None to signal "needs user confirmation"
return None, (
f"Requires confirmation ({result.trust_level} source + {result.verdict} verdict, "
f"{len(result.findings)} findings)"
)
return False, (
f"Blocked ({result.trust_level} source + {result.verdict} verdict, "
f"{len(result.findings)} findings). Use --force to override."
@ -694,7 +701,12 @@ def format_scan_report(result: ScanResult) -> str:
lines.append("")
allowed, reason = should_allow_install(result)
status = "ALLOWED" if allowed else "BLOCKED"
if allowed is True:
status = "ALLOWED"
elif allowed is None:
status = "NEEDS CONFIRMATION"
else:
status = "BLOCKED"
lines.append(f"Decision: {status}{reason}")
return "\n".join(lines)