security: supply chain hardening — CI pinning, dep pinning, and code fixes (#9801)

CI/CD Hardening:
- Pin all 12 GitHub Actions to full commit SHAs (was mutable @vN tags)
- Add explicit permissions: {contents: read} to 4 workflows
- Pin CI pip installs to exact versions (pyyaml==6.0.2, httpx==0.28.1)
- Extend supply-chain-audit.yml to scan workflow, Dockerfile, dependency
  manifest, and Actions version changes

Dependency Pinning:
- Pin git-based Python deps to commit SHAs (atroposlib, tinker, yc-bench)
- Pin WhatsApp Baileys from mutable branch to commit SHA

Tool Registry:
- Reject tool name shadowing from different tool families (plugins/MCP
  cannot overwrite built-in tools). MCP-to-MCP overwrites still allowed.

MCP Security:
- Add tool description content scanning for prompt injection patterns
- Log detailed change diff on dynamic tool refresh at WARNING level

Skill Manager:
- Fix dangerous verdict bug: agent-created skills with dangerous
  findings were silently allowed (ask->None->allow). Now blocked.
This commit is contained in:
Teknium 2026-04-14 14:23:37 -07:00 committed by GitHub
parent 9bbf7659e9
commit eed891f1bb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 214 additions and 53 deletions

View file

@ -219,6 +219,58 @@ def _sanitize_error(text: str) -> str:
return _CREDENTIAL_PATTERN.sub("[REDACTED]", text)
# ---------------------------------------------------------------------------
# MCP tool description content scanning
# ---------------------------------------------------------------------------
# Patterns that indicate potential prompt injection in MCP tool descriptions.
# These are WARNING-level — we log but don't block, since false positives
# would break legitimate MCP servers.
_MCP_INJECTION_PATTERNS = [
(re.compile(r"ignore\s+(all\s+)?previous\s+instructions", re.I),
"prompt override attempt ('ignore previous instructions')"),
(re.compile(r"you\s+are\s+now\s+a", re.I),
"identity override attempt ('you are now a...')"),
(re.compile(r"your\s+new\s+(task|role|instructions?)\s+(is|are)", re.I),
"task override attempt"),
(re.compile(r"system\s*:\s*", re.I),
"system prompt injection attempt"),
(re.compile(r"<\s*(system|human|assistant)\s*>", re.I),
"role tag injection attempt"),
(re.compile(r"do\s+not\s+(tell|inform|mention|reveal)", re.I),
"concealment instruction"),
(re.compile(r"(curl|wget|fetch)\s+https?://", re.I),
"network command in description"),
(re.compile(r"base64\.(b64decode|decodebytes)", re.I),
"base64 decode reference"),
(re.compile(r"exec\s*\(|eval\s*\(", re.I),
"code execution reference"),
(re.compile(r"import\s+(subprocess|os|shutil|socket)", re.I),
"dangerous import reference"),
]
def _scan_mcp_description(server_name: str, tool_name: str, description: str) -> List[str]:
"""Scan an MCP tool description for prompt injection patterns.
Returns a list of finding strings (empty = clean).
"""
findings = []
if not description:
return findings
for pattern, reason in _MCP_INJECTION_PATTERNS:
if pattern.search(description):
findings.append(reason)
if findings:
logger.warning(
"MCP server '%s' tool '%s': suspicious description content — %s. "
"Description: %.200s",
server_name, tool_name, "; ".join(findings),
description,
)
return findings
def _prepend_path(env: dict, directory: str) -> dict:
"""Prepend *directory* to env PATH if it is not already present."""
updated = dict(env or {})
@ -798,6 +850,9 @@ class MCPServerTask:
from toolsets import TOOLSETS
async with self._refresh_lock:
# Capture old tool names for change diff
old_tool_names = set(self._registered_tool_names)
# 1. Fetch current tool list from server
tools_result = await self.session.list_tools()
new_mcp_tools = tools_result.tools if hasattr(tools_result, "tools") else []
@ -817,10 +872,26 @@ class MCPServerTask:
self.name, self, self._config
)
logger.info(
"MCP server '%s': dynamically refreshed %d tool(s)",
self.name, len(self._registered_tool_names),
)
# 5. Log what changed (user-visible notification)
new_tool_names = set(self._registered_tool_names)
added = new_tool_names - old_tool_names
removed = old_tool_names - new_tool_names
changes = []
if added:
changes.append(f"added: {', '.join(sorted(added))}")
if removed:
changes.append(f"removed: {', '.join(sorted(removed))}")
if changes:
logger.warning(
"MCP server '%s': tools changed dynamically — %s. "
"Verify these changes are expected.",
self.name, "; ".join(changes),
)
else:
logger.info(
"MCP server '%s': dynamically refreshed %d tool(s) (no changes)",
self.name, len(self._registered_tool_names),
)
async def _run_stdio(self, config: dict):
"""Run the server using stdio transport."""
@ -1838,6 +1909,10 @@ def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> Li
if not _should_register(mcp_tool.name):
logger.debug("MCP server '%s': skipping tool '%s' (filtered by config)", name, mcp_tool.name)
continue
# Scan tool description for prompt injection patterns
_scan_mcp_description(name, mcp_tool.name, mcp_tool.description or "")
schema = _convert_mcp_schema(name, mcp_tool)
tool_name_prefixed = schema["name"]