security: supply chain hardening — CI pinning, dep pinning, and code fixes (#9801)

CI/CD Hardening: - Pin all 12 GitHub Actions to full commit SHAs (was mutable @vN tags) - Add explicit permissions: {contents: read} to 4 workflows - Pin CI pip installs to exact versions (pyyaml==6.0.2, httpx==0.28.1) - Extend supply-chain-audit.yml to scan workflow, Dockerfile, dependency manifest, and Actions version changes Dependency Pinning: - Pin git-based Python deps to commit SHAs (atroposlib, tinker, yc-bench) - Pin WhatsApp Baileys from mutable branch to commit SHA Tool Registry: - Reject tool name shadowing from different tool families (plugins/MCP cannot overwrite built-in tools). MCP-to-MCP overwrites still allowed. MCP Security: - Add tool description content scanning for prompt injection patterns - Log detailed change diff on dynamic tool refresh at WARNING level Skill Manager: - Fix dangerous verdict bug: agent-created skills with dangerous findings were silently allowed (ask->None->allow). Now blocked.
2026-05-04 02:21:47 +00:00 · 2026-04-14 14:23:37 -07:00 · 2026-04-14 14:23:37 -07:00 · eed891f1bb
commit eed891f1bb
parent 9bbf7659e9
14 changed files with 214 additions and 53 deletions
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -219,6 +219,58 @@ def _sanitize_error(text: str) -> str:
    return _CREDENTIAL_PATTERN.sub("[REDACTED]", text)


+# ---------------------------------------------------------------------------
+# MCP tool description content scanning
+# ---------------------------------------------------------------------------
+
+# Patterns that indicate potential prompt injection in MCP tool descriptions.
+# These are WARNING-level — we log but don't block, since false positives
+# would break legitimate MCP servers.
+_MCP_INJECTION_PATTERNS = [
+    (re.compile(r"ignore\s+(all\s+)?previous\s+instructions", re.I),
+     "prompt override attempt ('ignore previous instructions')"),
+    (re.compile(r"you\s+are\s+now\s+a", re.I),
+     "identity override attempt ('you are now a...')"),
+    (re.compile(r"your\s+new\s+(task|role|instructions?)\s+(is|are)", re.I),
+     "task override attempt"),
+    (re.compile(r"system\s*:\s*", re.I),
+     "system prompt injection attempt"),
+    (re.compile(r"<\s*(system|human|assistant)\s*>", re.I),
+     "role tag injection attempt"),
+    (re.compile(r"do\s+not\s+(tell|inform|mention|reveal)", re.I),
+     "concealment instruction"),
+    (re.compile(r"(curl|wget|fetch)\s+https?://", re.I),
+     "network command in description"),
+    (re.compile(r"base64\.(b64decode|decodebytes)", re.I),
+     "base64 decode reference"),
+    (re.compile(r"exec\s*\(|eval\s*\(", re.I),
+     "code execution reference"),
+    (re.compile(r"import\s+(subprocess|os|shutil|socket)", re.I),
+     "dangerous import reference"),
+]
+
+
+def _scan_mcp_description(server_name: str, tool_name: str, description: str) -> List[str]:
+    """Scan an MCP tool description for prompt injection patterns.
+
+    Returns a list of finding strings (empty = clean).
+    """
+    findings = []
+    if not description:
+        return findings
+    for pattern, reason in _MCP_INJECTION_PATTERNS:
+        if pattern.search(description):
+            findings.append(reason)
+    if findings:
+        logger.warning(
+            "MCP server '%s' tool '%s': suspicious description content — %s. "
+            "Description: %.200s",
+            server_name, tool_name, "; ".join(findings),
+            description,
+        )
+    return findings
+
+
 def _prepend_path(env: dict, directory: str) -> dict:
    """Prepend *directory* to env PATH if it is not already present."""
    updated = dict(env or {})
@ -798,6 +850,9 @@ class MCPServerTask:
        from toolsets import TOOLSETS

        async with self._refresh_lock:
+            # Capture old tool names for change diff
+            old_tool_names = set(self._registered_tool_names)
+
            # 1. Fetch current tool list from server
            tools_result = await self.session.list_tools()
            new_mcp_tools = tools_result.tools if hasattr(tools_result, "tools") else []
@ -817,10 +872,26 @@ class MCPServerTask:
                self.name, self, self._config
            )

-            logger.info(
-                "MCP server '%s': dynamically refreshed %d tool(s)",
-                self.name, len(self._registered_tool_names),
-            )
+            # 5. Log what changed (user-visible notification)
+            new_tool_names = set(self._registered_tool_names)
+            added = new_tool_names - old_tool_names
+            removed = old_tool_names - new_tool_names
+            changes = []
+            if added:
+                changes.append(f"added: {', '.join(sorted(added))}")
+            if removed:
+                changes.append(f"removed: {', '.join(sorted(removed))}")
+            if changes:
+                logger.warning(
+                    "MCP server '%s': tools changed dynamically — %s. "
+                    "Verify these changes are expected.",
+                    self.name, "; ".join(changes),
+                )
+            else:
+                logger.info(
+                    "MCP server '%s': dynamically refreshed %d tool(s) (no changes)",
+                    self.name, len(self._registered_tool_names),
+                )

    async def _run_stdio(self, config: dict):
        """Run the server using stdio transport."""
@ -1838,6 +1909,10 @@ def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> Li
        if not _should_register(mcp_tool.name):
            logger.debug("MCP server '%s': skipping tool '%s' (filtered by config)", name, mcp_tool.name)
            continue
+
+        # Scan tool description for prompt injection patterns
+        _scan_mcp_description(name, mcp_tool.name, mcp_tool.description or "")
+
        schema = _convert_mcp_schema(name, mcp_tool)
        tool_name_prefixed = schema["name"]