diff --git a/.github/workflows/contributor-check.yml b/.github/workflows/contributor-check.yml index f8d65a3ea4..3ca4991c61 100644 --- a/.github/workflows/contributor-check.yml +++ b/.github/workflows/contributor-check.yml @@ -9,11 +9,14 @@ on: - '**/*.py' - '.github/workflows/contributor-check.yml' +permissions: + contents: read + jobs: check-attribution: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 # Full history needed for git log diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index c55a62908d..480b236f84 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -28,20 +28,20 @@ jobs: name: github-pages url: ${{ steps.deploy.outputs.page_url }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: 20 cache: npm cache-dependency-path: website/package-lock.json - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: '3.11' - name: Install PyYAML for skill extraction - run: pip install pyyaml httpx + run: pip install pyyaml==6.0.2 httpx==0.28.1 - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py @@ -73,10 +73,10 @@ jobs: echo "hermes-agent.nousresearch.com" > _site/CNAME - name: Upload artifact - uses: actions/upload-pages-artifact@v3 + uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3 with: path: _site - name: Deploy to GitHub Pages id: deploy - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4 diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 6b360b8c64..f9e846e68c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -23,21 +23,21 @@ jobs: timeout-minutes: 60 steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: submodules: recursive - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 # Build amd64 only so we can `load` the image for smoke testing. # `load: true` cannot export a multi-arch manifest to the local daemon. # The multi-arch build follows on push to main / release. - name: Build image (amd64, smoke test) - uses: docker/build-push-action@v6 + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 with: context: . file: Dockerfile @@ -56,14 +56,14 @@ jobs: - name: Log in to Docker Hub if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release' - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Push multi-arch image (main branch) if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: docker/build-push-action@v6 + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 with: context: . file: Dockerfile @@ -75,7 +75,7 @@ jobs: - name: Push multi-arch image (release) if: github.event_name == 'release' - uses: docker/build-push-action@v6 + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 with: context: . file: Dockerfile diff --git a/.github/workflows/docs-site-checks.yml b/.github/workflows/docs-site-checks.yml index ea05d28046..2f985122cb 100644 --- a/.github/workflows/docs-site-checks.yml +++ b/.github/workflows/docs-site-checks.yml @@ -7,13 +7,16 @@ on: - '.github/workflows/docs-site-checks.yml' workflow_dispatch: +permissions: + contents: read + jobs: docs-site-checks: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: 20 cache: npm @@ -23,7 +26,7 @@ jobs: run: npm ci working-directory: website - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: '3.11' diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index dba33bfffc..387c9e5d13 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -14,6 +14,9 @@ on: - 'run_agent.py' - 'acp_adapter/**' +permissions: + contents: read + concurrency: group: nix-${{ github.ref }} cancel-in-progress: true @@ -26,7 +29,7 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 30 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13 - name: Check flake diff --git a/.github/workflows/skills-index.yml b/.github/workflows/skills-index.yml index 6c03e40746..8beda195c6 100644 --- a/.github/workflows/skills-index.yml +++ b/.github/workflows/skills-index.yml @@ -20,14 +20,14 @@ jobs: if: github.repository == 'NousResearch/hermes-agent' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: '3.11' - name: Install dependencies - run: pip install httpx pyyaml + run: pip install httpx==0.28.1 pyyaml==6.0.2 - name: Build skills index env: @@ -35,7 +35,7 @@ jobs: run: python scripts/build_skills_index.py - name: Upload index artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: skills-index path: website/static/api/skills-index.json @@ -53,25 +53,25 @@ jobs: # Only deploy on schedule or manual trigger (not on every push to the script) if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: skills-index path: website/static/api/ - - uses: actions/setup-node@v4 + - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: 20 cache: npm cache-dependency-path: website/package-lock.json - - uses: actions/setup-python@v5 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: '3.11' - name: Install PyYAML for skill extraction - run: pip install pyyaml + run: pip install pyyaml==6.0.2 - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py @@ -92,10 +92,10 @@ jobs: echo "hermes-agent.nousresearch.com" > _site/CNAME - name: Upload artifact - uses: actions/upload-pages-artifact@v3 + uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3 with: path: _site - name: Deploy to GitHub Pages id: deploy - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4 diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml index 1cee4564dd..4aa0fd321a 100644 --- a/.github/workflows/supply-chain-audit.yml +++ b/.github/workflows/supply-chain-audit.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: fetch-depth: 0 @@ -149,6 +149,62 @@ jobs: " fi + # --- CI/CD workflow files modified --- + WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true) + if [ -n "$WORKFLOW_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: CI/CD workflow files modified + Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added. + + **Files:** + \`\`\` + ${WORKFLOW_HITS} + \`\`\` + " + fi + + # --- Dockerfile / container build files modified --- + DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true) + if [ -n "$DOCKER_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: Container build files modified + Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands. + + **Files:** + \`\`\` + ${DOCKER_HITS} + \`\`\` + " + fi + + # --- Dependency manifest files modified --- + DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true) + if [ -n "$DEP_HITS" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: Dependency manifest files modified + Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources. + + **Files:** + \`\`\` + ${DEP_HITS} + \`\`\` + " + fi + + # --- GitHub Actions version unpinning (mutable tags instead of SHAs) --- + ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true) + if [ -n "$ACTIONS_UNPIN" ]; then + FINDINGS="${FINDINGS} + ### ⚠️ WARNING: GitHub Actions with mutable version tags + Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised. + + **Matches:** + \`\`\` + ${ACTIONS_UNPIN} + \`\`\` + " + fi + # --- Output results --- if [ -n "$FINDINGS" ]; then echo "found=true" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1e45193b8d..7d0822690a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [main] +permissions: + contents: read + # Cancel in-progress runs for the same PR/branch concurrency: group: tests-${{ github.ref }} @@ -17,13 +20,13 @@ jobs: timeout-minutes: 10 steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install system dependencies run: sudo apt-get update && sudo apt-get install -y ripgrep - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 - name: Set up Python 3.11 run: uv python install 3.11 @@ -49,10 +52,10 @@ jobs: timeout-minutes: 10 steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5 - name: Set up Python 3.11 run: uv python install 3.11 diff --git a/pyproject.toml b/pyproject.toml index f1cd158d4b..fa3fd48227 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,13 +78,13 @@ dingtalk = ["dingtalk-stream>=0.1.0,<1"] feishu = ["lark-oapi>=1.5.3,<2"] web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"] rl = [ - "atroposlib @ git+https://github.com/NousResearch/atropos.git", - "tinker @ git+https://github.com/thinking-machines-lab/tinker.git", + "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30", + "tinker @ git+https://github.com/thinking-machines-lab/tinker.git@30517b667f18a3dfb7ef33fb56cf686d5820ba2b", "fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1", "wandb>=0.15.0,<1", ] -yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git ; python_version >= '3.12'"] +yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"] all = [ "hermes-agent[modal]", "hermes-agent[daytona]", diff --git a/scripts/whatsapp-bridge/package.json b/scripts/whatsapp-bridge/package.json index 2d32560f44..cb2f6b22ed 100644 --- a/scripts/whatsapp-bridge/package.json +++ b/scripts/whatsapp-bridge/package.json @@ -8,7 +8,7 @@ "start": "node bridge.js" }, "dependencies": { - "@whiskeysockets/baileys": "WhiskeySockets/Baileys#fix/abprops-abt-fetch", + "@whiskeysockets/baileys": "WhiskeySockets/Baileys#01047debd81beb20da7b7779b08edcb06aa03770", "express": "^4.21.0", "qrcode-terminal": "^0.12.0", "pino": "^9.0.0" diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py index 883bbe318e..43049c2c18 100644 --- a/tests/tools/test_mcp_tool.py +++ b/tests/tools/test_mcp_tool.py @@ -2837,7 +2837,7 @@ class TestRegistryCollisionWarning: """registry.register() warns when a tool name is overwritten by a different toolset.""" def test_overwrite_different_toolset_logs_warning(self, caplog): - """Overwriting a tool from a different toolset emits a warning.""" + """Overwriting a tool from a different toolset is REJECTED with an error.""" from tools.registry import ToolRegistry import logging @@ -2847,11 +2847,13 @@ class TestRegistryCollisionWarning: reg.register(name="my_tool", toolset="builtin", schema=schema, handler=handler) - with caplog.at_level(logging.WARNING, logger="tools.registry"): + with caplog.at_level(logging.ERROR, logger="tools.registry"): reg.register(name="my_tool", toolset="mcp-ext", schema=schema, handler=handler) - assert any("collision" in r.message.lower() for r in caplog.records) + assert any("rejected" in r.message.lower() for r in caplog.records) assert any("builtin" in r.message and "mcp-ext" in r.message for r in caplog.records) + # The original tool should still be from 'builtin', not overwritten + assert reg.get_toolset_for_tool("my_tool") == "builtin" def test_overwrite_same_toolset_no_warning(self, caplog): """Re-registering within the same toolset is silent (e.g. reconnect).""" diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 2356830c46..d6bdc89faf 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -219,6 +219,58 @@ def _sanitize_error(text: str) -> str: return _CREDENTIAL_PATTERN.sub("[REDACTED]", text) +# --------------------------------------------------------------------------- +# MCP tool description content scanning +# --------------------------------------------------------------------------- + +# Patterns that indicate potential prompt injection in MCP tool descriptions. +# These are WARNING-level — we log but don't block, since false positives +# would break legitimate MCP servers. +_MCP_INJECTION_PATTERNS = [ + (re.compile(r"ignore\s+(all\s+)?previous\s+instructions", re.I), + "prompt override attempt ('ignore previous instructions')"), + (re.compile(r"you\s+are\s+now\s+a", re.I), + "identity override attempt ('you are now a...')"), + (re.compile(r"your\s+new\s+(task|role|instructions?)\s+(is|are)", re.I), + "task override attempt"), + (re.compile(r"system\s*:\s*", re.I), + "system prompt injection attempt"), + (re.compile(r"<\s*(system|human|assistant)\s*>", re.I), + "role tag injection attempt"), + (re.compile(r"do\s+not\s+(tell|inform|mention|reveal)", re.I), + "concealment instruction"), + (re.compile(r"(curl|wget|fetch)\s+https?://", re.I), + "network command in description"), + (re.compile(r"base64\.(b64decode|decodebytes)", re.I), + "base64 decode reference"), + (re.compile(r"exec\s*\(|eval\s*\(", re.I), + "code execution reference"), + (re.compile(r"import\s+(subprocess|os|shutil|socket)", re.I), + "dangerous import reference"), +] + + +def _scan_mcp_description(server_name: str, tool_name: str, description: str) -> List[str]: + """Scan an MCP tool description for prompt injection patterns. + + Returns a list of finding strings (empty = clean). + """ + findings = [] + if not description: + return findings + for pattern, reason in _MCP_INJECTION_PATTERNS: + if pattern.search(description): + findings.append(reason) + if findings: + logger.warning( + "MCP server '%s' tool '%s': suspicious description content — %s. " + "Description: %.200s", + server_name, tool_name, "; ".join(findings), + description, + ) + return findings + + def _prepend_path(env: dict, directory: str) -> dict: """Prepend *directory* to env PATH if it is not already present.""" updated = dict(env or {}) @@ -798,6 +850,9 @@ class MCPServerTask: from toolsets import TOOLSETS async with self._refresh_lock: + # Capture old tool names for change diff + old_tool_names = set(self._registered_tool_names) + # 1. Fetch current tool list from server tools_result = await self.session.list_tools() new_mcp_tools = tools_result.tools if hasattr(tools_result, "tools") else [] @@ -817,10 +872,26 @@ class MCPServerTask: self.name, self, self._config ) - logger.info( - "MCP server '%s': dynamically refreshed %d tool(s)", - self.name, len(self._registered_tool_names), - ) + # 5. Log what changed (user-visible notification) + new_tool_names = set(self._registered_tool_names) + added = new_tool_names - old_tool_names + removed = old_tool_names - new_tool_names + changes = [] + if added: + changes.append(f"added: {', '.join(sorted(added))}") + if removed: + changes.append(f"removed: {', '.join(sorted(removed))}") + if changes: + logger.warning( + "MCP server '%s': tools changed dynamically — %s. " + "Verify these changes are expected.", + self.name, "; ".join(changes), + ) + else: + logger.info( + "MCP server '%s': dynamically refreshed %d tool(s) (no changes)", + self.name, len(self._registered_tool_names), + ) async def _run_stdio(self, config: dict): """Run the server using stdio transport.""" @@ -1838,6 +1909,10 @@ def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> Li if not _should_register(mcp_tool.name): logger.debug("MCP server '%s': skipping tool '%s' (filtered by config)", name, mcp_tool.name) continue + + # Scan tool description for prompt injection patterns + _scan_mcp_description(name, mcp_tool.name, mcp_tool.description or "") + schema = _convert_mcp_schema(name, mcp_tool) tool_name_prefixed = schema["name"] diff --git a/tools/registry.py b/tools/registry.py index d6aff83486..b7351cb162 100644 --- a/tools/registry.py +++ b/tools/registry.py @@ -117,11 +117,27 @@ class ToolRegistry: with self._lock: existing = self._tools.get(name) if existing and existing.toolset != toolset: - logger.warning( - "Tool name collision: '%s' (toolset '%s') is being " - "overwritten by toolset '%s'", - name, existing.toolset, toolset, + # Allow MCP-to-MCP overwrites (legitimate: server refresh, + # or two MCP servers with overlapping tool names). + both_mcp = ( + existing.toolset.startswith("mcp-") + and toolset.startswith("mcp-") ) + if both_mcp: + logger.debug( + "Tool '%s': MCP toolset '%s' overwriting MCP toolset '%s'", + name, toolset, existing.toolset, + ) + else: + # Reject shadowing — prevent plugins/MCP from overwriting + # built-in tools or vice versa. + logger.error( + "Tool registration REJECTED: '%s' (toolset '%s') would " + "shadow existing tool from toolset '%s'. Deregister the " + "existing tool first if this is intentional.", + name, toolset, existing.toolset, + ) + return self._tools[name] = ToolEntry( name=name, toolset=toolset, diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 2b2625fa0d..6c73072593 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -64,11 +64,11 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]: report = format_scan_report(result) return f"Security scan blocked this skill ({reason}):\n{report}" if allowed is None: - # "ask" — allow but include the warning so the user sees the findings + # "ask" verdict — for agent-created skills this means dangerous + # findings were detected. Block the skill and include the report. report = format_scan_report(result) - logger.warning("Agent-created skill has security findings: %s", reason) - # Don't block — return None to allow, but log the warning - return None + logger.warning("Agent-created skill blocked (dangerous findings): %s", reason) + return f"Security scan blocked this skill ({reason}):\n{report}" except Exception as e: logger.warning("Security scan failed for %s: %s", skill_dir, e, exc_info=True) return None