From 0a5762c78d11f4d6626dbf99da5f62cc34cfe2c4 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 10 Jun 2026 15:13:42 -0700 Subject: [PATCH] fix(web): genericize free-MCP client identity per telemetry policy Replace the hermes-identifying clientInfo/User-Agent/session-id prefix on the keyless Parallel Search MCP path with a neutral 'mcp-web-client' identity. Project policy forbids third-party usage attribution without an explicit user opt-in (see telemetry PR policy); MCP requires a clientInfo, so a generic one satisfies the spec without attributing traffic. Also adds the contributor AUTHOR_MAP entry and refreshes uv.lock against current main (parallel-web 0.6.0). --- plugins/web/parallel/provider.py | 15 +++++++----- scripts/release.py | 1 + .../plugins/web/test_parallel_keyless_mcp.py | 23 ++++++++++--------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/plugins/web/parallel/provider.py b/plugins/web/parallel/provider.py index 20c4291d77..7a15b3d3f8 100644 --- a/plugins/web/parallel/provider.py +++ b/plugins/web/parallel/provider.py @@ -55,11 +55,13 @@ logger = logging.getLogger(__name__) # configured. Docs: https://docs.parallel.ai/integrations/mcp/search-mcp _MCP_SEARCH_URL = "https://search.parallel.ai/mcp" _MCP_PROTOCOL_VERSION = "2025-06-18" -_MCP_CLIENT_NAME = "hermes-agent" +# Deliberately generic client identity. Project policy (see the telemetry PR +# policy in AGENTS.md) forbids third-party usage attribution without an +# explicit user opt-in, so neither clientInfo nor the User-Agent names +# hermes. MCP requires *a* clientInfo; a neutral one satisfies the spec +# without attributing traffic. +_MCP_CLIENT_NAME = "mcp-web-client" _MCP_CLIENT_VERSION = "1.0.0" -# Identify free-tier traffic at the HTTP layer. Without this, httpx sends a -# generic ``python-httpx/`` User-Agent and hermes usage is only visible -# via the JSON-RPC ``clientInfo`` payload. _MCP_USER_AGENT = f"{_MCP_CLIENT_NAME}/{_MCP_CLIENT_VERSION}" _MCP_TIMEOUT_SECONDS = 30.0 @@ -76,9 +78,10 @@ def _new_session_id() -> str: Per-call rather than process-global: one process serves many unrelated chats in the gateway/batch runners, and a shared id would pool their - searches into one Parallel session. + searches into one Parallel session. The prefix is deliberately generic + (no hermes attribution — telemetry policy). """ - return f"hermes-agent-{uuid.uuid4().hex}" + return f"{_MCP_CLIENT_NAME}-{uuid.uuid4().hex}" # Module-level note: the canonical cache slots ``_parallel_client`` and # ``_async_parallel_client`` live on :mod:`tools.web_tools` so tests that do diff --git a/scripts/release.py b/scripts/release.py index cd0fe475d5..68ad134d6c 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -75,6 +75,7 @@ AUTHOR_MAP = { "129007007+HeLLGURD@users.noreply.github.com": "HeLLGURD", "290859878+synapsesx@users.noreply.github.com": "synapsesx", "dirtyren@users.noreply.github.com": "dirtyren", + "mharris@parallel.ai": "NormallyGaussian", "ted.malone@outlook.com": "temalo", "adityamalik2833@gmail.com": "alarcritty", "islam666@users.noreply.github.com": "islam666", diff --git a/tests/plugins/web/test_parallel_keyless_mcp.py b/tests/plugins/web/test_parallel_keyless_mcp.py index 49975c47f2..8495df144b 100644 --- a/tests/plugins/web/test_parallel_keyless_mcp.py +++ b/tests/plugins/web/test_parallel_keyless_mcp.py @@ -30,15 +30,16 @@ class TestMcpHeaders: assert h["Accept"] == "application/json, text/event-stream" assert "Mcp-Session-Id" not in h - def test_identifies_hermes_via_user_agent(self): - # Free-tier traffic is attributable at the HTTP layer (not just via the - # JSON-RPC clientInfo payload), on both the anonymous and keyed paths. - assert pp._mcp_headers(session_id=None, api_key=None)["User-Agent"].startswith( - "hermes-agent/" - ) - assert pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"].startswith( - "hermes-agent/" - ) + def test_user_agent_is_generic_not_hermes(self): + # Telemetry policy: no third-party usage attribution without opt-in. + # The UA must be set (not python-httpx default) but must not name + # hermes, on both the anonymous and keyed paths. + for ua in ( + pp._mcp_headers(session_id=None, api_key=None)["User-Agent"], + pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"], + ): + assert ua == f"{pp._MCP_CLIENT_NAME}/{pp._MCP_CLIENT_VERSION}" + assert "hermes" not in ua.lower() def test_session_id_and_bearer_when_present(self): h = pp._mcp_headers(session_id="sid-123", api_key="pk-live") @@ -280,7 +281,7 @@ class TestMcpWebFetch: assert args["name"] == "web_fetch" assert args["arguments"]["urls"] == urls assert args["arguments"]["full_content"] is True - assert args["arguments"]["session_id"].startswith("hermes-agent-") + assert args["arguments"]["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-") def test_prefers_full_content_over_excerpts(self): payload = {"results": [ @@ -354,7 +355,7 @@ class TestKeyedV1Search: # honors the caller's limit via advanced_settings.max_results assert captured["advanced_settings"] == {"max_results": 7} assert captured["mode"] == "advanced" # v1 default - assert captured["session_id"].startswith("hermes-agent-") # per-call id + assert captured["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-") # per-call id assert len(out["data"]["web"]) == 7 # client-side slice # paid path: no free-tier attribution, no [Parallel] label signal assert "attribution" not in out