fix(web): genericize free-MCP client identity per telemetry policy

Replace the hermes-identifying clientInfo/User-Agent/session-id prefix on
the keyless Parallel Search MCP path with a neutral 'mcp-web-client'
identity. Project policy forbids third-party usage attribution without an
explicit user opt-in (see telemetry PR policy); MCP requires a clientInfo,
so a generic one satisfies the spec without attributing traffic.

Also adds the contributor AUTHOR_MAP entry and refreshes uv.lock against
current main (parallel-web 0.6.0).
This commit is contained in:
Teknium 2026-06-10 15:13:42 -07:00
parent e0e2571711
commit 0a5762c78d
3 changed files with 22 additions and 17 deletions

View file

@ -55,11 +55,13 @@ logger = logging.getLogger(__name__)
# configured. Docs: https://docs.parallel.ai/integrations/mcp/search-mcp
_MCP_SEARCH_URL = "https://search.parallel.ai/mcp"
_MCP_PROTOCOL_VERSION = "2025-06-18"
_MCP_CLIENT_NAME = "hermes-agent"
# Deliberately generic client identity. Project policy (see the telemetry PR
# policy in AGENTS.md) forbids third-party usage attribution without an
# explicit user opt-in, so neither clientInfo nor the User-Agent names
# hermes. MCP requires *a* clientInfo; a neutral one satisfies the spec
# without attributing traffic.
_MCP_CLIENT_NAME = "mcp-web-client"
_MCP_CLIENT_VERSION = "1.0.0"
# Identify free-tier traffic at the HTTP layer. Without this, httpx sends a
# generic ``python-httpx/<version>`` User-Agent and hermes usage is only visible
# via the JSON-RPC ``clientInfo`` payload.
_MCP_USER_AGENT = f"{_MCP_CLIENT_NAME}/{_MCP_CLIENT_VERSION}"
_MCP_TIMEOUT_SECONDS = 30.0
@ -76,9 +78,10 @@ def _new_session_id() -> str:
Per-call rather than process-global: one process serves many unrelated
chats in the gateway/batch runners, and a shared id would pool their
searches into one Parallel session.
searches into one Parallel session. The prefix is deliberately generic
(no hermes attribution telemetry policy).
"""
return f"hermes-agent-{uuid.uuid4().hex}"
return f"{_MCP_CLIENT_NAME}-{uuid.uuid4().hex}"
# Module-level note: the canonical cache slots ``_parallel_client`` and
# ``_async_parallel_client`` live on :mod:`tools.web_tools` so tests that do

View file

@ -75,6 +75,7 @@ AUTHOR_MAP = {
"129007007+HeLLGURD@users.noreply.github.com": "HeLLGURD",
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
"dirtyren@users.noreply.github.com": "dirtyren",
"mharris@parallel.ai": "NormallyGaussian",
"ted.malone@outlook.com": "temalo",
"adityamalik2833@gmail.com": "alarcritty",
"islam666@users.noreply.github.com": "islam666",

View file

@ -30,15 +30,16 @@ class TestMcpHeaders:
assert h["Accept"] == "application/json, text/event-stream"
assert "Mcp-Session-Id" not in h
def test_identifies_hermes_via_user_agent(self):
# Free-tier traffic is attributable at the HTTP layer (not just via the
# JSON-RPC clientInfo payload), on both the anonymous and keyed paths.
assert pp._mcp_headers(session_id=None, api_key=None)["User-Agent"].startswith(
"hermes-agent/"
)
assert pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"].startswith(
"hermes-agent/"
)
def test_user_agent_is_generic_not_hermes(self):
# Telemetry policy: no third-party usage attribution without opt-in.
# The UA must be set (not python-httpx default) but must not name
# hermes, on both the anonymous and keyed paths.
for ua in (
pp._mcp_headers(session_id=None, api_key=None)["User-Agent"],
pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"],
):
assert ua == f"{pp._MCP_CLIENT_NAME}/{pp._MCP_CLIENT_VERSION}"
assert "hermes" not in ua.lower()
def test_session_id_and_bearer_when_present(self):
h = pp._mcp_headers(session_id="sid-123", api_key="pk-live")
@ -280,7 +281,7 @@ class TestMcpWebFetch:
assert args["name"] == "web_fetch"
assert args["arguments"]["urls"] == urls
assert args["arguments"]["full_content"] is True
assert args["arguments"]["session_id"].startswith("hermes-agent-")
assert args["arguments"]["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")
def test_prefers_full_content_over_excerpts(self):
payload = {"results": [
@ -354,7 +355,7 @@ class TestKeyedV1Search:
# honors the caller's limit via advanced_settings.max_results
assert captured["advanced_settings"] == {"max_results": 7}
assert captured["mode"] == "advanced" # v1 default
assert captured["session_id"].startswith("hermes-agent-") # per-call id
assert captured["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-") # per-call id
assert len(out["data"]["web"]) == 7 # client-side slice
# paid path: no free-tier attribution, no [Parallel] label signal
assert "attribution" not in out