From ea1372d2afd3ab60a76be8ec6ca984719498b15e Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Mon, 29 Jun 2026 04:08:45 -0700 Subject: [PATCH] fix(security): wire session-id sanitizer into artifact paths + API boundary Defense-in-depth on top of _safe_session_filename_component (#5958): Sink (makes the bad write impossible regardless of entry point): - run_agent._save_session_log: sanitize session_id before building the session_{sid}.json snapshot path. - agent_runtime_helpers.dump_api_request_debug: sanitize before building the request_dump_{sid}_{ts}.json path. Boundary (clean 400 instead of a silently-hashed filename): - api_server rejects path-traversal-shaped X-Hermes-Session-Id on the session-continuation path and the explicit /api/sessions create path, reusing gateway.session._is_path_unsafe (mirrors the native gateway's entry-boundary guard). Also enforces the session-header length cap on the continuation path. Tests: traversal session_id stays contained at the write site; sanitizer always yields a traversal-free segment; the API header rejects ../, absolute, and Windows-traversal IDs with 400. --- agent/agent_runtime_helpers.py | 6 +++++- gateway/platforms/api_server.py | 17 ++++++++++++++--- run_agent.py | 8 ++++++-- tests/gateway/test_api_server.py | 18 ++++++++++++++++++ tests/run_agent/test_run_agent.py | 28 ++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 6 deletions(-) diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index 21a14c97708..7a65d3f2e5e 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1281,7 +1281,11 @@ def dump_api_request_debug( dump_payload["error"] = error_info timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") - dump_file = agent.logs_dir / f"request_dump_{agent.session_id}_{timestamp}.json" + # Sanitize the session ID into a traversal-free path segment — it can + # originate from untrusted input (X-Hermes-Session-Id header), and an + # unsanitized "../"-shaped ID would write the dump outside logs_dir. + safe_sid = _ra()._safe_session_filename_component(agent.session_id) + dump_file = agent.logs_dir / f"request_dump_{safe_sid}_{timestamp}.json" # Redact secrets before persisting/printing. This dump captures the # full request body (system prompt, tool defs, context-embedded diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index ddedeb78706..ea91aea4329 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -1490,7 +1490,8 @@ class APIServerAdapter(BasePlatformAdapter): raw_id = body.get("id") or body.get("session_id") session_id = str(raw_id).strip() if raw_id else f"api_{int(time.time())}_{uuid.uuid4().hex[:8]}" - if not session_id or re.search(r'[\r\n\x00]', session_id): + from gateway.session import _is_path_unsafe + if not session_id or re.search(r'[\r\n\x00]', session_id) or _is_path_unsafe(session_id): return web.json_response(_openai_error("Invalid session ID", code="invalid_session_id"), status=400) if len(session_id) > self._MAX_SESSION_HEADER_LEN: return web.json_response(_openai_error("Session ID too long", code="invalid_session_id"), status=400) @@ -1905,12 +1906,22 @@ class APIServerAdapter(BasePlatformAdapter): ), status=403, ) - # Sanitize: reject control characters that could enable header injection. - if re.search(r'[\r\n\x00]', provided_session_id): + # Sanitize: reject control characters that could enable header + # injection, and path-traversal-shaped IDs that would escape the + # sessions directory when interpolated into on-disk artifact + # filenames (session snapshots, request dumps). Mirrors the native + # gateway's entry-boundary guard (gateway.session._is_path_unsafe). + from gateway.session import _is_path_unsafe + if re.search(r'[\r\n\x00]', provided_session_id) or _is_path_unsafe(provided_session_id): return web.json_response( {"error": {"message": "Invalid session ID", "type": "invalid_request_error"}}, status=400, ) + if len(provided_session_id) > self._MAX_SESSION_HEADER_LEN: + return web.json_response( + {"error": {"message": "Session ID too long", "type": "invalid_request_error"}}, + status=400, + ) session_id = provided_session_id try: db = self._ensure_session_db() diff --git a/run_agent.py b/run_agent.py index 1cb1f91d92d..bca815da3c6 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2387,9 +2387,13 @@ class AIAgent: # Re-derive the target path each call so /branch and /compress # session-id changes land in the right file without any re-point - # bookkeeping at the call sites. + # bookkeeping at the call sites. Sanitize the session ID into a + # single traversal-free path segment — session IDs can come from + # untrusted input (X-Hermes-Session-Id header) and must not escape + # the sessions directory. try: - log_file = self.logs_dir / f"session_{self.session_id}.json" + safe_sid = _safe_session_filename_component(self.session_id) + log_file = self.logs_dir / f"session_{safe_sid}.json" except Exception: return diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 3df7bac1dea..c0a2f52d6c7 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -3454,6 +3454,24 @@ class TestSessionIdHeader: call_kwargs = mock_run.call_args.kwargs assert call_kwargs["session_id"] == "my-session-123" + @pytest.mark.asyncio + async def test_traversal_session_id_header_rejected(self, auth_adapter): + """Security (#5958): a path-traversal X-Hermes-Session-Id must be + rejected with 400 so it can't reach the filesystem artifact paths + (session snapshot / request dump) and escape the sessions dir.""" + app = _create_app(auth_adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + for bad in ("../../../../etc/pwned", "/abs/path", "..\\win"): + resp = await cli.post( + "/v1/chat/completions", + headers={"X-Hermes-Session-Id": bad, "Authorization": "Bearer sk-secret"}, + json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]}, + ) + assert resp.status == 400, f"{bad!r} should be rejected" + # The agent is never invoked for a rejected ID. + assert mock_run.call_count == 0 + @pytest.mark.asyncio async def test_provided_session_id_loads_history_from_db(self, auth_adapter): """When X-Hermes-Session-Id is provided, history comes from SessionDB not request body.""" diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 6e9970df793..ad9204223c5 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -629,6 +629,34 @@ class TestSessionJsonSnapshotOptIn: # the session JSON opt-in. assert hasattr(agent, "logs_dir") + def test_traversal_session_id_cannot_escape_logs_dir(self, agent, tmp_path): + # Security regression (#5958): a traversal-shaped session ID (which can + # originate from the untrusted X-Hermes-Session-Id API header) must not + # redirect the session snapshot outside the sessions directory. + agent._session_json_enabled = True + agent.logs_dir = tmp_path + agent.session_id = "../../../../outside_dir/pwned" + agent._save_session_log([{"role": "user", "content": "hello"}]) + + # Exactly one snapshot, and it lives directly under logs_dir. + written = list(tmp_path.glob("session_*.json")) + assert len(written) == 1, "writer must produce a single contained snapshot" + assert written[0].resolve().parent == tmp_path.resolve() + # Nothing escaped to the traversal target. + assert not (tmp_path.parent.parent / "outside_dir").exists() + + def test_safe_session_filename_component_contains_traversal(self): + # The sanitizer is the chokepoint: every session-ID-derived artifact + # path goes through it, so it must always yield a single, traversal-free + # path segment while leaving legitimate IDs untouched. + f = run_agent._safe_session_filename_component + for raw in ("../../etc/passwd", "/abs/path", "..\\win\\trav", "a/b/c"): + out = f(raw) + assert "/" not in out and "\\" not in out and ".." not in out, out + # Legit IDs pass through unchanged; distinct IDs never collide. + assert f("api-abc123def456") == "api-abc123def456" + assert f("../a") != f("../b") + class TestSaveSessionLogRedactsSecrets: """Regression: session_*.json must not contain plaintext credentials (#19798, #19845)."""