From 4d4ba0831ef2f5315c166c65eef3d0ffb6f29a5b Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 21 Jun 2026 14:00:19 -0700 Subject: [PATCH] refactor(session): simplify traversal guard to a helper + logger, harden non-leading separators Follow-up to the salvaged #9560 fix: - Replace the _TRAVERSAL_RE regex with an explicit _is_path_unsafe() helper (drops the now-unused `import re`); catches a path separator ANYWHERE, not just leading, so a non-leading Windows backslash can't slip through. - Switch the per-entry skip in _ensure_loaded_locked from print() to logger.warning to match the module's logging conventions. - Add AUTHOR_MAP entry for the contributor. - Add regression tests for the non-leading-separator case. --- gateway/session.py | 30 +++++++++++++++++++++++------- scripts/release.py | 1 + tests/gateway/test_session.py | 9 +++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/gateway/session.py b/gateway/session.py index 941722e4d96..68df8f2955d 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -12,7 +12,6 @@ import hashlib import logging import os import json -import re import threading import uuid from pathlib import Path @@ -67,10 +66,27 @@ from .whatsapp_identity import ( ) from utils import atomic_replace -# Matches any value that could escape the sessions directory as a file path. -# Covers: directory traversal (..), Unix/Windows absolute paths (/ \), -# and Windows drive-letter paths (C:/ D:\\ etc.). -_TRAVERSAL_RE = re.compile(r'\.\.|^[/\\]|^[A-Za-z]:') +# Session keys/ids flow into filesystem paths downstream (e.g. +# ``sessions_dir / f"{session_id}.json"`` in hermes_state, request-dump +# filenames in agent_runtime_helpers). Any value that could escape the +# sessions directory as a path must be rejected at the entry boundary. +# Rejects: parent traversal (``..``), a path separator anywhere (``/`` or +# ``\``, so a non-leading Windows separator can't slip through), and a +# leading Windows drive letter (``C:``). Legitimate session keys are +# colon-delimited multi-segment ids (``agent:main::...``) and +# never contain these, so there are no false positives in practice. +def _is_path_unsafe(value: object) -> bool: + """Return True if ``value`` could traverse outside the sessions dir.""" + if not value: + return False + s = str(value) + if ".." in s or "/" in s or "\\" in s: + return True + # Leading Windows drive path, e.g. "C:\..." or "d:/...". A bare "x:" + # with no following separator isn't a usable absolute path, and the + # separator forms are already caught above — but keep an explicit guard + # for the drive-letter prefix in case a separator was normalized away. + return len(s) >= 2 and s[0].isalpha() and s[1] == ":" @dataclass @@ -584,7 +600,7 @@ class SessionEntry: # Validate path-sensitive fields to prevent directory traversal (CWE-22) for _field, _val in (("session_key", session_key), ("session_id", session_id)): - if _val and _TRAVERSAL_RE.search(str(_val)): + if _is_path_unsafe(_val): raise ValueError( f"Invalid {_field}: potential directory traversal detected" ) @@ -796,7 +812,7 @@ class SessionStore: try: self._entries[key] = SessionEntry.from_dict(entry_data) except (ValueError, KeyError) as e: - print(f"[gateway] Warning: Skipping invalid session entry {key!r}: {e}") + logger.warning("Skipping invalid session entry %r: %s", key, e) except Exception as e: print(f"[gateway] Warning: Failed to load sessions: {e}") diff --git a/scripts/release.py b/scripts/release.py index 1101c15da68..6007248db24 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "mediratta01.pally@gmail.com": "orbisai0security", # PR #9560 salvage (session.py path-traversal guard, V-009) "panghuer023@users.noreply.github.com": "panghuer023", # PR #37994 salvage (interrupt unblocks pending gateway approval; #8697) "w.a.t.s.o.n.mk10@gmail.com": "natehale", # PR #48678 salvage (typing indicator lingers after final reply) "0x0sec@gmail.com": "kn8-codes", # PR #48422 salvage (rich messages opt-in default off) diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 55611b8c0c5..c7f82b2d8c2 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -1095,6 +1095,15 @@ class TestSessionEntryFromDictTraversalValidation: with pytest.raises(ValueError, match="session_id"): SessionEntry.from_dict(self._entry(session_id="D:\\path\\to\\file")) + def test_session_id_non_leading_separator_raises(self): + """A path separator anywhere — not just leading — must be rejected, + since a non-leading backslash is still a Windows traversal vector.""" + from gateway.session import SessionEntry + with pytest.raises(ValueError, match="session_id"): + SessionEntry.from_dict(self._entry(session_id="good\\..\\bad")) + with pytest.raises(ValueError, match="session_key"): + SessionEntry.from_dict(self._entry(session_key="agent:main:good/sub")) + class TestEnsureLoadedSkipsInvalidEntries: """Regression: one bad sessions.json entry must not block valid entries from loading."""