refactor(session): simplify traversal guard to a helper + logger, harden non-leading separators

Follow-up to the salvaged #9560 fix:
- Replace the _TRAVERSAL_RE regex with an explicit _is_path_unsafe() helper
  (drops the now-unused `import re`); catches a path separator ANYWHERE,
  not just leading, so a non-leading Windows backslash can't slip through.
- Switch the per-entry skip in _ensure_loaded_locked from print() to
  logger.warning to match the module's logging conventions.
- Add AUTHOR_MAP entry for the contributor.
- Add regression tests for the non-leading-separator case.
This commit is contained in:
teknium1 2026-06-21 14:00:19 -07:00 committed by Teknium
parent aa2aac68b0
commit 4d4ba0831e
3 changed files with 33 additions and 7 deletions

View file

@ -12,7 +12,6 @@ import hashlib
import logging
import os
import json
import re
import threading
import uuid
from pathlib import Path
@ -67,10 +66,27 @@ from .whatsapp_identity import (
)
from utils import atomic_replace
# Matches any value that could escape the sessions directory as a file path.
# Covers: directory traversal (..), Unix/Windows absolute paths (/ \),
# and Windows drive-letter paths (C:/ D:\\ etc.).
_TRAVERSAL_RE = re.compile(r'\.\.|^[/\\]|^[A-Za-z]:')
# Session keys/ids flow into filesystem paths downstream (e.g.
# ``sessions_dir / f"{session_id}.json"`` in hermes_state, request-dump
# filenames in agent_runtime_helpers). Any value that could escape the
# sessions directory as a path must be rejected at the entry boundary.
# Rejects: parent traversal (``..``), a path separator anywhere (``/`` or
# ``\``, so a non-leading Windows separator can't slip through), and a
# leading Windows drive letter (``C:``). Legitimate session keys are
# colon-delimited multi-segment ids (``agent:main:<platform>:...``) and
# never contain these, so there are no false positives in practice.
def _is_path_unsafe(value: object) -> bool:
"""Return True if ``value`` could traverse outside the sessions dir."""
if not value:
return False
s = str(value)
if ".." in s or "/" in s or "\\" in s:
return True
# Leading Windows drive path, e.g. "C:\..." or "d:/...". A bare "x:"
# with no following separator isn't a usable absolute path, and the
# separator forms are already caught above — but keep an explicit guard
# for the drive-letter prefix in case a separator was normalized away.
return len(s) >= 2 and s[0].isalpha() and s[1] == ":"
@dataclass
@ -584,7 +600,7 @@ class SessionEntry:
# Validate path-sensitive fields to prevent directory traversal (CWE-22)
for _field, _val in (("session_key", session_key), ("session_id", session_id)):
if _val and _TRAVERSAL_RE.search(str(_val)):
if _is_path_unsafe(_val):
raise ValueError(
f"Invalid {_field}: potential directory traversal detected"
)
@ -796,7 +812,7 @@ class SessionStore:
try:
self._entries[key] = SessionEntry.from_dict(entry_data)
except (ValueError, KeyError) as e:
print(f"[gateway] Warning: Skipping invalid session entry {key!r}: {e}")
logger.warning("Skipping invalid session entry %r: %s", key, e)
except Exception as e:
print(f"[gateway] Warning: Failed to load sessions: {e}")

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"mediratta01.pally@gmail.com": "orbisai0security", # PR #9560 salvage (session.py path-traversal guard, V-009)
"panghuer023@users.noreply.github.com": "panghuer023", # PR #37994 salvage (interrupt unblocks pending gateway approval; #8697)
"w.a.t.s.o.n.mk10@gmail.com": "natehale", # PR #48678 salvage (typing indicator lingers after final reply)
"0x0sec@gmail.com": "kn8-codes", # PR #48422 salvage (rich messages opt-in default off)

View file

@ -1095,6 +1095,15 @@ class TestSessionEntryFromDictTraversalValidation:
with pytest.raises(ValueError, match="session_id"):
SessionEntry.from_dict(self._entry(session_id="D:\\path\\to\\file"))
def test_session_id_non_leading_separator_raises(self):
"""A path separator anywhere — not just leading — must be rejected,
since a non-leading backslash is still a Windows traversal vector."""
from gateway.session import SessionEntry
with pytest.raises(ValueError, match="session_id"):
SessionEntry.from_dict(self._entry(session_id="good\\..\\bad"))
with pytest.raises(ValueError, match="session_key"):
SessionEntry.from_dict(self._entry(session_key="agent:main:good/sub"))
class TestEnsureLoadedSkipsInvalidEntries:
"""Regression: one bad sessions.json entry must not block valid entries from loading."""