mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(terminal): sanitize host/relative cwd OVERRIDE before it reaches docker run -w (#50636)
terminal_tool() resolves a per-task cwd override that WINS over config["cwd"]:
cwd = overrides.get("cwd") or config["cwd"]
config["cwd"] is sanitized for container backends in _get_env_config() (host
prefixes /Users//home//C:\\/C:/ and relative paths are replaced with the
backend default /root). But the override was applied RAW — it was never run
through that guard. The gateway/TUI registers the host launch dir as a cwd
override for workspace tracking (tui_gateway/server.py _register_session_cwd
-> _terminal_task_cwd -> _session_cwd -> os.getcwd()), so on a container
backend a host path leaked straight to `docker run -w <host-path>`:
- Windows desktop: -w C:\Users\<user> -> container fails to start (exit 125)
- POSIX: -w /home/<user> -> same
The ACP adapter translates its override cwd (acp_adapter/session.py
_translate_acp_cwd), but the gateway path did neither translation nor
sanitization, so the override bypassed the one guard that would have caught it.
Fix: extract the host/relative-path predicate into a shared
_is_unusable_container_cwd() helper (so the existing _get_env_config()
sanitizer and the new guard can't drift), and re-apply it to the *resolved*
cwd at the override-resolution site. Valid in-container override paths
(RL/benchmark sandboxes that set cwd to /workspace, /root, ...) are absolute
non-host paths and pass through untouched.
Tests: unit-pin the predicate (Windows backslash/forwardslash, POSIX home,
macOS /Users, relative, valid container paths) AND an E2E call-site pin that
drives terminal_tool() with a host-path override registered and asserts the
cwd reaching _create_environment is sanitized. Mutation-verified: reverting
the call-site guard makes the two host-path E2E tests fail (showing the raw
host path leaking) while the valid-/workspace-override test stays green.
This commit is contained in:
parent
411faf08bd
commit
c15945655f
3 changed files with 222 additions and 17 deletions
146
tests/tools/test_container_cwd_sanitize.py
Normal file
146
tests/tools/test_container_cwd_sanitize.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Regression tests for host-path cwd sanitization on container backends.
|
||||
|
||||
Two code paths in ``tools/terminal_tool.py`` must reject a host (or relative)
|
||||
working directory before it reaches ``docker run -w``:
|
||||
|
||||
1. ``_get_env_config()`` sanitizes the ``TERMINAL_CWD``-derived ``config["cwd"]``.
|
||||
2. ``terminal_tool()`` resolves a *per-task cwd override* that WINS over
|
||||
``config["cwd"]`` (registered by the gateway/TUI for workspace tracking,
|
||||
and by RL/benchmark envs). That override was applied RAW — never sanitized
|
||||
— so a host cwd (e.g. a Windows desktop session's ``C:\\Users\\<user>``)
|
||||
leaked straight to ``docker run -w C:\\Users\\<user>``, which fails to start
|
||||
the container (exit 125). The sanitizer at path #1 lists ``C:\\``/``C:/`` as
|
||||
host prefixes but only ever ran against ``config["cwd"]``, so the override
|
||||
bypassed the one guard that would have caught it.
|
||||
|
||||
Both paths now share ``_is_unusable_container_cwd()``; these tests pin its
|
||||
behaviour so neither path can regress.
|
||||
"""
|
||||
|
||||
import tools.terminal_tool as tt
|
||||
|
||||
|
||||
class TestIsUnusableContainerCwd:
|
||||
def test_windows_backslash_host_path_rejected(self):
|
||||
# The exact shape from the bug report: a Windows host cwd reaching a
|
||||
# Linux container's -w flag.
|
||||
assert tt._is_unusable_container_cwd(r"C:\Users\someuser") is True
|
||||
|
||||
def test_windows_forwardslash_host_path_rejected(self):
|
||||
assert tt._is_unusable_container_cwd("C:/Users/someuser") is True
|
||||
|
||||
def test_posix_home_host_path_rejected(self):
|
||||
assert tt._is_unusable_container_cwd("/home/ben/projects") is True
|
||||
|
||||
def test_macos_users_host_path_rejected(self):
|
||||
assert tt._is_unusable_container_cwd("/Users/ben/projects") is True
|
||||
|
||||
def test_relative_path_rejected(self):
|
||||
assert tt._is_unusable_container_cwd(".") is True
|
||||
assert tt._is_unusable_container_cwd("src/app") is True
|
||||
|
||||
def test_valid_container_workspace_accepted(self):
|
||||
# In-container paths that RL/benchmark overrides legitimately set must
|
||||
# pass through untouched.
|
||||
assert tt._is_unusable_container_cwd("/workspace") is False
|
||||
assert tt._is_unusable_container_cwd("/root") is False
|
||||
assert tt._is_unusable_container_cwd("/app") is False
|
||||
assert tt._is_unusable_container_cwd("/opt/project") is False
|
||||
|
||||
def test_empty_is_not_flagged(self):
|
||||
# Empty/None-ish cwd is handled by the caller's `or config["cwd"]`
|
||||
# fallback, not by flagging it here.
|
||||
assert tt._is_unusable_container_cwd("") is False
|
||||
|
||||
def test_host_prefixes_include_windows_and_posix(self):
|
||||
# Guard the constant itself — the Windows entries are the ones that
|
||||
# were load-bearing for the reported desktop bug.
|
||||
assert r"C:\\"[:2] in tt._HOST_CWD_PREFIXES or "C:\\" in tt._HOST_CWD_PREFIXES
|
||||
assert "C:/" in tt._HOST_CWD_PREFIXES
|
||||
assert "/home/" in tt._HOST_CWD_PREFIXES
|
||||
assert "/Users/" in tt._HOST_CWD_PREFIXES
|
||||
|
||||
def test_container_backends_set(self):
|
||||
assert tt._CONTAINER_BACKENDS == frozenset(
|
||||
{"docker", "singularity", "modal", "daytona"}
|
||||
)
|
||||
|
||||
|
||||
class TestOverrideCwdSanitizedAtCallSite:
|
||||
"""E2E pin: a per-task cwd OVERRIDE that is a host path must NOT reach the
|
||||
container builder. This is the actual reported bug — the gateway/TUI
|
||||
registers the host launch dir as a cwd override, which previously won over
|
||||
the (sanitized) config["cwd"] and flowed raw into `docker run -w`.
|
||||
"""
|
||||
|
||||
def _run_and_capture_cwd(self, monkeypatch, override_cwd, config_cwd="/root"):
|
||||
"""Drive terminal_tool() on the docker backend with a host-path cwd
|
||||
override registered, and return the cwd that reached _create_environment
|
||||
(i.e. the cwd that would be passed to `docker run -w`).
|
||||
"""
|
||||
captured = {}
|
||||
|
||||
config = {
|
||||
"env_type": "docker",
|
||||
"docker_image": "pytorch/pytorch:latest",
|
||||
"cwd": config_cwd,
|
||||
"host_cwd": None,
|
||||
"timeout": 180,
|
||||
"lifetime_seconds": 300,
|
||||
"container_cpu": 1,
|
||||
"container_memory": 5120,
|
||||
"container_disk": 51200,
|
||||
"container_persistent": True,
|
||||
"docker_volumes": [],
|
||||
"docker_env": {},
|
||||
"docker_extra_args": [],
|
||||
"docker_mount_cwd_to_workspace": False,
|
||||
"docker_run_as_host_user": False,
|
||||
"docker_forward_env": [],
|
||||
"modal_mode": "auto",
|
||||
}
|
||||
|
||||
class _DummyEnv:
|
||||
cwd = config_cwd
|
||||
|
||||
def execute(self, *a, **k):
|
||||
return {"output": "", "exit_code": 0}
|
||||
|
||||
def fake_create_environment(env_type, image, cwd, timeout, **kwargs):
|
||||
captured["cwd"] = cwd
|
||||
return _DummyEnv()
|
||||
|
||||
monkeypatch.setattr(tt, "_get_env_config", lambda: config)
|
||||
monkeypatch.setattr(tt, "_start_cleanup_thread", lambda: None)
|
||||
monkeypatch.setattr(tt, "_check_all_guards", lambda *a, **k: {"approved": True})
|
||||
monkeypatch.setattr(tt, "_create_environment", fake_create_environment)
|
||||
# Force a fresh environment build so _create_environment is invoked.
|
||||
monkeypatch.setattr(tt, "_active_environments", {})
|
||||
monkeypatch.setattr(tt, "_last_activity", {})
|
||||
|
||||
task_id = "sess-host-cwd"
|
||||
tt.register_task_env_overrides(task_id, {"cwd": override_cwd})
|
||||
try:
|
||||
tt.terminal_tool(command="pwd", task_id=task_id)
|
||||
finally:
|
||||
tt.clear_task_env_overrides(task_id)
|
||||
tt._active_environments.pop(task_id, None)
|
||||
tt._active_environments.pop("default", None)
|
||||
return captured.get("cwd")
|
||||
|
||||
def test_windows_host_override_does_not_reach_container(self, monkeypatch):
|
||||
# The bug: C:\Users\<user> registered as override → docker run -w C:\Users\<user> → exit 125.
|
||||
cwd = self._run_and_capture_cwd(monkeypatch, r"C:\Users\someuser")
|
||||
assert cwd == "/root", (
|
||||
f"Host-path cwd override leaked to the container builder: {cwd!r}. "
|
||||
"It must be sanitized back to config['cwd']."
|
||||
)
|
||||
|
||||
def test_posix_host_override_does_not_reach_container(self, monkeypatch):
|
||||
cwd = self._run_and_capture_cwd(monkeypatch, "/home/someuser/project")
|
||||
assert cwd == "/root"
|
||||
|
||||
def test_valid_container_override_is_preserved(self, monkeypatch):
|
||||
# RL/benchmark envs set an in-container path; it must pass through.
|
||||
cwd = self._run_and_capture_cwd(monkeypatch, "/workspace/task42")
|
||||
assert cwd == "/workspace/task42"
|
||||
|
|
@ -274,17 +274,30 @@ class TestEnsurepipFix:
|
|||
# =========================================================================
|
||||
|
||||
class TestHostPrefixList:
|
||||
"""Verify the host prefix list catches common host-only paths."""
|
||||
"""Verify the host prefix list catches common host-only paths.
|
||||
|
||||
def test_all_common_host_prefixes_caught(self):
|
||||
"""The host prefix check should catch /Users/, /home/, C:\\, C:/."""
|
||||
# Read the actual source to verify the prefixes
|
||||
import inspect
|
||||
source = inspect.getsource(_tt_mod._get_env_config)
|
||||
for prefix in ["/Users/", "/home/", 'C:\\\\"', "C:/"]:
|
||||
# Normalize for source comparison
|
||||
check = prefix.rstrip('"')
|
||||
assert check in source or prefix in source, (
|
||||
f"Host prefix {prefix!r} not found in _get_env_config. "
|
||||
The prefixes used to live as an inline literal inside ``_get_env_config``;
|
||||
they now live in the module-level ``_HOST_CWD_PREFIXES`` constant shared by
|
||||
both the ``_get_env_config`` sanitizer and the override-resolution guard
|
||||
(``_is_unusable_container_cwd``). Assert the *behavior* (each common host
|
||||
prefix is flagged as unusable inside a container) rather than grepping a
|
||||
function's source — the latter is a change-detector that breaks on any
|
||||
refactor that moves the constant.
|
||||
"""
|
||||
|
||||
def test_all_common_host_prefixes_present_in_constant(self):
|
||||
"""The shared prefix constant must list the common host-only roots."""
|
||||
for prefix in ("/Users/", "/home/", "C:\\", "C:/"):
|
||||
assert prefix in _tt_mod._HOST_CWD_PREFIXES, (
|
||||
f"Host prefix {prefix!r} missing from _HOST_CWD_PREFIXES. "
|
||||
"Container backends need this to avoid using host paths."
|
||||
)
|
||||
|
||||
def test_all_common_host_paths_flagged_unusable(self):
|
||||
"""A host path under each prefix must be rejected as a container cwd."""
|
||||
for host_path in ("/Users/me/proj", "/home/me/proj",
|
||||
"C:\\Users\\me", "C:/Users/me"):
|
||||
assert _tt_mod._is_unusable_container_cwd(host_path) is True, (
|
||||
f"Host path {host_path!r} should be rejected as a container "
|
||||
"cwd but was accepted."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1086,6 +1086,36 @@ def _safe_getcwd() -> str:
|
|||
return os.getenv("TERMINAL_CWD") or os.path.expanduser("~")
|
||||
|
||||
|
||||
# Path prefixes that identify a *host* working directory which cannot exist
|
||||
# inside a container sandbox. Covers POSIX user dirs and Windows drive paths
|
||||
# (``C:\Users\...`` / ``C:/Users/...``) — the latter is how a Windows host's
|
||||
# cwd looks when it leaks toward a Linux container's ``-w`` flag.
|
||||
_HOST_CWD_PREFIXES = ("/Users/", "/home/", "C:\\", "C:/")
|
||||
|
||||
_CONTAINER_BACKENDS = frozenset({"docker", "singularity", "modal", "daytona"})
|
||||
|
||||
|
||||
def _is_unusable_container_cwd(cwd: str) -> bool:
|
||||
"""Return True if *cwd* is a host/relative path that won't work as the
|
||||
working directory inside a container sandbox.
|
||||
|
||||
A container's cwd must be an absolute path that exists *inside* the
|
||||
sandbox (e.g. ``/workspace`` or ``/root``). A host path (``/home/user``,
|
||||
``C:\\Users\\me``) or a relative path (``.``, ``src/``) is meaningless to
|
||||
``docker run -w`` and makes the container fail to start (exit 125).
|
||||
"""
|
||||
if not cwd:
|
||||
return False
|
||||
if any(cwd.startswith(p) for p in _HOST_CWD_PREFIXES):
|
||||
return True
|
||||
# Relative paths (".", "src/") can't be a container workdir either. Windows
|
||||
# drive paths are absolute on Windows but os.path.isabs() is False on a
|
||||
# POSIX host, so they're already caught by the prefix check above.
|
||||
if not os.path.isabs(cwd):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _get_env_config() -> Dict[str, Any]:
|
||||
"""Get terminal environment configuration from environment variables."""
|
||||
# Default image with Python and Node.js for maximum compatibility
|
||||
|
|
@ -1138,21 +1168,18 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
if cwd:
|
||||
cwd = os.path.expanduser(cwd)
|
||||
host_cwd = None
|
||||
host_prefixes = ("/Users/", "/home/", "C:\\", "C:/")
|
||||
if env_type == "docker" and mount_docker_cwd:
|
||||
docker_cwd_source = os.getenv("TERMINAL_CWD") or _safe_getcwd()
|
||||
candidate = os.path.abspath(os.path.expanduser(docker_cwd_source))
|
||||
if (
|
||||
any(candidate.startswith(p) for p in host_prefixes)
|
||||
any(candidate.startswith(p) for p in _HOST_CWD_PREFIXES)
|
||||
or (os.path.isabs(candidate) and os.path.isdir(candidate) and not candidate.startswith(("/workspace", "/root")))
|
||||
):
|
||||
host_cwd = candidate
|
||||
cwd = "/workspace"
|
||||
elif env_type in {"modal", "docker", "singularity", "daytona"} and cwd:
|
||||
elif env_type in _CONTAINER_BACKENDS and cwd:
|
||||
# Host paths and relative paths that won't work inside containers
|
||||
is_host_path = any(cwd.startswith(p) for p in host_prefixes)
|
||||
is_relative = not os.path.isabs(cwd) # e.g. "." or "src/"
|
||||
if (is_host_path or is_relative) and cwd != default_cwd:
|
||||
if _is_unusable_container_cwd(cwd) and cwd != default_cwd:
|
||||
logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
|
||||
"(host/relative path won't work in sandbox). Using %r instead.",
|
||||
cwd, env_type, default_cwd)
|
||||
|
|
@ -1925,6 +1952,25 @@ def terminal_tool(
|
|||
image = ""
|
||||
|
||||
cwd = overrides.get("cwd") or config["cwd"]
|
||||
# A per-task cwd override (registered by the gateway/TUI for workspace
|
||||
# tracking, or by RL/benchmark envs) wins over config["cwd"] — but
|
||||
# config["cwd"] was already sanitized for container backends in
|
||||
# _get_env_config() while the override is raw. On a container backend a
|
||||
# raw host path (e.g. a Windows desktop session's C:\Users\<user>, or a
|
||||
# POSIX /home/<user>) reaches `docker run -w <host-path>` and the
|
||||
# container fails to start (exit 125). Re-apply the same host/relative
|
||||
# path guard to the *resolved* cwd so the override can't bypass it.
|
||||
# Valid in-container override paths (RL/benchmark sandboxes that set
|
||||
# cwd to /workspace, /root, etc.) are absolute non-host paths and pass
|
||||
# through untouched.
|
||||
if env_type in _CONTAINER_BACKENDS and _is_unusable_container_cwd(cwd):
|
||||
if cwd != config["cwd"]:
|
||||
logger.info(
|
||||
"Ignoring host/relative cwd override %r for %s backend "
|
||||
"(won't exist in sandbox). Using %r instead.",
|
||||
cwd, env_type, config["cwd"],
|
||||
)
|
||||
cwd = config["cwd"]
|
||||
default_timeout = config["timeout"]
|
||||
effective_timeout = timeout or default_timeout
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue