From 08ec602770c4451f0e095ad3a288934dae6f98a6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 9 May 2026 18:44:58 -0700 Subject: [PATCH] fix(tool-result-storage): persist via stdin to bypass 128 KB exec-arg cap (#22913) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux's MAX_ARG_STRLEN caps any single argv element at 128 KB (32 * PAGE_SIZE). The previous heredoc-in-the-command-string approach in _write_to_sandbox put the entire tool result inside the 'bash -c' arg, so any result over ~128 KB raised OSError [Errno 7] 'Argument list too long' before the heredoc ever ran. The caller logged a warning, but quiet_mode (CLI default) sets tools.* to ERROR — so the warning never reached agent.log either, and the agent saw a 1.5 KB preview tagged 'Full output could not be saved to sandbox'. Hits delegate_task with 3+ subagent outputs routinely now. Switch to passing content via env.execute(stdin_data=...). cmd is now just 'mkdir -p X && cat > Y' (under 1 KB), and the heavyweight payload travels through stdin where there is no argv-element limit. E2E reproduced the user's exact 144,778-char delegate_task envelope: old code OSError'd, new code round-trips cleanly to disk with all three task summaries intact. --- tests/tools/test_tool_result_storage.py | 27 ++++++++++++++----------- tools/tool_result_storage.py | 22 ++++++++++++-------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py index 3cea3b59ffa..17b6815c1d1 100644 --- a/tests/tools/test_tool_result_storage.py +++ b/tests/tools/test_tool_result_storage.py @@ -90,8 +90,11 @@ class TestWriteToSandbox: env.execute.assert_called_once() cmd = env.execute.call_args[0][0] assert "mkdir -p" in cmd - assert "hello world" in cmd - assert HEREDOC_MARKER in cmd + # Content travels through stdin, NOT inside the command string — + # otherwise large content would hit Linux's 128 KB MAX_ARG_STRLEN + # ceiling on `bash -c ` (#22906). + assert "hello world" not in cmd + assert env.execute.call_args[1]["stdin_data"] == "hello world" def test_failure_returns_false(self): env = MagicMock() @@ -99,16 +102,16 @@ class TestWriteToSandbox: result = _write_to_sandbox("content", "/tmp/hermes-results/abc.txt", env) assert result is False - def test_heredoc_collision_uses_uuid_marker(self): + def test_large_content_via_stdin(self): + """Regression: 200 KB content exceeds Linux MAX_ARG_STRLEN (128 KB). + It must travel via stdin, never inside the command string.""" env = MagicMock() env.execute.return_value = {"output": "", "returncode": 0} - content = f"text with {HEREDOC_MARKER} inside" - _write_to_sandbox(content, "/tmp/hermes-results/abc.txt", env) + big = "x" * 200_000 + _write_to_sandbox(big, "/tmp/hermes-results/big.txt", env) cmd = env.execute.call_args[0][0] - # The default marker should NOT be used as the delimiter - lines = cmd.split("\n") - # The first and last lines contain the actual delimiter - assert HEREDOC_MARKER not in lines[0].split("<<")[1] + assert len(cmd) < 1_000 # cmd is just `mkdir -p X && cat > Y` + assert env.execute.call_args[1]["stdin_data"] == big def test_timeout_passed(self): env = MagicMock() @@ -247,9 +250,9 @@ class TestMaybePersistToolResult: threshold=30_000, ) assert PERSISTED_OUTPUT_TAG in result - # The heredoc written to sandbox should contain the full JSON blob - cmd = env.execute.call_args[0][0] - assert '"exit_code"' in cmd + # Content is delivered through stdin (no longer embedded in the + # command string — see test_large_content_via_stdin for why). + assert env.execute.call_args[1]["stdin_data"] == content def test_above_threshold_no_env_truncates_inline(self): content = "x" * 60_000 diff --git a/tools/tool_result_storage.py b/tools/tool_result_storage.py index 43422644825..fed8621eee4 100644 --- a/tools/tool_result_storage.py +++ b/tools/tool_result_storage.py @@ -76,15 +76,21 @@ def _heredoc_marker(content: str) -> str: def _write_to_sandbox(content: str, remote_path: str, env) -> bool: - """Write content into the sandbox via env.execute(). Returns True on success.""" - marker = _heredoc_marker(content) + """Write content into the sandbox via env.execute(). Returns True on success. + + Pushes ``content`` through stdin rather than embedding it in the command + string. Linux's ``MAX_ARG_STRLEN`` caps any single argv element at 128 KB + (32 * PAGE_SIZE), so the previous heredoc-in-the-command-string approach + silently failed with ``OSError: [Errno 7] Argument list too long`` for any + tool result over ~128 KB — exactly the case persistence exists to handle. + Routing through stdin removes that ceiling on local + ssh (``_stdin_mode + == "pipe"``); remote backends with ``_stdin_mode == "heredoc"`` keep their + existing API-body sized limit, which is orders of magnitude larger than + the exec-arg ceiling. + """ storage_dir = os.path.dirname(remote_path) - cmd = ( - f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)} << '{marker}'\n" - f"{content}\n" - f"{marker}" - ) - result = env.execute(cmd, timeout=30) + cmd = f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)}" + result = env.execute(cmd, timeout=30, stdin_data=content) return result.get("returncode", 1) == 0