fix(tool-result-storage): persist via stdin to bypass 128 KB exec-arg cap (#22913)

Linux's MAX_ARG_STRLEN caps any single argv element at 128 KB
(32 * PAGE_SIZE). The previous heredoc-in-the-command-string approach
in _write_to_sandbox put the entire tool result inside the 'bash -c'
arg, so any result over ~128 KB raised OSError [Errno 7] 'Argument
list too long' before the heredoc ever ran. The caller logged a
warning, but quiet_mode (CLI default) sets tools.* to ERROR — so the
warning never reached agent.log either, and the agent saw a 1.5 KB
preview tagged 'Full output could not be saved to sandbox'. Hits
delegate_task with 3+ subagent outputs routinely now.

Switch to passing content via env.execute(stdin_data=...). cmd is
now just 'mkdir -p X && cat > Y' (under 1 KB), and the heavyweight
payload travels through stdin where there is no argv-element limit.

E2E reproduced the user's exact 144,778-char delegate_task envelope:
old code OSError'd, new code round-trips cleanly to disk with all
three task summaries intact.
This commit is contained in:
Teknium 2026-05-09 18:44:58 -07:00 committed by GitHub
parent ded194eb6a
commit 08ec602770
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 29 additions and 20 deletions

View file

@ -90,8 +90,11 @@ class TestWriteToSandbox:
env.execute.assert_called_once()
cmd = env.execute.call_args[0][0]
assert "mkdir -p" in cmd
assert "hello world" in cmd
assert HEREDOC_MARKER in cmd
# Content travels through stdin, NOT inside the command string —
# otherwise large content would hit Linux's 128 KB MAX_ARG_STRLEN
# ceiling on `bash -c <cmd>` (#22906).
assert "hello world" not in cmd
assert env.execute.call_args[1]["stdin_data"] == "hello world"
def test_failure_returns_false(self):
env = MagicMock()
@ -99,16 +102,16 @@ class TestWriteToSandbox:
result = _write_to_sandbox("content", "/tmp/hermes-results/abc.txt", env)
assert result is False
def test_heredoc_collision_uses_uuid_marker(self):
def test_large_content_via_stdin(self):
"""Regression: 200 KB content exceeds Linux MAX_ARG_STRLEN (128 KB).
It must travel via stdin, never inside the command string."""
env = MagicMock()
env.execute.return_value = {"output": "", "returncode": 0}
content = f"text with {HEREDOC_MARKER} inside"
_write_to_sandbox(content, "/tmp/hermes-results/abc.txt", env)
big = "x" * 200_000
_write_to_sandbox(big, "/tmp/hermes-results/big.txt", env)
cmd = env.execute.call_args[0][0]
# The default marker should NOT be used as the delimiter
lines = cmd.split("\n")
# The first and last lines contain the actual delimiter
assert HEREDOC_MARKER not in lines[0].split("<<")[1]
assert len(cmd) < 1_000 # cmd is just `mkdir -p X && cat > Y`
assert env.execute.call_args[1]["stdin_data"] == big
def test_timeout_passed(self):
env = MagicMock()
@ -247,9 +250,9 @@ class TestMaybePersistToolResult:
threshold=30_000,
)
assert PERSISTED_OUTPUT_TAG in result
# The heredoc written to sandbox should contain the full JSON blob
cmd = env.execute.call_args[0][0]
assert '"exit_code"' in cmd
# Content is delivered through stdin (no longer embedded in the
# command string — see test_large_content_via_stdin for why).
assert env.execute.call_args[1]["stdin_data"] == content
def test_above_threshold_no_env_truncates_inline(self):
content = "x" * 60_000

View file

@ -76,15 +76,21 @@ def _heredoc_marker(content: str) -> str:
def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
"""Write content into the sandbox via env.execute(). Returns True on success."""
marker = _heredoc_marker(content)
"""Write content into the sandbox via env.execute(). Returns True on success.
Pushes ``content`` through stdin rather than embedding it in the command
string. Linux's ``MAX_ARG_STRLEN`` caps any single argv element at 128 KB
(32 * PAGE_SIZE), so the previous heredoc-in-the-command-string approach
silently failed with ``OSError: [Errno 7] Argument list too long`` for any
tool result over ~128 KB exactly the case persistence exists to handle.
Routing through stdin removes that ceiling on local + ssh (``_stdin_mode
== "pipe"``); remote backends with ``_stdin_mode == "heredoc"`` keep their
existing API-body sized limit, which is orders of magnitude larger than
the exec-arg ceiling.
"""
storage_dir = os.path.dirname(remote_path)
cmd = (
f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)} << '{marker}'\n"
f"{content}\n"
f"{marker}"
)
result = env.execute(cmd, timeout=30)
cmd = f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)}"
result = env.execute(cmd, timeout=30, stdin_data=content)
return result.get("returncode", 1) == 0