mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
fix(tool-result-storage): persist via stdin to bypass 128 KB exec-arg cap (#22913)
Linux's MAX_ARG_STRLEN caps any single argv element at 128 KB (32 * PAGE_SIZE). The previous heredoc-in-the-command-string approach in _write_to_sandbox put the entire tool result inside the 'bash -c' arg, so any result over ~128 KB raised OSError [Errno 7] 'Argument list too long' before the heredoc ever ran. The caller logged a warning, but quiet_mode (CLI default) sets tools.* to ERROR — so the warning never reached agent.log either, and the agent saw a 1.5 KB preview tagged 'Full output could not be saved to sandbox'. Hits delegate_task with 3+ subagent outputs routinely now. Switch to passing content via env.execute(stdin_data=...). cmd is now just 'mkdir -p X && cat > Y' (under 1 KB), and the heavyweight payload travels through stdin where there is no argv-element limit. E2E reproduced the user's exact 144,778-char delegate_task envelope: old code OSError'd, new code round-trips cleanly to disk with all three task summaries intact.
This commit is contained in:
parent
ded194eb6a
commit
08ec602770
2 changed files with 29 additions and 20 deletions
|
|
@ -90,8 +90,11 @@ class TestWriteToSandbox:
|
||||||
env.execute.assert_called_once()
|
env.execute.assert_called_once()
|
||||||
cmd = env.execute.call_args[0][0]
|
cmd = env.execute.call_args[0][0]
|
||||||
assert "mkdir -p" in cmd
|
assert "mkdir -p" in cmd
|
||||||
assert "hello world" in cmd
|
# Content travels through stdin, NOT inside the command string —
|
||||||
assert HEREDOC_MARKER in cmd
|
# otherwise large content would hit Linux's 128 KB MAX_ARG_STRLEN
|
||||||
|
# ceiling on `bash -c <cmd>` (#22906).
|
||||||
|
assert "hello world" not in cmd
|
||||||
|
assert env.execute.call_args[1]["stdin_data"] == "hello world"
|
||||||
|
|
||||||
def test_failure_returns_false(self):
|
def test_failure_returns_false(self):
|
||||||
env = MagicMock()
|
env = MagicMock()
|
||||||
|
|
@ -99,16 +102,16 @@ class TestWriteToSandbox:
|
||||||
result = _write_to_sandbox("content", "/tmp/hermes-results/abc.txt", env)
|
result = _write_to_sandbox("content", "/tmp/hermes-results/abc.txt", env)
|
||||||
assert result is False
|
assert result is False
|
||||||
|
|
||||||
def test_heredoc_collision_uses_uuid_marker(self):
|
def test_large_content_via_stdin(self):
|
||||||
|
"""Regression: 200 KB content exceeds Linux MAX_ARG_STRLEN (128 KB).
|
||||||
|
It must travel via stdin, never inside the command string."""
|
||||||
env = MagicMock()
|
env = MagicMock()
|
||||||
env.execute.return_value = {"output": "", "returncode": 0}
|
env.execute.return_value = {"output": "", "returncode": 0}
|
||||||
content = f"text with {HEREDOC_MARKER} inside"
|
big = "x" * 200_000
|
||||||
_write_to_sandbox(content, "/tmp/hermes-results/abc.txt", env)
|
_write_to_sandbox(big, "/tmp/hermes-results/big.txt", env)
|
||||||
cmd = env.execute.call_args[0][0]
|
cmd = env.execute.call_args[0][0]
|
||||||
# The default marker should NOT be used as the delimiter
|
assert len(cmd) < 1_000 # cmd is just `mkdir -p X && cat > Y`
|
||||||
lines = cmd.split("\n")
|
assert env.execute.call_args[1]["stdin_data"] == big
|
||||||
# The first and last lines contain the actual delimiter
|
|
||||||
assert HEREDOC_MARKER not in lines[0].split("<<")[1]
|
|
||||||
|
|
||||||
def test_timeout_passed(self):
|
def test_timeout_passed(self):
|
||||||
env = MagicMock()
|
env = MagicMock()
|
||||||
|
|
@ -247,9 +250,9 @@ class TestMaybePersistToolResult:
|
||||||
threshold=30_000,
|
threshold=30_000,
|
||||||
)
|
)
|
||||||
assert PERSISTED_OUTPUT_TAG in result
|
assert PERSISTED_OUTPUT_TAG in result
|
||||||
# The heredoc written to sandbox should contain the full JSON blob
|
# Content is delivered through stdin (no longer embedded in the
|
||||||
cmd = env.execute.call_args[0][0]
|
# command string — see test_large_content_via_stdin for why).
|
||||||
assert '"exit_code"' in cmd
|
assert env.execute.call_args[1]["stdin_data"] == content
|
||||||
|
|
||||||
def test_above_threshold_no_env_truncates_inline(self):
|
def test_above_threshold_no_env_truncates_inline(self):
|
||||||
content = "x" * 60_000
|
content = "x" * 60_000
|
||||||
|
|
|
||||||
|
|
@ -76,15 +76,21 @@ def _heredoc_marker(content: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
|
def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
|
||||||
"""Write content into the sandbox via env.execute(). Returns True on success."""
|
"""Write content into the sandbox via env.execute(). Returns True on success.
|
||||||
marker = _heredoc_marker(content)
|
|
||||||
|
Pushes ``content`` through stdin rather than embedding it in the command
|
||||||
|
string. Linux's ``MAX_ARG_STRLEN`` caps any single argv element at 128 KB
|
||||||
|
(32 * PAGE_SIZE), so the previous heredoc-in-the-command-string approach
|
||||||
|
silently failed with ``OSError: [Errno 7] Argument list too long`` for any
|
||||||
|
tool result over ~128 KB — exactly the case persistence exists to handle.
|
||||||
|
Routing through stdin removes that ceiling on local + ssh (``_stdin_mode
|
||||||
|
== "pipe"``); remote backends with ``_stdin_mode == "heredoc"`` keep their
|
||||||
|
existing API-body sized limit, which is orders of magnitude larger than
|
||||||
|
the exec-arg ceiling.
|
||||||
|
"""
|
||||||
storage_dir = os.path.dirname(remote_path)
|
storage_dir = os.path.dirname(remote_path)
|
||||||
cmd = (
|
cmd = f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)}"
|
||||||
f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)} << '{marker}'\n"
|
result = env.execute(cmd, timeout=30, stdin_data=content)
|
||||||
f"{content}\n"
|
|
||||||
f"{marker}"
|
|
||||||
)
|
|
||||||
result = env.execute(cmd, timeout=30)
|
|
||||||
return result.get("returncode", 1) == 0
|
return result.get("returncode", 1) == 0
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue