diff --git a/tests/tools/test_terminal_foreground_timeout_cap.py b/tests/tools/test_terminal_foreground_timeout_cap.py index 9e7edd3325..5f95e15571 100644 --- a/tests/tools/test_terminal_foreground_timeout_cap.py +++ b/tests/tools/test_terminal_foreground_timeout_cap.py @@ -1,7 +1,7 @@ -"""Tests for foreground timeout clamping in terminal_tool. +"""Tests for foreground timeout cap in terminal_tool. -Ensures that foreground commands have a hard timeout cap to prevent -a single tool call from blocking the entire agent session. +Ensures that foreground commands with timeout > FOREGROUND_MAX_TIMEOUT +are rejected with an error suggesting background=true. """ import json import os @@ -29,36 +29,27 @@ def _make_env_config(**overrides): class TestForegroundTimeoutCap: - """FOREGROUND_MAX_TIMEOUT prevents foreground commands from blocking too long.""" + """FOREGROUND_MAX_TIMEOUT rejects foreground commands that exceed it.""" - def test_foreground_timeout_clamped_to_max(self): - """When model requests timeout > FOREGROUND_MAX_TIMEOUT, it's clamped.""" + def test_foreground_timeout_rejected_above_max(self): + """When model requests timeout > FOREGROUND_MAX_TIMEOUT, return error.""" from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \ patch("tools.terminal_tool._start_cleanup_thread"): - mock_env = MagicMock() - mock_env.execute.return_value = {"output": "done", "returncode": 0} + result = json.loads(terminal_tool( + command="echo hello", + timeout=9999, # Way above max + )) - with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \ - patch("tools.terminal_tool._last_activity", {"default": 0}), \ - patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}): - result = json.loads(terminal_tool( - command="echo hello", - timeout=9999, # Way above max - )) + assert "error" in result + assert "9999" in result["error"] + assert str(FOREGROUND_MAX_TIMEOUT) in result["error"] + assert "background=true" in result["error"] - # Verify the timeout was clamped - call_kwargs = mock_env.execute.call_args - assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT - assert result.get("timeout_note") is not None - assert "clamped" in result["timeout_note"] - assert "9999" in result["timeout_note"] - assert "background=true" in result["timeout_note"] - - def test_foreground_timeout_within_max_not_clamped(self): - """When model requests timeout <= FOREGROUND_MAX_TIMEOUT, no clamping.""" + def test_foreground_timeout_within_max_executes(self): + """When model requests timeout <= FOREGROUND_MAX_TIMEOUT, execute normally.""" from tools.terminal_tool import terminal_tool with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \ @@ -75,12 +66,16 @@ class TestForegroundTimeoutCap: timeout=300, # Within max )) - call_kwargs = mock_env.execute.call_args - assert call_kwargs[1]["timeout"] == 300 - assert "timeout_note" not in result + call_kwargs = mock_env.execute.call_args + assert call_kwargs[1]["timeout"] == 300 + assert "error" not in result or result["error"] is None - def test_config_default_exceeds_cap_no_model_timeout(self): - """When config default timeout > cap and model passes no timeout, clamping fires.""" + def test_config_default_above_cap_not_rejected(self): + """When config default timeout > cap but model passes no timeout, execute normally. + + Only the model's explicit timeout parameter triggers rejection, + not the user's configured default. + """ from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT # User configured TERMINAL_TIMEOUT=900 in their env @@ -96,16 +91,12 @@ class TestForegroundTimeoutCap: patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}): result = json.loads(terminal_tool(command="make build")) - # Should be clamped - call_kwargs = mock_env.execute.call_args - assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT - # Note should reference the original 900s, NOT "None" - note = result.get("timeout_note", "") - assert "900" in note, f"Expected '900' in timeout_note but got: {note!r}" - assert "None" not in note, f"timeout_note contains 'None': {note!r}" - assert "clamped" in note + # Should execute with the config default, NOT be rejected + call_kwargs = mock_env.execute.call_args + assert call_kwargs[1]["timeout"] == 900 + assert "error" not in result or result["error"] is None - def test_background_not_clamped(self): + def test_background_not_rejected(self): """Background commands should NOT be subject to foreground timeout cap.""" from tools.terminal_tool import terminal_tool @@ -132,14 +123,14 @@ class TestForegroundTimeoutCap: timeout=9999, )) - # Background should NOT be clamped - assert result.get("timeout_note") is None + # Background should NOT be rejected + assert "error" not in result or result["error"] is None - def test_default_timeout_not_clamped(self): - """Default timeout (180s) should not trigger clamping.""" + def test_default_timeout_not_rejected(self): + """Default timeout (180s) should not trigger rejection.""" from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT - # 180 < 600, so no clamping + # 180 < 600, so no rejection assert 180 < FOREGROUND_MAX_TIMEOUT with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \ @@ -153,9 +144,31 @@ class TestForegroundTimeoutCap: patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}): result = json.loads(terminal_tool(command="echo hello")) - call_kwargs = mock_env.execute.call_args - assert call_kwargs[1]["timeout"] == 180 - assert "timeout_note" not in result + call_kwargs = mock_env.execute.call_args + assert call_kwargs[1]["timeout"] == 180 + assert "error" not in result or result["error"] is None + + def test_exactly_at_max_not_rejected(self): + """Timeout exactly at FOREGROUND_MAX_TIMEOUT should execute normally.""" + from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT + + with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \ + patch("tools.terminal_tool._start_cleanup_thread"): + + mock_env = MagicMock() + mock_env.execute.return_value = {"output": "done", "returncode": 0} + + with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \ + patch("tools.terminal_tool._last_activity", {"default": 0}), \ + patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}): + result = json.loads(terminal_tool( + command="echo hello", + timeout=FOREGROUND_MAX_TIMEOUT, # Exactly at limit + )) + + call_kwargs = mock_env.execute.call_args + assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT + assert "error" not in result or result["error"] is None class TestForegroundMaxTimeoutConstant: @@ -164,9 +177,6 @@ class TestForegroundMaxTimeoutConstant: def test_default_value_is_600(self): """Default FOREGROUND_MAX_TIMEOUT is 600 when env var is not set.""" from tools.terminal_tool import FOREGROUND_MAX_TIMEOUT - # Module-level constant should be 600 in a clean test environment. - # If TERMINAL_MAX_FOREGROUND_TIMEOUT is set, it may differ — but the - # conftest _isolate_hermes_home fixture ensures a clean env for tests. assert FOREGROUND_MAX_TIMEOUT == 600 def test_schema_mentions_max(self): @@ -174,4 +184,4 @@ class TestForegroundMaxTimeoutConstant: from tools.terminal_tool import TERMINAL_SCHEMA, FOREGROUND_MAX_TIMEOUT timeout_desc = TERMINAL_SCHEMA["parameters"]["properties"]["timeout"]["description"] assert str(FOREGROUND_MAX_TIMEOUT) in timeout_desc - assert "max" in timeout_desc.lower() + assert "background=true" in timeout_desc diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 7f128bc88e..d57078f528 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1210,16 +1210,17 @@ def terminal_tool( cwd = overrides.get("cwd") or config["cwd"] default_timeout = config["timeout"] effective_timeout = timeout or default_timeout - unclamped_timeout = effective_timeout - # Clamp foreground commands to FOREGROUND_MAX_TIMEOUT to prevent - # a single tool call from blocking the entire agent session. - if not background and effective_timeout > FOREGROUND_MAX_TIMEOUT: - logger.info( - "Clamping foreground timeout from %ds to %ds (max: TERMINAL_MAX_FOREGROUND_TIMEOUT=%d)", - effective_timeout, FOREGROUND_MAX_TIMEOUT, FOREGROUND_MAX_TIMEOUT, - ) - effective_timeout = FOREGROUND_MAX_TIMEOUT + # Reject foreground commands where the model explicitly requests + # a timeout above FOREGROUND_MAX_TIMEOUT — nudge it toward background. + if not background and timeout and timeout > FOREGROUND_MAX_TIMEOUT: + return json.dumps({ + "error": ( + f"Foreground timeout {timeout}s exceeds the maximum of " + f"{FOREGROUND_MAX_TIMEOUT}s. Use background=true with " + f"notify_on_complete=true for long-running commands." + ), + }, ensure_ascii=False) # Start cleanup thread _start_cleanup_thread() @@ -1485,18 +1486,11 @@ def terminal_tool( except Exception as e: error_str = str(e).lower() if "timeout" in error_str: - timeout_result = { + return json.dumps({ "output": "", "exit_code": 124, "error": f"Command timed out after {effective_timeout} seconds" - } - if unclamped_timeout != effective_timeout: - timeout_result["timeout_note"] = ( - f"Timeout of {unclamped_timeout}s was clamped to " - f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. " - f"Use background=true for long-running processes." - ) - return json.dumps(timeout_result, ensure_ascii=False) + }, ensure_ascii=False) # Retry on transient errors if retry_count < max_retries: @@ -1559,12 +1553,6 @@ def terminal_tool( result_dict["approval"] = approval_note if exit_note: result_dict["exit_code_meaning"] = exit_note - if unclamped_timeout != effective_timeout: - result_dict["timeout_note"] = ( - f"Timeout of {unclamped_timeout}s was clamped to " - f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. " - f"Use background=true for long-running processes." - ) return json.dumps(result_dict, ensure_ascii=False) @@ -1751,7 +1739,7 @@ TERMINAL_SCHEMA = { }, "timeout": { "type": "integer", - "description": f"Max seconds to wait (default: 180, max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.", + "description": f"Max seconds to wait (default: 180, foreground max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily. Foreground timeout above {FOREGROUND_MAX_TIMEOUT}s is rejected; use background=true for longer commands.", "minimum": 1 }, "workdir": {