mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
terminal: steer long-lived server commands to background mode
This commit is contained in:
parent
a3a4932405
commit
d50a9b20d2
2 changed files with 120 additions and 0 deletions
|
|
@ -48,6 +48,53 @@ class TestForegroundTimeoutCap:
|
||||||
assert str(FOREGROUND_MAX_TIMEOUT) in result["error"]
|
assert str(FOREGROUND_MAX_TIMEOUT) in result["error"]
|
||||||
assert "background=true" in result["error"]
|
assert "background=true" in result["error"]
|
||||||
|
|
||||||
|
def test_foreground_rejects_shell_level_background_wrappers(self):
|
||||||
|
"""Foreground nohup/disown/setsid commands should be redirected to background mode."""
|
||||||
|
from tools.terminal_tool import terminal_tool
|
||||||
|
|
||||||
|
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
|
||||||
|
patch("tools.terminal_tool._start_cleanup_thread"):
|
||||||
|
|
||||||
|
result = json.loads(terminal_tool(
|
||||||
|
command="nohup pnpm dev > /tmp/sg-server.log 2>&1 &",
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result["exit_code"] == -1
|
||||||
|
assert "background=true" in result["error"]
|
||||||
|
assert "nohup" in result["error"].lower()
|
||||||
|
|
||||||
|
def test_foreground_rejects_long_lived_server_command(self):
|
||||||
|
"""Foreground dev server commands should be redirected to background mode."""
|
||||||
|
from tools.terminal_tool import terminal_tool
|
||||||
|
|
||||||
|
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
|
||||||
|
patch("tools.terminal_tool._start_cleanup_thread"):
|
||||||
|
|
||||||
|
result = json.loads(terminal_tool(command="pnpm dev"))
|
||||||
|
|
||||||
|
assert result["exit_code"] == -1
|
||||||
|
assert "long-lived" in result["error"].lower()
|
||||||
|
assert "background=true" in result["error"]
|
||||||
|
|
||||||
|
def test_foreground_allows_help_variant_for_server_command(self):
|
||||||
|
"""Informational variants like '--help' should not be blocked."""
|
||||||
|
from tools.terminal_tool import terminal_tool
|
||||||
|
|
||||||
|
with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
|
||||||
|
patch("tools.terminal_tool._start_cleanup_thread"):
|
||||||
|
|
||||||
|
mock_env = MagicMock()
|
||||||
|
mock_env.execute.return_value = {"output": "usage", "returncode": 0}
|
||||||
|
|
||||||
|
with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
|
||||||
|
patch("tools.terminal_tool._last_activity", {"default": 0}), \
|
||||||
|
patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
|
||||||
|
result = json.loads(terminal_tool(command="pnpm dev --help"))
|
||||||
|
|
||||||
|
assert result["error"] is None
|
||||||
|
call_kwargs = mock_env.execute.call_args
|
||||||
|
assert call_kwargs[0][0] == "pnpm dev --help"
|
||||||
|
|
||||||
def test_foreground_timeout_within_max_executes(self):
|
def test_foreground_timeout_within_max_executes(self):
|
||||||
"""When model requests timeout <= FOREGROUND_MAX_TIMEOUT, execute normally."""
|
"""When model requests timeout <= FOREGROUND_MAX_TIMEOUT, execute normally."""
|
||||||
from tools.terminal_tool import terminal_tool
|
from tools.terminal_tool import terminal_tool
|
||||||
|
|
|
||||||
|
|
@ -523,6 +523,8 @@ Foreground (default): Commands return INSTANTLY when done, even if the timeout i
|
||||||
Background: Set background=true to get a session_id. Two patterns:
|
Background: Set background=true to get a session_id. Two patterns:
|
||||||
(1) Long-lived processes that never exit (servers, watchers).
|
(1) Long-lived processes that never exit (servers, watchers).
|
||||||
(2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute.
|
(2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute.
|
||||||
|
For servers/watchers, do NOT use shell-level background wrappers (nohup/disown/setsid/trailing '&') in foreground mode. Use background=true so Hermes can track lifecycle and output.
|
||||||
|
After starting a server, verify readiness with a health check or log signal, then run tests in a separate terminal() call. Avoid blind sleep loops.
|
||||||
Use process(action="poll") for progress checks, process(action="wait") to block until done.
|
Use process(action="poll") for progress checks, process(action="wait") to block until done.
|
||||||
Working directory: Use 'workdir' for per-command cwd.
|
Working directory: Use 'workdir' for per-command cwd.
|
||||||
PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).
|
PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).
|
||||||
|
|
@ -1103,6 +1105,65 @@ def _command_requires_pipe_stdin(command: str) -> bool:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SHELL_LEVEL_BACKGROUND_RE = re.compile(r"\b(?:nohup|disown|setsid)\b", re.IGNORECASE)
|
||||||
|
_INLINE_BACKGROUND_AMP_RE = re.compile(r"\s&\s")
|
||||||
|
_TRAILING_BACKGROUND_AMP_RE = re.compile(r"\s&\s*(?:#.*)?$")
|
||||||
|
_LONG_LIVED_FOREGROUND_PATTERNS = (
|
||||||
|
re.compile(r"\b(?:npm|pnpm|yarn|bun)\s+(?:run\s+)?(?:dev|start|serve|watch)\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bdocker\s+compose\s+up\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bnext\s+dev\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bvite(?:\s|$)", re.IGNORECASE),
|
||||||
|
re.compile(r"\bnodemon\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\buvicorn\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bgunicorn\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bpython(?:3)?\s+-m\s+http\.server\b", re.IGNORECASE),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_help_or_version_command(command: str) -> bool:
|
||||||
|
"""Return True for informational invocations that should never be blocked."""
|
||||||
|
normalized = " ".join(command.lower().split())
|
||||||
|
return (
|
||||||
|
" --help" in normalized
|
||||||
|
or normalized.endswith(" -h")
|
||||||
|
or " --version" in normalized
|
||||||
|
or normalized.endswith(" -v")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _foreground_background_guidance(command: str) -> str | None:
|
||||||
|
"""Suggest background mode when a foreground command looks long-lived.
|
||||||
|
|
||||||
|
Prevents workflows that start a server/watch process and then stall before
|
||||||
|
follow-up checks or test commands run.
|
||||||
|
"""
|
||||||
|
if _looks_like_help_or_version_command(command):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if _SHELL_LEVEL_BACKGROUND_RE.search(command):
|
||||||
|
return (
|
||||||
|
"Foreground command uses shell-level background wrappers (nohup/disown/setsid). "
|
||||||
|
"Use terminal(background=true) so Hermes can track the process, then run "
|
||||||
|
"readiness checks and tests in separate commands."
|
||||||
|
)
|
||||||
|
|
||||||
|
if _INLINE_BACKGROUND_AMP_RE.search(command) or _TRAILING_BACKGROUND_AMP_RE.search(command):
|
||||||
|
return (
|
||||||
|
"Foreground command uses '&' backgrounding. Use terminal(background=true) for long-lived "
|
||||||
|
"processes, then run health checks and tests in follow-up terminal calls."
|
||||||
|
)
|
||||||
|
|
||||||
|
for pattern in _LONG_LIVED_FOREGROUND_PATTERNS:
|
||||||
|
if pattern.search(command):
|
||||||
|
return (
|
||||||
|
"This foreground command appears to start a long-lived server/watch process. "
|
||||||
|
"Run it with background=true, verify readiness (health endpoint/log signal), "
|
||||||
|
"then execute tests in a separate command."
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def terminal_tool(
|
def terminal_tool(
|
||||||
command: str,
|
command: str,
|
||||||
background: bool = False,
|
background: bool = False,
|
||||||
|
|
@ -1195,6 +1256,18 @@ def terminal_tool(
|
||||||
),
|
),
|
||||||
}, ensure_ascii=False)
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
|
# Guardrail: long-lived server/watch commands should run as managed
|
||||||
|
# background sessions, not foreground shell hacks.
|
||||||
|
if not background:
|
||||||
|
guidance = _foreground_background_guidance(command)
|
||||||
|
if guidance:
|
||||||
|
return json.dumps({
|
||||||
|
"output": "",
|
||||||
|
"exit_code": -1,
|
||||||
|
"error": guidance,
|
||||||
|
"status": "error",
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
# Start cleanup thread
|
# Start cleanup thread
|
||||||
_start_cleanup_thread()
|
_start_cleanup_thread()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue