mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
feat(terminal): collapse subagent task_ids to shared container (#16177)
Before: delegate_task children each allocated their own terminal
sandbox keyed by child task_id. Starting extra containers (or Modal
sandboxes / Daytona workspaces) is expensive, and the subagent's work
is invisible to the parent — files written by the child in its
container don't exist in the parent's when the subagent returns.
After: a single `_resolve_container_task_id` helper maps any
tool-call task_id to "default" UNLESS an env override is registered
for it. The parent agent and all delegate_task children therefore
share one long-lived sandbox — installed packages, cwd, /workspace
files, and /tmp scratch carry over freely between them.
RL and benchmark environments (TerminalBench2, HermesSweEnv, ...)
opt in to isolation via `register_task_env_overrides(task_id, {...})`;
those task_ids survive the collapse and get their own sandbox,
preserving the per-task Docker image behavior these benchmarks rely on.
file_state / active-subagents registry / TUI events still key off the
original child task_id, so the 'subagent wrote a file the parent read'
warning and UI per-subagent panels keep working.
Tradeoff: parallel delegate_task children (tasks=[...]) now share one
bash/container. Concurrent cd, env-var mutations, and writes to the
same path will collide. If that bites a specific workflow, the
subagent can opt back into isolation via register_task_env_overrides.
Applied at four lookup sites:
- tools/terminal_tool.py terminal_tool() and get_active_env()
- tools/file_tools.py _get_file_ops() and _get_live_tracking_cwd()
- tools/code_execution_tool.py _get_or_create_environment()
Docs: website/docs/user-guide/configuration.md updated to reflect the
shared-container reality and document the RL/benchmark carve-out.
Tests: tests/tools/test_shared_container_task_id.py (9 cases).
This commit is contained in:
parent
087e74d4d7
commit
5b2c59559a
5 changed files with 159 additions and 8 deletions
|
|
@ -803,6 +803,31 @@ def clear_task_env_overrides(task_id: str):
|
|||
"""
|
||||
_task_env_overrides.pop(task_id, None)
|
||||
|
||||
|
||||
def _resolve_container_task_id(task_id: Optional[str]) -> str:
|
||||
"""
|
||||
Map a tool-call ``task_id`` to the container/sandbox key used by
|
||||
``_active_environments``.
|
||||
|
||||
The top-level agent passes ``task_id=None`` and lands on ``"default"``.
|
||||
``delegate_task`` children pass their own subagent ID so that
|
||||
file-state tracking, the active-subagents registry, and TUI events stay
|
||||
distinct per child -- but we deliberately collapse that ID back to
|
||||
``"default"`` here so subagents share the parent's long-lived container
|
||||
(one bash, one /workspace, one set of installed packages).
|
||||
|
||||
Exception: RL / benchmark environments (TerminalBench2, HermesSweEnv, ...)
|
||||
call ``register_task_env_overrides(task_id, {...})`` to request a
|
||||
per-task Docker/Modal image. When an override is registered for a
|
||||
task_id, we honour it by returning the task_id unchanged -- those
|
||||
rollouts need their own isolated sandbox, which is the whole point of
|
||||
the override.
|
||||
"""
|
||||
if task_id and task_id in _task_env_overrides:
|
||||
return task_id
|
||||
return "default"
|
||||
|
||||
|
||||
# Configuration from environment variables
|
||||
|
||||
def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"):
|
||||
|
|
@ -1139,8 +1164,9 @@ def _stop_cleanup_thread():
|
|||
|
||||
def get_active_env(task_id: str):
|
||||
"""Return the active BaseEnvironment for *task_id*, or None."""
|
||||
lookup = _resolve_container_task_id(task_id)
|
||||
with _env_lock:
|
||||
return _active_environments.get(task_id)
|
||||
return _active_environments.get(lookup) or _active_environments.get(task_id)
|
||||
|
||||
|
||||
def is_persistent_env(task_id: str) -> bool:
|
||||
|
|
@ -1473,8 +1499,11 @@ def terminal_tool(
|
|||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
|
||||
# Use task_id for environment isolation
|
||||
effective_task_id = task_id or "default"
|
||||
# Use task_id for environment isolation. By default all subagent
|
||||
# task_ids collapse back to "default" so the top-level agent and
|
||||
# every delegate_task child share one container; only task_ids with
|
||||
# a registered env override (RL benchmarks) get isolated sandboxes.
|
||||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Check per-task overrides (set by environments like TerminalBench2Env)
|
||||
# before falling back to global env var config
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue