mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-14 09:11:54 +00:00
fix(terminal): read cwd overrides under raw task_id after container collapse
PR #41822 collapsed CWD-only overrides to the shared 'default' container via _resolve_container_task_id, but three call sites kept routing the *env/override lookup* through that collapsed id: - the foreground exec path read _task_env_overrides[effective_task_id], yet register_task_env_overrides writes under the raw task_id, so a CWD-only override's cwd was silently dropped (env spun up at the wrong root, exit 126); - the get-or-create env lookup keyed solely on effective_task_id, so an env cached under the raw task_id was missed and duplicated; - register_task_env_overrides synced the new cwd onto the env under the collapsed id, missing a live env cached under the raw task_id. Container *identity* still collapses to 'default' (sharing preserved); only the per-session env/override *lookup* now prefers the raw task_id and falls back to the collapsed id. Fixes the 3 regressions in test_terminal_task_cwd.py left red by #41822.
This commit is contained in:
parent
d759c13c09
commit
329c33dac3
1 changed files with 39 additions and 9 deletions
|
|
@ -972,9 +972,14 @@ def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]):
|
|||
# while letting an explicit ACP cwd change win, as the client expects.
|
||||
new_cwd = overrides.get("cwd")
|
||||
if isinstance(new_cwd, str) and new_cwd.strip():
|
||||
# The live env is cached under the raw task_id for per-session surfaces
|
||||
# (ACP/gateway/dashboard) and under the collapsed container id for
|
||||
# isolation-keyed rollouts. Try the raw id first, then the container id,
|
||||
# so a CWD-only override (which collapses to "default") still finds and
|
||||
# updates the originating session's env.
|
||||
container_id = _resolve_container_task_id(task_id)
|
||||
with _env_lock:
|
||||
env = _active_environments.get(container_id)
|
||||
env = _active_environments.get(task_id) or _active_environments.get(container_id)
|
||||
if env is not None and getattr(env, "cwd", None) is not None:
|
||||
env.cwd = new_cwd
|
||||
|
||||
|
|
@ -1848,8 +1853,20 @@ def terminal_tool(
|
|||
effective_task_id = _resolve_container_task_id(task_id)
|
||||
|
||||
# Check per-task overrides (set by environments like TerminalBench2Env)
|
||||
# before falling back to global env var config
|
||||
overrides = _task_env_overrides.get(effective_task_id, {})
|
||||
# before falling back to global env var config.
|
||||
#
|
||||
# Overrides are keyed by the *raw* task_id (that's the key
|
||||
# ``register_task_env_overrides`` writes under), NOT by the collapsed
|
||||
# container id. A CWD-only override collapses ``effective_task_id`` to
|
||||
# ``"default"`` for container sharing, but its cwd must still be read
|
||||
# back here under the originating task_id, or the override is silently
|
||||
# dropped. Fall back to the collapsed id so isolation-keyed RL/benchmark
|
||||
# overrides (registered under an id that equals their container id) keep
|
||||
# resolving as before.
|
||||
overrides = (
|
||||
(_task_env_overrides.get(task_id) if task_id else None)
|
||||
or _task_env_overrides.get(effective_task_id, {})
|
||||
)
|
||||
|
||||
# Select image based on env type, with per-task override support
|
||||
if env_type == "docker":
|
||||
|
|
@ -1898,9 +1915,18 @@ def terminal_tool(
|
|||
# task_id wait for the first one to finish creating the sandbox,
|
||||
# instead of each creating their own (wasting Modal resources).
|
||||
with _env_lock:
|
||||
if effective_task_id in _active_environments:
|
||||
_last_activity[effective_task_id] = time.time()
|
||||
env = _active_environments[effective_task_id]
|
||||
# Prefer the collapsed container id, but fall back to an env cached
|
||||
# under the raw task_id. Per-session surfaces (ACP/gateway/dashboard)
|
||||
# with a CWD-only override collapse to "default" for container
|
||||
# sharing, yet an env may already be cached under the originating
|
||||
# task_id; honor it instead of spawning a duplicate.
|
||||
_existing_key = (
|
||||
effective_task_id if effective_task_id in _active_environments
|
||||
else (task_id if task_id and task_id in _active_environments else None)
|
||||
)
|
||||
if _existing_key is not None:
|
||||
_last_activity[_existing_key] = time.time()
|
||||
env = _active_environments[_existing_key]
|
||||
needs_creation = False
|
||||
else:
|
||||
needs_creation = True
|
||||
|
|
@ -1915,9 +1941,13 @@ def terminal_tool(
|
|||
with task_lock:
|
||||
# Double-check after acquiring the per-task lock
|
||||
with _env_lock:
|
||||
if effective_task_id in _active_environments:
|
||||
_last_activity[effective_task_id] = time.time()
|
||||
env = _active_environments[effective_task_id]
|
||||
_existing_key = (
|
||||
effective_task_id if effective_task_id in _active_environments
|
||||
else (task_id if task_id and task_id in _active_environments else None)
|
||||
)
|
||||
if _existing_key is not None:
|
||||
_last_activity[_existing_key] = time.time()
|
||||
env = _active_environments[_existing_key]
|
||||
needs_creation = False
|
||||
|
||||
if needs_creation:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue