refactor(tools): consolidate task-override lookup into one helper

The raw-key-first-then-collapsed override lookup was hand-rolled in three
places with subtly different spellings: terminal_tool's command setup, and
both file_tools._registered_task_cwd_override and _get_file_ops. Since that
exact raw-vs-collapsed invariant is what the session-cwd fix depends on,
keeping three copies invites the drift that caused the original bug.

Add terminal_tool.resolve_task_overrides(task_id) as the single source and
route all three sites through it. Behaviour is unchanged (verified
byte-equivalent across raw/collapsed/isolation/None/subagent inputs).
This commit is contained in:
kshitijk4poor 2026-06-15 14:02:17 +05:30
parent d6a8d9dcab
commit ddf7c7af81
2 changed files with 30 additions and 28 deletions

View file

@ -123,15 +123,9 @@ def _registered_task_cwd_override(task_id: str = "default") -> str | None:
read that raw override before falling back to the collapsed container key.
"""
try:
from tools.terminal_tool import _resolve_container_task_id, _task_env_overrides
from tools.terminal_tool import resolve_task_overrides
raw_task_id = task_id or "default"
container_key = _resolve_container_task_id(raw_task_id)
overrides = (
_task_env_overrides.get(raw_task_id)
or _task_env_overrides.get(container_key)
or {}
)
overrides = resolve_task_overrides(task_id)
except Exception:
return None
@ -693,15 +687,11 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
terminal_env = None
if terminal_env is None:
from tools.terminal_tool import _task_env_overrides
from tools.terminal_tool import resolve_task_overrides
config = _get_env_config()
env_type = config["env_type"]
overrides = (
_task_env_overrides.get(raw_task_id)
or _task_env_overrides.get(task_id)
or {}
)
overrides = resolve_task_overrides(raw_task_id)
if env_type == "docker":
image = overrides.get("docker_image") or config["docker_image"]

View file

@ -1034,6 +1034,26 @@ def _resolve_container_task_id(task_id: Optional[str]) -> str:
return "default"
def resolve_task_overrides(task_id: Optional[str]) -> Dict[str, Any]:
"""Return the env overrides for *task_id*, raw key first then collapsed.
``register_task_env_overrides`` writes under the *raw* task/session id, but
a CWD-only override collapses (:func:`_resolve_container_task_id`) to the
shared ``"default"`` container so per-session surfaces (ACP/gateway/
dashboard) don't each spin up their own sandbox. Callers that need the
override (terminal command setup, file-tool cwd resolution) must therefore
read the raw id FIRST and only fall back to the collapsed container id, or
the originating session's override is silently dropped. This is the single
source of that lookup so the terminal and file layers can't drift apart.
"""
raw = task_id or "default"
return (
_task_env_overrides.get(raw)
or _task_env_overrides.get(_resolve_container_task_id(raw))
or {}
)
# Configuration from environment variables
def _parse_env_var(name: str, default: str, converter: Any = int, type_label: str = "integer"):
@ -1885,20 +1905,12 @@ def terminal_tool(
effective_task_id = _resolve_container_task_id(task_id)
# Check per-task overrides (set by environments like TerminalBench2Env)
# before falling back to global env var config.
#
# Overrides are keyed by the *raw* task_id (that's the key
# ``register_task_env_overrides`` writes under), NOT by the collapsed
# container id. A CWD-only override collapses ``effective_task_id`` to
# ``"default"`` for container sharing, but its cwd must still be read
# back here under the originating task_id, or the override is silently
# dropped. Fall back to the collapsed id so isolation-keyed RL/benchmark
# overrides (registered under an id that equals their container id) keep
# resolving as before.
overrides = (
(_task_env_overrides.get(task_id) if task_id else None)
or _task_env_overrides.get(effective_task_id, {})
)
# before falling back to global env var config. ``resolve_task_overrides``
# reads the raw task id first then the collapsed container id, so a
# CWD-only override (which collapses ``effective_task_id`` to
# ``"default"``) is still found under its originating session id while
# isolation-keyed RL/benchmark overrides keep resolving as before.
overrides = resolve_task_overrides(task_id)
# Select image based on env type, with per-task override support
if env_type == "docker":