From ab06ef8ed615a6d57ad01930794f9b40b467c489 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 11 Jun 2026 19:50:08 -0500 Subject: [PATCH] fix(coding): teach agents terminal env state persists Tell coding agents to activate shell setup once per session instead of re-sourcing it before every command, and pin the existing LocalEnvironment env-snapshot behavior with regression tests. --- agent/coding_context.py | 4 +++ tests/agent/test_coding_context.py | 6 ++++ tests/tools/test_local_shell_init.py | 43 ++++++++++++++++++++++++++++ tests/tools/test_terminal_tool.py | 8 ++++++ tools/terminal_tool.py | 3 +- 5 files changed, 63 insertions(+), 1 deletion(-) diff --git a/agent/coding_context.py b/agent/coding_context.py index 79bc0c92e3c..ede0dc1528a 100644 --- a/agent/coding_context.py +++ b/agent/coding_context.py @@ -190,6 +190,10 @@ CODING_AGENT_GUIDANCE = ( "Verify, and know when to stop:\n" "- Use `terminal` for git, builds, tests, and inspection. Run the relevant " "tests/linter/build and confirm they pass before claiming the work is done.\n" + "- Terminal state persists across calls: current directory and exported " + "environment variables carry forward. Activate a virtualenv or export setup " + "vars once, then reuse that state instead of re-sourcing it before every " + "test command.\n" "- Fix root causes, not symptoms: when you find a bug, check sibling call " "paths for the same flaw and fix the class, not just the reported site.\n" "- When fixing linter/type errors on a file, stop after about three " diff --git a/tests/agent/test_coding_context.py b/tests/agent/test_coding_context.py index b16b1737999..00d1eaa3e51 100644 --- a/tests/agent/test_coding_context.py +++ b/tests/agent/test_coding_context.py @@ -11,6 +11,12 @@ import pytest from agent import coding_context as cc +def test_coding_guidance_advertises_persistent_terminal_state(): + assert "Terminal state persists across calls" in cc.CODING_AGENT_GUIDANCE + assert "Activate a virtualenv" in cc.CODING_AGENT_GUIDANCE + assert "instead of re-sourcing it before every test command" in cc.CODING_AGENT_GUIDANCE + + def _git_init(path): env = { "GIT_AUTHOR_NAME": "t", "GIT_AUTHOR_EMAIL": "t@t", diff --git a/tests/tools/test_local_shell_init.py b/tests/tools/test_local_shell_init.py index 1bdaeeeb67a..f1e4f5b0451 100644 --- a/tests/tools/test_local_shell_init.py +++ b/tests/tools/test_local_shell_init.py @@ -190,6 +190,49 @@ class TestSnapshotEndToEnd: """Spin up a real LocalEnvironment and confirm the snapshot sources extra init files.""" + def test_exported_env_changes_persist_between_commands(self, tmp_path): + env = LocalEnvironment(cwd=str(tmp_path), timeout=15) + try: + first = env.execute( + 'export HERMES_SESSION_ENV_PROBE="sticky"; ' + 'export PATH="/tmp/hermes-session-bin:$PATH"; ' + 'echo "first=$HERMES_SESSION_ENV_PROBE"' + ) + second = env.execute( + 'echo "second=$HERMES_SESSION_ENV_PROBE"; echo "PATH=$PATH"' + ) + finally: + env.cleanup() + + assert first["returncode"] == 0 + assert second["returncode"] == 0 + assert "first=sticky" in first.get("output", "") + output = second.get("output", "") + assert "second=sticky" in output + assert "/tmp/hermes-session-bin" in output + + def test_venv_style_activation_persists_between_commands(self, tmp_path): + venv_bin = tmp_path / ".venv" / "bin" + venv_bin.mkdir(parents=True) + activate = venv_bin / "activate" + activate.write_text( + f'export VIRTUAL_ENV="{tmp_path / ".venv"}"\n' + f'export PATH="{venv_bin}:$PATH"\n' + ) + + env = LocalEnvironment(cwd=str(tmp_path), timeout=15) + try: + first = env.execute('source .venv/bin/activate; echo "venv=$VIRTUAL_ENV"') + second = env.execute('echo "venv=$VIRTUAL_ENV"; echo "PATH=$PATH"') + finally: + env.cleanup() + + assert first["returncode"] == 0 + assert second["returncode"] == 0 + output = second.get("output", "") + assert f"venv={tmp_path / '.venv'}" in output + assert str(venv_bin) in output + def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch): init_file = tmp_path / "custom-init.sh" init_file.write_text( diff --git a/tests/tools/test_terminal_tool.py b/tests/tools/test_terminal_tool.py index ea113e63c27..84af6fc7633 100644 --- a/tests/tools/test_terminal_tool.py +++ b/tests/tools/test_terminal_tool.py @@ -22,6 +22,14 @@ def test_searching_for_sudo_does_not_trigger_rewrite(monkeypatch): assert sudo_stdin is None +def test_terminal_schema_advertises_persistent_env_state(): + description = terminal_tool.TERMINAL_TOOL_DESCRIPTION + + assert "exported environment variables persist between calls" in description + assert "activate a virtualenv" in description + assert "do not re-source the same environment before every command" in description + + def test_printf_literal_sudo_does_not_trigger_rewrite(monkeypatch): monkeypatch.delenv("SUDO_PASSWORD", raising=False) monkeypatch.delenv("HERMES_INTERACTIVE", raising=False) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 2ad882fba25..f20f2abcbb5 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -834,7 +834,7 @@ import sys # Tool description for LLM -TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem usually persists between calls. +TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem, current working directory, and exported environment variables persist between calls. Do NOT use cat/head/tail to read files — use read_file instead. Do NOT use grep/rg/find to search — use search_files instead. @@ -842,6 +842,7 @@ Do NOT use ls to list directories — use search_files(target='files') instead. Do NOT use sed/awk to edit files — use patch instead. Do NOT use echo/cat heredoc to create files — use write_file instead. Reserve terminal for: builds, installs, git, processes, scripts, network, package managers, and anything that needs a shell. +Because exported environment state persists, activate a virtualenv or export setup variables once per session; do not re-source the same environment before every command unless a command proves the shell state was reset. Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for short commands. Background: Set background=true to get a session_id. Almost always pair with notify_on_complete=true — bg without notify runs SILENTLY and you have no way to learn it finished short of calling process(action='poll') yourself. Two legitimate uses: