From 07e785d60ae1967ad1d7d901175368d1843d2e61 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 18 Jun 2026 13:22:12 -0500 Subject: [PATCH] fix(prompt): dedupe parallel-tool-call steer; correct its rationale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The universal PARALLEL_TOOL_CALL_GUIDANCE block already lives on main, but it shipped with two rough edges this change cleans up: - It duplicated the batching steer for Google models. The GOOGLE_MODEL_OPERATIONAL_GUIDANCE block still carried its own "Parallel tool calls" bullet, so Gemini/Gemma received the instruction twice in one prompt. Drop the redundant bullet — the universal block is now the single source. - Its comment claimed "nothing in the open-source system prompt encouraged batching," which was wrong: the steer existed for Google models only. Reword to say the gap was that every *other* model got nothing. - Tighten the test that asserts the steer (precedence-correct), and add an invariant guarding against re-introducing the Google duplicate. --- agent/prompt_builder.py | 15 +++++++++------ tests/agent/test_prompt_builder.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b8e60722168..97836f27b05 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -320,9 +320,11 @@ TASK_COMPLETION_GUIDANCE = ( # concurrently when they are independent (read-only tools always; path-scoped # file ops when their targets don't overlap — see # run_agent._execute_tool_calls / tool_dispatch_helpers). The missing piece -# was telling the *model* to emit those calls together in the first place; -# nothing in the open-source system prompt encouraged batching. This block -# closes that gap. +# was telling the *model* to emit those calls together in the first place. +# Until now the only batching steer in the prompt lived in +# GOOGLE_MODEL_OPERATIONAL_GUIDANCE — Gemini/Gemma got it, every other model +# got nothing. This block makes the steer universal; the now-redundant +# Google-only bullet has been dropped so no model receives it twice. # # Short on purpose — shipped in the cached system prompt to every user, every # session. Token cost is paid once at install and amortised across all @@ -425,9 +427,10 @@ GOOGLE_MODEL_OPERATIONAL_GUIDANCE = ( "package.json, requirements.txt, Cargo.toml, etc. before importing.\n" "- **Conciseness:** Keep explanatory text brief — a few sentences, not " "paragraphs. Focus on actions and results over narration.\n" - "- **Parallel tool calls:** When you need to perform multiple independent " - "operations (e.g. reading several files), make all the tool calls in a " - "single response rather than sequentially.\n" + # Parallel-tool-call steering now lives in the universal + # PARALLEL_TOOL_CALL_GUIDANCE block (injected for all models), so it is no + # longer duplicated here — keeping it would send Gemini/Gemma the same + # instruction twice. "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive " "to prevent CLI tools from hanging on prompts.\n" "- **Keep going:** Work autonomously until the task is fully resolved. " diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index e98c26e319f..6f0206dfbcb 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -28,6 +28,7 @@ from agent.prompt_builder import ( TOOL_USE_ENFORCEMENT_MODELS, OPENAI_MODEL_EXECUTION_GUIDANCE, PARALLEL_TOOL_CALL_GUIDANCE, + GOOGLE_MODEL_OPERATIONAL_GUIDANCE, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, PLATFORM_HINTS, @@ -1512,8 +1513,9 @@ class TestParallelToolCallGuidance: def test_steers_batching_into_one_response(self): text = PARALLEL_TOOL_CALL_GUIDANCE.lower() - # Must tell the model to group independent calls together. - assert "single response" in text or "same" in text and "turn" in text + # Must tell the model to group independent calls together — accept any + # phrasing that means "one turn" without freezing exact wording. + assert "single response" in text or ("same" in text and "turn" in text) assert "independent" in text def test_carves_out_dependent_calls(self): @@ -1533,6 +1535,11 @@ class TestParallelToolCallGuidance: # Heading delimits it as its own section in the assembled prompt. assert PARALLEL_TOOL_CALL_GUIDANCE.lstrip().startswith("#") + def test_not_duplicated_in_google_guidance(self): + # The universal block is now the single source of parallel-batching + # steer. The Google-only block must NOT carry its own copy, otherwise + # Gemini/Gemma would receive the instruction twice in one prompt. + assert "parallel tool call" not in GOOGLE_MODEL_OPERATIONAL_GUIDANCE.lower() # =========================================================================