From a4f179c5099a2d74eca0fa81d69ea2f9dad82829 Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Thu, 11 Jun 2026 12:52:52 -0500
Subject: [PATCH] fix(agent): steer GPT/Codex family to V4A for single-file
 edits too (#44411)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The coding-posture brief told GPT/Codex models to use patch mode='patch'
(V4A) for structured/multi-file changes but mode='replace' "for a single
small swap". That second nudge points those models at a format their
first-party harness never taught them.

Verified against openai/codex (current main): apply_patch is the ONLY file
editor in codex-rs — zero occurrences of str_replace/old_string anywhere in
the repo; the grammar (core/src/tools/handlers/apply_patch.lark) is exactly
the V4A dialect our patch_parser implements; the shipped model prompts
(gpt_5_codex, gpt-5.2-codex, gpt-5.1-codex-max + instruction templates)
explicitly say to use apply_patch "for single file edits"; and the tool is
gated per model via ModelInfo.apply_patch_tool_type, i.e. OpenAI ships
V4A-for-everything as model metadata.

The GPT-family line now steers to mode='patch' for all edits, single-file
included. The replace-family line (Claude + open-weight) is unchanged —
Claude Code's FileEdit is old_string/new_string/replace_all exact string
replacement (confirmed from Anthropic's shipped sdk-tools.d.ts, the only
file editor in its tool union), matching our mode='replace'.
---
 agent/coding_context.py            | 12 +++++++++---
 tests/agent/test_coding_context.py |  4 ++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/agent/coding_context.py b/agent/coding_context.py
index b534b6da322..9e6f67e5249 100644
--- a/agent/coding_context.py
+++ b/agent/coding_context.py
@@ -106,13 +106,19 @@ _GIT_TIMEOUT = 2.5
 # multi-file) and mode="replace" (find-and-swap). We nudge each family toward
 # its native format. Unknown families get nothing (the brief's neutral wording
 # stands). Substrings match the model id; aligned with TOOL_USE_ENFORCEMENT_MODELS.
+#
+# GPT/Codex get V4A for ALL edits, single-file included: in codex-rs,
+# apply_patch (V4A — apply_patch.lark) is the ONLY file editor, no
+# str_replace-style tool exists, and the shipped model prompts say to use
+# apply_patch even "for single file edits" — so a replace-mode nudge would
+# steer those models toward a format their first-party harness never taught
+# them.
 _EDIT_FORMAT_GUIDANCE: dict[str, tuple[tuple[str, ...], str]] = {
     "patch": (
         ("gpt", "codex"),
         "- Edit format: author new files with `write_file`; for edits to "
-        "existing code prefer `patch` with `mode='patch'` (V4A multi-file diff) "
-        "for structured or multi-file changes — it's the diff format you handle "
-        "most reliably. Use `mode='replace'` for a single small swap.",
+        "existing code use `patch` with `mode='patch'` (V4A diff) — including "
+        "single-file edits. It's the edit format you handle most reliably.",
     ),
     "replace": (
         ("claude", "sonnet", "opus", "haiku",
diff --git a/tests/agent/test_coding_context.py b/tests/agent/test_coding_context.py
index 36937dc8716..75749cb8fe9 100644
--- a/tests/agent/test_coding_context.py
+++ b/tests/agent/test_coding_context.py
@@ -312,6 +312,10 @@ class TestEditFormatSteering:
         assert "mode='patch'" in brief
         assert "V4A" in brief
         assert "write_file" in brief  # new files authored, not patched
+        # Codex-family harnesses ship apply_patch (V4A) as the ONLY editor and
+        # instruct it even for single-file edits — never nudge replace mode.
+        assert "single-file" in brief
+        assert "mode='replace'" not in brief
 
     def test_anthropic_family_gets_replace_nudge(self, tmp_path):
         _git_init(tmp_path)