diff --git a/agent/trajectory.py b/agent/trajectory.py
index 90696eb8a..0f586bab4 100644
--- a/agent/trajectory.py
+++ b/agent/trajectory.py
@@ -7,24 +7,39 @@ the file-write logic live here.
import json
import logging
+import re
from datetime import datetime
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
+_SCRATCHPAD_OPEN_TAG = ""
+_SCRATCHPAD_CLOSE_TAG = ""
+_FENCED_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL)
+_INLINE_CODE_RE = re.compile(r"`[^`\n]*`")
+_BLOCKQUOTE_RE = re.compile(r"(?m)^[ \t]*>.*(?:\n|$)")
+
def convert_scratchpad_to_think(content: str) -> str:
"""Convert tags to tags."""
- if not content or "" not in content:
+ if not content or _SCRATCHPAD_OPEN_TAG not in content:
return content
- return content.replace("", "").replace("", "")
+ return content.replace(_SCRATCHPAD_OPEN_TAG, "").replace(_SCRATCHPAD_CLOSE_TAG, "")
+
+
+def _strip_markdown_context_for_scratchpad_check(content: str) -> str:
+ """Remove markdown contexts where scratchpad tags should be treated as literal text."""
+ stripped = _FENCED_CODE_BLOCK_RE.sub("", content)
+ stripped = _INLINE_CODE_RE.sub("", stripped)
+ return _BLOCKQUOTE_RE.sub("", stripped)
def has_incomplete_scratchpad(content: str) -> bool:
"""Check if content has an opening without a closing tag."""
if not content:
return False
- return "" in content and "" not in content
+ visible_content = _strip_markdown_context_for_scratchpad_check(content)
+ return visible_content.count(_SCRATCHPAD_OPEN_TAG) > visible_content.count(_SCRATCHPAD_CLOSE_TAG)
def save_trajectory(trajectory: List[Dict[str, Any]], model: str,
diff --git a/tests/agent/test_trajectory.py b/tests/agent/test_trajectory.py
new file mode 100644
index 000000000..495af1d90
--- /dev/null
+++ b/tests/agent/test_trajectory.py
@@ -0,0 +1,36 @@
+from agent.trajectory import convert_scratchpad_to_think, has_incomplete_scratchpad
+
+
+def test_convert_scratchpad_to_think_rewrites_tags():
+ content = "think done"
+ assert convert_scratchpad_to_think(content) == "think done"
+
+
+def test_has_incomplete_scratchpad_detects_real_unclosed_tag():
+ content = "Answering...\nstill thinking"
+ assert has_incomplete_scratchpad(content) is True
+
+
+def test_has_incomplete_scratchpad_ignores_fenced_code_block_mentions():
+ content = """Here is the grep output:
+
+```text
+
+```
+"""
+ assert has_incomplete_scratchpad(content) is False
+
+
+def test_has_incomplete_scratchpad_ignores_blockquote_mentions():
+ content = "> literal token \n\nFinal answer."
+ assert has_incomplete_scratchpad(content) is False
+
+
+def test_has_incomplete_scratchpad_ignores_inline_code_mentions():
+ content = "The user literally typed `` in the prompt."
+ assert has_incomplete_scratchpad(content) is False
+
+
+def test_has_incomplete_scratchpad_still_flags_real_tag_after_quote():
+ content = "> quoted literal \n\nreal"
+ assert has_incomplete_scratchpad(content) is True