diff --git a/agent/trajectory.py b/agent/trajectory.py index 90696eb8a..0f586bab4 100644 --- a/agent/trajectory.py +++ b/agent/trajectory.py @@ -7,24 +7,39 @@ the file-write logic live here. import json import logging +import re from datetime import datetime from typing import Any, Dict, List logger = logging.getLogger(__name__) +_SCRATCHPAD_OPEN_TAG = "" +_SCRATCHPAD_CLOSE_TAG = "" +_FENCED_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]*`") +_BLOCKQUOTE_RE = re.compile(r"(?m)^[ \t]*>.*(?:\n|$)") + def convert_scratchpad_to_think(content: str) -> str: """Convert tags to tags.""" - if not content or "" not in content: + if not content or _SCRATCHPAD_OPEN_TAG not in content: return content - return content.replace("", "").replace("", "") + return content.replace(_SCRATCHPAD_OPEN_TAG, "").replace(_SCRATCHPAD_CLOSE_TAG, "") + + +def _strip_markdown_context_for_scratchpad_check(content: str) -> str: + """Remove markdown contexts where scratchpad tags should be treated as literal text.""" + stripped = _FENCED_CODE_BLOCK_RE.sub("", content) + stripped = _INLINE_CODE_RE.sub("", stripped) + return _BLOCKQUOTE_RE.sub("", stripped) def has_incomplete_scratchpad(content: str) -> bool: """Check if content has an opening without a closing tag.""" if not content: return False - return "" in content and "" not in content + visible_content = _strip_markdown_context_for_scratchpad_check(content) + return visible_content.count(_SCRATCHPAD_OPEN_TAG) > visible_content.count(_SCRATCHPAD_CLOSE_TAG) def save_trajectory(trajectory: List[Dict[str, Any]], model: str, diff --git a/tests/agent/test_trajectory.py b/tests/agent/test_trajectory.py new file mode 100644 index 000000000..495af1d90 --- /dev/null +++ b/tests/agent/test_trajectory.py @@ -0,0 +1,36 @@ +from agent.trajectory import convert_scratchpad_to_think, has_incomplete_scratchpad + + +def test_convert_scratchpad_to_think_rewrites_tags(): + content = "think done" + assert convert_scratchpad_to_think(content) == "think done" + + +def test_has_incomplete_scratchpad_detects_real_unclosed_tag(): + content = "Answering...\nstill thinking" + assert has_incomplete_scratchpad(content) is True + + +def test_has_incomplete_scratchpad_ignores_fenced_code_block_mentions(): + content = """Here is the grep output: + +```text + +``` +""" + assert has_incomplete_scratchpad(content) is False + + +def test_has_incomplete_scratchpad_ignores_blockquote_mentions(): + content = "> literal token \n\nFinal answer." + assert has_incomplete_scratchpad(content) is False + + +def test_has_incomplete_scratchpad_ignores_inline_code_mentions(): + content = "The user literally typed `` in the prompt." + assert has_incomplete_scratchpad(content) is False + + +def test_has_incomplete_scratchpad_still_flags_real_tag_after_quote(): + content = "> quoted literal \n\nreal" + assert has_incomplete_scratchpad(content) is True