This commit is contained in:
sgaofen 2026-04-24 18:23:33 -05:00 committed by GitHub
commit b7f511d5dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 54 additions and 3 deletions

View file

@ -7,24 +7,39 @@ the file-write logic live here.
import json
import logging
import re
from datetime import datetime
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
_SCRATCHPAD_OPEN_TAG = "<REASONING_SCRATCHPAD>"
_SCRATCHPAD_CLOSE_TAG = "</REASONING_SCRATCHPAD>"
_FENCED_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL)
_INLINE_CODE_RE = re.compile(r"`[^`\n]*`")
_BLOCKQUOTE_RE = re.compile(r"(?m)^[ \t]*>.*(?:\n|$)")
def convert_scratchpad_to_think(content: str) -> str:
"""Convert <REASONING_SCRATCHPAD> tags to <think> tags."""
if not content or "<REASONING_SCRATCHPAD>" not in content:
if not content or _SCRATCHPAD_OPEN_TAG not in content:
return content
return content.replace("<REASONING_SCRATCHPAD>", "<think>").replace("</REASONING_SCRATCHPAD>", "</think>")
return content.replace(_SCRATCHPAD_OPEN_TAG, "<think>").replace(_SCRATCHPAD_CLOSE_TAG, "</think>")
def _strip_markdown_context_for_scratchpad_check(content: str) -> str:
"""Remove markdown contexts where scratchpad tags should be treated as literal text."""
stripped = _FENCED_CODE_BLOCK_RE.sub("", content)
stripped = _INLINE_CODE_RE.sub("", stripped)
return _BLOCKQUOTE_RE.sub("", stripped)
def has_incomplete_scratchpad(content: str) -> bool:
"""Check if content has an opening <REASONING_SCRATCHPAD> without a closing tag."""
if not content:
return False
return "<REASONING_SCRATCHPAD>" in content and "</REASONING_SCRATCHPAD>" not in content
visible_content = _strip_markdown_context_for_scratchpad_check(content)
return visible_content.count(_SCRATCHPAD_OPEN_TAG) > visible_content.count(_SCRATCHPAD_CLOSE_TAG)
def save_trajectory(trajectory: List[Dict[str, Any]], model: str,

View file

@ -0,0 +1,36 @@
from agent.trajectory import convert_scratchpad_to_think, has_incomplete_scratchpad
def test_convert_scratchpad_to_think_rewrites_tags():
content = "<REASONING_SCRATCHPAD>think</REASONING_SCRATCHPAD> done"
assert convert_scratchpad_to_think(content) == "<think>think</think> done"
def test_has_incomplete_scratchpad_detects_real_unclosed_tag():
content = "Answering...\n<REASONING_SCRATCHPAD>still thinking"
assert has_incomplete_scratchpad(content) is True
def test_has_incomplete_scratchpad_ignores_fenced_code_block_mentions():
content = """Here is the grep output:
```text
<REASONING_SCRATCHPAD>
```
"""
assert has_incomplete_scratchpad(content) is False
def test_has_incomplete_scratchpad_ignores_blockquote_mentions():
content = "> literal token <REASONING_SCRATCHPAD>\n\nFinal answer."
assert has_incomplete_scratchpad(content) is False
def test_has_incomplete_scratchpad_ignores_inline_code_mentions():
content = "The user literally typed `<REASONING_SCRATCHPAD>` in the prompt."
assert has_incomplete_scratchpad(content) is False
def test_has_incomplete_scratchpad_still_flags_real_tag_after_quote():
content = "> quoted literal <REASONING_SCRATCHPAD>\n\n<REASONING_SCRATCHPAD>real"
assert has_incomplete_scratchpad(content) is True