fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent loses track of which files it already read and re-reads them in a loop. Users report the agent reading the same files endlessly without writing. Root cause: context compression is lossy — file contents and read history are lost in the summary. After compression, the model thinks it hasn't examined the files yet and reads them again. Fix (two-part): 1. Track file reads per task in file_tools.py. When the same file region is read again, include a _warning in the response telling the model to stop re-reading and use existing information. 2. After context compression, inject a structured message listing all files already read in the session with explicit "do NOT re-read" instruction, preserving read history across compression boundaries. Adds 16 tests covering warning detection, task isolation, summary accuracy, tracker cleanup, and compression history injection.
2026-05-02 02:01:47 +00:00 · 2026-03-08 20:44:42 +03:00 · 2026-03-08 20:44:42 +03:00 · 9eee529a7f
commit 9eee529a7f
parent cd77c7100c
3 changed files with 349 additions and 6 deletions
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -13,6 +13,11 @@ logger = logging.getLogger(__name__)
 _file_ops_lock = threading.Lock()
 _file_ops_cache: dict = {}

+# Track files read per task to detect re-read loops after context compression.
+# Key: task_id, Value: dict mapping (path, offset, limit) -> read count
+_read_tracker_lock = threading.Lock()
+_read_tracker: dict = {}
+

 def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
    """Get or create ShellFileOperations for a terminal environment.
@ -128,11 +133,55 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
    try:
        file_ops = _get_file_ops(task_id)
        result = file_ops.read_file(path, offset, limit)
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        result_dict = result.to_dict()
+
+        # Track reads to detect re-read loops (e.g. after context compression)
+        read_key = (path, offset, limit)
+        with _read_tracker_lock:
+            task_reads = _read_tracker.setdefault(task_id, {})
+            task_reads[read_key] = task_reads.get(read_key, 0) + 1
+            count = task_reads[read_key]
+
+        if count > 1:
+            result_dict["_warning"] = (
+                f"You have already read this exact file region {count} times in this session. "
+                "The content has not changed. Use the information you already have instead of re-reading. "
+                "If you are stuck in a loop, stop reading and proceed with writing or responding."
+            )
+
+        return json.dumps(result_dict, ensure_ascii=False)
    except Exception as e:
        return json.dumps({"error": str(e)}, ensure_ascii=False)


+def get_read_files_summary(task_id: str = "default") -> list:
+    """Return a list of files read in this session for the given task.
+
+    Used by context compression to preserve file-read history across
+    compression boundaries.
+    """
+    with _read_tracker_lock:
+        task_reads = _read_tracker.get(task_id, {})
+        seen_paths = {}
+        for (path, offset, limit), count in task_reads.items():
+            if path not in seen_paths:
+                seen_paths[path] = []
+            seen_paths[path].append(f"lines {offset}-{offset + limit - 1}")
+        return [
+            {"path": p, "regions": regions}
+            for p, regions in sorted(seen_paths.items())
+        ]
+
+
+def clear_read_tracker(task_id: str = None):
+    """Clear the read tracker. Called when starting a new conversation."""
+    with _read_tracker_lock:
+        if task_id:
+            _read_tracker.pop(task_id, None)
+        else:
+            _read_tracker.clear()
+
+
 def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
    """Write content to a file."""
    try: