From 06e94223242e844e892a5c9c67fcfa700a8f82ed Mon Sep 17 00:00:00 2001
From: Shannon Sands <shannon.sands.1979@gmail.com>
Date: Sat, 14 Feb 2026 09:33:22 +1000
Subject: [PATCH] Keep full trajectory; truncate prompt on per-turn copy

Previously _truncate_context() mutated the shared messages list, which could drop older turns and break reward computation/debugging.

Now we keep messages as the full trajectory and apply truncation to a copy (prompt_messages) for each model call.
---
 environments/agent_loop.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index 3fd277efa5..952922a448 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -295,7 +295,12 @@ class HermesAgentLoop:
 
         Args:
             messages: Initial conversation messages (system + user).
-                      Modified in-place as the conversation progresses.
+                      This list is treated as the FULL trajectory and is
+                      appended to as the conversation progresses.
+
+                      Prompt truncation (to avoid context overflow) is applied
+                      on a copy of this list per turn, so we do not lose
+                      earlier messages for reward computation/debugging.
 
         Returns:
             AgentResult with full conversation history, managed state, and metadata
@@ -310,12 +315,14 @@ class HermesAgentLoop:
         tool_calls_exec_error = 0
 
         for turn in range(self.max_turns):
-            # Truncate context if approaching limit
-            messages = self._truncate_context(messages)
+            # Truncate context if approaching limit.
+            # IMPORTANT: do this on a copy so we keep the full trajectory in `messages`
+            # for reward computation + debugging, while only trimming the prompt view.
+            prompt_messages = self._truncate_context(list(messages))
 
             # Build the chat_completion kwargs
             chat_kwargs = {
-                "messages": messages,
+                "messages": prompt_messages,
                 "n": 1,
                 "temperature": self.temperature,
             }