Keep full trajectory; truncate prompt on per-turn copy

Previously _truncate_context() mutated the shared messages list, which could drop older turns and break reward computation/debugging. Now we keep messages as the full trajectory and apply truncation to a copy (prompt_messages) for each model call.
2026-05-03 02:11:48 +00:00 · 2026-02-14 09:33:22 +10:00 · 2026-02-14 09:33:22 +10:00 · 06e9422324
commit 06e9422324
parent 907616a692
1 changed files with 11 additions and 4 deletions
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@ -295,7 +295,12 @@ class HermesAgentLoop:

        Args:
            messages: Initial conversation messages (system + user).
-                      Modified in-place as the conversation progresses.
+                      This list is treated as the FULL trajectory and is
+                      appended to as the conversation progresses.
+
+                      Prompt truncation (to avoid context overflow) is applied
+                      on a copy of this list per turn, so we do not lose
+                      earlier messages for reward computation/debugging.

        Returns:
            AgentResult with full conversation history, managed state, and metadata
@ -310,12 +315,14 @@ class HermesAgentLoop:
        tool_calls_exec_error = 0

        for turn in range(self.max_turns):
-            # Truncate context if approaching limit
-            messages = self._truncate_context(messages)
+            # Truncate context if approaching limit.
+            # IMPORTANT: do this on a copy so we keep the full trajectory in `messages`
+            # for reward computation + debugging, while only trimming the prompt view.
+            prompt_messages = self._truncate_context(list(messages))

            # Build the chat_completion kwargs
            chat_kwargs = {
-                "messages": messages,
+                "messages": prompt_messages,
                "n": 1,
                "temperature": self.temperature,
            }