From 06e94223242e844e892a5c9c67fcfa700a8f82ed Mon Sep 17 00:00:00 2001 From: Shannon Sands Date: Sat, 14 Feb 2026 09:33:22 +1000 Subject: [PATCH] Keep full trajectory; truncate prompt on per-turn copy Previously _truncate_context() mutated the shared messages list, which could drop older turns and break reward computation/debugging. Now we keep messages as the full trajectory and apply truncation to a copy (prompt_messages) for each model call. --- environments/agent_loop.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/environments/agent_loop.py b/environments/agent_loop.py index 3fd277efa5..952922a448 100644 --- a/environments/agent_loop.py +++ b/environments/agent_loop.py @@ -295,7 +295,12 @@ class HermesAgentLoop: Args: messages: Initial conversation messages (system + user). - Modified in-place as the conversation progresses. + This list is treated as the FULL trajectory and is + appended to as the conversation progresses. + + Prompt truncation (to avoid context overflow) is applied + on a copy of this list per turn, so we do not lose + earlier messages for reward computation/debugging. Returns: AgentResult with full conversation history, managed state, and metadata @@ -310,12 +315,14 @@ class HermesAgentLoop: tool_calls_exec_error = 0 for turn in range(self.max_turns): - # Truncate context if approaching limit - messages = self._truncate_context(messages) + # Truncate context if approaching limit. + # IMPORTANT: do this on a copy so we keep the full trajectory in `messages` + # for reward computation + debugging, while only trimming the prompt view. + prompt_messages = self._truncate_context(list(messages)) # Build the chat_completion kwargs chat_kwargs = { - "messages": messages, + "messages": prompt_messages, "n": 1, "temperature": self.temperature, }