final changes from successful run

This commit is contained in:
dmahan93 2026-04-22 14:57:57 -05:00
parent 721e0b96cd
commit be43bee11a

View file

@ -8275,28 +8275,34 @@ class AIAgent:
self._response_was_previewed = True self._response_was_previewed = True
break break
# No fallback -- if reasoning_text exists, the model put its # No fallback -- the model kept emitting <think>...</think>
# entire response inside <think> tags; use that as the content. # with empty content for 3 retries. Preserve token IDs from
# the last API attempt (reasoning-only generation) so RL can
# train on this trajectory instead of dropping it entirely.
# Using _build_assistant_message ensures prompt_token_ids,
# generation_token_ids, and generation_log_probs are attached
# when present on the assistant_message object.
if reasoning_text: if reasoning_text:
self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True) self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True)
final_response = reasoning_text final_response = reasoning_text
empty_msg = {
# Preserve token IDs from the last API attempt by building the
# assistant message from the live API response object. This
# avoids the all-empty-output-items ValueError in NeMo RL's
# nemo_gym postprocessor when every turn was reasoning-only.
try:
_last_msg = self._build_assistant_message(assistant_message, finish_reason)
messages.append(_last_msg)
except Exception:
# If assistant_message is out of scope or _build fails,
# fall back to a message without token IDs (matches
# original behavior).
messages.append({
"role": "assistant", "role": "assistant",
"content": final_response, "content": final_response,
"reasoning": reasoning_text, "reasoning": reasoning_text,
"finish_reason": finish_reason, "finish_reason": finish_reason,
} })
messages.append(empty_msg)
break
# Truly empty -- no reasoning and no content
empty_msg = {
"role": "assistant",
"content": final_response,
"reasoning": reasoning_text,
"finish_reason": finish_reason,
}
messages.append(empty_msg)
self._cleanup_task_resources(effective_task_id) self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history) self._persist_session(messages, conversation_history)