From 3756d84dfe4bc7e68e0b7bea36608deb50283a27 Mon Sep 17 00:00:00 2001
From: Basit Mustafa <basit.mustafa@gmail.com>
Date: Fri, 24 Apr 2026 15:21:03 -0700
Subject: [PATCH] fix(run_agent): strip temperature from flush_memories Codex
 fallback path

When the auxiliary client fails for any reason (_call_llm raises), the
codex_responses fallback path explicitly set codex_kwargs["temperature"] =
_flush_temperature (0.3). _run_codex_stream then sent this to the Codex
Responses endpoint (chatgpt.com/backend-api/codex), which rejects
temperature with HTTP 400 "Unsupported parameter: temperature".

The error propagated to the outer except block and surfaced as
"Auxiliary memory flush failed: HTTP 400 - Unsupported parameter:
temperature", misleadingly pointing at temperature rather than the
original aux failure.

Fix: strip temperature unconditionally from codex_kwargs before calling
_run_codex_stream, consistent with how _CodexCompletionsAdapter already
omits temperature from Responses API calls.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 run_agent.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 56da8f2aae5..ee9f7d787d6 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5084,7 +5084,6 @@ class AIAgent:
     def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
         """Execute one streaming Responses API request and return the final response."""
         import httpx as _httpx
-
         active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct")
         max_stream_retries = 1
         has_tool_calls = False
@@ -7938,10 +7937,8 @@ class AIAgent:
                     codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
                 elif not codex_kwargs.get("tools"):
                     codex_kwargs["tools"] = [memory_tool_def]
-                if _flush_temperature is not None:
-                    codex_kwargs["temperature"] = _flush_temperature
-                else:
-                    codex_kwargs.pop("temperature", None)
+                # Codex Responses endpoint does not accept temperature — strip it
+                codex_kwargs.pop("temperature", None)
                 if "max_output_tokens" in codex_kwargs:
                     codex_kwargs["max_output_tokens"] = 5120
                 response = self._run_codex_stream(codex_kwargs)