fix(compression): 3-line fix for infinite compression loop (#29335)

Three compounding root causes:

A) run_conversation() result dict missing session_id — gateway's
   dead-code guard at gateway/run.py:8700 never triggers
B) preflight compression bypasses should_compress() anti-thrashing —
   re-triggers every turn when tool schemas dominate token budget
C) gateway updates session_entry.session_id in memory but doesn't
   persist via session_store._save()

Fixes: #29335
This commit is contained in:
Radical Edward 2026-05-25 01:49:41 +02:00 committed by Teknium
parent 222a3a9c19
commit 3914089d52
2 changed files with 4 additions and 1 deletions

View file

@ -484,7 +484,8 @@ def run_conversation(
tools=agent.tools or None,
)
if _preflight_tokens >= agent.context_compressor.threshold_tokens:
if _preflight_tokens >= agent.context_compressor.threshold_tokens \
and agent.context_compressor.should_compress(_preflight_tokens):
logger.info(
"Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
f"{_preflight_tokens:,}",
@ -4180,6 +4181,7 @@ def run_conversation(
"estimated_cost_usd": agent.session_estimated_cost_usd,
"cost_status": agent.session_cost_status,
"cost_source": agent.session_cost_source,
"session_id": agent.session_id,
}
if agent._tool_guardrail_halt_decision is not None:
result["guardrail"] = agent._tool_guardrail_halt_decision.to_metadata()

View file

@ -8692,6 +8692,7 @@ class GatewayRunner:
# session_entry so transcript writes below go to the right session.
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
session_entry.session_id = agent_result["session_id"]
self.session_store._save()
# Prepend reasoning/thinking if display is enabled (per-platform)
try: