fix: guard aux LLM calls against None content + reasoning fallback + retry (salvage #3389) (#3449)

Salvage of #3389 by @binhnt92 with reasoning fallback and retry logic added on top.

All 7 auxiliary LLM call sites now use extract_content_or_reasoning() which mirrors the main agent loop's behavior: extract content, strip think blocks, fall back to structured reasoning fields, retry on empty.

Closes #3389.
This commit is contained in:
Teknium 2026-03-27 15:28:19 -07:00 committed by GitHub
parent ab09f6b568
commit 658692799d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 414 additions and 14 deletions

View file

@ -44,7 +44,7 @@ import asyncio
from typing import List, Dict, Any, Optional
import httpx
from firecrawl import Firecrawl
from agent.auxiliary_client import async_call_llm
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
from tools.debug_helpers import DebugSession
from tools.url_safety import is_safe_url
from tools.website_policy import check_website_access
@ -416,7 +416,16 @@ Create a markdown summary that captures all key information in a well-organized,
if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
return response.choices[0].message.content.strip()
content = extract_content_or_reasoning(response)
if content:
return content
# Reasoning-only / empty response — let the retry loop handle it
logger.warning("LLM returned empty content (attempt %d/%d), retrying", attempt + 1, max_retries)
if attempt < max_retries - 1:
await asyncio.sleep(retry_delay)
retry_delay = min(retry_delay * 2, 60)
continue
return content # Return whatever we got after exhausting retries
except RuntimeError:
logger.warning("No auxiliary model available for web content processing")
return None
@ -535,8 +544,14 @@ Create a single, unified markdown summary."""
if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
final_summary = response.choices[0].message.content.strip()
final_summary = extract_content_or_reasoning(response)
# Retry once on empty content (reasoning-only response)
if not final_summary:
logger.warning("Synthesis LLM returned empty content, retrying once")
response = await async_call_llm(**call_kwargs)
final_summary = extract_content_or_reasoning(response)
# Enforce hard cap
if len(final_summary) > max_output_size:
final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"