mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-19 04:52:06 +00:00
fix(tools): add return_exceptions to asyncio.gather in web_tools
Three asyncio.gather() calls in tools/web_tools.py ran without return_exceptions=True. A single failing task (e.g. LLM rate limit on one URL) would raise out of gather() and discard every other successfully fetched/summarized result. Pass return_exceptions=True and filter BaseException entries with a warning log before unpacking. Affects: - chunk summarization gather (large web_extract pages) - firecrawl per-result LLM post-processing - tavily crawl per-result LLM post-processing Closes #2744
This commit is contained in:
parent
5301cc212b
commit
eacb398f75
1 changed files with 30 additions and 9 deletions
|
|
@ -586,11 +586,20 @@ async def _process_large_content_chunked(
|
|||
|
||||
# Run all chunk summarizations in parallel
|
||||
tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# Collect successful summaries in order
|
||||
# Use return_exceptions=True so a single task failure does not discard
|
||||
# all other successfully summarized chunks.
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Filter out exceptions, then collect successful summaries in order
|
||||
successful_results = []
|
||||
for result_item in results:
|
||||
if isinstance(result_item, BaseException):
|
||||
logger.warning("Chunk summarization task failed: %s", result_item)
|
||||
continue
|
||||
successful_results.append(result_item)
|
||||
|
||||
summaries = []
|
||||
for chunk_idx, summary in sorted(results, key=lambda x: x[0]):
|
||||
for chunk_idx, summary in sorted(successful_results, key=lambda x: x[0]):
|
||||
if summary:
|
||||
summaries.append(f"## Section {chunk_idx + 1}\n{summary}")
|
||||
|
||||
|
|
@ -1038,10 +1047,16 @@ async def web_extract_tool(
|
|||
# Run all LLM processing in parallel
|
||||
results_list = response.get('results', [])
|
||||
tasks = [process_single_result(result) for result in results_list]
|
||||
processed_results = await asyncio.gather(*tasks)
|
||||
|
||||
# Use return_exceptions=True so a single task failure does not
|
||||
# discard all other successfully processed results.
|
||||
processed_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Collect metrics and print results
|
||||
for result, metrics, status in processed_results:
|
||||
for result_item in processed_results:
|
||||
if isinstance(result_item, BaseException):
|
||||
logger.warning("Web result processing task failed: %s", result_item)
|
||||
continue
|
||||
result, metrics, status = result_item
|
||||
url = result.get('url', 'Unknown URL')
|
||||
if status == "processed":
|
||||
debug_call_data["compression_metrics"].append(metrics)
|
||||
|
|
@ -1285,8 +1300,14 @@ async def web_crawl_tool(
|
|||
return result, metrics, "too_short"
|
||||
|
||||
tasks = [_process_tavily_crawl(r) for r in response.get('results', [])]
|
||||
processed_results = await asyncio.gather(*tasks)
|
||||
for result, metrics, status in processed_results:
|
||||
# Use return_exceptions=True so a single task failure does not
|
||||
# discard all other successfully processed crawl results.
|
||||
processed_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
for result_item in processed_results:
|
||||
if isinstance(result_item, BaseException):
|
||||
logger.warning("Tavily crawl processing task failed: %s", result_item)
|
||||
continue
|
||||
result, metrics, status = result_item
|
||||
if status == "processed":
|
||||
debug_call_data["compression_metrics"].append(metrics)
|
||||
debug_call_data["pages_processed_with_llm"] += 1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue