mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(agent): add jittered retry backoff
Adds agent/retry_utils.py with jittered_backoff() — exponential backoff with additive jitter to prevent thundering-herd retry spikes when multiple gateway sessions hit the same rate-limited provider. Replaces fixed exponential backoff at 4 call sites: - run_agent.py: None-choices retry path (5s base, 120s cap) - run_agent.py: API error retry path (2s base, 60s cap) - trajectory_compressor.py: sync + async summarization retries Thread-safe jitter counter with overflow guards ensures unique seeds across concurrent retries. Trimmed from original PR to keep only wired-in functionality. Co-authored-by: martinp09 <martinp09@users.noreply.github.com>
This commit is contained in:
parent
fff237e111
commit
e1befe5077
4 changed files with 181 additions and 4 deletions
|
|
@ -44,6 +44,7 @@ import fire
|
|||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, TimeElapsedColumn, TimeRemainingColumn
|
||||
from rich.console import Console
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from agent.retry_utils import jittered_backoff
|
||||
|
||||
# Load environment variables
|
||||
from dotenv import load_dotenv
|
||||
|
|
@ -585,7 +586,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
|||
self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
|
||||
|
||||
if attempt < self.config.max_retries - 1:
|
||||
time.sleep(self.config.retry_delay * (attempt + 1))
|
||||
time.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
|
||||
else:
|
||||
# Fallback: create a basic summary
|
||||
return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
|
||||
|
|
@ -647,7 +648,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
|||
self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
|
||||
|
||||
if attempt < self.config.max_retries - 1:
|
||||
await asyncio.sleep(self.config.retry_delay * (attempt + 1))
|
||||
await asyncio.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
|
||||
else:
|
||||
# Fallback: create a basic summary
|
||||
return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue