mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
feat(agent): make API retry count configurable via agent.api_max_retries (#14730)
Closes #11616. The agent's API retry loop hardcoded max_retries = 3, so users with fallback providers on flaky primaries burned through ~3 × provider timeout (e.g. 3 × 180s = 9 minutes) before their fallback chain got a chance to kick in. Expose a new config key: agent: api_max_retries: 3 # default unchanged Set it to 1 for fast failover when you have fallback providers, or raise it if you prefer longer tolerance on a single provider. Values < 1 are clamped to 1 (single attempt, no retry); non-integer values fall back to the default. This wraps the Hermes-level retry loop only — the OpenAI SDK's own low-level retries (max_retries=2 default) still run beneath this for transient network errors. Changes: - hermes_cli/config.py: add agent.api_max_retries default 3 with comment. - run_agent.py: read self._api_max_retries in AIAgent.__init__; replace hardcoded max_retries = 3 in the retry loop with self._api_max_retries. - cli-config.yaml.example: documented example entry. - hermes_cli/tips.py: discoverable tip line. - tests/run_agent/test_api_max_retries_config.py: 4 tests covering default, override, clamp-to-one, and invalid-value fallback.
This commit is contained in:
parent
327b57da91
commit
165b2e481a
5 changed files with 94 additions and 1 deletions
|
|
@ -507,6 +507,13 @@ agent:
|
|||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
|
|
|
|||
|
|
@ -361,6 +361,15 @@ DEFAULT_CONFIG = {
|
|||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
# (max_retries=2 default) for transient network errors; this is
|
||||
# the Hermes-level retry loop that wraps the whole call. Lower
|
||||
# this to 1 if you use fallback providers and want fast failover
|
||||
# on flaky primaries; raise it if you prefer to tolerate longer
|
||||
# provider hiccups on a single provider.
|
||||
"api_max_retries": 3,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
|
|
|
|||
|
|
@ -289,6 +289,7 @@ TIPS = [
|
|||
"When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
|
||||
"agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
|
||||
"agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
|
||||
"agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
|
||||
"The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
|
||||
"Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
|
||||
"The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
|
||||
|
|
|
|||
13
run_agent.py
13
run_agent.py
|
|
@ -1548,6 +1548,17 @@ class AIAgent:
|
|||
_agent_section = {}
|
||||
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# App-level API retry count (wraps each model API call). Default 3,
|
||||
# overridable via agent.api_max_retries in config.yaml. See #11616.
|
||||
try:
|
||||
_raw_api_retries = _agent_section.get("api_max_retries", 3)
|
||||
_api_retries = int(_raw_api_retries)
|
||||
if _api_retries < 1:
|
||||
_api_retries = 1 # 1 = no retry (single attempt)
|
||||
except (TypeError, ValueError):
|
||||
_api_retries = 3
|
||||
self._api_max_retries = _api_retries
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via config.yaml (compression section)
|
||||
|
|
@ -9259,7 +9270,7 @@ class AIAgent:
|
|||
|
||||
api_start_time = time.time()
|
||||
retry_count = 0
|
||||
max_retries = 3
|
||||
max_retries = self._api_max_retries
|
||||
primary_recovery_attempted = False
|
||||
max_compression_attempts = 3
|
||||
codex_auth_retry_attempted=False
|
||||
|
|
|
|||
65
tests/run_agent/test_api_max_retries_config.py
Normal file
65
tests/run_agent/test_api_max_retries_config.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
"""Tests for agent.api_max_retries config surface.
|
||||
|
||||
Closes #11616 — make the hardcoded ``max_retries = 3`` in the agent's API
|
||||
retry loop user-configurable so fallback-provider setups can fail over
|
||||
faster on flaky primaries instead of burning ~3x180s on the same stall.
|
||||
"""
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_agent(api_max_retries=None):
|
||||
"""Build an AIAgent with a mocked config.load_config that returns a
|
||||
config tree containing the given agent.api_max_retries (or default)."""
|
||||
cfg = {"agent": {}}
|
||||
if api_max_retries is not None:
|
||||
cfg["agent"]["api_max_retries"] = api_max_retries
|
||||
|
||||
with patch("run_agent.OpenAI"), \
|
||||
patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
return AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
|
||||
def test_default_api_max_retries_is_three():
|
||||
"""No config override → legacy default of 3 retries preserved."""
|
||||
agent = _make_agent()
|
||||
assert agent._api_max_retries == 3
|
||||
|
||||
|
||||
def test_api_max_retries_honors_config_override():
|
||||
"""Setting agent.api_max_retries in config propagates to the agent."""
|
||||
agent = _make_agent(api_max_retries=1)
|
||||
assert agent._api_max_retries == 1
|
||||
|
||||
agent2 = _make_agent(api_max_retries=5)
|
||||
assert agent2._api_max_retries == 5
|
||||
|
||||
|
||||
def test_api_max_retries_clamps_below_one_to_one():
|
||||
"""0 or negative values would disable the retry loop entirely
|
||||
(the ``while retry_count < max_retries`` guard would never execute),
|
||||
so clamp to 1 = single attempt, no retry."""
|
||||
agent = _make_agent(api_max_retries=0)
|
||||
assert agent._api_max_retries == 1
|
||||
|
||||
agent2 = _make_agent(api_max_retries=-3)
|
||||
assert agent2._api_max_retries == 1
|
||||
|
||||
|
||||
def test_api_max_retries_falls_back_on_invalid_value():
|
||||
"""Garbage values in config don't crash agent init — fall back to 3."""
|
||||
agent = _make_agent(api_max_retries="not-a-number")
|
||||
assert agent._api_max_retries == 3
|
||||
|
||||
agent2 = _make_agent(api_max_retries=None)
|
||||
# None with dict.get default fires → default(3), then int(None) raises
|
||||
# TypeError → except branch sets to 3.
|
||||
assert agent2._api_max_retries == 3
|
||||
Loading…
Add table
Add a link
Reference in a new issue