mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-09 03:11:58 +00:00
feat(agent): configurable timeouts for auxiliary LLM calls via config.yaml (#3597)
Add per-task timeout settings under auxiliary.{task}.timeout in config.yaml
instead of hardcoded values. Users with slow local models (Ollama, llama.cpp)
can now increase timeouts for compression, vision, session search, etc.
Defaults:
- auxiliary.compression.timeout: 120s (was hardcoded 45s)
- auxiliary.vision.timeout: 30s (unchanged)
- all other aux tasks: 30s (was hardcoded 30s)
- title_generator: 30s (was hardcoded 15s)
call_llm/async_call_llm now auto-resolve timeout from config when not
explicitly passed. Callers can still override with an explicit timeout arg.
Based on PR #3406 by alanfwilliams. Converted from env vars to config.yaml
per project conventions.
Co-authored-by: alanfwilliams <alanfwilliams@users.noreply.github.com>
This commit is contained in:
parent
404a0b823e
commit
839d9d7471
4 changed files with 41 additions and 7 deletions
|
|
@ -1458,6 +1458,29 @@ def _resolve_task_provider_model(
|
||||||
return "auto", resolved_model, None, None
|
return "auto", resolved_model, None, None
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_AUX_TIMEOUT = 30.0
|
||||||
|
|
||||||
|
|
||||||
|
def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
|
||||||
|
"""Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
|
||||||
|
if not task:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config
|
||||||
|
config = load_config()
|
||||||
|
except ImportError:
|
||||||
|
return default
|
||||||
|
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||||
|
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||||
|
raw = task_config.get("timeout")
|
||||||
|
if raw is not None:
|
||||||
|
try:
|
||||||
|
return float(raw)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
def _build_call_kwargs(
|
def _build_call_kwargs(
|
||||||
provider: str,
|
provider: str,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
@ -1515,7 +1538,7 @@ def call_llm(
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
tools: list = None,
|
tools: list = None,
|
||||||
timeout: float = 30.0,
|
timeout: float = None,
|
||||||
extra_body: dict = None,
|
extra_body: dict = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
"""Centralized synchronous LLM call.
|
"""Centralized synchronous LLM call.
|
||||||
|
|
@ -1533,7 +1556,7 @@ def call_llm(
|
||||||
temperature: Sampling temperature (None = provider default).
|
temperature: Sampling temperature (None = provider default).
|
||||||
max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
|
max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
|
||||||
tools: Tool definitions (for function calling).
|
tools: Tool definitions (for function calling).
|
||||||
timeout: Request timeout in seconds.
|
timeout: Request timeout in seconds (None = read from auxiliary.{task}.timeout config).
|
||||||
extra_body: Additional request body fields.
|
extra_body: Additional request body fields.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -1598,10 +1621,12 @@ def call_llm(
|
||||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||||
f"Run: hermes setup")
|
f"Run: hermes setup")
|
||||||
|
|
||||||
|
effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
|
||||||
|
|
||||||
kwargs = _build_call_kwargs(
|
kwargs = _build_call_kwargs(
|
||||||
resolved_provider, final_model, messages,
|
resolved_provider, final_model, messages,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
tools=tools, timeout=timeout, extra_body=extra_body,
|
tools=tools, timeout=effective_timeout, extra_body=extra_body,
|
||||||
base_url=resolved_base_url)
|
base_url=resolved_base_url)
|
||||||
|
|
||||||
# Handle max_tokens vs max_completion_tokens retry
|
# Handle max_tokens vs max_completion_tokens retry
|
||||||
|
|
@ -1683,7 +1708,7 @@ async def async_call_llm(
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
tools: list = None,
|
tools: list = None,
|
||||||
timeout: float = 30.0,
|
timeout: float = None,
|
||||||
extra_body: dict = None,
|
extra_body: dict = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
"""Centralized asynchronous LLM call.
|
"""Centralized asynchronous LLM call.
|
||||||
|
|
@ -1744,10 +1769,12 @@ async def async_call_llm(
|
||||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||||
f"Run: hermes setup")
|
f"Run: hermes setup")
|
||||||
|
|
||||||
|
effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
|
||||||
|
|
||||||
kwargs = _build_call_kwargs(
|
kwargs = _build_call_kwargs(
|
||||||
resolved_provider, final_model, messages,
|
resolved_provider, final_model, messages,
|
||||||
temperature=temperature, max_tokens=max_tokens,
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
tools=tools, timeout=timeout, extra_body=extra_body,
|
tools=tools, timeout=effective_timeout, extra_body=extra_body,
|
||||||
base_url=resolved_base_url)
|
base_url=resolved_base_url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -347,7 +347,7 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"temperature": 0.3,
|
"temperature": 0.3,
|
||||||
"max_tokens": summary_budget * 2,
|
"max_tokens": summary_budget * 2,
|
||||||
"timeout": 45.0,
|
# timeout resolved from auxiliary.compression.timeout config by call_llm
|
||||||
}
|
}
|
||||||
if self.summary_model:
|
if self.summary_model:
|
||||||
call_kwargs["model"] = self.summary_model
|
call_kwargs["model"] = self.summary_model
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ _TITLE_PROMPT = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def generate_title(user_message: str, assistant_response: str, timeout: float = 15.0) -> Optional[str]:
|
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
|
||||||
"""Generate a session title from the first exchange.
|
"""Generate a session title from the first exchange.
|
||||||
|
|
||||||
Uses the auxiliary LLM client (cheapest/fastest available model).
|
Uses the auxiliary LLM client (cheapest/fastest available model).
|
||||||
|
|
|
||||||
|
|
@ -227,42 +227,49 @@ DEFAULT_CONFIG = {
|
||||||
"model": "",
|
"model": "",
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 30, # seconds — increase for slow local models
|
||||||
},
|
},
|
||||||
"compression": {
|
"compression": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 120, # seconds — compression summarises large contexts; increase for local models
|
||||||
},
|
},
|
||||||
"session_search": {
|
"session_search": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 30,
|
||||||
},
|
},
|
||||||
"skills_hub": {
|
"skills_hub": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 30,
|
||||||
},
|
},
|
||||||
"approval": {
|
"approval": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "", # fast/cheap model recommended (e.g. gemini-flash, haiku)
|
"model": "", # fast/cheap model recommended (e.g. gemini-flash, haiku)
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 30,
|
||||||
},
|
},
|
||||||
"mcp": {
|
"mcp": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 30,
|
||||||
},
|
},
|
||||||
"flush_memories": {
|
"flush_memories": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
|
"timeout": 30,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue