mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: call_llm/async_call_llm + config slots + migrate all consumers
Add centralized call_llm() and async_call_llm() functions that own the full LLM request lifecycle: 1. Resolve provider + model from task config or explicit args 2. Get or create a cached client for that provider 3. Format request args (max_tokens handling, provider extra_body) 4. Make the API call with max_tokens/max_completion_tokens retry 5. Return the response Config: expanded auxiliary section with provider:model slots for all tasks (compression, vision, web_extract, session_search, skills_hub, mcp, flush_memories). Config version bumped to 7. Migrated all auxiliary consumers: - context_compressor.py: uses call_llm(task='compression') - vision_tools.py: uses async_call_llm(task='vision') - web_tools.py: uses async_call_llm(task='web_extract') - session_search_tool.py: uses async_call_llm(task='session_search') - browser_tool.py: uses call_llm(task='vision'/'web_extract') - mcp_tool.py: uses call_llm(task='mcp') - skills_guard.py: uses call_llm(provider='openrouter') - run_agent.py flush_memories: uses call_llm(task='flush_memories') Tests updated for context_compressor and MCP tool. Some test mocks still need updating (15 remaining failures from mock pattern changes, 2 pre-existing).
This commit is contained in:
parent
013cc4d2fc
commit
0aa31cd3cb
13 changed files with 552 additions and 375 deletions
|
|
@ -1828,8 +1828,8 @@ class TestSamplingCallbackText:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
result = asyncio.run(self.handler(None, params))
|
||||
|
|
@ -1847,13 +1847,13 @@ class TestSamplingCallbackText:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
):
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
) as mock_call:
|
||||
params = _make_sampling_params(system_prompt="Be helpful")
|
||||
asyncio.run(self.handler(None, params))
|
||||
|
||||
call_args = fake_client.chat.completions.create.call_args
|
||||
call_args = mock_call.call_args
|
||||
messages = call_args.kwargs["messages"]
|
||||
assert messages[0] == {"role": "system", "content": "Be helpful"}
|
||||
|
||||
|
|
@ -1865,8 +1865,8 @@ class TestSamplingCallbackText:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
result = asyncio.run(self.handler(None, params))
|
||||
|
|
@ -1889,8 +1889,8 @@ class TestSamplingCallbackToolUse:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
result = asyncio.run(self.handler(None, params))
|
||||
|
|
@ -1916,8 +1916,8 @@ class TestSamplingCallbackToolUse:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(self.handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -1939,8 +1939,8 @@ class TestToolLoopGovernance:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
# Round 1, 2: allowed
|
||||
|
|
@ -1959,8 +1959,8 @@ class TestToolLoopGovernance:
|
|||
fake_client = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
# Tool response (round 1 of 1 allowed)
|
||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
|
@ -1984,8 +1984,8 @@ class TestToolLoopGovernance:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
|
|
@ -2003,8 +2003,8 @@ class TestSamplingErrors:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
# First call succeeds
|
||||
r1 = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
|
@ -2017,20 +2017,16 @@ class TestSamplingErrors:
|
|||
|
||||
def test_timeout_error(self):
|
||||
handler = SamplingHandler("to", {"timeout": 0.05})
|
||||
fake_client = MagicMock()
|
||||
|
||||
def slow_call(**kwargs):
|
||||
import threading
|
||||
# Use an event to ensure the thread truly blocks long enough
|
||||
evt = threading.Event()
|
||||
evt.wait(5) # blocks for up to 5 seconds (cancelled by timeout)
|
||||
return _make_llm_response()
|
||||
|
||||
fake_client.chat.completions.create.side_effect = slow_call
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
side_effect=slow_call,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
|
|
@ -2041,12 +2037,11 @@ class TestSamplingErrors:
|
|||
handler = SamplingHandler("np", {})
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(None, None),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
side_effect=RuntimeError("No LLM provider configured"),
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
assert "No LLM provider" in result.message
|
||||
assert handler.metrics["errors"] == 1
|
||||
|
||||
def test_empty_choices_returns_error(self):
|
||||
|
|
@ -2060,8 +2055,8 @@ class TestSamplingErrors:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2080,8 +2075,8 @@ class TestSamplingErrors:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2099,8 +2094,8 @@ class TestSamplingErrors:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2120,8 +2115,8 @@ class TestModelWhitelist:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "test-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, CreateMessageResult)
|
||||
|
|
@ -2131,8 +2126,8 @@ class TestModelWhitelist:
|
|||
fake_client = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "gpt-3.5-turbo"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
|
|
@ -2145,8 +2140,8 @@ class TestModelWhitelist:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "any-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, CreateMessageResult)
|
||||
|
|
@ -2166,8 +2161,8 @@ class TestMalformedToolCallArgs:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2194,8 +2189,8 @@ class TestMalformedToolCallArgs:
|
|||
fake_client.chat.completions.create.return_value = response
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2214,8 +2209,8 @@ class TestMetricsTracking:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2229,8 +2224,8 @@ class TestMetricsTracking:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2241,8 +2236,8 @@ class TestMetricsTracking:
|
|||
handler = SamplingHandler("met3", {})
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(None, None),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
side_effect=RuntimeError("No LLM provider configured"),
|
||||
):
|
||||
asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue