fix(aux): add session_search extra_body and concurrency controls

Adds auxiliary.<task>.extra_body config passthrough so reasoning-heavy
OpenAI-compatible providers can receive provider-specific request fields
(e.g. enable_thinking: false on GLM) on auxiliary calls, and bounds
session_search summary fan-out with auxiliary.session_search.max_concurrency
(default 3, clamped 1-5) to avoid 429 bursts on small providers.

- agent/auxiliary_client.py: extract _get_auxiliary_task_config helper,
  add _get_task_extra_body, merge config+explicit extra_body with explicit winning
- hermes_cli/config.py: extra_body defaults on all aux tasks +
  session_search.max_concurrency; _config_version 19 -> 20
- tools/session_search_tool.py: semaphore around _summarize_all gather
- tests: coverage in test_auxiliary_client, test_session_search, test_aux_config
- docs: user-guide/configuration.md + fallback-providers.md

Co-authored-by: Teknium <teknium@nousresearch.com>
This commit is contained in:
helix4u 2026-04-20 00:44:32 -07:00 committed by Teknium
parent 904f20d622
commit 6ab78401c9
8 changed files with 207 additions and 26 deletions

View file

@ -946,6 +946,70 @@ class TestStaleBaseUrlWarning:
"Expected a warning about stale OPENAI_BASE_URL"
assert mod._stale_base_url_warned is True
class TestAuxiliaryTaskExtraBody:
def test_sync_call_merges_task_extra_body_from_config(self):
client = MagicMock()
client.base_url = "https://api.example.com/v1"
response = MagicMock()
client.chat.completions.create.return_value = response
config = {
"auxiliary": {
"session_search": {
"extra_body": {
"enable_thinking": False,
"reasoning": {"effort": "none"},
}
}
}
}
with patch("hermes_cli.config.load_config", return_value=config), patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "glm-4.5-air"),
):
result = call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
extra_body={"metadata": {"source": "test"}},
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["extra_body"]["enable_thinking"] is False
assert kwargs["extra_body"]["reasoning"] == {"effort": "none"}
assert kwargs["extra_body"]["metadata"] == {"source": "test"}
@pytest.mark.asyncio
async def test_async_call_explicit_extra_body_overrides_task_config(self):
client = MagicMock()
client.base_url = "https://api.example.com/v1"
response = MagicMock()
client.chat.completions.create = AsyncMock(return_value=response)
config = {
"auxiliary": {
"session_search": {
"extra_body": {"enable_thinking": False}
}
}
}
with patch("hermes_cli.config.load_config", return_value=config), patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "glm-4.5-air"),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
extra_body={"enable_thinking": True},
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["extra_body"]["enable_thinking"] is True
def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog):
"""No warning when the provider is 'custom' — OPENAI_BASE_URL is expected."""
import agent.auxiliary_client as mod