mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-07 08:02:23 +00:00
fix(custom): pass custom provider extra body
Allow custom OpenAI-compatible providers declared under `custom_providers:`
to set provider-specific `extra_body` fields and have Hermes merge them into
chat-completions requests when the matching custom endpoint is active.
This is a manual per-provider override rather than a model-name heuristic.
OpenAI-compatible Gemma thinking support is real, but the on-wire payload
shape is backend-specific: some servers want top-level `enable_thinking`,
while vLLM Gemma and NIM-style endpoints expect `chat_template_kwargs`.
A per-provider override is safer than picking one assumed payload.
Example config:
```yaml
custom_providers:
- name: gemma-local
base_url: http://localhost:8080/v1
model: google/gemma-4-31b-it
extra_body:
enable_thinking: true
reasoning_effort: high
```
For vLLM Gemma or NIM-style endpoints, use the nested shape those servers
expect:
```yaml
extra_body:
chat_template_kwargs:
enable_thinking: true
```
Changes:
- `hermes_cli/config.py`: preserve `extra_body` in normalized
`custom_providers:` entries and allow it in the validated field set.
- `hermes_cli/runtime_provider.py`: propagate custom-provider `extra_body`
as `request_overrides.extra_body` for named custom runtime resolution,
including credential-pool paths.
- `agent/agent_init.py`: at agent init, locate the matching custom-provider
entry by `base_url` (+ optional model) and merge its `extra_body` into
`AIAgent.request_overrides`, with caller-provided overrides winning on
conflicting top-level keys.
- `plugins/model-providers/custom/__init__.py`: keep existing CustomProfile
behavior (Ollama `num_ctx`, `think=False` when reasoning disabled);
user-configured `extra_body` flows through `request_overrides`.
- `website/docs/integrations/providers.md`: document the explicit
`extra_body` override and the vLLM/Gemma `chat_template_kwargs` variant.
- Tests cover config normalization, runtime propagation, model matching,
trailing-slash equivalence, fallback when no `model` field is set, and
caller-override merging precedence.
Verified end-to-end against `CustomProfile` via `ChatCompletionsTransport`:
configured `extra_body` reaches `kwargs.extra_body` on the wire request,
and coexists with profile-generated entries (Ollama `num_ctx`, `think=False`)
without clobber.
Salvaged from #29022 onto current `main`. Cosmetic typing edit in
`plugins/model-providers/custom/__init__.py` and a stale-base docs revert
in `providers.md` were dropped during cherry-pick.
Closes #29022
This commit is contained in:
parent
2fdefca570
commit
ba9964ff0d
7 changed files with 286 additions and 3 deletions
|
|
@ -3017,7 +3017,7 @@ def _normalize_custom_provider_entry(
|
|||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
"discover_models",
|
||||
"discover_models", "extra_body",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
|
|
@ -3112,6 +3112,10 @@ def _normalize_custom_provider_entry(
|
|||
if isinstance(discover_models, bool):
|
||||
normalized["discover_models"] = discover_models
|
||||
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
normalized["extra_body"] = dict(extra_body)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
|
|
@ -3272,7 +3276,7 @@ _KNOWN_ROOT_KEYS = {
|
|||
# Valid fields inside a custom_providers list entry
|
||||
_VALID_CUSTOM_PROVIDER_FIELDS = {
|
||||
"name", "base_url", "api_key", "api_mode", "model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"context_length", "rate_limit_delay", "extra_body",
|
||||
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
|
||||
# — include it here so the set accurately describes the supported schema.
|
||||
"key_env",
|
||||
|
|
|
|||
|
|
@ -528,6 +528,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
|||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
# The v11→v12 migration writes the API mode under the new
|
||||
# ``transport`` field, but hand-edited configs may still
|
||||
# use the legacy ``api_mode`` spelling. Accept both —
|
||||
|
|
@ -553,6 +556,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
|||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
|
|
@ -596,6 +602,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
|||
result["key_env"] = key_env
|
||||
if provider_key:
|
||||
result["provider_key"] = provider_key
|
||||
extra_body = entry.get("extra_body")
|
||||
if isinstance(extra_body, dict):
|
||||
result["extra_body"] = dict(extra_body)
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
|
|
@ -607,6 +616,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
|||
return None
|
||||
|
||||
|
||||
def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
|
||||
extra_body = custom_provider.get("extra_body")
|
||||
if not isinstance(extra_body, dict) or not extra_body:
|
||||
return {}
|
||||
return {"extra_body": dict(extra_body)}
|
||||
|
||||
|
||||
def _resolve_named_custom_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
|
|
@ -683,6 +699,12 @@ def _resolve_named_custom_runtime(
|
|||
model_name = custom_provider.get("model")
|
||||
if model_name:
|
||||
pool_result["model"] = model_name
|
||||
request_overrides = _custom_provider_request_overrides(custom_provider)
|
||||
if request_overrides:
|
||||
pool_result["request_overrides"] = {
|
||||
**dict(pool_result.get("request_overrides") or {}),
|
||||
**request_overrides,
|
||||
}
|
||||
return pool_result
|
||||
|
||||
_cp_is_openai_url = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
|
||||
|
|
@ -714,6 +736,9 @@ def _resolve_named_custom_runtime(
|
|||
# provider name differs from the actual model string the API expects.
|
||||
if custom_provider.get("model"):
|
||||
result["model"] = custom_provider["model"]
|
||||
request_overrides = _custom_provider_request_overrides(custom_provider)
|
||||
if request_overrides:
|
||||
result["request_overrides"] = request_overrides
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue