fix(custom): pass custom provider extra body

Allow custom OpenAI-compatible providers declared under `custom_providers:` to set provider-specific `extra_body` fields and have Hermes merge them into chat-completions requests when the matching custom endpoint is active. This is a manual per-provider override rather than a model-name heuristic. OpenAI-compatible Gemma thinking support is real, but the on-wire payload shape is backend-specific: some servers want top-level `enable_thinking`, while vLLM Gemma and NIM-style endpoints expect `chat_template_kwargs`. A per-provider override is safer than picking one assumed payload. Example config: ```yaml custom_providers: - name: gemma-local base_url: http://localhost:8080/v1 model: google/gemma-4-31b-it extra_body: enable_thinking: true reasoning_effort: high ``` For vLLM Gemma or NIM-style endpoints, use the nested shape those servers expect: ```yaml extra_body: chat_template_kwargs: enable_thinking: true ``` Changes: - `hermes_cli/config.py`: preserve `extra_body` in normalized `custom_providers:` entries and allow it in the validated field set. - `hermes_cli/runtime_provider.py`: propagate custom-provider `extra_body` as `request_overrides.extra_body` for named custom runtime resolution, including credential-pool paths. - `agent/agent_init.py`: at agent init, locate the matching custom-provider entry by `base_url` (+ optional model) and merge its `extra_body` into `AIAgent.request_overrides`, with caller-provided overrides winning on conflicting top-level keys. - `plugins/model-providers/custom/__init__.py`: keep existing CustomProfile behavior (Ollama `num_ctx`, `think=False` when reasoning disabled); user-configured `extra_body` flows through `request_overrides`. - `website/docs/integrations/providers.md`: document the explicit `extra_body` override and the vLLM/Gemma `chat_template_kwargs` variant. - Tests cover config normalization, runtime propagation, model matching, trailing-slash equivalence, fallback when no `model` field is set, and caller-override merging precedence. Verified end-to-end against `CustomProfile` via `ChatCompletionsTransport`: configured `extra_body` reaches `kwargs.extra_body` on the wire request, and coexists with profile-generated entries (Ollama `num_ctx`, `think=False`) without clobber. Salvaged from #29022 onto current `main`. Cosmetic typing edit in `plugins/model-providers/custom/__init__.py` and a stale-base docs revert in `providers.md` were dropped during cherry-pick. Closes #29022
2026-06-08 08:11:38 +00:00 · 2026-05-21 19:45:15 +05:30 · 2026-05-21 19:45:15 +05:30 · ba9964ff0d
commit ba9964ff0d
parent 2fdefca570
7 changed files with 286 additions and 3 deletions
--- a/tests/agent/test_custom_provider_extra_body.py
+++ b/tests/agent/test_custom_provider_extra_body.py
@ -0,0 +1,93 @@
+from types import SimpleNamespace
+
+from agent.agent_init import _merge_custom_provider_extra_body
+
+
+def test_custom_provider_extra_body_merges_into_request_overrides():
+    agent = SimpleNamespace(
+        provider="custom",
+        model="google/gemma-4-31b-it",
+        base_url="https://example.test/v1",
+        request_overrides={"service_tier": "priority"},
+    )
+
+    _merge_custom_provider_extra_body(
+        agent,
+        [
+            {
+                "name": "gemma",
+                "base_url": "https://example.test/v1/",
+                "model": "google/gemma-4-31b-it",
+                "extra_body": {
+                    "enable_thinking": True,
+                    "reasoning_effort": "high",
+                },
+            }
+        ],
+    )
+
+    assert agent.request_overrides == {
+        "service_tier": "priority",
+        "extra_body": {
+            "enable_thinking": True,
+            "reasoning_effort": "high",
+        },
+    }
+
+
+def test_custom_provider_extra_body_preserves_caller_override():
+    agent = SimpleNamespace(
+        provider="custom",
+        model="google/gemma-4-31b-it",
+        base_url="https://example.test/v1",
+        request_overrides={
+            "extra_body": {
+                "reasoning_effort": "low",
+                "caller_only": True,
+            }
+        },
+    )
+
+    _merge_custom_provider_extra_body(
+        agent,
+        [
+            {
+                "name": "gemma",
+                "base_url": "https://example.test/v1",
+                "model": "google/gemma-4-31b-it",
+                "extra_body": {
+                    "enable_thinking": True,
+                    "reasoning_effort": "high",
+                },
+            }
+        ],
+    )
+
+    assert agent.request_overrides["extra_body"] == {
+        "enable_thinking": True,
+        "reasoning_effort": "low",
+        "caller_only": True,
+    }
+
+
+def test_custom_provider_extra_body_ignores_other_custom_models():
+    agent = SimpleNamespace(
+        provider="custom",
+        model="other-model",
+        base_url="https://example.test/v1",
+        request_overrides={},
+    )
+
+    _merge_custom_provider_extra_body(
+        agent,
+        [
+            {
+                "name": "gemma",
+                "base_url": "https://example.test/v1",
+                "model": "google/gemma-4-31b-it",
+                "extra_body": {"enable_thinking": True},
+            }
+        ],
+    )
+
+    assert agent.request_overrides == {}
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@ -1631,6 +1631,33 @@ def test_named_custom_runtime_propagates_model_direct_path(monkeypatch):
    assert resolved["provider"] == "custom"


+def test_named_custom_runtime_propagates_extra_body_direct_path(monkeypatch):
+    """Custom provider extra_body should become runtime request_overrides."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-gemma")
+    monkeypatch.setattr(
+        rp, "_get_named_custom_provider",
+        lambda p: {
+            "name": "my-gemma",
+            "base_url": "http://localhost:8000/v1",
+            "api_key": "test-key",
+            "model": "google/gemma-4-31b-it",
+            "extra_body": {
+                "enable_thinking": True,
+                "reasoning_effort": "high",
+            },
+        },
+    )
+    monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+    resolved = rp.resolve_runtime_provider(requested="my-gemma")
+    assert resolved["request_overrides"] == {
+        "extra_body": {
+            "enable_thinking": True,
+            "reasoning_effort": "high",
+        }
+    }
+
+
 def test_named_custom_runtime_propagates_model_pool_path(monkeypatch):
    """Model should propagate even when credential pool handles credentials."""
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-server")
@ -1662,6 +1689,36 @@ def test_named_custom_runtime_propagates_model_pool_path(monkeypatch):
    assert resolved["api_key"] == "pool-key", "pool credentials should be used"


+def test_named_custom_runtime_propagates_extra_body_pool_path(monkeypatch):
+    """Custom provider extra_body should survive credential-pool resolution."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-gemma")
+    monkeypatch.setattr(
+        rp, "_get_named_custom_provider",
+        lambda p: {
+            "name": "my-gemma",
+            "base_url": "http://localhost:8000/v1",
+            "api_key": "test-key",
+            "model": "google/gemma-4-31b-it",
+            "extra_body": {"enable_thinking": True},
+        },
+    )
+    monkeypatch.setattr(
+        rp, "_try_resolve_from_custom_pool",
+        lambda *a, **k: {
+            "provider": "custom",
+            "api_mode": "chat_completions",
+            "base_url": "http://localhost:8000/v1",
+            "api_key": "pool-key",
+            "source": "pool:custom:my-gemma",
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="my-gemma")
+    assert resolved["request_overrides"] == {
+        "extra_body": {"enable_thinking": True}
+    }
+
+
 def test_named_custom_runtime_no_model_when_absent(monkeypatch):
    """When custom_providers entry has no model field, runtime should not either."""
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-server")
@ -2150,6 +2207,24 @@ class TestProviderEntryApiKeyEnvAlias:
        key_env so the set stays in sync with what the runtime actually reads."""
        from hermes_cli.config import _VALID_CUSTOM_PROVIDER_FIELDS
        assert "key_env" in _VALID_CUSTOM_PROVIDER_FIELDS
+
+    def test_extra_body_is_supported_schema(self):
+        from hermes_cli.config import (
+            _VALID_CUSTOM_PROVIDER_FIELDS,
+            _normalize_custom_provider_entry,
+        )
+        entry = {
+            "name": "vendor",
+            "base_url": "https://api.vendor.example.com/v1",
+            "extra_body": {
+                "chat_template_kwargs": {"enable_thinking": True},
+                "include_reasoning": True,
+            },
+        }
+        normalized = _normalize_custom_provider_entry(dict(entry), provider_key="vendor")
+        assert normalized is not None
+        assert "extra_body" in _VALID_CUSTOM_PROVIDER_FIELDS
+        assert normalized["extra_body"] == entry["extra_body"]
 # =============================================================================
 # Tencent TokenHub — API-key provider runtime resolution
 # =============================================================================
--- a/tests/providers/test_transport_parity.py
+++ b/tests/providers/test_transport_parity.py
@ -236,7 +236,7 @@ class TestQwenParity:


 class TestCustomOllamaParity:
-    """Custom/Ollama: num_ctx, think=false — now tested via profile."""
+    """Custom/Ollama: num_ctx, thinking controls — now tested via profile."""

    def test_ollama_num_ctx(self, transport):
        kw = transport.build_kwargs(