From 13c3d4b4efa2f39d7bc3178cf3eca77167ff7699 Mon Sep 17 00:00:00 2001 From: kchantharuan Date: Wed, 13 May 2026 12:46:07 -0700 Subject: [PATCH] feat(nvidia): add NIM billing origin header --- agent/auxiliary_client.py | 25 +++++++ run_agent.py | 30 ++++++-- tests/agent/test_auxiliary_client.py | 41 +++++++++++ tests/providers/test_provider_profiles.py | 4 ++ .../test_provider_attribution_headers.py | 68 +++++++++++++++++++ 5 files changed, 162 insertions(+), 6 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index cd655e70e56..1c7dd9f7497 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict: return headers + +# NVIDIA NIM cloud billing attribution. Keep this host-gated because the +# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL. +_NVIDIA_NIM_CLOUD_HEADERS = { + "X-BILLING-INVOKE-ORIGIN": "HermesAgent", +} + + +def build_nvidia_nim_headers(base_url: str | None) -> dict: + """Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic.""" + if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"): + return dict(_NVIDIA_NIM_CLOUD_HEADERS) + return {} + + # Vercel AI Gateway app attribution headers. HTTP-Referer maps to # referrerUrl and X-Title maps to appName in the gateway's analytics. from hermes_cli import __version__ as _HERMES_VERSION @@ -1372,6 +1387,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + extra["default_headers"] = build_nvidia_nim_headers(base_url) else: try: from providers import get_provider_profile as _gpf_aux @@ -1407,6 +1424,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + extra["default_headers"] = build_nvidia_nim_headers(base_url) else: try: from providers import get_provider_profile as _gpf_aux2 @@ -2690,6 +2709,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False): ) elif base_url_host_matches(sync_base_url, "api.kimi.com"): async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"): + async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url) else: # Fall back to profile.default_headers for providers that declare # client-level headers on their ProviderProfile (e.g. attribution @@ -2951,6 +2972,8 @@ def resolve_provider_client( extra["default_headers"] = copilot_request_headers( is_agent_turn=True, is_vision=is_vision ) + elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"): + extra["default_headers"] = build_nvidia_nim_headers(custom_base) else: # Fall back to profile.default_headers for providers that # declare client-level attribution headers on their profile. @@ -3149,6 +3172,8 @@ def resolve_provider_client( headers.update(copilot_request_headers( is_agent_turn=True, is_vision=is_vision )) + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + headers.update(build_nvidia_nim_headers(base_url)) else: # Fall back to profile.default_headers for providers that declare # client-level attribution headers on their profile (e.g. GMI diff --git a/run_agent.py b/run_agent.py index a82c6417ae1..7e42beb3eba 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1664,6 +1664,9 @@ class AIAgent: if base_url_host_matches(effective_base, "openrouter.ai"): from agent.auxiliary_client import build_or_headers client_kwargs["default_headers"] = build_or_headers() + elif base_url_host_matches(effective_base, "integrate.api.nvidia.com"): + from agent.auxiliary_client import build_nvidia_nim_headers + client_kwargs["default_headers"] = build_nvidia_nim_headers(effective_base) elif base_url_host_matches(effective_base, "api.routermint.com"): client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(effective_base, "api.githubcopilot.com"): @@ -1702,9 +1705,15 @@ class AIAgent: } if _provider_timeout is not None: client_kwargs["timeout"] = _provider_timeout - # Preserve any default_headers the router set - if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers: - client_kwargs["default_headers"] = dict(_routed_client._default_headers) + # Preserve provider-specific headers the router set. The + # OpenAI SDK stores caller-provided default_headers in + # _custom_headers; older/mocked clients may expose + # _default_headers instead. + _routed_headers = getattr(_routed_client, "_custom_headers", None) + if not _routed_headers: + _routed_headers = getattr(_routed_client, "_default_headers", None) + if _routed_headers: + client_kwargs["default_headers"] = dict(_routed_headers) else: # When the user explicitly chose a non-OpenRouter provider # but no credentials were found, fail fast with a clear @@ -1753,8 +1762,11 @@ class AIAgent: } if _provider_timeout is not None: client_kwargs["timeout"] = _provider_timeout - if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers: - client_kwargs["default_headers"] = dict(_fb_client._default_headers) + _fb_headers = getattr(_fb_client, "_custom_headers", None) + if not _fb_headers: + _fb_headers = getattr(_fb_client, "_default_headers", None) + if _fb_headers: + client_kwargs["default_headers"] = dict(_fb_headers) _fb_resolved = True break if not _fb_resolved: @@ -7334,12 +7346,18 @@ class AIAgent: return True def _apply_client_headers_for_base_url(self, base_url: str) -> None: - from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers + from agent.auxiliary_client import ( + _AI_GATEWAY_HEADERS, + build_nvidia_nim_headers, + build_or_headers, + ) if base_url_host_matches(base_url, "openrouter.ai"): self._client_kwargs["default_headers"] = build_or_headers() elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"): self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS) + elif base_url_host_matches(base_url, "integrate.api.nvidia.com"): + self._client_kwargs["default_headers"] = build_nvidia_nim_headers(base_url) elif base_url_host_matches(base_url, "api.routermint.com"): self._client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(base_url, "api.githubcopilot.com"): diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index c25ca219379..9dd85762956 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -2415,10 +2415,51 @@ def _clean_env(monkeypatch): """Strip provider env vars so each test starts clean.""" for key in ( "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", + "NVIDIA_API_KEY", "NVIDIA_BASE_URL", ): monkeypatch.delenv(key, raising=False) +class TestNvidiaBillingHeaders: + """NVIDIA NIM billing-origin headers are scoped to NVIDIA cloud.""" + + def test_resolve_provider_client_cloud_adds_billing_origin_header(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key") + monkeypatch.delenv("NVIDIA_BASE_URL", raising=False) + mock_openai = MagicMock() + mock_openai.return_value = MagicMock(name="nvidia-client") + + with patch("agent.auxiliary_client.OpenAI", mock_openai): + client, model = resolve_provider_client( + provider="nvidia", + model="nvidia/test-model", + ) + + assert client is not None + assert model == "nvidia/test-model" + call_kwargs = mock_openai.call_args[1] + headers = call_kwargs["default_headers"] + assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + def test_resolve_provider_client_local_nim_skips_billing_origin_header(self, monkeypatch): + monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key") + monkeypatch.setenv("NVIDIA_BASE_URL", "http://localhost:8000/v1") + mock_openai = MagicMock() + mock_openai.return_value = MagicMock(name="nvidia-local-client") + + with patch("agent.auxiliary_client.OpenAI", mock_openai): + client, model = resolve_provider_client( + provider="nvidia", + model="nvidia/test-model", + ) + + assert client is not None + assert model == "nvidia/test-model" + call_kwargs = mock_openai.call_args[1] + headers = call_kwargs.get("default_headers", {}) + assert "X-BILLING-INVOKE-ORIGIN" not in headers + + class TestOpenRouterExplicitApiKey: """Test that explicit_api_key is correctly propagated to _try_openrouter().""" diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py index c79ed2aea9b..df96a80fd80 100644 --- a/tests/providers/test_provider_profiles.py +++ b/tests/providers/test_provider_profiles.py @@ -42,6 +42,10 @@ class TestNvidiaProfile: p = get_provider_profile("nvidia") assert "nvidia.com" in p.base_url + def test_billing_header_not_profile_wide(self): + p = get_provider_profile("nvidia") + assert p.default_headers == {} + class TestKimiProfile: def test_temperature_omit(self): diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py index 2a1d9088c46..a4ce301a857 100644 --- a/tests/run_agent/test_provider_attribution_headers.py +++ b/tests/run_agent/test_provider_attribution_headers.py @@ -3,6 +3,7 @@ Mirrors the OpenRouter pattern for the Vercel AI Gateway so that referrerUrl / appName / User-Agent flow into gateway analytics. """ +from types import SimpleNamespace from unittest.mock import MagicMock, patch from run_agent import AIAgent @@ -65,6 +66,73 @@ def test_routermint_base_url_applies_user_agent_header(mock_openai): assert headers["User-Agent"].startswith("HermesAgent/") +@patch("run_agent.OpenAI") +def test_nvidia_cloud_base_url_applies_billing_origin_header(mock_openai): + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://integrate.api.nvidia.com/v1", + model="nvidia/test-model", + provider="nvidia", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + assert agent._client_kwargs["default_headers"]["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + agent._apply_client_headers_for_base_url("https://integrate.api.nvidia.com/v1") + + headers = agent._client_kwargs["default_headers"] + assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + +@patch("run_agent.OpenAI") +def test_nvidia_local_base_url_does_not_apply_billing_origin_header(mock_openai): + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://integrate.api.nvidia.com/v1", + model="nvidia/test-model", + provider="nvidia", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + agent._client_kwargs["default_headers"] = { + "X-BILLING-INVOKE-ORIGIN": "HermesAgent", + } + + agent._apply_client_headers_for_base_url("http://localhost:8000/v1") + + assert "default_headers" not in agent._client_kwargs + + +@patch("run_agent.OpenAI") +def test_routed_client_preserves_openai_sdk_custom_headers(mock_openai): + mock_openai.return_value = MagicMock() + routed_client = SimpleNamespace( + api_key="test-key", + base_url="https://integrate.api.nvidia.com/v1", + _custom_headers={"X-BILLING-INVOKE-ORIGIN": "HermesAgent"}, + ) + + with patch("agent.auxiliary_client.resolve_provider_client", return_value=( + routed_client, + "nvidia/test-model", + )): + agent = AIAgent( + provider="nvidia", + model="nvidia/test-model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + headers = agent._client_kwargs["default_headers"] + assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent" + + @patch("run_agent.OpenAI") def test_gmi_base_url_picks_up_profile_user_agent(mock_openai): """GMI declares User-Agent on its ProviderProfile.default_headers.