From 13c3d4b4efa2f39d7bc3178cf3eca77167ff7699 Mon Sep 17 00:00:00 2001
From: kchantharuan <kchantharuan@nvidia.com>
Date: Wed, 13 May 2026 12:46:07 -0700
Subject: [PATCH] feat(nvidia): add NIM billing origin header

---
 agent/auxiliary_client.py                     | 25 +++++++
 run_agent.py                                  | 30 ++++++--
 tests/agent/test_auxiliary_client.py          | 41 +++++++++++
 tests/providers/test_provider_profiles.py     |  4 ++
 .../test_provider_attribution_headers.py      | 68 +++++++++++++++++++
 5 files changed, 162 insertions(+), 6 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index cd655e70e56..1c7dd9f7497 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict:
 
     return headers
 
+
+# NVIDIA NIM cloud billing attribution.  Keep this host-gated because the
+# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL.
+_NVIDIA_NIM_CLOUD_HEADERS = {
+    "X-BILLING-INVOKE-ORIGIN": "HermesAgent",
+}
+
+
+def build_nvidia_nim_headers(base_url: str | None) -> dict:
+    """Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic."""
+    if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"):
+        return dict(_NVIDIA_NIM_CLOUD_HEADERS)
+    return {}
+
+
 # Vercel AI Gateway app attribution headers. HTTP-Referer maps to
 # referrerUrl and X-Title maps to appName in the gateway's analytics.
 from hermes_cli import __version__ as _HERMES_VERSION
@@ -1372,6 +1387,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                 from hermes_cli.models import copilot_default_headers
 
                 extra["default_headers"] = copilot_default_headers()
+            elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+                extra["default_headers"] = build_nvidia_nim_headers(base_url)
             else:
                 try:
                     from providers import get_provider_profile as _gpf_aux
@@ -1407,6 +1424,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             from hermes_cli.models import copilot_default_headers
 
             extra["default_headers"] = copilot_default_headers()
+        elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+            extra["default_headers"] = build_nvidia_nim_headers(base_url)
         else:
             try:
                 from providers import get_provider_profile as _gpf_aux2
@@ -2690,6 +2709,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
         )
     elif base_url_host_matches(sync_base_url, "api.kimi.com"):
         async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+    elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"):
+        async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url)
     else:
         # Fall back to profile.default_headers for providers that declare
         # client-level headers on their ProviderProfile (e.g. attribution
@@ -2951,6 +2972,8 @@ def resolve_provider_client(
                 extra["default_headers"] = copilot_request_headers(
                     is_agent_turn=True, is_vision=is_vision
                 )
+            elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"):
+                extra["default_headers"] = build_nvidia_nim_headers(custom_base)
             else:
                 # Fall back to profile.default_headers for providers that
                 # declare client-level attribution headers on their profile.
@@ -3149,6 +3172,8 @@ def resolve_provider_client(
             headers.update(copilot_request_headers(
                 is_agent_turn=True, is_vision=is_vision
             ))
+        elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+            headers.update(build_nvidia_nim_headers(base_url))
         else:
             # Fall back to profile.default_headers for providers that declare
             # client-level attribution headers on their profile (e.g. GMI
diff --git a/run_agent.py b/run_agent.py
index a82c6417ae1..7e42beb3eba 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1664,6 +1664,9 @@ class AIAgent:
                 if base_url_host_matches(effective_base, "openrouter.ai"):
                     from agent.auxiliary_client import build_or_headers
                     client_kwargs["default_headers"] = build_or_headers()
+                elif base_url_host_matches(effective_base, "integrate.api.nvidia.com"):
+                    from agent.auxiliary_client import build_nvidia_nim_headers
+                    client_kwargs["default_headers"] = build_nvidia_nim_headers(effective_base)
                 elif base_url_host_matches(effective_base, "api.routermint.com"):
                     client_kwargs["default_headers"] = _routermint_headers()
                 elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
@@ -1702,9 +1705,15 @@ class AIAgent:
                     }
                     if _provider_timeout is not None:
                         client_kwargs["timeout"] = _provider_timeout
-                    # Preserve any default_headers the router set
-                    if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
-                        client_kwargs["default_headers"] = dict(_routed_client._default_headers)
+                    # Preserve provider-specific headers the router set.  The
+                    # OpenAI SDK stores caller-provided default_headers in
+                    # _custom_headers; older/mocked clients may expose
+                    # _default_headers instead.
+                    _routed_headers = getattr(_routed_client, "_custom_headers", None)
+                    if not _routed_headers:
+                        _routed_headers = getattr(_routed_client, "_default_headers", None)
+                    if _routed_headers:
+                        client_kwargs["default_headers"] = dict(_routed_headers)
                 else:
                     # When the user explicitly chose a non-OpenRouter provider
                     # but no credentials were found, fail fast with a clear
@@ -1753,8 +1762,11 @@ class AIAgent:
                                 }
                                 if _provider_timeout is not None:
                                     client_kwargs["timeout"] = _provider_timeout
-                                if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers:
-                                    client_kwargs["default_headers"] = dict(_fb_client._default_headers)
+                                _fb_headers = getattr(_fb_client, "_custom_headers", None)
+                                if not _fb_headers:
+                                    _fb_headers = getattr(_fb_client, "_default_headers", None)
+                                if _fb_headers:
+                                    client_kwargs["default_headers"] = dict(_fb_headers)
                                 _fb_resolved = True
                                 break
                         if not _fb_resolved:
@@ -7334,12 +7346,18 @@ class AIAgent:
         return True
 
     def _apply_client_headers_for_base_url(self, base_url: str) -> None:
-        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers
+        from agent.auxiliary_client import (
+            _AI_GATEWAY_HEADERS,
+            build_nvidia_nim_headers,
+            build_or_headers,
+        )
 
         if base_url_host_matches(base_url, "openrouter.ai"):
             self._client_kwargs["default_headers"] = build_or_headers()
         elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
             self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
+        elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
+            self._client_kwargs["default_headers"] = build_nvidia_nim_headers(base_url)
         elif base_url_host_matches(base_url, "api.routermint.com"):
             self._client_kwargs["default_headers"] = _routermint_headers()
         elif base_url_host_matches(base_url, "api.githubcopilot.com"):
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index c25ca219379..9dd85762956 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -2415,10 +2415,51 @@ def _clean_env(monkeypatch):
     """Strip provider env vars so each test starts clean."""
     for key in (
         "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+        "NVIDIA_API_KEY", "NVIDIA_BASE_URL",
     ):
         monkeypatch.delenv(key, raising=False)
 
 
+class TestNvidiaBillingHeaders:
+    """NVIDIA NIM billing-origin headers are scoped to NVIDIA cloud."""
+
+    def test_resolve_provider_client_cloud_adds_billing_origin_header(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key")
+        monkeypatch.delenv("NVIDIA_BASE_URL", raising=False)
+        mock_openai = MagicMock()
+        mock_openai.return_value = MagicMock(name="nvidia-client")
+
+        with patch("agent.auxiliary_client.OpenAI", mock_openai):
+            client, model = resolve_provider_client(
+                provider="nvidia",
+                model="nvidia/test-model",
+            )
+
+        assert client is not None
+        assert model == "nvidia/test-model"
+        call_kwargs = mock_openai.call_args[1]
+        headers = call_kwargs["default_headers"]
+        assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+    def test_resolve_provider_client_local_nim_skips_billing_origin_header(self, monkeypatch):
+        monkeypatch.setenv("NVIDIA_API_KEY", "nvidia-key")
+        monkeypatch.setenv("NVIDIA_BASE_URL", "http://localhost:8000/v1")
+        mock_openai = MagicMock()
+        mock_openai.return_value = MagicMock(name="nvidia-local-client")
+
+        with patch("agent.auxiliary_client.OpenAI", mock_openai):
+            client, model = resolve_provider_client(
+                provider="nvidia",
+                model="nvidia/test-model",
+            )
+
+        assert client is not None
+        assert model == "nvidia/test-model"
+        call_kwargs = mock_openai.call_args[1]
+        headers = call_kwargs.get("default_headers", {})
+        assert "X-BILLING-INVOKE-ORIGIN" not in headers
+
+
 class TestOpenRouterExplicitApiKey:
     """Test that explicit_api_key is correctly propagated to _try_openrouter()."""
 
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
index c79ed2aea9b..df96a80fd80 100644
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -42,6 +42,10 @@ class TestNvidiaProfile:
         p = get_provider_profile("nvidia")
         assert "nvidia.com" in p.base_url
 
+    def test_billing_header_not_profile_wide(self):
+        p = get_provider_profile("nvidia")
+        assert p.default_headers == {}
+
 
 class TestKimiProfile:
     def test_temperature_omit(self):
diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py
index 2a1d9088c46..a4ce301a857 100644
--- a/tests/run_agent/test_provider_attribution_headers.py
+++ b/tests/run_agent/test_provider_attribution_headers.py
@@ -3,6 +3,7 @@
 Mirrors the OpenRouter pattern for the Vercel AI Gateway so that
 referrerUrl / appName / User-Agent flow into gateway analytics.
 """
+from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 from run_agent import AIAgent
@@ -65,6 +66,73 @@ def test_routermint_base_url_applies_user_agent_header(mock_openai):
     assert headers["User-Agent"].startswith("HermesAgent/")
 
 
+@patch("run_agent.OpenAI")
+def test_nvidia_cloud_base_url_applies_billing_origin_header(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://integrate.api.nvidia.com/v1",
+        model="nvidia/test-model",
+        provider="nvidia",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    assert agent._client_kwargs["default_headers"]["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+    agent._apply_client_headers_for_base_url("https://integrate.api.nvidia.com/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+
+@patch("run_agent.OpenAI")
+def test_nvidia_local_base_url_does_not_apply_billing_origin_header(mock_openai):
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://integrate.api.nvidia.com/v1",
+        model="nvidia/test-model",
+        provider="nvidia",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._client_kwargs["default_headers"] = {
+        "X-BILLING-INVOKE-ORIGIN": "HermesAgent",
+    }
+
+    agent._apply_client_headers_for_base_url("http://localhost:8000/v1")
+
+    assert "default_headers" not in agent._client_kwargs
+
+
+@patch("run_agent.OpenAI")
+def test_routed_client_preserves_openai_sdk_custom_headers(mock_openai):
+    mock_openai.return_value = MagicMock()
+    routed_client = SimpleNamespace(
+        api_key="test-key",
+        base_url="https://integrate.api.nvidia.com/v1",
+        _custom_headers={"X-BILLING-INVOKE-ORIGIN": "HermesAgent"},
+    )
+
+    with patch("agent.auxiliary_client.resolve_provider_client", return_value=(
+        routed_client,
+        "nvidia/test-model",
+    )):
+        agent = AIAgent(
+            provider="nvidia",
+            model="nvidia/test-model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["X-BILLING-INVOKE-ORIGIN"] == "HermesAgent"
+
+
 @patch("run_agent.OpenAI")
 def test_gmi_base_url_picks_up_profile_user_agent(mock_openai):
     """GMI declares User-Agent on its ProviderProfile.default_headers.