fix(copilot): recognize enterprise subdomains in host checks

The earlier enterprise base URL change (proxy-ep parsing) gave us URLs
like `api.enterprise.githubcopilot.com`, but ~15 host-matching call
sites still hard-coded `api.githubcopilot.com`. Enterprise users would
therefore drop the `Copilot-Integration-Id: vscode-chat` header at
client-build time, and upstream rejected requests with:

    The requested model is not available for integrator "zed"
    (or "copilot-language-server") — verify the correct
    Copilot-Integration-Id header is being sent.

The header was correct in copilot_default_headers(); it just never
made it into default_headers for non-default hostnames because every
detector compared against the exact string "api.githubcopilot.com".

This commit broadens all those checks to "githubcopilot.com" via
base_url_host_matches (which already does proper subdomain matching),
so api.enterprise.githubcopilot.com, api.business.githubcopilot.com,
etc. all share the same headers, vision routing, max_completion_tokens
selection, and reasoning-effort detection as the default endpoint.

Also adds ".githubcopilot.com" to _URL_TO_PROVIDER so context-window
resolution via models.dev works for enterprise base URLs, and tightens
_is_github_copilot_url to use suffix matching instead of strict equality.

Tests:
- New: enterprise Copilot endpoint preserves Copilot-Integration-Id
- New: enterprise endpoint returns max_completion_tokens (not max_tokens)
- Existing 333 base_url / copilot / aux-client / credential-pool tests pass

Parts 5 of #7731.
This commit is contained in:
NiuNiu Xia 2026-05-22 15:30:22 +00:00 committed by Teknium
parent fbd15e285c
commit fb07215844
7 changed files with 60 additions and 14 deletions

View file

@ -828,7 +828,7 @@ def init_agent(
client_kwargs["default_headers"] = build_nvidia_nim_headers(effective_base)
elif base_url_host_matches(effective_base, "api.routermint.com"):
client_kwargs["default_headers"] = _ra()._routermint_headers()
elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
elif base_url_host_matches(effective_base, "githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
client_kwargs["default_headers"] = copilot_default_headers()

View file

@ -1624,7 +1624,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif base_url_host_matches(base_url, "githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@ -1664,7 +1664,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif base_url_host_matches(base_url, "githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@ -2954,7 +2954,7 @@ def _recoverable_pool_provider(
return "nous"
if base_url_host_matches(base, "api.anthropic.com"):
return "anthropic"
if base_url_host_matches(base, "api.githubcopilot.com"):
if base_url_host_matches(base, "githubcopilot.com"):
return "copilot"
if base_url_host_matches(base, "api.kimi.com"):
return "kimi-coding"
@ -3823,7 +3823,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
sync_base_url = str(sync_client.base_url)
if base_url_host_matches(sync_base_url, "openrouter.ai"):
async_kwargs["default_headers"] = build_or_headers()
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
elif base_url_host_matches(sync_base_url, "githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
async_kwargs["default_headers"] = copilot_request_headers(
@ -4128,7 +4128,7 @@ def resolve_provider_client(
extra["default_query"] = _dq
if base_url_host_matches(custom_base, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
elif base_url_host_matches(custom_base, "githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
extra["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
@ -4381,7 +4381,7 @@ def resolve_provider_client(
headers = {}
if base_url_host_matches(base_url, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif base_url_host_matches(base_url, "githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
headers.update(copilot_request_headers(
@ -4854,9 +4854,14 @@ def auxiliary_max_tokens_param(value: int, *, model: Optional[str] = None) -> di
or_key = os.getenv("OPENROUTER_API_KEY")
# Use max_completion_tokens for direct OpenAI-compatible providers that reject
# max_tokens on newer GPT-4o/o-series/GPT-5-style models.
_custom_host = base_url_hostname(custom_base) or ""
if (not or_key
and _read_nous_auth() is None
and base_url_hostname(custom_base) in {"api.openai.com", "api.githubcopilot.com"}):
and (
_custom_host == "api.openai.com"
or _custom_host == "api.githubcopilot.com"
or _custom_host.endswith(".githubcopilot.com")
)):
return {"max_completion_tokens": value}
# ...and for any caller serving a newer OpenAI-family model by name.
if model_forces_max_completion_tokens(model):

View file

@ -632,7 +632,7 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
_ct = agent._get_transport()
is_github_responses = (
base_url_host_matches(agent.base_url, "models.github.ai")
or base_url_host_matches(agent.base_url, "api.githubcopilot.com")
or base_url_host_matches(agent.base_url, "githubcopilot.com")
)
is_codex_backend = (
agent.provider == "openai-codex"
@ -702,7 +702,7 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
_is_or = agent._is_openrouter_url()
_is_gh = (
base_url_host_matches(agent._base_url_lower, "models.github.ai")
or base_url_host_matches(agent._base_url_lower, "api.githubcopilot.com")
or base_url_host_matches(agent._base_url_lower, "githubcopilot.com")
)
_is_nous = "nousresearch" in agent._base_url_lower
_is_nvidia = "integrate.api.nvidia.com" in agent._base_url_lower

View file

@ -429,6 +429,10 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"inference-api.nousresearch.com": "nous",
"api.deepseek.com": "deepseek",
"api.githubcopilot.com": "copilot",
# Enterprise Copilot endpoints look like api.enterprise.githubcopilot.com,
# api.business.githubcopilot.com, etc. Match the suffix so context-window
# resolution works for enterprise accounts too.
".githubcopilot.com": "copilot",
"models.github.ai": "copilot",
# GitHub Models free tier (Azure-hosted prototyping endpoint) — same
# canonical provider as the Copilot API. Hard per-request token cap

View file

@ -1133,7 +1133,9 @@ class AIAgent:
hostname = getattr(self, "_base_url_hostname", "") or base_url_hostname(
getattr(self, "_base_url_lower", "")
)
return hostname == "api.githubcopilot.com"
if not hostname:
return False
return hostname == "api.githubcopilot.com" or hostname.endswith(".githubcopilot.com")
def _resolved_api_call_timeout(self) -> float:
"""Resolve the effective per-call request timeout in seconds.
@ -3837,7 +3839,7 @@ class AIAgent:
# unaffected (they don't go through here).
request_kwargs["max_retries"] = 0
if (
base_url_host_matches(str(request_kwargs.get("base_url", "")), "api.githubcopilot.com")
base_url_host_matches(str(request_kwargs.get("base_url", "")), "githubcopilot.com")
and self._api_kwargs_have_image_parts(api_kwargs or {})
):
request_kwargs["default_headers"] = self._copilot_headers_for_request(is_vision=True)
@ -4099,7 +4101,7 @@ class AIAgent:
self._client_kwargs["default_headers"] = build_nvidia_nim_headers(base_url)
elif base_url_host_matches(base_url, "api.routermint.com"):
self._client_kwargs["default_headers"] = _routermint_headers()
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif base_url_host_matches(base_url, "githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
self._client_kwargs["default_headers"] = copilot_default_headers()
@ -4995,7 +4997,7 @@ class AIAgent:
return True
if (
base_url_host_matches(self._base_url_lower, "models.github.ai")
or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
or base_url_host_matches(self._base_url_lower, "githubcopilot.com")
):
try:
from hermes_cli.models import github_model_reasoning_efforts

View file

@ -320,3 +320,31 @@ def test_openrouter_headers_no_cache_when_disabled(mock_openai):
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
assert "X-OpenRouter-Cache" not in headers
assert "X-OpenRouter-Cache-TTL" not in headers
@patch("run_agent.OpenAI")
def test_copilot_enterprise_base_url_applies_copilot_default_headers(mock_openai):
"""Enterprise Copilot endpoints (api.<tenant>.githubcopilot.com) must apply
the same default_headers including Copilot-Integration-Id: vscode-chat
as the default api.githubcopilot.com endpoint. Without this, the upstream
sees the request as integrator 'zed' or 'copilot-language-server' and
rejects it with a 400 error for many models (regression seen May 2026)."""
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://api.enterprise.githubcopilot.com",
model="claude-opus-4.6-1m",
provider="copilot",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent._apply_client_headers_for_base_url("https://api.enterprise.githubcopilot.com")
headers = agent._client_kwargs.get("default_headers", {})
# Lookup is case-insensitive — normalize for the assertion.
lc = {k.lower(): v for k, v in headers.items()}
assert lc.get("copilot-integration-id") == "vscode-chat", (
f"enterprise Copilot endpoint must carry Copilot-Integration-Id=vscode-chat; got {headers}"
)

View file

@ -5699,6 +5699,13 @@ class TestMaxTokensParam:
result = agent._max_tokens_param(4096)
assert result == {"max_tokens": 4096}
def test_returns_max_completion_tokens_for_enterprise_copilot(self, agent):
"""Enterprise Copilot endpoints (api.<tenant>.githubcopilot.com) must
share the same max_tokens behavior as the default endpoint."""
agent.base_url = "https://api.enterprise.githubcopilot.com"
result = agent._max_tokens_param(4096)
assert result == {"max_completion_tokens": 4096}
class TestGpt5ApiModeRouting:
"""Verify provider-specific GPT-5 API-mode routing."""