fix(xai): scope native web_search to swap-only + reconcile composer ctx to 200k

Salvage corrections on top of @XVVH's #44341:
- Make native web_search injection a 1:1 swap for an already-present client
  web_search function, NOT an additive grant. The original unconditionally
  appended {"type":"web_search"} on every is_xai_responses turn with any
  tools, force-enabling Grok server-side search even when the user never
  enabled the web toolset (bypassing Hermes web-provider config + tool-trace
  plumbing). Now gated on a client web_search actually being present.
- Reconcile grok-composer context to 200000 (merged in #47908) rather than
  262144; 200k is xAI's published usable context window for Composer 2.5,
  262144 is the /v1/responses input+output budget.
- Update tests to match scoped behavior + add a no-web-toolset guard test.
- AUTHOR_MAP entry for #44341 salvage.

Incomplete-guard (server-side *_call items at in_progress no longer flip
has_incomplete_items) and preflight built-in-tool allowlist kept as-is.
This commit is contained in:
teknium1 2026-06-17 16:44:12 -07:00 committed by Teknium
parent 6f89e17a33
commit c5eb64b9f7
6 changed files with 89 additions and 38 deletions

View file

@ -275,11 +275,12 @@ DEFAULT_CONTEXT_LENGTHS = {
# via a custom provider. Values sourced from models.dev (2026-04).
# Keys use substring matching (longest-first), so e.g. "grok-4.20"
# matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
# OAuth-only slug; absent from GET /v1/models. xAI publishes a 200k
# usable context window for Composer 2.5 on Grok Build (SuperGrok /
# Premium+); /v1/responses additionally enforces a ~262144 input+output
# budget, but the usable context (what we track here) is 200k.
"grok-composer": 200000, # grok-composer-2.5-fast (Grok Build CLI)
"grok-build": 256000, # grok-build-0.1
# OAuth-only slug; absent from GET /v1/models. Live /v1/responses probe
# (2026-03) enforces ~262144 tokens total (input+output), not 131k.
"grok-composer": 262144, # grok-composer-2.5-fast
"grok-code-fast": 256000, # grok-code-fast-1
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning, also matches -reasoning

View file

@ -147,29 +147,45 @@ class ResponsesApiTransport(ProviderTransport):
# response remained incomplete after 3 continuation attempts".
# Verified live against grok-composer-2.5-fast (2026-06).
#
# Fix: declare xAI's native ``web_search`` built-in so the search
# actually runs to completion server-side and the model streams a
# real answer. The Responses API rejects two tools sharing the
# name ``web_search`` (HTTP 400 "Duplicate tool names"), so we
# drop the client-side ``web_search`` function for the xAI path
# and let the native tool satisfy it. All other client-side
# tools (read_file, terminal, web_extract, MCP tools, …) are
# untouched and continue to dispatch through Hermes's agent loop.
# Fix: when the agent HAS a client-side ``web_search`` function (i.e.
# the user enabled the web toolset), declare xAI's native
# ``web_search`` built-in instead so the search actually runs to
# completion server-side and the model streams a real answer. The
# Responses API rejects two tools sharing the name ``web_search``
# (HTTP 400 "Duplicate tool names"), so we drop the client-side
# ``web_search`` function for the xAI path and let the native tool
# satisfy it. All other client-side tools (read_file, terminal,
# web_extract, MCP tools, …) are untouched and continue to dispatch
# through Hermes's agent loop.
#
# NOTE: this routes ``web_search`` to Grok's native search engine
# for xAI sessions instead of Hermes's configured web provider
# (Tavily/etc.), and those results bypass Hermes's tool-trace /
# citation plumbing (they arrive baked into the model's answer
# rather than as a tool result the loop observes). Scoped to
# ``is_xai_responses`` deliberately; narrow to specific models if
# a future grok variant should keep the client-side function.
if is_xai_responses:
filtered = [
t for t in (response_tools or [])
if not (isinstance(t, dict) and t.get("name") == "web_search")
]
filtered.append({"type": "web_search"})
response_tools = filtered
# Scope: we ONLY swap in the native built-in when the client
# ``web_search`` was actually present. We do NOT force-enable Grok
# server-side search on turns where the user never had web enabled —
# that would silently route around Hermes's web-provider config and
# tool-trace/citation plumbing for every xai-oauth turn. The swap is
# a 1:1 replacement of an already-requested capability, not an
# additive grant.
#
# NOTE: for the swapped case this routes ``web_search`` to Grok's
# native search engine for xAI sessions instead of Hermes's
# configured web provider (Tavily/etc.), and those results bypass
# Hermes's tool-trace / citation plumbing (they arrive baked into the
# model's answer rather than as a tool result the loop observes).
# Scoped to ``is_xai_responses`` deliberately; narrow to specific
# models if a future grok variant should keep the client-side
# function.
if is_xai_responses and response_tools:
has_client_web_search = any(
isinstance(t, dict) and t.get("name") == "web_search"
for t in response_tools
)
if has_client_web_search:
filtered = [
t for t in response_tools
if not (isinstance(t, dict) and t.get("name") == "web_search")
]
filtered.append({"type": "web_search"})
response_tools = filtered
# ``tools`` MUST be omitted entirely when there are no functions to
# expose: the openai SDK's ``responses.stream()`` / ``responses.parse()``

View file

@ -49,6 +49,7 @@ AUTHOR_MAP = {
"zheng@omegasys.eu": "omegazheng",
"220877172+james47kjv@users.noreply.github.com": "james47kjv",
"yuhanglin@YuhangdeMac-mini.local": "1960697431",
"admin@fent.quest": "XVVH",
"despitemeguru@gmail.com": "definitelynotguru",
"chaslui@outlook.com": "ChasLui",
"rio.jeong@thebytesize.ai": "rio-jeong",

View file

@ -142,7 +142,7 @@ class TestDefaultContextLengths:
("grok-4", 256000),
("grok-4-0709", 256000),
("grok-build-0.1", 256000),
("grok-composer-2.5-fast", 262144),
("grok-composer-2.5-fast", 200000),
("grok-code-fast-1", 256000),
("grok-3", 131072),
("grok-3-mini", 131072),

View file

@ -263,12 +263,43 @@ class TestCodexBuildKwargs:
# full history.
assert "reasoning.encrypted_content" in kw.get("include", [])
def test_xai_injects_native_web_search_tool(self, transport):
"""xAI path declares xAI's native server-side web_search built-in so
grok server-side search runs to completion (otherwise the turn stalls
as reasoning-with-no-answer -> false 'incomplete' -> 3 retries -> fail).
def test_xai_injects_native_web_search_when_client_web_search_present(self, transport):
"""xAI path swaps a client-side ``web_search`` function for xAI's
native server-side ``web_search`` built-in so grok server-side search
runs to completion (otherwise the turn stalls as
reasoning-with-no-answer -> false 'incomplete' -> 3 retries -> fail).
Non-conflicting client tools are preserved.
"""
messages = [{"role": "user", "content": "Find current prices."}]
kw = transport.build_kwargs(
model="grok-composer-2.5-fast", messages=messages,
tools=[
{"type": "function", "function": {
"name": "read_file", "description": "Read a file.",
"parameters": {"type": "object",
"properties": {"path": {"type": "string"}}}}},
{"type": "function", "function": {
"name": "web_search", "description": "Search the web.",
"parameters": {"type": "object",
"properties": {"query": {"type": "string"}}}}},
],
is_xai_responses=True,
)
tool_types = [t.get("type") for t in kw.get("tools", [])]
assert "web_search" in tool_types, kw.get("tools")
# Non-conflicting client-side tools are preserved.
names = [t.get("name") for t in kw.get("tools", []) if t.get("type") == "function"]
assert "read_file" in names
def test_xai_does_not_inject_native_web_search_without_client_web_search(self, transport):
"""The native ``web_search`` built-in is a 1:1 swap for an
already-requested client ``web_search`` NOT an additive grant. A
turn whose toolset has no ``web_search`` (user never enabled the web
toolset) must not get Grok server-side search force-injected, which
would silently bypass Hermes's web-provider config and tool-trace
plumbing for every xai-oauth turn.
"""
messages = [{"role": "user", "content": "Read this file."}]
kw = transport.build_kwargs(
model="grok-composer-2.5-fast", messages=messages,
tools=[{"type": "function", "function": {
@ -277,10 +308,9 @@ class TestCodexBuildKwargs:
"properties": {"path": {"type": "string"}}}}}],
is_xai_responses=True,
)
tool_types = [t.get("type") for t in kw.get("tools", [])]
assert "web_search" in tool_types, kw.get("tools")
# Non-conflicting client-side tools are preserved.
names = [t.get("name") for t in kw.get("tools", []) if t.get("type") == "function"]
tools = kw.get("tools", [])
assert not any(t.get("type") == "web_search" for t in tools), tools
names = [t.get("name") for t in tools if t.get("type") == "function"]
assert "read_file" in names
def test_xai_drops_clientside_web_search_to_avoid_duplicate(self, transport):

View file

@ -949,15 +949,18 @@ def test_grok_4_still_resolves_to_256k():
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000
def test_grok_composer_context_length_is_262k():
def test_grok_composer_context_length_is_200k():
"""grok-composer-2.5-fast is OAuth-only and missing from /v1/models.
Without a specific entry it fell through to the generic ``grok`` 131k
catch-all, under-reporting ~262k enforced on /v1/responses.
catch-all. xAI publishes a 200k usable context window for Composer 2.5
on Grok Build (SuperGrok / Premium+); /v1/responses additionally caps
the input+output budget at ~262144, but the usable context (what we
track) is 200k.
"""
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
assert DEFAULT_CONTEXT_LENGTHS["grok-composer"] == 262_144
assert DEFAULT_CONTEXT_LENGTHS["grok-composer"] == 200_000
slug = "grok-composer-2.5-fast"
matched_key = max(
(k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
@ -966,7 +969,7 @@ def test_grok_composer_context_length_is_262k():
assert matched_key == "grok-composer", (
f"Expected longest-first match on grok-composer for {slug}, got {matched_key}"
)
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 262_144
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 200_000
# ---------------------------------------------------------------------------