mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-18 09:51:59 +00:00
fix(xai): scope native web_search to swap-only + reconcile composer ctx to 200k
Salvage corrections on top of @XVVH's #44341: - Make native web_search injection a 1:1 swap for an already-present client web_search function, NOT an additive grant. The original unconditionally appended {"type":"web_search"} on every is_xai_responses turn with any tools, force-enabling Grok server-side search even when the user never enabled the web toolset (bypassing Hermes web-provider config + tool-trace plumbing). Now gated on a client web_search actually being present. - Reconcile grok-composer context to 200000 (merged in #47908) rather than 262144; 200k is xAI's published usable context window for Composer 2.5, 262144 is the /v1/responses input+output budget. - Update tests to match scoped behavior + add a no-web-toolset guard test. - AUTHOR_MAP entry for #44341 salvage. Incomplete-guard (server-side *_call items at in_progress no longer flip has_incomplete_items) and preflight built-in-tool allowlist kept as-is.
This commit is contained in:
parent
6f89e17a33
commit
c5eb64b9f7
6 changed files with 89 additions and 38 deletions
|
|
@ -275,11 +275,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
# via a custom provider. Values sourced from models.dev (2026-04).
|
||||
# Keys use substring matching (longest-first), so e.g. "grok-4.20"
|
||||
# matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
|
||||
# OAuth-only slug; absent from GET /v1/models. xAI publishes a 200k
|
||||
# usable context window for Composer 2.5 on Grok Build (SuperGrok /
|
||||
# Premium+); /v1/responses additionally enforces a ~262144 input+output
|
||||
# budget, but the usable context (what we track here) is 200k.
|
||||
"grok-composer": 200000, # grok-composer-2.5-fast (Grok Build CLI)
|
||||
"grok-build": 256000, # grok-build-0.1
|
||||
# OAuth-only slug; absent from GET /v1/models. Live /v1/responses probe
|
||||
# (2026-03) enforces ~262144 tokens total (input+output), not 131k.
|
||||
"grok-composer": 262144, # grok-composer-2.5-fast
|
||||
"grok-code-fast": 256000, # grok-code-fast-1
|
||||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning, also matches -reasoning
|
||||
|
|
|
|||
|
|
@ -147,29 +147,45 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
# response remained incomplete after 3 continuation attempts".
|
||||
# Verified live against grok-composer-2.5-fast (2026-06).
|
||||
#
|
||||
# Fix: declare xAI's native ``web_search`` built-in so the search
|
||||
# actually runs to completion server-side and the model streams a
|
||||
# real answer. The Responses API rejects two tools sharing the
|
||||
# name ``web_search`` (HTTP 400 "Duplicate tool names"), so we
|
||||
# drop the client-side ``web_search`` function for the xAI path
|
||||
# and let the native tool satisfy it. All other client-side
|
||||
# tools (read_file, terminal, web_extract, MCP tools, …) are
|
||||
# untouched and continue to dispatch through Hermes's agent loop.
|
||||
# Fix: when the agent HAS a client-side ``web_search`` function (i.e.
|
||||
# the user enabled the web toolset), declare xAI's native
|
||||
# ``web_search`` built-in instead so the search actually runs to
|
||||
# completion server-side and the model streams a real answer. The
|
||||
# Responses API rejects two tools sharing the name ``web_search``
|
||||
# (HTTP 400 "Duplicate tool names"), so we drop the client-side
|
||||
# ``web_search`` function for the xAI path and let the native tool
|
||||
# satisfy it. All other client-side tools (read_file, terminal,
|
||||
# web_extract, MCP tools, …) are untouched and continue to dispatch
|
||||
# through Hermes's agent loop.
|
||||
#
|
||||
# NOTE: this routes ``web_search`` to Grok's native search engine
|
||||
# for xAI sessions instead of Hermes's configured web provider
|
||||
# (Tavily/etc.), and those results bypass Hermes's tool-trace /
|
||||
# citation plumbing (they arrive baked into the model's answer
|
||||
# rather than as a tool result the loop observes). Scoped to
|
||||
# ``is_xai_responses`` deliberately; narrow to specific models if
|
||||
# a future grok variant should keep the client-side function.
|
||||
if is_xai_responses:
|
||||
filtered = [
|
||||
t for t in (response_tools or [])
|
||||
if not (isinstance(t, dict) and t.get("name") == "web_search")
|
||||
]
|
||||
filtered.append({"type": "web_search"})
|
||||
response_tools = filtered
|
||||
# Scope: we ONLY swap in the native built-in when the client
|
||||
# ``web_search`` was actually present. We do NOT force-enable Grok
|
||||
# server-side search on turns where the user never had web enabled —
|
||||
# that would silently route around Hermes's web-provider config and
|
||||
# tool-trace/citation plumbing for every xai-oauth turn. The swap is
|
||||
# a 1:1 replacement of an already-requested capability, not an
|
||||
# additive grant.
|
||||
#
|
||||
# NOTE: for the swapped case this routes ``web_search`` to Grok's
|
||||
# native search engine for xAI sessions instead of Hermes's
|
||||
# configured web provider (Tavily/etc.), and those results bypass
|
||||
# Hermes's tool-trace / citation plumbing (they arrive baked into the
|
||||
# model's answer rather than as a tool result the loop observes).
|
||||
# Scoped to ``is_xai_responses`` deliberately; narrow to specific
|
||||
# models if a future grok variant should keep the client-side
|
||||
# function.
|
||||
if is_xai_responses and response_tools:
|
||||
has_client_web_search = any(
|
||||
isinstance(t, dict) and t.get("name") == "web_search"
|
||||
for t in response_tools
|
||||
)
|
||||
if has_client_web_search:
|
||||
filtered = [
|
||||
t for t in response_tools
|
||||
if not (isinstance(t, dict) and t.get("name") == "web_search")
|
||||
]
|
||||
filtered.append({"type": "web_search"})
|
||||
response_tools = filtered
|
||||
|
||||
# ``tools`` MUST be omitted entirely when there are no functions to
|
||||
# expose: the openai SDK's ``responses.stream()`` / ``responses.parse()``
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ AUTHOR_MAP = {
|
|||
"zheng@omegasys.eu": "omegazheng",
|
||||
"220877172+james47kjv@users.noreply.github.com": "james47kjv",
|
||||
"yuhanglin@YuhangdeMac-mini.local": "1960697431",
|
||||
"admin@fent.quest": "XVVH",
|
||||
"despitemeguru@gmail.com": "definitelynotguru",
|
||||
"chaslui@outlook.com": "ChasLui",
|
||||
"rio.jeong@thebytesize.ai": "rio-jeong",
|
||||
|
|
|
|||
|
|
@ -142,7 +142,7 @@ class TestDefaultContextLengths:
|
|||
("grok-4", 256000),
|
||||
("grok-4-0709", 256000),
|
||||
("grok-build-0.1", 256000),
|
||||
("grok-composer-2.5-fast", 262144),
|
||||
("grok-composer-2.5-fast", 200000),
|
||||
("grok-code-fast-1", 256000),
|
||||
("grok-3", 131072),
|
||||
("grok-3-mini", 131072),
|
||||
|
|
|
|||
|
|
@ -263,12 +263,43 @@ class TestCodexBuildKwargs:
|
|||
# full history.
|
||||
assert "reasoning.encrypted_content" in kw.get("include", [])
|
||||
|
||||
def test_xai_injects_native_web_search_tool(self, transport):
|
||||
"""xAI path declares xAI's native server-side web_search built-in so
|
||||
grok server-side search runs to completion (otherwise the turn stalls
|
||||
as reasoning-with-no-answer -> false 'incomplete' -> 3 retries -> fail).
|
||||
def test_xai_injects_native_web_search_when_client_web_search_present(self, transport):
|
||||
"""xAI path swaps a client-side ``web_search`` function for xAI's
|
||||
native server-side ``web_search`` built-in so grok server-side search
|
||||
runs to completion (otherwise the turn stalls as
|
||||
reasoning-with-no-answer -> false 'incomplete' -> 3 retries -> fail).
|
||||
Non-conflicting client tools are preserved.
|
||||
"""
|
||||
messages = [{"role": "user", "content": "Find current prices."}]
|
||||
kw = transport.build_kwargs(
|
||||
model="grok-composer-2.5-fast", messages=messages,
|
||||
tools=[
|
||||
{"type": "function", "function": {
|
||||
"name": "read_file", "description": "Read a file.",
|
||||
"parameters": {"type": "object",
|
||||
"properties": {"path": {"type": "string"}}}}},
|
||||
{"type": "function", "function": {
|
||||
"name": "web_search", "description": "Search the web.",
|
||||
"parameters": {"type": "object",
|
||||
"properties": {"query": {"type": "string"}}}}},
|
||||
],
|
||||
is_xai_responses=True,
|
||||
)
|
||||
tool_types = [t.get("type") for t in kw.get("tools", [])]
|
||||
assert "web_search" in tool_types, kw.get("tools")
|
||||
# Non-conflicting client-side tools are preserved.
|
||||
names = [t.get("name") for t in kw.get("tools", []) if t.get("type") == "function"]
|
||||
assert "read_file" in names
|
||||
|
||||
def test_xai_does_not_inject_native_web_search_without_client_web_search(self, transport):
|
||||
"""The native ``web_search`` built-in is a 1:1 swap for an
|
||||
already-requested client ``web_search`` — NOT an additive grant. A
|
||||
turn whose toolset has no ``web_search`` (user never enabled the web
|
||||
toolset) must not get Grok server-side search force-injected, which
|
||||
would silently bypass Hermes's web-provider config and tool-trace
|
||||
plumbing for every xai-oauth turn.
|
||||
"""
|
||||
messages = [{"role": "user", "content": "Read this file."}]
|
||||
kw = transport.build_kwargs(
|
||||
model="grok-composer-2.5-fast", messages=messages,
|
||||
tools=[{"type": "function", "function": {
|
||||
|
|
@ -277,10 +308,9 @@ class TestCodexBuildKwargs:
|
|||
"properties": {"path": {"type": "string"}}}}}],
|
||||
is_xai_responses=True,
|
||||
)
|
||||
tool_types = [t.get("type") for t in kw.get("tools", [])]
|
||||
assert "web_search" in tool_types, kw.get("tools")
|
||||
# Non-conflicting client-side tools are preserved.
|
||||
names = [t.get("name") for t in kw.get("tools", []) if t.get("type") == "function"]
|
||||
tools = kw.get("tools", [])
|
||||
assert not any(t.get("type") == "web_search" for t in tools), tools
|
||||
names = [t.get("name") for t in tools if t.get("type") == "function"]
|
||||
assert "read_file" in names
|
||||
|
||||
def test_xai_drops_clientside_web_search_to_avoid_duplicate(self, transport):
|
||||
|
|
|
|||
|
|
@ -949,15 +949,18 @@ def test_grok_4_still_resolves_to_256k():
|
|||
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 256_000
|
||||
|
||||
|
||||
def test_grok_composer_context_length_is_262k():
|
||||
def test_grok_composer_context_length_is_200k():
|
||||
"""grok-composer-2.5-fast is OAuth-only and missing from /v1/models.
|
||||
|
||||
Without a specific entry it fell through to the generic ``grok`` 131k
|
||||
catch-all, under-reporting ~262k enforced on /v1/responses.
|
||||
catch-all. xAI publishes a 200k usable context window for Composer 2.5
|
||||
on Grok Build (SuperGrok / Premium+); /v1/responses additionally caps
|
||||
the input+output budget at ~262144, but the usable context (what we
|
||||
track) is 200k.
|
||||
"""
|
||||
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
|
||||
|
||||
assert DEFAULT_CONTEXT_LENGTHS["grok-composer"] == 262_144
|
||||
assert DEFAULT_CONTEXT_LENGTHS["grok-composer"] == 200_000
|
||||
slug = "grok-composer-2.5-fast"
|
||||
matched_key = max(
|
||||
(k for k in DEFAULT_CONTEXT_LENGTHS if k in slug.lower()),
|
||||
|
|
@ -966,7 +969,7 @@ def test_grok_composer_context_length_is_262k():
|
|||
assert matched_key == "grok-composer", (
|
||||
f"Expected longest-first match on grok-composer for {slug}, got {matched_key}"
|
||||
)
|
||||
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 262_144
|
||||
assert DEFAULT_CONTEXT_LENGTHS[matched_key] == 200_000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue