fix(agent): validate context/memory tool schemas before wrapping

Closes #47707

Context engines and memory providers expose tool schemas via
get_tool_schemas(). agent_init.py wrapped each as
{"type":"function","function":_schema} without validating that
_schema carries a top-level name. A provider returning an entry already
in OpenAI tool form ({"type":"function","function":{...}}) was then
double-wrapped into a tool whose function has no name. Strict providers
(e.g. DeepSeek) reject the entire request with HTTP 400
'tools[N].function: missing field name', so one malformed schema
silently disables the whole toolset and breaks every turn. The schema
was also never added to valid_tool_names, so even lenient providers
could not call it.

Add a shared normalize_tool_schema() helper that unwraps an
already-wrapped entry and returns None for anything lacking a resolvable
string name. Wire it into the agent_init context-engine loop and all
three memory_manager surfaces (inject_memory_provider_tools,
add_provider routing index, get_all_tool_schemas), so a single bad
plugin schema is skipped with a warning instead of poisoning the
request.

Verification: 209 targeted agent/memory tests pass (incl. 9 new).
New tests assert the unwrap + skip-nameless behavior and fail without
the fix.
This commit is contained in:
Bartok9 2026-06-17 03:38:07 -04:00 committed by kshitijk4poor
parent 77d2b50751
commit 710cd48fb1
3 changed files with 177 additions and 16 deletions

View file

@ -1621,16 +1621,27 @@ def init_agent(
for t in agent.tools
if isinstance(t, dict)
}
for _schema in agent.context_compressor.get_tool_schemas():
_tname = _schema.get("name", "")
if _tname and _tname in _existing_tool_names:
from agent.memory_manager import normalize_tool_schema as _normalize_tool_schema
for _raw_schema in agent.context_compressor.get_tool_schemas():
_schema = _normalize_tool_schema(_raw_schema)
if _schema is None:
# A schema with no resolvable name (e.g. an already-wrapped
# entry) would append a nameless tool that strict providers
# 400 on, disabling the whole toolset (#47707). Skip it.
_ra().logger.warning(
"Context engine returned a tool schema with no resolvable "
"name; skipping to avoid poisoning the request (%r)",
_raw_schema,
)
continue
_tname = _schema["name"]
if _tname in _existing_tool_names:
continue # already registered via plugin/cache path
_wrapped = {"type": "function", "function": _schema}
agent.tools.append(_wrapped)
if _tname:
agent.valid_tool_names.add(_tname)
agent._context_engine_tool_names.add(_tname)
_existing_tool_names.add(_tname)
agent.valid_tool_names.add(_tname)
agent._context_engine_tool_names.add(_tname)
_existing_tool_names.add(_tname)
# Notify context engine of session start
if hasattr(agent, "context_compressor") and agent.context_compressor:

View file

@ -46,6 +46,39 @@ logger = logging.getLogger(__name__)
_SYNC_DRAIN_TIMEOUT_S = 5.0
def normalize_tool_schema(schema: Any) -> Optional[Dict[str, Any]]:
"""Return a function-tool dict with a resolvable top-level ``name``.
Context engines and memory providers expose tool schemas via
``get_tool_schemas()``. The expected shape is a bare function schema
(``{"name": ..., "description": ..., "parameters": ...}``) which callers
wrap as ``{"type": "function", "function": schema}``.
Some providers instead return an entry that is *already* in OpenAI tool
form (``{"type": "function", "function": {"name": ...}}``). Wrapping that
a second time produces ``{"type": "function", "function": {"type":
"function", "function": {...}}}`` whose ``function`` has no top-level
``name``. Strict providers (e.g. DeepSeek) reject the *entire* request
with ``tools[N].function: missing field name`` (HTTP 400), so one bad
schema disables the whole toolset and breaks every turn (#47707).
This helper normalizes both shapes to the bare function schema and
returns ``None`` for anything without a resolvable name, so callers can
skip-with-warning rather than appending a nameless tool.
"""
if not isinstance(schema, dict):
return None
# Unwrap an already-wrapped OpenAI tool entry.
if schema.get("type") == "function" and isinstance(schema.get("function"), dict):
schema = schema["function"]
if not isinstance(schema, dict):
return None
name = schema.get("name", "")
if not name or not isinstance(name, str):
return None
return schema
def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
"""Return whether external memory-provider tools should be exposed."""
if enabled_toolsets is None:
@ -92,11 +125,17 @@ def inject_memory_provider_tools(agent: Any) -> int:
agent.valid_tool_names = valid_tool_names
added = 0
for schema in get_schemas():
if not isinstance(schema, dict):
for raw_schema in get_schemas():
schema = normalize_tool_schema(raw_schema)
if schema is None:
logger.warning(
"Memory provider returned a tool schema with no resolvable "
"name; skipping to avoid poisoning the request (%r)",
raw_schema,
)
continue
tool_name = schema.get("name", "")
if not tool_name or tool_name in existing_tool_names:
tool_name = schema["name"]
if tool_name in existing_tool_names:
continue
tools.append({"type": "function", "function": schema})
valid_tool_names.add(tool_name)
@ -370,8 +409,11 @@ class MemoryManager:
_core_tool_names = set(_HERMES_CORE_TOOLS)
# Index tool names → provider for routing
for schema in provider.get_tool_schemas():
tool_name = schema.get("name", "")
for raw_schema in provider.get_tool_schemas():
schema = normalize_tool_schema(raw_schema)
if schema is None:
continue
tool_name = schema["name"]
if tool_name in _core_tool_names:
logger.warning(
"Memory provider '%s' tool '%s' shadows a reserved core "
@ -658,11 +700,19 @@ class MemoryManager:
seen = set()
for provider in self._providers:
try:
for schema in provider.get_tool_schemas():
name = schema.get("name", "")
for raw_schema in provider.get_tool_schemas():
schema = normalize_tool_schema(raw_schema)
if schema is None:
logger.warning(
"Memory provider '%s' returned a tool schema with "
"no resolvable name; skipping (%r)",
provider.name, raw_schema,
)
continue
name = schema["name"]
if name in _core_tool_names:
continue
if name and name not in seen:
if name not in seen:
schemas.append(schema)
seen.add(name)
except Exception as e:

View file

@ -1487,3 +1487,103 @@ class TestContextEngineToolsetGate:
"""Gate is moot without a context_compressor."""
tools, names, engine_names = self._run_context_engine_injection(None, None)
assert tools == []
class TestNormalizeToolSchema:
"""Issue #47707: one malformed tool schema must not poison the request.
Context engines / memory providers expose schemas via get_tool_schemas().
The expected shape is a bare function schema; some providers return an
entry already in OpenAI tool form ({"type":"function","function":{...}}).
Wrapping that a second time yields a tool whose `function` has no
top-level `name`, which strict providers (DeepSeek) reject with HTTP 400
`tools[N].function: missing field name` disabling the entire toolset.
"""
def test_bare_schema_passthrough(self):
from agent.memory_manager import normalize_tool_schema
s = {"name": "x_grep", "description": "d", "parameters": {}}
assert normalize_tool_schema(s) == s
def test_already_wrapped_schema_is_unwrapped(self):
from agent.memory_manager import normalize_tool_schema
wrapped = {
"type": "function",
"function": {"name": "x_grep", "description": "d", "parameters": {}},
}
out = normalize_tool_schema(wrapped)
assert out is not None
assert out["name"] == "x_grep"
# Must be the inner function schema, not the wrapper.
assert "type" not in out or out.get("type") != "function"
def test_nameless_schema_rejected(self):
from agent.memory_manager import normalize_tool_schema
assert normalize_tool_schema({"description": "no name"}) is None
def test_double_wrapped_without_name_rejected(self):
from agent.memory_manager import normalize_tool_schema
# The exact poisoning shape from #47707.
assert normalize_tool_schema(
{"type": "function", "function": {"type": "function",
"function": {"name": "x"}}}
) is None
def test_non_dict_rejected(self):
from agent.memory_manager import normalize_tool_schema
assert normalize_tool_schema("nope") is None
assert normalize_tool_schema(None) is None
def test_non_string_name_rejected(self):
from agent.memory_manager import normalize_tool_schema
assert normalize_tool_schema({"name": 123}) is None
class TestMemoryInjectionRejectsMalformedSchema:
"""The real inject_memory_provider_tools must skip nameless schemas.
Without the #47707 fix, an already-wrapped schema is appended as a
nameless tool ({"type":"function","function":{"type":"function",...}}),
poisoning the whole tool surface. With the fix it is skipped (or, for a
well-formed-but-wrapped schema, unwrapped to a valid tool).
"""
def _agent_with(self, *schemas):
mgr = MemoryManager()
mgr.add_provider(FakeMemoryProvider("ext", tools=list(schemas)))
return SimpleNamespace(
_memory_manager=mgr,
enabled_toolsets=None,
tools=[],
valid_tool_names=set(),
)
def test_already_wrapped_schema_is_unwrapped_not_poisoned(self):
agent = self._agent_with(
{"type": "function",
"function": {"name": "x_grep", "description": "d", "parameters": {}}}
)
inject_memory_provider_tools(agent)
# Exactly one well-formed tool, with a top-level function name.
assert len(agent.tools) == 1
fn = agent.tools[0]["function"]
assert fn["name"] == "x_grep"
# No nested double-wrap leaked through.
assert fn.get("type") != "function"
assert "x_grep" in agent.valid_tool_names
def test_nameless_schema_is_skipped(self):
agent = self._agent_with({"description": "no name at all"})
inject_memory_provider_tools(agent)
assert agent.tools == []
assert agent.valid_tool_names == set()
def test_good_schema_still_injected_alongside_bad(self):
agent = self._agent_with(
{"name": "good_tool", "description": "d", "parameters": {}},
{"description": "bad, no name"},
)
inject_memory_provider_tools(agent)
names = {t["function"]["name"] for t in agent.tools}
assert names == {"good_tool"}
assert agent.valid_tool_names == {"good_tool"}