fix(agent): validate context/memory tool schemas before wrapping

Closes #47707 Context engines and memory providers expose tool schemas via get_tool_schemas(). agent_init.py wrapped each as {"type":"function","function":_schema} without validating that _schema carries a top-level name. A provider returning an entry already in OpenAI tool form ({"type":"function","function":{...}}) was then double-wrapped into a tool whose function has no name. Strict providers (e.g. DeepSeek) reject the entire request with HTTP 400 'tools[N].function: missing field name', so one malformed schema silently disables the whole toolset and breaks every turn. The schema was also never added to valid_tool_names, so even lenient providers could not call it. Add a shared normalize_tool_schema() helper that unwraps an already-wrapped entry and returns None for anything lacking a resolvable string name. Wire it into the agent_init context-engine loop and all three memory_manager surfaces (inject_memory_provider_tools, add_provider routing index, get_all_tool_schemas), so a single bad plugin schema is skipped with a warning instead of poisoning the request. Verification: 209 targeted agent/memory tests pass (incl. 9 new). New tests assert the unwrap + skip-nameless behavior and fail without the fix.
2026-06-27 11:22:03 +00:00 · 2026-06-17 03:38:07 -04:00 · 2026-06-17 03:38:07 -04:00 · 710cd48fb1
commit 710cd48fb1
parent 77d2b50751
3 changed files with 177 additions and 16 deletions
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@ -1621,16 +1621,27 @@ def init_agent(
            for t in agent.tools
            if isinstance(t, dict)
        }
-        for _schema in agent.context_compressor.get_tool_schemas():
-            _tname = _schema.get("name", "")
-            if _tname and _tname in _existing_tool_names:
+        from agent.memory_manager import normalize_tool_schema as _normalize_tool_schema
+        for _raw_schema in agent.context_compressor.get_tool_schemas():
+            _schema = _normalize_tool_schema(_raw_schema)
+            if _schema is None:
+                # A schema with no resolvable name (e.g. an already-wrapped
+                # entry) would append a nameless tool that strict providers
+                # 400 on, disabling the whole toolset (#47707). Skip it.
+                _ra().logger.warning(
+                    "Context engine returned a tool schema with no resolvable "
+                    "name; skipping to avoid poisoning the request (%r)",
+                    _raw_schema,
+                )
+                continue
+            _tname = _schema["name"]
+            if _tname in _existing_tool_names:
                continue  # already registered via plugin/cache path
            _wrapped = {"type": "function", "function": _schema}
            agent.tools.append(_wrapped)
-            if _tname:
-                agent.valid_tool_names.add(_tname)
-                agent._context_engine_tool_names.add(_tname)
-                _existing_tool_names.add(_tname)
+            agent.valid_tool_names.add(_tname)
+            agent._context_engine_tool_names.add(_tname)
+            _existing_tool_names.add(_tname)

    # Notify context engine of session start
    if hasattr(agent, "context_compressor") and agent.context_compressor:
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@ -46,6 +46,39 @@ logger = logging.getLogger(__name__)
 _SYNC_DRAIN_TIMEOUT_S = 5.0


+def normalize_tool_schema(schema: Any) -> Optional[Dict[str, Any]]:
+    """Return a function-tool dict with a resolvable top-level ``name``.
+
+    Context engines and memory providers expose tool schemas via
+    ``get_tool_schemas()``. The expected shape is a bare function schema
+    (``{"name": ..., "description": ..., "parameters": ...}``) which callers
+    wrap as ``{"type": "function", "function": schema}``.
+
+    Some providers instead return an entry that is *already* in OpenAI tool
+    form (``{"type": "function", "function": {"name": ...}}``). Wrapping that
+    a second time produces ``{"type": "function", "function": {"type":
+    "function", "function": {...}}}`` whose ``function`` has no top-level
+    ``name``. Strict providers (e.g. DeepSeek) reject the *entire* request
+    with ``tools[N].function: missing field name`` (HTTP 400), so one bad
+    schema disables the whole toolset and breaks every turn (#47707).
+
+    This helper normalizes both shapes to the bare function schema and
+    returns ``None`` for anything without a resolvable name, so callers can
+    skip-with-warning rather than appending a nameless tool.
+    """
+    if not isinstance(schema, dict):
+        return None
+    # Unwrap an already-wrapped OpenAI tool entry.
+    if schema.get("type") == "function" and isinstance(schema.get("function"), dict):
+        schema = schema["function"]
+        if not isinstance(schema, dict):
+            return None
+    name = schema.get("name", "")
+    if not name or not isinstance(name, str):
+        return None
+    return schema
+
+
 def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
    """Return whether external memory-provider tools should be exposed."""
    if enabled_toolsets is None:
@ -92,11 +125,17 @@ def inject_memory_provider_tools(agent: Any) -> int:
        agent.valid_tool_names = valid_tool_names

    added = 0
-    for schema in get_schemas():
-        if not isinstance(schema, dict):
+    for raw_schema in get_schemas():
+        schema = normalize_tool_schema(raw_schema)
+        if schema is None:
+            logger.warning(
+                "Memory provider returned a tool schema with no resolvable "
+                "name; skipping to avoid poisoning the request (%r)",
+                raw_schema,
+            )
            continue
-        tool_name = schema.get("name", "")
-        if not tool_name or tool_name in existing_tool_names:
+        tool_name = schema["name"]
+        if tool_name in existing_tool_names:
            continue
        tools.append({"type": "function", "function": schema})
        valid_tool_names.add(tool_name)
@ -370,8 +409,11 @@ class MemoryManager:
        _core_tool_names = set(_HERMES_CORE_TOOLS)

        # Index tool names → provider for routing
-        for schema in provider.get_tool_schemas():
-            tool_name = schema.get("name", "")
+        for raw_schema in provider.get_tool_schemas():
+            schema = normalize_tool_schema(raw_schema)
+            if schema is None:
+                continue
+            tool_name = schema["name"]
            if tool_name in _core_tool_names:
                logger.warning(
                    "Memory provider '%s' tool '%s' shadows a reserved core "
@ -658,11 +700,19 @@ class MemoryManager:
        seen = set()
        for provider in self._providers:
            try:
-                for schema in provider.get_tool_schemas():
-                    name = schema.get("name", "")
+                for raw_schema in provider.get_tool_schemas():
+                    schema = normalize_tool_schema(raw_schema)
+                    if schema is None:
+                        logger.warning(
+                            "Memory provider '%s' returned a tool schema with "
+                            "no resolvable name; skipping (%r)",
+                            provider.name, raw_schema,
+                        )
+                        continue
+                    name = schema["name"]
                    if name in _core_tool_names:
                        continue
-                    if name and name not in seen:
+                    if name not in seen:
                        schemas.append(schema)
                        seen.add(name)
            except Exception as e:
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@ -1487,3 +1487,103 @@ class TestContextEngineToolsetGate:
        """Gate is moot without a context_compressor."""
        tools, names, engine_names = self._run_context_engine_injection(None, None)
        assert tools == []
+
+
+class TestNormalizeToolSchema:
+    """Issue #47707: one malformed tool schema must not poison the request.
+
+    Context engines / memory providers expose schemas via get_tool_schemas().
+    The expected shape is a bare function schema; some providers return an
+    entry already in OpenAI tool form ({"type":"function","function":{...}}).
+    Wrapping that a second time yields a tool whose `function` has no
+    top-level `name`, which strict providers (DeepSeek) reject with HTTP 400
+    `tools[N].function: missing field name` — disabling the entire toolset.
+    """
+
+    def test_bare_schema_passthrough(self):
+        from agent.memory_manager import normalize_tool_schema
+        s = {"name": "x_grep", "description": "d", "parameters": {}}
+        assert normalize_tool_schema(s) == s
+
+    def test_already_wrapped_schema_is_unwrapped(self):
+        from agent.memory_manager import normalize_tool_schema
+        wrapped = {
+            "type": "function",
+            "function": {"name": "x_grep", "description": "d", "parameters": {}},
+        }
+        out = normalize_tool_schema(wrapped)
+        assert out is not None
+        assert out["name"] == "x_grep"
+        # Must be the inner function schema, not the wrapper.
+        assert "type" not in out or out.get("type") != "function"
+
+    def test_nameless_schema_rejected(self):
+        from agent.memory_manager import normalize_tool_schema
+        assert normalize_tool_schema({"description": "no name"}) is None
+
+    def test_double_wrapped_without_name_rejected(self):
+        from agent.memory_manager import normalize_tool_schema
+        # The exact poisoning shape from #47707.
+        assert normalize_tool_schema(
+            {"type": "function", "function": {"type": "function",
+             "function": {"name": "x"}}}
+        ) is None
+
+    def test_non_dict_rejected(self):
+        from agent.memory_manager import normalize_tool_schema
+        assert normalize_tool_schema("nope") is None
+        assert normalize_tool_schema(None) is None
+
+    def test_non_string_name_rejected(self):
+        from agent.memory_manager import normalize_tool_schema
+        assert normalize_tool_schema({"name": 123}) is None
+
+
+class TestMemoryInjectionRejectsMalformedSchema:
+    """The real inject_memory_provider_tools must skip nameless schemas.
+
+    Without the #47707 fix, an already-wrapped schema is appended as a
+    nameless tool ({"type":"function","function":{"type":"function",...}}),
+    poisoning the whole tool surface. With the fix it is skipped (or, for a
+    well-formed-but-wrapped schema, unwrapped to a valid tool).
+    """
+
+    def _agent_with(self, *schemas):
+        mgr = MemoryManager()
+        mgr.add_provider(FakeMemoryProvider("ext", tools=list(schemas)))
+        return SimpleNamespace(
+            _memory_manager=mgr,
+            enabled_toolsets=None,
+            tools=[],
+            valid_tool_names=set(),
+        )
+
+    def test_already_wrapped_schema_is_unwrapped_not_poisoned(self):
+        agent = self._agent_with(
+            {"type": "function",
+             "function": {"name": "x_grep", "description": "d", "parameters": {}}}
+        )
+        inject_memory_provider_tools(agent)
+        # Exactly one well-formed tool, with a top-level function name.
+        assert len(agent.tools) == 1
+        fn = agent.tools[0]["function"]
+        assert fn["name"] == "x_grep"
+        # No nested double-wrap leaked through.
+        assert fn.get("type") != "function"
+        assert "x_grep" in agent.valid_tool_names
+
+    def test_nameless_schema_is_skipped(self):
+        agent = self._agent_with({"description": "no name at all"})
+        inject_memory_provider_tools(agent)
+        assert agent.tools == []
+        assert agent.valid_tool_names == set()
+
+    def test_good_schema_still_injected_alongside_bad(self):
+        agent = self._agent_with(
+            {"name": "good_tool", "description": "d", "parameters": {}},
+            {"description": "bad, no name"},
+        )
+        inject_memory_provider_tools(agent)
+        names = {t["function"]["name"] for t in agent.tools}
+        assert names == {"good_tool"}
+        assert agent.valid_tool_names == {"good_tool"}