From 710cd48fb1d649925e159aa08d29993f828c40ee Mon Sep 17 00:00:00 2001 From: Bartok9 Date: Wed, 17 Jun 2026 03:38:07 -0400 Subject: [PATCH] fix(agent): validate context/memory tool schemas before wrapping Closes #47707 Context engines and memory providers expose tool schemas via get_tool_schemas(). agent_init.py wrapped each as {"type":"function","function":_schema} without validating that _schema carries a top-level name. A provider returning an entry already in OpenAI tool form ({"type":"function","function":{...}}) was then double-wrapped into a tool whose function has no name. Strict providers (e.g. DeepSeek) reject the entire request with HTTP 400 'tools[N].function: missing field name', so one malformed schema silently disables the whole toolset and breaks every turn. The schema was also never added to valid_tool_names, so even lenient providers could not call it. Add a shared normalize_tool_schema() helper that unwraps an already-wrapped entry and returns None for anything lacking a resolvable string name. Wire it into the agent_init context-engine loop and all three memory_manager surfaces (inject_memory_provider_tools, add_provider routing index, get_all_tool_schemas), so a single bad plugin schema is skipped with a warning instead of poisoning the request. Verification: 209 targeted agent/memory tests pass (incl. 9 new). New tests assert the unwrap + skip-nameless behavior and fail without the fix. --- agent/agent_init.py | 25 +++++-- agent/memory_manager.py | 68 ++++++++++++++++--- tests/agent/test_memory_provider.py | 100 ++++++++++++++++++++++++++++ 3 files changed, 177 insertions(+), 16 deletions(-) diff --git a/agent/agent_init.py b/agent/agent_init.py index e7f2ed9eac3..ae41af6bb10 100644 --- a/agent/agent_init.py +++ b/agent/agent_init.py @@ -1621,16 +1621,27 @@ def init_agent( for t in agent.tools if isinstance(t, dict) } - for _schema in agent.context_compressor.get_tool_schemas(): - _tname = _schema.get("name", "") - if _tname and _tname in _existing_tool_names: + from agent.memory_manager import normalize_tool_schema as _normalize_tool_schema + for _raw_schema in agent.context_compressor.get_tool_schemas(): + _schema = _normalize_tool_schema(_raw_schema) + if _schema is None: + # A schema with no resolvable name (e.g. an already-wrapped + # entry) would append a nameless tool that strict providers + # 400 on, disabling the whole toolset (#47707). Skip it. + _ra().logger.warning( + "Context engine returned a tool schema with no resolvable " + "name; skipping to avoid poisoning the request (%r)", + _raw_schema, + ) + continue + _tname = _schema["name"] + if _tname in _existing_tool_names: continue # already registered via plugin/cache path _wrapped = {"type": "function", "function": _schema} agent.tools.append(_wrapped) - if _tname: - agent.valid_tool_names.add(_tname) - agent._context_engine_tool_names.add(_tname) - _existing_tool_names.add(_tname) + agent.valid_tool_names.add(_tname) + agent._context_engine_tool_names.add(_tname) + _existing_tool_names.add(_tname) # Notify context engine of session start if hasattr(agent, "context_compressor") and agent.context_compressor: diff --git a/agent/memory_manager.py b/agent/memory_manager.py index b24c76b3107..984499228fe 100644 --- a/agent/memory_manager.py +++ b/agent/memory_manager.py @@ -46,6 +46,39 @@ logger = logging.getLogger(__name__) _SYNC_DRAIN_TIMEOUT_S = 5.0 +def normalize_tool_schema(schema: Any) -> Optional[Dict[str, Any]]: + """Return a function-tool dict with a resolvable top-level ``name``. + + Context engines and memory providers expose tool schemas via + ``get_tool_schemas()``. The expected shape is a bare function schema + (``{"name": ..., "description": ..., "parameters": ...}``) which callers + wrap as ``{"type": "function", "function": schema}``. + + Some providers instead return an entry that is *already* in OpenAI tool + form (``{"type": "function", "function": {"name": ...}}``). Wrapping that + a second time produces ``{"type": "function", "function": {"type": + "function", "function": {...}}}`` whose ``function`` has no top-level + ``name``. Strict providers (e.g. DeepSeek) reject the *entire* request + with ``tools[N].function: missing field name`` (HTTP 400), so one bad + schema disables the whole toolset and breaks every turn (#47707). + + This helper normalizes both shapes to the bare function schema and + returns ``None`` for anything without a resolvable name, so callers can + skip-with-warning rather than appending a nameless tool. + """ + if not isinstance(schema, dict): + return None + # Unwrap an already-wrapped OpenAI tool entry. + if schema.get("type") == "function" and isinstance(schema.get("function"), dict): + schema = schema["function"] + if not isinstance(schema, dict): + return None + name = schema.get("name", "") + if not name or not isinstance(name, str): + return None + return schema + + def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool: """Return whether external memory-provider tools should be exposed.""" if enabled_toolsets is None: @@ -92,11 +125,17 @@ def inject_memory_provider_tools(agent: Any) -> int: agent.valid_tool_names = valid_tool_names added = 0 - for schema in get_schemas(): - if not isinstance(schema, dict): + for raw_schema in get_schemas(): + schema = normalize_tool_schema(raw_schema) + if schema is None: + logger.warning( + "Memory provider returned a tool schema with no resolvable " + "name; skipping to avoid poisoning the request (%r)", + raw_schema, + ) continue - tool_name = schema.get("name", "") - if not tool_name or tool_name in existing_tool_names: + tool_name = schema["name"] + if tool_name in existing_tool_names: continue tools.append({"type": "function", "function": schema}) valid_tool_names.add(tool_name) @@ -370,8 +409,11 @@ class MemoryManager: _core_tool_names = set(_HERMES_CORE_TOOLS) # Index tool names → provider for routing - for schema in provider.get_tool_schemas(): - tool_name = schema.get("name", "") + for raw_schema in provider.get_tool_schemas(): + schema = normalize_tool_schema(raw_schema) + if schema is None: + continue + tool_name = schema["name"] if tool_name in _core_tool_names: logger.warning( "Memory provider '%s' tool '%s' shadows a reserved core " @@ -658,11 +700,19 @@ class MemoryManager: seen = set() for provider in self._providers: try: - for schema in provider.get_tool_schemas(): - name = schema.get("name", "") + for raw_schema in provider.get_tool_schemas(): + schema = normalize_tool_schema(raw_schema) + if schema is None: + logger.warning( + "Memory provider '%s' returned a tool schema with " + "no resolvable name; skipping (%r)", + provider.name, raw_schema, + ) + continue + name = schema["name"] if name in _core_tool_names: continue - if name and name not in seen: + if name not in seen: schemas.append(schema) seen.add(name) except Exception as e: diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py index bacb8911600..1a99977deb4 100644 --- a/tests/agent/test_memory_provider.py +++ b/tests/agent/test_memory_provider.py @@ -1487,3 +1487,103 @@ class TestContextEngineToolsetGate: """Gate is moot without a context_compressor.""" tools, names, engine_names = self._run_context_engine_injection(None, None) assert tools == [] + + +class TestNormalizeToolSchema: + """Issue #47707: one malformed tool schema must not poison the request. + + Context engines / memory providers expose schemas via get_tool_schemas(). + The expected shape is a bare function schema; some providers return an + entry already in OpenAI tool form ({"type":"function","function":{...}}). + Wrapping that a second time yields a tool whose `function` has no + top-level `name`, which strict providers (DeepSeek) reject with HTTP 400 + `tools[N].function: missing field name` — disabling the entire toolset. + """ + + def test_bare_schema_passthrough(self): + from agent.memory_manager import normalize_tool_schema + s = {"name": "x_grep", "description": "d", "parameters": {}} + assert normalize_tool_schema(s) == s + + def test_already_wrapped_schema_is_unwrapped(self): + from agent.memory_manager import normalize_tool_schema + wrapped = { + "type": "function", + "function": {"name": "x_grep", "description": "d", "parameters": {}}, + } + out = normalize_tool_schema(wrapped) + assert out is not None + assert out["name"] == "x_grep" + # Must be the inner function schema, not the wrapper. + assert "type" not in out or out.get("type") != "function" + + def test_nameless_schema_rejected(self): + from agent.memory_manager import normalize_tool_schema + assert normalize_tool_schema({"description": "no name"}) is None + + def test_double_wrapped_without_name_rejected(self): + from agent.memory_manager import normalize_tool_schema + # The exact poisoning shape from #47707. + assert normalize_tool_schema( + {"type": "function", "function": {"type": "function", + "function": {"name": "x"}}} + ) is None + + def test_non_dict_rejected(self): + from agent.memory_manager import normalize_tool_schema + assert normalize_tool_schema("nope") is None + assert normalize_tool_schema(None) is None + + def test_non_string_name_rejected(self): + from agent.memory_manager import normalize_tool_schema + assert normalize_tool_schema({"name": 123}) is None + + +class TestMemoryInjectionRejectsMalformedSchema: + """The real inject_memory_provider_tools must skip nameless schemas. + + Without the #47707 fix, an already-wrapped schema is appended as a + nameless tool ({"type":"function","function":{"type":"function",...}}), + poisoning the whole tool surface. With the fix it is skipped (or, for a + well-formed-but-wrapped schema, unwrapped to a valid tool). + """ + + def _agent_with(self, *schemas): + mgr = MemoryManager() + mgr.add_provider(FakeMemoryProvider("ext", tools=list(schemas))) + return SimpleNamespace( + _memory_manager=mgr, + enabled_toolsets=None, + tools=[], + valid_tool_names=set(), + ) + + def test_already_wrapped_schema_is_unwrapped_not_poisoned(self): + agent = self._agent_with( + {"type": "function", + "function": {"name": "x_grep", "description": "d", "parameters": {}}} + ) + inject_memory_provider_tools(agent) + # Exactly one well-formed tool, with a top-level function name. + assert len(agent.tools) == 1 + fn = agent.tools[0]["function"] + assert fn["name"] == "x_grep" + # No nested double-wrap leaked through. + assert fn.get("type") != "function" + assert "x_grep" in agent.valid_tool_names + + def test_nameless_schema_is_skipped(self): + agent = self._agent_with({"description": "no name at all"}) + inject_memory_provider_tools(agent) + assert agent.tools == [] + assert agent.valid_tool_names == set() + + def test_good_schema_still_injected_alongside_bad(self): + agent = self._agent_with( + {"name": "good_tool", "description": "d", "parameters": {}}, + {"description": "bad, no name"}, + ) + inject_memory_provider_tools(agent) + names = {t["function"]["name"] for t in agent.tools} + assert names == {"good_tool"} + assert agent.valid_tool_names == {"good_tool"}