mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(agent): validate context/memory tool schemas before wrapping
Closes #47707 Context engines and memory providers expose tool schemas via get_tool_schemas(). agent_init.py wrapped each as {"type":"function","function":_schema} without validating that _schema carries a top-level name. A provider returning an entry already in OpenAI tool form ({"type":"function","function":{...}}) was then double-wrapped into a tool whose function has no name. Strict providers (e.g. DeepSeek) reject the entire request with HTTP 400 'tools[N].function: missing field name', so one malformed schema silently disables the whole toolset and breaks every turn. The schema was also never added to valid_tool_names, so even lenient providers could not call it. Add a shared normalize_tool_schema() helper that unwraps an already-wrapped entry and returns None for anything lacking a resolvable string name. Wire it into the agent_init context-engine loop and all three memory_manager surfaces (inject_memory_provider_tools, add_provider routing index, get_all_tool_schemas), so a single bad plugin schema is skipped with a warning instead of poisoning the request. Verification: 209 targeted agent/memory tests pass (incl. 9 new). New tests assert the unwrap + skip-nameless behavior and fail without the fix.
This commit is contained in:
parent
77d2b50751
commit
710cd48fb1
3 changed files with 177 additions and 16 deletions
|
|
@ -1621,16 +1621,27 @@ def init_agent(
|
|||
for t in agent.tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in agent.context_compressor.get_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing_tool_names:
|
||||
from agent.memory_manager import normalize_tool_schema as _normalize_tool_schema
|
||||
for _raw_schema in agent.context_compressor.get_tool_schemas():
|
||||
_schema = _normalize_tool_schema(_raw_schema)
|
||||
if _schema is None:
|
||||
# A schema with no resolvable name (e.g. an already-wrapped
|
||||
# entry) would append a nameless tool that strict providers
|
||||
# 400 on, disabling the whole toolset (#47707). Skip it.
|
||||
_ra().logger.warning(
|
||||
"Context engine returned a tool schema with no resolvable "
|
||||
"name; skipping to avoid poisoning the request (%r)",
|
||||
_raw_schema,
|
||||
)
|
||||
continue
|
||||
_tname = _schema["name"]
|
||||
if _tname in _existing_tool_names:
|
||||
continue # already registered via plugin/cache path
|
||||
_wrapped = {"type": "function", "function": _schema}
|
||||
agent.tools.append(_wrapped)
|
||||
if _tname:
|
||||
agent.valid_tool_names.add(_tname)
|
||||
agent._context_engine_tool_names.add(_tname)
|
||||
_existing_tool_names.add(_tname)
|
||||
agent.valid_tool_names.add(_tname)
|
||||
agent._context_engine_tool_names.add(_tname)
|
||||
_existing_tool_names.add(_tname)
|
||||
|
||||
# Notify context engine of session start
|
||||
if hasattr(agent, "context_compressor") and agent.context_compressor:
|
||||
|
|
|
|||
|
|
@ -46,6 +46,39 @@ logger = logging.getLogger(__name__)
|
|||
_SYNC_DRAIN_TIMEOUT_S = 5.0
|
||||
|
||||
|
||||
def normalize_tool_schema(schema: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Return a function-tool dict with a resolvable top-level ``name``.
|
||||
|
||||
Context engines and memory providers expose tool schemas via
|
||||
``get_tool_schemas()``. The expected shape is a bare function schema
|
||||
(``{"name": ..., "description": ..., "parameters": ...}``) which callers
|
||||
wrap as ``{"type": "function", "function": schema}``.
|
||||
|
||||
Some providers instead return an entry that is *already* in OpenAI tool
|
||||
form (``{"type": "function", "function": {"name": ...}}``). Wrapping that
|
||||
a second time produces ``{"type": "function", "function": {"type":
|
||||
"function", "function": {...}}}`` whose ``function`` has no top-level
|
||||
``name``. Strict providers (e.g. DeepSeek) reject the *entire* request
|
||||
with ``tools[N].function: missing field name`` (HTTP 400), so one bad
|
||||
schema disables the whole toolset and breaks every turn (#47707).
|
||||
|
||||
This helper normalizes both shapes to the bare function schema and
|
||||
returns ``None`` for anything without a resolvable name, so callers can
|
||||
skip-with-warning rather than appending a nameless tool.
|
||||
"""
|
||||
if not isinstance(schema, dict):
|
||||
return None
|
||||
# Unwrap an already-wrapped OpenAI tool entry.
|
||||
if schema.get("type") == "function" and isinstance(schema.get("function"), dict):
|
||||
schema = schema["function"]
|
||||
if not isinstance(schema, dict):
|
||||
return None
|
||||
name = schema.get("name", "")
|
||||
if not name or not isinstance(name, str):
|
||||
return None
|
||||
return schema
|
||||
|
||||
|
||||
def memory_provider_tools_enabled(enabled_toolsets: Optional[List[str]]) -> bool:
|
||||
"""Return whether external memory-provider tools should be exposed."""
|
||||
if enabled_toolsets is None:
|
||||
|
|
@ -92,11 +125,17 @@ def inject_memory_provider_tools(agent: Any) -> int:
|
|||
agent.valid_tool_names = valid_tool_names
|
||||
|
||||
added = 0
|
||||
for schema in get_schemas():
|
||||
if not isinstance(schema, dict):
|
||||
for raw_schema in get_schemas():
|
||||
schema = normalize_tool_schema(raw_schema)
|
||||
if schema is None:
|
||||
logger.warning(
|
||||
"Memory provider returned a tool schema with no resolvable "
|
||||
"name; skipping to avoid poisoning the request (%r)",
|
||||
raw_schema,
|
||||
)
|
||||
continue
|
||||
tool_name = schema.get("name", "")
|
||||
if not tool_name or tool_name in existing_tool_names:
|
||||
tool_name = schema["name"]
|
||||
if tool_name in existing_tool_names:
|
||||
continue
|
||||
tools.append({"type": "function", "function": schema})
|
||||
valid_tool_names.add(tool_name)
|
||||
|
|
@ -370,8 +409,11 @@ class MemoryManager:
|
|||
_core_tool_names = set(_HERMES_CORE_TOOLS)
|
||||
|
||||
# Index tool names → provider for routing
|
||||
for schema in provider.get_tool_schemas():
|
||||
tool_name = schema.get("name", "")
|
||||
for raw_schema in provider.get_tool_schemas():
|
||||
schema = normalize_tool_schema(raw_schema)
|
||||
if schema is None:
|
||||
continue
|
||||
tool_name = schema["name"]
|
||||
if tool_name in _core_tool_names:
|
||||
logger.warning(
|
||||
"Memory provider '%s' tool '%s' shadows a reserved core "
|
||||
|
|
@ -658,11 +700,19 @@ class MemoryManager:
|
|||
seen = set()
|
||||
for provider in self._providers:
|
||||
try:
|
||||
for schema in provider.get_tool_schemas():
|
||||
name = schema.get("name", "")
|
||||
for raw_schema in provider.get_tool_schemas():
|
||||
schema = normalize_tool_schema(raw_schema)
|
||||
if schema is None:
|
||||
logger.warning(
|
||||
"Memory provider '%s' returned a tool schema with "
|
||||
"no resolvable name; skipping (%r)",
|
||||
provider.name, raw_schema,
|
||||
)
|
||||
continue
|
||||
name = schema["name"]
|
||||
if name in _core_tool_names:
|
||||
continue
|
||||
if name and name not in seen:
|
||||
if name not in seen:
|
||||
schemas.append(schema)
|
||||
seen.add(name)
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -1487,3 +1487,103 @@ class TestContextEngineToolsetGate:
|
|||
"""Gate is moot without a context_compressor."""
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
|
||||
class TestNormalizeToolSchema:
|
||||
"""Issue #47707: one malformed tool schema must not poison the request.
|
||||
|
||||
Context engines / memory providers expose schemas via get_tool_schemas().
|
||||
The expected shape is a bare function schema; some providers return an
|
||||
entry already in OpenAI tool form ({"type":"function","function":{...}}).
|
||||
Wrapping that a second time yields a tool whose `function` has no
|
||||
top-level `name`, which strict providers (DeepSeek) reject with HTTP 400
|
||||
`tools[N].function: missing field name` — disabling the entire toolset.
|
||||
"""
|
||||
|
||||
def test_bare_schema_passthrough(self):
|
||||
from agent.memory_manager import normalize_tool_schema
|
||||
s = {"name": "x_grep", "description": "d", "parameters": {}}
|
||||
assert normalize_tool_schema(s) == s
|
||||
|
||||
def test_already_wrapped_schema_is_unwrapped(self):
|
||||
from agent.memory_manager import normalize_tool_schema
|
||||
wrapped = {
|
||||
"type": "function",
|
||||
"function": {"name": "x_grep", "description": "d", "parameters": {}},
|
||||
}
|
||||
out = normalize_tool_schema(wrapped)
|
||||
assert out is not None
|
||||
assert out["name"] == "x_grep"
|
||||
# Must be the inner function schema, not the wrapper.
|
||||
assert "type" not in out or out.get("type") != "function"
|
||||
|
||||
def test_nameless_schema_rejected(self):
|
||||
from agent.memory_manager import normalize_tool_schema
|
||||
assert normalize_tool_schema({"description": "no name"}) is None
|
||||
|
||||
def test_double_wrapped_without_name_rejected(self):
|
||||
from agent.memory_manager import normalize_tool_schema
|
||||
# The exact poisoning shape from #47707.
|
||||
assert normalize_tool_schema(
|
||||
{"type": "function", "function": {"type": "function",
|
||||
"function": {"name": "x"}}}
|
||||
) is None
|
||||
|
||||
def test_non_dict_rejected(self):
|
||||
from agent.memory_manager import normalize_tool_schema
|
||||
assert normalize_tool_schema("nope") is None
|
||||
assert normalize_tool_schema(None) is None
|
||||
|
||||
def test_non_string_name_rejected(self):
|
||||
from agent.memory_manager import normalize_tool_schema
|
||||
assert normalize_tool_schema({"name": 123}) is None
|
||||
|
||||
|
||||
class TestMemoryInjectionRejectsMalformedSchema:
|
||||
"""The real inject_memory_provider_tools must skip nameless schemas.
|
||||
|
||||
Without the #47707 fix, an already-wrapped schema is appended as a
|
||||
nameless tool ({"type":"function","function":{"type":"function",...}}),
|
||||
poisoning the whole tool surface. With the fix it is skipped (or, for a
|
||||
well-formed-but-wrapped schema, unwrapped to a valid tool).
|
||||
"""
|
||||
|
||||
def _agent_with(self, *schemas):
|
||||
mgr = MemoryManager()
|
||||
mgr.add_provider(FakeMemoryProvider("ext", tools=list(schemas)))
|
||||
return SimpleNamespace(
|
||||
_memory_manager=mgr,
|
||||
enabled_toolsets=None,
|
||||
tools=[],
|
||||
valid_tool_names=set(),
|
||||
)
|
||||
|
||||
def test_already_wrapped_schema_is_unwrapped_not_poisoned(self):
|
||||
agent = self._agent_with(
|
||||
{"type": "function",
|
||||
"function": {"name": "x_grep", "description": "d", "parameters": {}}}
|
||||
)
|
||||
inject_memory_provider_tools(agent)
|
||||
# Exactly one well-formed tool, with a top-level function name.
|
||||
assert len(agent.tools) == 1
|
||||
fn = agent.tools[0]["function"]
|
||||
assert fn["name"] == "x_grep"
|
||||
# No nested double-wrap leaked through.
|
||||
assert fn.get("type") != "function"
|
||||
assert "x_grep" in agent.valid_tool_names
|
||||
|
||||
def test_nameless_schema_is_skipped(self):
|
||||
agent = self._agent_with({"description": "no name at all"})
|
||||
inject_memory_provider_tools(agent)
|
||||
assert agent.tools == []
|
||||
assert agent.valid_tool_names == set()
|
||||
|
||||
def test_good_schema_still_injected_alongside_bad(self):
|
||||
agent = self._agent_with(
|
||||
{"name": "good_tool", "description": "d", "parameters": {}},
|
||||
{"description": "bad, no name"},
|
||||
)
|
||||
inject_memory_provider_tools(agent)
|
||||
names = {t["function"]["name"] for t in agent.tools}
|
||||
assert names == {"good_tool"}
|
||||
assert agent.valid_tool_names == {"good_tool"}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue