feat(delegate): show user's actual concurrency / spawn-depth limits in tool description (#22694)

The delegate_task tool description hardcoded 'default 3' / 'default 2' for
max_concurrent_children / max_spawn_depth, which misled the model on any
install that raised these limits — the schema text said 'default 3' even
when the user had set max_concurrent_children=15 / max_spawn_depth=3, so
the model would self-cap at 3 and never use the headroom.

Make the description dynamic. ToolEntry gains an optional
dynamic_schema_overrides callable; registry.get_definitions() merges its
output on top of the static schema before returning it. delegate_tool
registers a builder that reads the current delegation.* config and emits:

- 'up to N items concurrently for this user' (N = max_concurrent_children)
- 'Nested delegation IS enabled / OFF for this user (max_spawn_depth=N)'
- 'orchestrator children can themselves delegate up to M more level(s)'
- 'orchestrator_enabled=false' when the kill switch is set

The model_tools cache key already includes config.yaml mtime+size, so
edits to delegation.* in config invalidate the cached tool definitions
without an explicit hook. CLI_CONFIG staleness within a process is a
pre-existing limitation of _load_config and out of scope here.

Static description / tasks.description / role.description in
DELEGATE_TASK_SCHEMA are placeholders so module import doesn't trigger
cli.CLI_CONFIG load before the test conftest can redirect HERMES_HOME.
This commit is contained in:
Teknium 2026-05-09 11:07:53 -07:00 committed by GitHub
parent 000ddb8a93
commit 1f4200debf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 222 additions and 23 deletions

View file

@ -75,6 +75,55 @@ class TestDelegateRequirements(unittest.TestCase):
self.assertNotIn("max_iterations", props)
self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable
def test_schema_description_advertises_runtime_limits(self):
"""The model must see the user's actual concurrency / spawn-depth caps,
not the framework defaults. Without this, models that read 'default 3'
will self-cap below the user's real limit.
"""
from tools.delegate_tool import (
_build_dynamic_schema_overrides,
_get_max_concurrent_children,
_get_max_spawn_depth,
)
overrides = _build_dynamic_schema_overrides()
max_children = _get_max_concurrent_children()
max_depth = _get_max_spawn_depth()
desc = overrides["description"]
tasks_desc = overrides["parameters"]["properties"]["tasks"]["description"]
role_desc = overrides["parameters"]["properties"]["role"]["description"]
# Top-level description names the user's concurrency limit explicitly.
self.assertIn(f"up to {max_children}", desc)
# Top-level description names the user's spawn-depth limit explicitly.
self.assertIn(f"max_spawn_depth={max_depth}", desc)
# tasks parameter description repeats the concurrency cap.
self.assertIn(f"up to {max_children}", tasks_desc)
# role parameter description names the spawn-depth limit.
self.assertIn(f"max_spawn_depth={max_depth}", role_desc)
# The misleading "default 3" / "default 2" wording is gone from
# every dynamic surface (model-facing).
for surface in (desc, tasks_desc, role_desc):
self.assertNotIn("default 3", surface)
self.assertNotIn("default 2", surface)
def test_schema_overrides_applied_via_get_definitions(self):
"""Registry.get_definitions() must apply dynamic_schema_overrides so
the model API call sees current values, not the static import-time text.
"""
from tools.registry import registry
defs = registry.get_definitions({"delegate_task"})
self.assertEqual(len(defs), 1)
fn = defs[0]["function"]
# Description should mention the user's actual limits, not "default 3".
from tools.delegate_tool import (
_get_max_concurrent_children,
_get_max_spawn_depth,
)
self.assertIn(f"up to {_get_max_concurrent_children()}", fn["description"])
self.assertIn(f"max_spawn_depth={_get_max_spawn_depth()}", fn["description"])
class TestChildSystemPrompt(unittest.TestCase):
def test_goal_only(self):