feat(delegate): show user's actual concurrency / spawn-depth limits in tool description (#22694)

The delegate_task tool description hardcoded 'default 3' / 'default 2' for max_concurrent_children / max_spawn_depth, which misled the model on any install that raised these limits — the schema text said 'default 3' even when the user had set max_concurrent_children=15 / max_spawn_depth=3, so the model would self-cap at 3 and never use the headroom. Make the description dynamic. ToolEntry gains an optional dynamic_schema_overrides callable; registry.get_definitions() merges its output on top of the static schema before returning it. delegate_tool registers a builder that reads the current delegation.* config and emits: - 'up to N items concurrently for this user' (N = max_concurrent_children) - 'Nested delegation IS enabled / OFF for this user (max_spawn_depth=N)' - 'orchestrator children can themselves delegate up to M more level(s)' - 'orchestrator_enabled=false' when the kill switch is set The model_tools cache key already includes config.yaml mtime+size, so edits to delegation.* in config invalidate the cached tool definitions without an explicit hook. CLI_CONFIG staleness within a process is a pre-existing limitation of _load_config and out of scope here. Static description / tasks.description / role.description in DELEGATE_TASK_SCHEMA are placeholders so module import doesn't trigger cli.CLI_CONFIG load before the test conftest can redirect HERMES_HOME.
2026-05-23 05:31:23 +00:00 · 2026-05-09 11:07:53 -07:00 · 2026-05-09 11:07:53 -07:00 · 1f4200debf
commit 1f4200debf
parent 000ddb8a93
3 changed files with 222 additions and 23 deletions
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@ -75,6 +75,55 @@ class TestDelegateRequirements(unittest.TestCase):
        self.assertNotIn("max_iterations", props)
        self.assertNotIn("maxItems", props["tasks"])  # removed — limit is now runtime-configurable

+    def test_schema_description_advertises_runtime_limits(self):
+        """The model must see the user's actual concurrency / spawn-depth caps,
+        not the framework defaults. Without this, models that read 'default 3'
+        will self-cap below the user's real limit.
+        """
+        from tools.delegate_tool import (
+            _build_dynamic_schema_overrides,
+            _get_max_concurrent_children,
+            _get_max_spawn_depth,
+        )
+
+        overrides = _build_dynamic_schema_overrides()
+        max_children = _get_max_concurrent_children()
+        max_depth = _get_max_spawn_depth()
+
+        desc = overrides["description"]
+        tasks_desc = overrides["parameters"]["properties"]["tasks"]["description"]
+        role_desc = overrides["parameters"]["properties"]["role"]["description"]
+
+        # Top-level description names the user's concurrency limit explicitly.
+        self.assertIn(f"up to {max_children}", desc)
+        # Top-level description names the user's spawn-depth limit explicitly.
+        self.assertIn(f"max_spawn_depth={max_depth}", desc)
+        # tasks parameter description repeats the concurrency cap.
+        self.assertIn(f"up to {max_children}", tasks_desc)
+        # role parameter description names the spawn-depth limit.
+        self.assertIn(f"max_spawn_depth={max_depth}", role_desc)
+        # The misleading "default 3" / "default 2" wording is gone from
+        # every dynamic surface (model-facing).
+        for surface in (desc, tasks_desc, role_desc):
+            self.assertNotIn("default 3", surface)
+            self.assertNotIn("default 2", surface)
+
+    def test_schema_overrides_applied_via_get_definitions(self):
+        """Registry.get_definitions() must apply dynamic_schema_overrides so
+        the model API call sees current values, not the static import-time text.
+        """
+        from tools.registry import registry
+        defs = registry.get_definitions({"delegate_task"})
+        self.assertEqual(len(defs), 1)
+        fn = defs[0]["function"]
+        # Description should mention the user's actual limits, not "default 3".
+        from tools.delegate_tool import (
+            _get_max_concurrent_children,
+            _get_max_spawn_depth,
+        )
+        self.assertIn(f"up to {_get_max_concurrent_children()}", fn["description"])
+        self.assertIn(f"max_spawn_depth={_get_max_spawn_depth()}", fn["description"])
+

 class TestChildSystemPrompt(unittest.TestCase):
    def test_goal_only(self):