diff --git a/agent/background_review.py b/agent/background_review.py index d425d6c08e4..ba65b2b1bc8 100644 --- a/agent/background_review.py +++ b/agent/background_review.py @@ -390,24 +390,9 @@ def _run_review_in_thread( # parent below so memory(action="add") writes from # the review still land on disk; the review just # has zero side effects on external providers. - # Inherit the parent's toolset configuration so the review - # fork's outbound request body has byte-identical ``tools[]`` - # with the parent's last main-turn request. Without this, - # ``enabled_toolsets=None`` defaults to "all registered tools" - # and the fork transmits every tool descriptor (including any - # the user has disabled via ``hermes tools disable``), while - # the parent transmits only its narrower configured set — - # making the two requests diverge in ``tools[]`` even though - # they share ``messages[0..N]`` and ``system`` byte-for-byte. - # Anthropic's prompt-cache key includes ``tools[]``, so any - # divergence forks the cache lineage and forces a full - # prefix rewrite (~100-200K tokens per turn for long convs). - # The post-construction whitelist (``set_thread_tool_whitelist`` - # below) still restricts which tools the model is allowed - # to dispatch — this change only aligns what the request - # body transmits, not what the review is allowed to do. - # This extends the byte-stability invariant established by - # PR #17276 (which fixed ``system``) to ``tools[]``. + # Match parent's toolset config so ``tools[]`` is byte-identical + # in the request body — Anthropic's cache key includes it. + # (The runtime whitelist below still restricts dispatch.) review_agent = AIAgent( model=agent.model, max_iterations=16, diff --git a/scripts/release.py b/scripts/release.py index 99d5e6bade3..6c5d9275b33 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -718,6 +718,7 @@ AUTHOR_MAP = { "9219265+cresslank@users.noreply.github.com": "cresslank", "trevmanthony@gmail.com": "trevthefoolish", "ziliangpeng@users.noreply.github.com": "ziliangpeng", + "ziliangdotme@gmail.com": "ziliangpeng", "centripetal-star@users.noreply.github.com": "centripetal-star", "LeonSGP43@users.noreply.github.com": "LeonSGP43", "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", diff --git a/tests/run_agent/test_background_review_cache_parity.py b/tests/run_agent/test_background_review_cache_parity.py index 74b29028627..58a2dfa4812 100644 --- a/tests/run_agent/test_background_review_cache_parity.py +++ b/tests/run_agent/test_background_review_cache_parity.py @@ -38,11 +38,7 @@ def _make_agent_stub(agent_cls): agent._MEMORY_REVIEW_PROMPT = "review memory" agent._SKILL_REVIEW_PROMPT = "review skills" agent._COMBINED_REVIEW_PROMPT = "review both" - # Parent's toolset configuration — must be propagated to the review - # fork so ``tools[]`` matches byte-for-byte. Without these set on the - # stub, ``getattr(agent, ..., None)`` would return None on both sides - # and the test wouldn't catch a regression where the fork is built - # without the kwargs at all. + # Non-None so the test catches a missing-kwarg regression. agent.enabled_toolsets = ["memory", "skills", "terminal"] agent.disabled_toolsets = ["spotify", "feishu_doc"] return agent @@ -193,21 +189,7 @@ def test_review_fork_pins_session_start_and_session_id(): def test_review_fork_inherits_parent_toolset_config(): - """The review fork must receive ``enabled_toolsets`` / ``disabled_toolsets`` - from the parent so the outbound request body's ``tools[]`` field matches - byte-for-byte. - - Without this, ``enabled_toolsets=None`` defaults to "all registered tools" - and the fork sends every tool descriptor (e.g. Spotify, Feishu, video) - even when the parent disabled them via ``hermes tools disable``. Anthropic's - prompt cache keys on the byte-exact ``tools[]`` array, so divergence here - forks the cache lineage and forces a full prefix rewrite per nudge - (~100-200 K cache-write tokens for long conversations). - - This is the same byte-stability invariant as - ``test_review_fork_inherits_parent_cached_system_prompt`` but for the - ``tools[]`` slot of the request body, not the ``system`` slot. - """ + """``tools[]`` byte-stability: fork must inherit parent's toolset config.""" import run_agent agent = _make_agent_stub(run_agent.AIAgent) @@ -218,7 +200,6 @@ def test_review_fork_inherits_parent_toolset_config(): def __init__(self, *args, **kwargs): captured["enabled_toolsets"] = kwargs.get("enabled_toolsets") captured["disabled_toolsets"] = kwargs.get("disabled_toolsets") - # Minimal post-init attrs the surrounding code touches. self._cached_system_prompt = None self._memory_write_origin = None self._memory_write_context = None @@ -249,14 +230,10 @@ def test_review_fork_inherits_parent_toolset_config(): ) assert captured.get("enabled_toolsets") == agent.enabled_toolsets, ( - f"Review fork did not receive parent's enabled_toolsets. " - f"Got {captured.get('enabled_toolsets')!r}, expected {agent.enabled_toolsets!r}. " - "This causes ``tools[]`` to diverge between main turns and review nudges, " - "breaking Anthropic prompt-cache parity." + f"enabled_toolsets mismatch: {captured.get('enabled_toolsets')!r} " + f"vs expected {agent.enabled_toolsets!r}" ) assert captured.get("disabled_toolsets") == agent.disabled_toolsets, ( - f"Review fork did not receive parent's disabled_toolsets. " - f"Got {captured.get('disabled_toolsets')!r}, expected {agent.disabled_toolsets!r}. " - "This causes ``tools[]`` to diverge between main turns and review nudges, " - "breaking Anthropic prompt-cache parity." + f"disabled_toolsets mismatch: {captured.get('disabled_toolsets')!r} " + f"vs expected {agent.disabled_toolsets!r}" ) diff --git a/tests/run_agent/test_background_review_toolset_restriction.py b/tests/run_agent/test_background_review_toolset_restriction.py index 5282c92e1ba..9682014ee44 100644 --- a/tests/run_agent/test_background_review_toolset_restriction.py +++ b/tests/run_agent/test_background_review_toolset_restriction.py @@ -38,11 +38,7 @@ def _make_agent_stub(agent_cls): agent._MEMORY_REVIEW_PROMPT = "review memory" agent._SKILL_REVIEW_PROMPT = "review skills" agent._COMBINED_REVIEW_PROMPT = "review both" - # Parent's toolset configuration must propagate to the review fork - # so the request body's ``tools[]`` array is byte-identical. Without - # propagation, ``enabled_toolsets=None`` expands to "all registered - # tools" — which diverges from a parent that narrowed its set via - # ``hermes tools disable`` or config. + # Non-None so the test catches a missing-kwarg regression. agent.enabled_toolsets = ["memory", "skills", "terminal"] agent.disabled_toolsets = ["spotify", "feishu_doc"] return agent @@ -60,25 +56,7 @@ class _SyncThread: def test_background_review_matches_parent_toolset_config(): - """The review fork must propagate the parent's ``enabled_toolsets`` to AIAgent. - - Earlier guidance (the old "do NOT pass enabled_toolsets" rule) assumed the - parent always ran with the registry default. In practice the parent is - often narrowed via ``hermes tools disable`` or ``config.yaml``, and - leaving ``enabled_toolsets=None`` on the fork makes it expand to ALL - registered tools — which is what *diverges* from the parent and breaks - Anthropic's prefix cache key on ``tools[]``. - - The correct rule is symmetric: whatever the parent has, the fork - must have the same. When the parent's value is ``None``, the fork's - must also be ``None`` (and they'll both expand identically). When - the parent narrows, the fork inherits the narrowed set verbatim. - - (Schema-level alignment with the parent + post-construction runtime - whitelist remain the safety contract for #15204 — the whitelist - blocks dispatch of non-memory/skill tools regardless of what schemas - are sent over the wire.) - """ + """Fork must receive parent's toolset config so ``tools[]`` cache key matches.""" import run_agent agent = _make_agent_stub(run_agent.AIAgent) @@ -98,18 +76,13 @@ def test_background_review_matches_parent_toolset_config(): ) assert "enabled_toolsets" in captured, "AIAgent.__init__ was not called" - # The kwargs must equal the parent's so the ``tools[]`` request-body - # bytes match the parent's last main-turn request. assert captured["enabled_toolsets"] == agent.enabled_toolsets, ( - f"Review fork did not propagate parent's enabled_toolsets " - f"(got {captured['enabled_toolsets']!r}, expected {agent.enabled_toolsets!r}). " - "This causes ``tools[]`` to diverge from the parent — Anthropic's " - "prompt-cache key includes ``tools[]``, so divergence forks the " - "cache lineage and forces a full prefix rewrite per nudge." + f"enabled_toolsets mismatch: {captured['enabled_toolsets']!r} " + f"vs expected {agent.enabled_toolsets!r}" ) assert captured["disabled_toolsets"] == agent.disabled_toolsets, ( - f"Review fork did not propagate parent's disabled_toolsets " - f"(got {captured['disabled_toolsets']!r}, expected {agent.disabled_toolsets!r})." + f"disabled_toolsets mismatch: {captured['disabled_toolsets']!r} " + f"vs expected {agent.disabled_toolsets!r}" )