mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-02 12:13:05 +00:00
Batch delegation returned each subagent's full final_response verbatim into the parent's context. A fan-out of N children could dump 60k+ tokens at once, blowing the parent's context window and — on rate-limited providers — triggering a compression/429 death spiral (429 misread as context-too-large -> window step-down -> retry loop -> conversation dies). Cap each summary against the parent's *remaining* context headroom split across the batch (not a magic char count). When trimming, mirror the web_extract convention: spill the full text to cache/delegation (mounted into remote backends via credential_files._CACHE_DIRS) and return a head+tail window (75/25, line-snapped) plus a footer with the exact read_file offset to page the omitted middle. Both the subagent's opening AND its closing (outcomes / files-changed / issues, which live at the end) survive in-context, and nothing is lost — the parent can read_file the full version on any backend. delegation.max_summary_chars (default 24000) is a static ceiling layered on top as belt-and-suspenders for models that ignore 'be concise'; 0 disables it. Child prompt tightened to lead with outcomes / bullets. Co-authored-by: rc-int <rcint@klaith.com>
126 lines
5 KiB
Python
126 lines
5 KiB
Python
"""Tests for subagent summary budgeting (PR #9126).
|
|
|
|
delegate_task caps subagent summaries against the parent's remaining context
|
|
headroom (split across the batch) before they enter the parent's context, and
|
|
spills the full text to disk so nothing is lost. This guards the
|
|
compression/429 death spiral that batch fan-out could trigger by returning N
|
|
full summaries verbatim into the parent.
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
import tools.delegate_tool as dt
|
|
|
|
|
|
class _FakeCompressor:
|
|
def __init__(self, context_length, max_tokens):
|
|
self.context_length = context_length
|
|
self.max_tokens = max_tokens
|
|
|
|
|
|
class _FakeParent:
|
|
def __init__(self, context_length, used_tokens, max_tokens):
|
|
self.context_compressor = _FakeCompressor(context_length, max_tokens)
|
|
self.session_prompt_tokens = used_tokens
|
|
|
|
|
|
def test_small_summaries_pass_through_untouched():
|
|
parent = _FakeParent(context_length=200_000, used_tokens=10_000, max_tokens=8_000)
|
|
results = [
|
|
{"task_index": 0, "summary": "short result A", "status": "completed"},
|
|
{"task_index": 1, "summary": "short result B", "status": "completed"},
|
|
]
|
|
dt._apply_summary_budget(results, parent)
|
|
assert results[0]["summary"] == "short result A"
|
|
assert "summary_truncated" not in results[0]
|
|
assert "summary_truncated" not in results[1]
|
|
|
|
|
|
def test_batch_overflow_trimmed_and_spilled_losslessly(monkeypatch):
|
|
# Isolate spill directory to a temp HERMES_HOME.
|
|
with tempfile.TemporaryDirectory() as td:
|
|
monkeypatch.setenv("HERMES_HOME", os.path.join(td, ".hermes"))
|
|
# Distinct head + tail markers so we can prove the tail survives.
|
|
big = "HEAD_MARKER\n" + ("X" * 50_000) + "\nTAIL_MARKER"
|
|
# Parent nearly full (120k/131k) → tiny headroom → aggressive trim.
|
|
parent = _FakeParent(context_length=131_000, used_tokens=120_000, max_tokens=8_000)
|
|
results = [
|
|
{"task_index": i, "summary": big, "status": "completed"} for i in range(5)
|
|
]
|
|
dt._apply_summary_budget(results, parent)
|
|
for r in results:
|
|
assert r["summary_truncated"] is True
|
|
assert len(r["summary"]) < len(big)
|
|
# Head+tail window: both ends survive in-context.
|
|
assert "HEAD_MARKER" in r["summary"]
|
|
assert "TAIL_MARKER" in r["summary"]
|
|
path = r.get("summary_full_path")
|
|
assert path and os.path.exists(path)
|
|
# The spill file holds the FULL original text — nothing is lost.
|
|
with open(path, encoding="utf-8") as fh:
|
|
assert fh.read() == big
|
|
# The footer points the parent at the full version with an offset.
|
|
assert "read_file" in r["summary"]
|
|
assert "offset=" in r["summary"]
|
|
# Spilled into the delegation cache (mounted into remote backends).
|
|
assert os.path.join("cache", "delegation") in path
|
|
|
|
|
|
def test_dynamic_budget_shrinks_as_batch_grows():
|
|
def cap_for(n):
|
|
return dt._parent_summary_char_budget(
|
|
_FakeParent(131_000, 30_000, 8_000), n
|
|
)
|
|
|
|
c1, c5, c20 = cap_for(1), cap_for(5), cap_for(20)
|
|
assert c1 is not None and c5 is not None and c20 is not None
|
|
# More children → smaller per-summary slice of the same headroom.
|
|
assert c1 > c5 > c20
|
|
|
|
|
|
def test_floor_enforced_when_parent_over_budget():
|
|
# Parent already over its context budget → each summary gets only the floor.
|
|
budget = dt._parent_summary_char_budget(
|
|
_FakeParent(131_000, 200_000, 8_000), 3
|
|
)
|
|
assert budget == dt._MIN_SUMMARY_CHARS
|
|
|
|
|
|
def test_unknown_context_falls_back_to_static_ceiling(monkeypatch):
|
|
class _Bare:
|
|
pass
|
|
|
|
# No compressor → dynamic budget is unknowable.
|
|
assert dt._parent_summary_char_budget(_Bare(), 3) is None
|
|
|
|
# But the static delegation.max_summary_chars ceiling still trims.
|
|
with tempfile.TemporaryDirectory() as td:
|
|
monkeypatch.setenv("HERMES_HOME", os.path.join(td, ".hermes"))
|
|
results = [{"task_index": 0, "summary": "Y" * 40_000, "status": "completed"}]
|
|
dt._apply_summary_budget(results, _Bare())
|
|
assert results[0]["summary_truncated"] is True
|
|
assert len(results[0]["summary"]) < 40_000
|
|
|
|
|
|
def test_disabled_static_ceiling_and_unknown_context_leaves_summary_intact(monkeypatch):
|
|
class _Bare:
|
|
pass
|
|
|
|
# Both caps off: static ceiling 0 (disabled) AND no compressor (no dynamic).
|
|
monkeypatch.setattr(dt, "_load_config", lambda: {"max_summary_chars": 0})
|
|
results = [{"task_index": 0, "summary": "Z" * 40_000, "status": "completed"}]
|
|
dt._apply_summary_budget(results, _Bare())
|
|
assert "summary_truncated" not in results[0]
|
|
assert len(results[0]["summary"]) == 40_000
|
|
|
|
|
|
def test_empty_results_is_noop():
|
|
# No summaries → nothing to do, must not raise.
|
|
dt._apply_summary_budget([], _FakeParent(131_000, 1_000, 8_000))
|
|
dt._apply_summary_budget(
|
|
[{"task_index": 0, "status": "failed", "summary": None}],
|
|
_FakeParent(131_000, 1_000, 8_000),
|
|
)
|