mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
* docs: fix ascii-guard border alignment errors
Three docs pages had ASCII diagram boxes with off-by-one column
alignment issues that failed docs-site-checks CI:
- architecture.md: outer box is 71 cols but inner-box content lines
and border corners were offset by 1 col, making content-line right
border at col 70/72 while top/bottom border was at col 71. Inner
boxes also had border corners at cols 19/36/53 but content pipes
at cols 20/37/54. Rewrote the diagram with consistent 71-col width
throughout, aligned inner boxes at cols 4-19, 22-37, 40-55 with
2-space gaps and 15-space trailing padding.
- gateway-internals.md: same class of issue — outer box at 51 cols,
inner content lines varied 52-54 cols. Rewrote with consistent
51-col width, inner boxes at cols 4-15, 18-29, 32-43. Also
restructured the bottom-half message flow so it's bare text
(not half-open box cells) matching the intent of the original.
- agent-loop.md line 112-114: box 2 (API thread) content lines had
one extra space pushing the right border to col 46 while the top
and bottom borders of that box sat at col 45. Trimmed one trailing
space from each of the three content lines.
All 123 docs files now pass `npm run lint:diagrams`:
✓ Errors: 0 (warnings: 6, non-fatal)
Pre-existing failures on main — unrelated to any open PR.
* test(setup): accept description kwarg in prompt_choice mock lambdas
setup.py's `_curses_prompt_choice` gained an optional `description`
parameter (used for rendering context hints alongside the prompt).
`prompt_choice` forwards it via keyword arg. The two existing tests
mocked `_curses_prompt_choice` with lambdas that didn't accept the
new kwarg, so the forwarded call raised TypeError.
Fix: add `description=None` to both mock lambda signatures so they
absorb the new kwarg without changing behavior.
* test(matrix): update stale audio-caching assertion
test_regular_audio_has_http_url asserted that non-voice audio
messages keep their HTTP URL and are NOT downloaded/cached. That
was true when the caching code only triggered on
`is_voice_message`. Since bec02f37 (encrypted-media caching
refactor), matrix.py caches all media locally — photos, audio,
video, documents — so downstream tools can read them as real
files via media_urls. This applies to regular audio too.
Renamed the test to `test_regular_audio_is_cached_locally`,
flipped the assertions accordingly, and documented the
intentional behavior change in the docstring. Other tests in
the file (voice-specific caching, message-type detection,
reply-to threading) continue to pass.
* test(413): allow multi-pass preflight compression
run_agent.py's preflight compression runs up to 3 passes in a loop
for very large sessions (each pass summarizes the middle N turns,
then re-checks tokens). The loop breaks when a pass returns a
message list no shorter than its input (can't compress further).
test_preflight_compresses_oversized_history used a static mock
return value that returned the same 2 messages regardless of input,
so the loop ran pass 1 (41 -> 2) and pass 2 (2 -> 2 -> break),
making call_count == 2. The assert_called_once() assertion was
strictly wrong under the multi-pass design.
The invariant the test actually cares about is: preflight ran, and
its first invocation received the full oversized history. Replaced
the count assertion with those two invariants.
* docs: drop '...' from gateway diagram, merge side-by-side boxes
ascii-guard 2.3.0 flagged two remaining issues after the initial fix
pass:
1. gateway-internals.md L33: the '...' suffix after inner box 3's
right border got parsed as 'extra characters after inner-box right
border'. Dropped the '...' — the surrounding prose already conveys
'and more platforms' without needing the visual hint.
2. agent-loop.md: ascii-guard can't cleanly parse two side-by-side
boxes of different heights (main thread 7 rows, API thread 5 rows).
Even equalizing heights didn't help — the linter treats the left
box's right border as the end of the diagram. Merged into a single
54-char-wide outer box with both threads labeled as regions inside,
keeping the ▶ arrow to preserve the main→API flow direction.
563 lines
23 KiB
Python
563 lines
23 KiB
Python
"""Tests for payload/context-length → compression retry logic in AIAgent.
|
|
|
|
Verifies that:
|
|
- HTTP 413 errors trigger history compression and retry
|
|
- HTTP 400 context-length errors trigger compression (not generic 4xx abort)
|
|
- Preflight compression proactively compresses oversized sessions before API calls
|
|
"""
|
|
|
|
import pytest
|
|
#pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
|
|
|
|
|
|
|
|
import uuid
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from agent.context_compressor import SUMMARY_PREFIX
|
|
from run_agent import AIAgent
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_tool_defs(*names: str) -> list:
|
|
return [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": n,
|
|
"description": f"{n} tool",
|
|
"parameters": {"type": "object", "properties": {}},
|
|
},
|
|
}
|
|
for n in names
|
|
]
|
|
|
|
|
|
def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, usage=None):
|
|
msg = SimpleNamespace(
|
|
content=content,
|
|
tool_calls=tool_calls,
|
|
reasoning_content=None,
|
|
reasoning=None,
|
|
)
|
|
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
|
|
resp = SimpleNamespace(choices=[choice], model="test/model")
|
|
resp.usage = SimpleNamespace(**usage) if usage else None
|
|
return resp
|
|
|
|
|
|
def _make_413_error(*, use_status_code=True, message="Request entity too large"):
|
|
"""Create an exception that mimics a 413 HTTP error."""
|
|
err = Exception(message)
|
|
if use_status_code:
|
|
err.status_code = 413
|
|
return err
|
|
|
|
|
|
@pytest.fixture()
|
|
def agent():
|
|
with (
|
|
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
|
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
|
patch("run_agent.OpenAI"),
|
|
):
|
|
a = AIAgent(
|
|
api_key="test-key-1234567890",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
a.client = MagicMock()
|
|
a._cached_system_prompt = "You are helpful."
|
|
a._use_prompt_caching = False
|
|
a.tool_delay = 0
|
|
a.compression_enabled = False
|
|
a.save_trajectories = False
|
|
return a
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestHTTP413Compression:
|
|
"""413 errors should trigger compression, not abort as generic 4xx."""
|
|
|
|
def test_413_triggers_compression(self, agent):
|
|
"""A 413 error should call _compress_context and retry, not abort."""
|
|
# First call raises 413; second call succeeds after compression.
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="Success after compression", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
# Prefill so there are multiple messages for compression to reduce
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Compression reduces 3 messages down to 1
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Success after compression"
|
|
|
|
def test_413_not_treated_as_generic_4xx(self, agent):
|
|
"""413 must NOT hit the generic 4xx abort path; it should attempt compression."""
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
# If 413 were treated as generic 4xx, result would have "failed": True
|
|
assert result.get("failed") is not True
|
|
assert result["completed"] is True
|
|
|
|
def test_413_error_message_detection(self, agent):
|
|
"""413 detected via error message string (no status_code attr)."""
|
|
err = _make_413_error(use_status_code=False, message="error code: 413")
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_413_clears_conversation_history_on_persist(self, agent):
|
|
"""After 413-triggered compression, _persist_session must receive None history.
|
|
|
|
Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
|
|
but if conversation_history still holds the original (pre-compression) list,
|
|
_flush_messages_to_session_db computes flush_from = max(len(history), 0) which
|
|
exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
|
|
is written to the new session → "Session found but has no messages" on resume.
|
|
"""
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
big_history = [
|
|
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
|
for i in range(200)
|
|
]
|
|
|
|
persist_calls = []
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(
|
|
agent, "_persist_session",
|
|
side_effect=lambda msgs, hist: persist_calls.append(hist),
|
|
),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "summary"}],
|
|
"compressed prompt",
|
|
)
|
|
agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
|
|
for hist in persist_calls:
|
|
assert hist is None, (
|
|
f"conversation_history should be None after mid-loop compression, "
|
|
f"got list with {len(hist)} items"
|
|
)
|
|
|
|
def test_context_overflow_clears_conversation_history_on_persist(self, agent):
|
|
"""After context-overflow compression, _persist_session must receive None history."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
|
|
"However, you requested about 270460 tokens."
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
|
|
big_history = [
|
|
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
|
for i in range(200)
|
|
]
|
|
|
|
persist_calls = []
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(
|
|
agent, "_persist_session",
|
|
side_effect=lambda msgs, hist: persist_calls.append(hist),
|
|
),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "summary"}],
|
|
"compressed prompt",
|
|
)
|
|
agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
assert len(persist_calls) >= 1
|
|
for hist in persist_calls:
|
|
assert hist is None, (
|
|
f"conversation_history should be None after context-overflow compression, "
|
|
f"got list with {len(hist)} items"
|
|
)
|
|
|
|
def test_400_context_length_triggers_compression(self, agent):
|
|
"""A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.
|
|
|
|
OpenRouter returns HTTP 400 (not 413) for context-length errors. Before
|
|
the fix, this was caught by the generic 4xx handler which aborted
|
|
immediately — now it correctly triggers compression+retry.
|
|
"""
|
|
err_400 = Exception(
|
|
"Error code: 400 - {'error': {'message': "
|
|
"\"This endpoint's maximum context length is 204800 tokens. "
|
|
"However, you requested about 270460 tokens.\", 'code': 400}}"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
# Must NOT have "failed": True (which would mean the generic 4xx handler caught it)
|
|
assert result.get("failed") is not True
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Recovered after compression"
|
|
|
|
def test_400_reduce_length_triggers_compression(self, agent):
|
|
"""A 400 with 'reduce the length' should trigger compression."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - Please reduce the length of the messages"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_context_length_retry_rebuilds_request_after_compression(self, agent):
|
|
"""Retry must send the compressed transcript, not the stale oversized payload."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - {'error': {'message': "
|
|
"\"This endpoint's maximum context length is 128000 tokens. "
|
|
"Please reduce the length of the messages.\"}}"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="Recovered after real compression", finish_reason="stop")
|
|
|
|
request_payloads = []
|
|
|
|
def _side_effect(**kwargs):
|
|
request_payloads.append(kwargs)
|
|
if len(request_payloads) == 1:
|
|
raise err_400
|
|
return ok_resp
|
|
|
|
agent.client.chat.completions.create.side_effect = _side_effect
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "compressed summary"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
assert result["completed"] is True
|
|
assert len(request_payloads) == 2
|
|
assert len(request_payloads[1]["messages"]) < len(request_payloads[0]["messages"])
|
|
assert request_payloads[1]["messages"][0] == {
|
|
"role": "system",
|
|
"content": "compressed prompt",
|
|
}
|
|
assert request_payloads[1]["messages"][1] == {
|
|
"role": "user",
|
|
"content": "compressed summary",
|
|
}
|
|
|
|
def test_413_cannot_compress_further(self, agent):
|
|
"""When compression can't reduce messages, return partial result."""
|
|
err_413 = _make_413_error()
|
|
agent.client.chat.completions.create.side_effect = [err_413]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Compression returns same number of messages → can't compress further
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"same prompt",
|
|
)
|
|
result = agent.run_conversation("hello")
|
|
|
|
assert result["completed"] is False
|
|
assert result.get("partial") is True
|
|
assert "413" in result["error"]
|
|
|
|
|
|
class TestPreflightCompression:
|
|
"""Preflight compression should compress history before the first API call."""
|
|
|
|
def test_preflight_compresses_oversized_history(self, agent):
|
|
"""When loaded history exceeds the model's context threshold, compress before API call."""
|
|
agent.compression_enabled = True
|
|
# Set a small context so the history is "oversized", but large enough
|
|
# that the compressed result (2 short messages) fits in a single pass.
|
|
agent.context_compressor.context_length = 2000
|
|
agent.context_compressor.threshold_tokens = 200
|
|
|
|
# Build a history that will be large enough to trigger preflight
|
|
# (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
|
|
big_history = []
|
|
for i in range(20):
|
|
big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
|
|
big_history.append({"role": "assistant", "content": f"Response number {i} with extra padding here"})
|
|
|
|
ok_resp = _mock_response(content="After preflight", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [ok_resp]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Simulate compression reducing messages to a small set that fits
|
|
mock_compress.return_value = (
|
|
[
|
|
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
|
|
{"role": "user", "content": "hello"},
|
|
],
|
|
"new system prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
# Preflight compression is a multi-pass loop (up to 3 passes for very
|
|
# large sessions, breaking when no further reduction is possible).
|
|
# First pass must have received the full oversized history.
|
|
assert mock_compress.call_count >= 1, "Preflight compression never ran"
|
|
first_call_messages = mock_compress.call_args_list[0].args[0]
|
|
assert len(first_call_messages) >= 40, (
|
|
f"First preflight pass should see the full history, got "
|
|
f"{len(first_call_messages)} messages"
|
|
)
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "After preflight"
|
|
|
|
def test_no_preflight_when_under_threshold(self, agent):
|
|
"""When history fits within context, no preflight compression needed."""
|
|
agent.compression_enabled = True
|
|
# Large context — history easily fits
|
|
agent.context_compressor.context_length = 1000000
|
|
agent.context_compressor.threshold_tokens = 850000
|
|
|
|
small_history = [
|
|
{"role": "user", "content": "hi"},
|
|
{"role": "assistant", "content": "hello"},
|
|
]
|
|
|
|
ok_resp = _mock_response(content="No compression needed", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [ok_resp]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
result = agent.run_conversation("hello", conversation_history=small_history)
|
|
|
|
mock_compress.assert_not_called()
|
|
assert result["completed"] is True
|
|
|
|
def test_no_preflight_when_compression_disabled(self, agent):
|
|
"""Preflight should not run when compression is disabled."""
|
|
agent.compression_enabled = False
|
|
agent.context_compressor.context_length = 100
|
|
agent.context_compressor.threshold_tokens = 85
|
|
|
|
big_history = [
|
|
{"role": "user", "content": "x" * 1000},
|
|
{"role": "assistant", "content": "y" * 1000},
|
|
] * 10
|
|
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [ok_resp]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
result = agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
mock_compress.assert_not_called()
|
|
|
|
|
|
class TestToolResultPreflightCompression:
|
|
"""Compression should trigger when tool results push context past the threshold."""
|
|
|
|
def test_large_tool_results_trigger_compression(self, agent):
|
|
"""When tool results push estimated tokens past threshold, compress before next call."""
|
|
agent.compression_enabled = True
|
|
agent.context_compressor.context_length = 200_000
|
|
agent.context_compressor.threshold_tokens = 130_000 # below the 135k reported usage
|
|
agent.context_compressor.last_prompt_tokens = 130_000
|
|
agent.context_compressor.last_completion_tokens = 5_000
|
|
|
|
tc = SimpleNamespace(
|
|
id="tc1", type="function",
|
|
function=SimpleNamespace(name="web_search", arguments='{"query":"test"}'),
|
|
)
|
|
tool_resp = _mock_response(
|
|
content=None, finish_reason="stop", tool_calls=[tc],
|
|
usage={"prompt_tokens": 130_000, "completion_tokens": 5_000, "total_tokens": 135_000},
|
|
)
|
|
ok_resp = _mock_response(
|
|
content="Done after compression", finish_reason="stop",
|
|
usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100},
|
|
)
|
|
agent.client.chat.completions.create.side_effect = [tool_resp, ok_resp]
|
|
large_result = "x" * 100_000
|
|
|
|
with (
|
|
patch("run_agent.handle_function_call", return_value=large_result),
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}], "compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello")
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_anthropic_prompt_too_long_safety_net(self, agent):
|
|
"""Anthropic 'prompt is too long' error triggers compression as safety net."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', "
|
|
"'message': 'prompt is too long: 233153 tokens > 200000 maximum'}}"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
prefill = [
|
|
{"role": "user", "content": "previous"},
|
|
{"role": "assistant", "content": "answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}], "compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|