mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Cuts shard-3 local runtime in half by neutralizing real wall-clock waits across three classes of slow test: ## 1. Retry backoff mocks - tests/run_agent/conftest.py (NEW): autouse fixture mocks jittered_backoff to 0.0 so the `while time.time() < sleep_end` busy-loop exits immediately. No global time.sleep mock (would break threading tests). - test_anthropic_error_handling, test_413_compression, test_run_agent_codex_responses, test_fallback_model: per-file fixtures mock time.sleep / asyncio.sleep for retry / compression paths. - test_retaindb_plugin: cap the retaindb module's bound time.sleep to 0.05s via a per-test shim (background writer-thread retries sleep 2s after errors; tests don't care about exact duration). Plus replace arbitrary time.sleep(N) waits with short polling loops bounded by deadline. ## 2. Subprocess sleeps in production code - test_update_gateway_restart: mock time.sleep. Production code does time.sleep(3) after `systemctl restart` to verify the service survived. Tests mock subprocess.run \u2014 nothing actually restarts \u2014 so the wait is dead time. ## 3. Network / IMDS timeouts (biggest single win) - tests/conftest.py: add AWS_EC2_METADATA_DISABLED=true plus AWS_METADATA_SERVICE_TIMEOUT=1 and ATTEMPTS=1. boto3 falls back to IMDS (169.254.169.254) when no AWS creds are set. Any test hitting has_aws_credentials() / resolve_aws_auth_env_var() (e.g. test_status, test_setup_copilot_acp, anything that touches provider auto-detect) burned ~2-4s waiting for that to time out. - test_exit_cleanup_interrupt: explicitly mock resolve_runtime_provider which was doing real network auto-detect (~4s). Tests don't care about provider resolution \u2014 the agent is already mocked. - test_timezone: collapse the 3-test "TZ env in subprocess" suite into 2 tests by checking both injection AND no-leak in the same subprocess spawn (was 3 \u00d7 3.2s, now 2 \u00d7 4s). ## Validation | Test | Before | After | |---|---|---| | test_anthropic_error_handling (8 tests) | ~80s | ~15s | | test_413_compression (14 tests) | ~18s | 2.3s | | test_retaindb_plugin (67 tests) | ~13s | 1.3s | | test_status_includes_tavily_key | 4.0s | 0.05s | | test_setup_copilot_acp_skips_same_provider_pool_step | 8.0s | 0.26s | | test_update_gateway_restart (5 tests) | ~18s total | ~0.35s total | | test_exit_cleanup_interrupt (2 tests) | 8s | 1.5s | | **Matrix shard 3 local** | **108s** | **50s** | No behavioral contract changed \u2014 tests still verify retry happens, service restart logic runs, etc.; they just don't burn real seconds waiting for it. Supersedes PR #11779 (those changes are included here).
582 lines
24 KiB
Python
582 lines
24 KiB
Python
"""Tests for payload/context-length → compression retry logic in AIAgent.
|
|
|
|
Verifies that:
|
|
- HTTP 413 errors trigger history compression and retry
|
|
- HTTP 400 context-length errors trigger compression (not generic 4xx abort)
|
|
- Preflight compression proactively compresses oversized sessions before API calls
|
|
"""
|
|
|
|
import pytest
|
|
#pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
|
|
|
|
|
|
|
|
import uuid
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from agent.context_compressor import SUMMARY_PREFIX
|
|
from run_agent import AIAgent
|
|
import run_agent
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fast backoff for compression retry tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _no_compression_sleep(monkeypatch):
|
|
"""Short-circuit the 2s time.sleep between compression retries.
|
|
|
|
Production code has ``time.sleep(2)`` in multiple places after a 413/context
|
|
compression, for rate-limit smoothing. Tests assert behavior, not timing.
|
|
"""
|
|
import time as _time
|
|
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
|
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_tool_defs(*names: str) -> list:
|
|
return [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": n,
|
|
"description": f"{n} tool",
|
|
"parameters": {"type": "object", "properties": {}},
|
|
},
|
|
}
|
|
for n in names
|
|
]
|
|
|
|
|
|
def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, usage=None):
|
|
msg = SimpleNamespace(
|
|
content=content,
|
|
tool_calls=tool_calls,
|
|
reasoning_content=None,
|
|
reasoning=None,
|
|
)
|
|
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
|
|
resp = SimpleNamespace(choices=[choice], model="test/model")
|
|
resp.usage = SimpleNamespace(**usage) if usage else None
|
|
return resp
|
|
|
|
|
|
def _make_413_error(*, use_status_code=True, message="Request entity too large"):
|
|
"""Create an exception that mimics a 413 HTTP error."""
|
|
err = Exception(message)
|
|
if use_status_code:
|
|
err.status_code = 413
|
|
return err
|
|
|
|
|
|
@pytest.fixture()
|
|
def agent():
|
|
with (
|
|
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
|
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
|
patch("run_agent.OpenAI"),
|
|
):
|
|
a = AIAgent(
|
|
api_key="test-key-1234567890",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
a.client = MagicMock()
|
|
a._cached_system_prompt = "You are helpful."
|
|
a._use_prompt_caching = False
|
|
a.tool_delay = 0
|
|
a.compression_enabled = False
|
|
a.save_trajectories = False
|
|
return a
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestHTTP413Compression:
|
|
"""413 errors should trigger compression, not abort as generic 4xx."""
|
|
|
|
def test_413_triggers_compression(self, agent):
|
|
"""A 413 error should call _compress_context and retry, not abort."""
|
|
# First call raises 413; second call succeeds after compression.
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="Success after compression", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
# Prefill so there are multiple messages for compression to reduce
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Compression reduces 3 messages down to 1
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Success after compression"
|
|
|
|
def test_413_not_treated_as_generic_4xx(self, agent):
|
|
"""413 must NOT hit the generic 4xx abort path; it should attempt compression."""
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
# If 413 were treated as generic 4xx, result would have "failed": True
|
|
assert result.get("failed") is not True
|
|
assert result["completed"] is True
|
|
|
|
def test_413_error_message_detection(self, agent):
|
|
"""413 detected via error message string (no status_code attr)."""
|
|
err = _make_413_error(use_status_code=False, message="error code: 413")
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_413_clears_conversation_history_on_persist(self, agent):
|
|
"""After 413-triggered compression, _persist_session must receive None history.
|
|
|
|
Bug: _compress_context() creates a new session and resets _last_flushed_db_idx=0,
|
|
but if conversation_history still holds the original (pre-compression) list,
|
|
_flush_messages_to_session_db computes flush_from = max(len(history), 0) which
|
|
exceeds len(compressed_messages), so messages[flush_from:] is empty and nothing
|
|
is written to the new session → "Session found but has no messages" on resume.
|
|
"""
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
big_history = [
|
|
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
|
for i in range(200)
|
|
]
|
|
|
|
persist_calls = []
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(
|
|
agent, "_persist_session",
|
|
side_effect=lambda msgs, hist: persist_calls.append(hist),
|
|
),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "summary"}],
|
|
"compressed prompt",
|
|
)
|
|
agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
assert len(persist_calls) >= 1, "Expected at least one _persist_session call"
|
|
for hist in persist_calls:
|
|
assert hist is None, (
|
|
f"conversation_history should be None after mid-loop compression, "
|
|
f"got list with {len(hist)} items"
|
|
)
|
|
|
|
def test_context_overflow_clears_conversation_history_on_persist(self, agent):
|
|
"""After context-overflow compression, _persist_session must receive None history."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - This endpoint's maximum context length is 128000 tokens. "
|
|
"However, you requested about 270460 tokens."
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
|
|
big_history = [
|
|
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
|
for i in range(200)
|
|
]
|
|
|
|
persist_calls = []
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(
|
|
agent, "_persist_session",
|
|
side_effect=lambda msgs, hist: persist_calls.append(hist),
|
|
),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "summary"}],
|
|
"compressed prompt",
|
|
)
|
|
agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
assert len(persist_calls) >= 1
|
|
for hist in persist_calls:
|
|
assert hist is None, (
|
|
f"conversation_history should be None after context-overflow compression, "
|
|
f"got list with {len(hist)} items"
|
|
)
|
|
|
|
def test_400_context_length_triggers_compression(self, agent):
|
|
"""A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.
|
|
|
|
OpenRouter returns HTTP 400 (not 413) for context-length errors. Before
|
|
the fix, this was caught by the generic 4xx handler which aborted
|
|
immediately — now it correctly triggers compression+retry.
|
|
"""
|
|
err_400 = Exception(
|
|
"Error code: 400 - {'error': {'message': "
|
|
"\"This endpoint's maximum context length is 204800 tokens. "
|
|
"However, you requested about 270460 tokens.\", 'code': 400}}"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
# Must NOT have "failed": True (which would mean the generic 4xx handler caught it)
|
|
assert result.get("failed") is not True
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Recovered after compression"
|
|
|
|
def test_400_reduce_length_triggers_compression(self, agent):
|
|
"""A 400 with 'reduce the length' should trigger compression."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - Please reduce the length of the messages"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_context_length_retry_rebuilds_request_after_compression(self, agent):
|
|
"""Retry must send the compressed transcript, not the stale oversized payload."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - {'error': {'message': "
|
|
"\"This endpoint's maximum context length is 128000 tokens. "
|
|
"Please reduce the length of the messages.\"}}"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="Recovered after real compression", finish_reason="stop")
|
|
|
|
request_payloads = []
|
|
|
|
def _side_effect(**kwargs):
|
|
request_payloads.append(kwargs)
|
|
if len(request_payloads) == 1:
|
|
raise err_400
|
|
return ok_resp
|
|
|
|
agent.client.chat.completions.create.side_effect = _side_effect
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "compressed summary"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
assert result["completed"] is True
|
|
assert len(request_payloads) == 2
|
|
assert len(request_payloads[1]["messages"]) < len(request_payloads[0]["messages"])
|
|
assert request_payloads[1]["messages"][0] == {
|
|
"role": "system",
|
|
"content": "compressed prompt",
|
|
}
|
|
assert request_payloads[1]["messages"][1] == {
|
|
"role": "user",
|
|
"content": "compressed summary",
|
|
}
|
|
|
|
def test_413_cannot_compress_further(self, agent):
|
|
"""When compression can't reduce messages, return partial result."""
|
|
err_413 = _make_413_error()
|
|
agent.client.chat.completions.create.side_effect = [err_413]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Compression returns same number of messages → can't compress further
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"same prompt",
|
|
)
|
|
result = agent.run_conversation("hello")
|
|
|
|
assert result["completed"] is False
|
|
assert result.get("partial") is True
|
|
assert "413" in result["error"]
|
|
|
|
|
|
class TestPreflightCompression:
|
|
"""Preflight compression should compress history before the first API call."""
|
|
|
|
def test_preflight_compresses_oversized_history(self, agent):
|
|
"""When loaded history exceeds the model's context threshold, compress before API call."""
|
|
agent.compression_enabled = True
|
|
# Set a small context so the history is "oversized", but large enough
|
|
# that the compressed result (2 short messages) fits in a single pass.
|
|
agent.context_compressor.context_length = 2000
|
|
agent.context_compressor.threshold_tokens = 200
|
|
|
|
# Build a history that will be large enough to trigger preflight
|
|
# (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
|
|
big_history = []
|
|
for i in range(20):
|
|
big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
|
|
big_history.append({"role": "assistant", "content": f"Response number {i} with extra padding here"})
|
|
|
|
ok_resp = _mock_response(content="After preflight", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [ok_resp]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Simulate compression reducing messages to a small set that fits
|
|
mock_compress.return_value = (
|
|
[
|
|
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
|
|
{"role": "user", "content": "hello"},
|
|
],
|
|
"new system prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
# Preflight compression is a multi-pass loop (up to 3 passes for very
|
|
# large sessions, breaking when no further reduction is possible).
|
|
# First pass must have received the full oversized history.
|
|
assert mock_compress.call_count >= 1, "Preflight compression never ran"
|
|
first_call_messages = mock_compress.call_args_list[0].args[0]
|
|
assert len(first_call_messages) >= 40, (
|
|
f"First preflight pass should see the full history, got "
|
|
f"{len(first_call_messages)} messages"
|
|
)
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "After preflight"
|
|
|
|
def test_no_preflight_when_under_threshold(self, agent):
|
|
"""When history fits within context, no preflight compression needed."""
|
|
agent.compression_enabled = True
|
|
# Large context — history easily fits
|
|
agent.context_compressor.context_length = 1000000
|
|
agent.context_compressor.threshold_tokens = 850000
|
|
|
|
small_history = [
|
|
{"role": "user", "content": "hi"},
|
|
{"role": "assistant", "content": "hello"},
|
|
]
|
|
|
|
ok_resp = _mock_response(content="No compression needed", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [ok_resp]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
result = agent.run_conversation("hello", conversation_history=small_history)
|
|
|
|
mock_compress.assert_not_called()
|
|
assert result["completed"] is True
|
|
|
|
def test_no_preflight_when_compression_disabled(self, agent):
|
|
"""Preflight should not run when compression is disabled."""
|
|
agent.compression_enabled = False
|
|
agent.context_compressor.context_length = 100
|
|
agent.context_compressor.threshold_tokens = 85
|
|
|
|
big_history = [
|
|
{"role": "user", "content": "x" * 1000},
|
|
{"role": "assistant", "content": "y" * 1000},
|
|
] * 10
|
|
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [ok_resp]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
result = agent.run_conversation("hello", conversation_history=big_history)
|
|
|
|
mock_compress.assert_not_called()
|
|
|
|
|
|
class TestToolResultPreflightCompression:
|
|
"""Compression should trigger when tool results push context past the threshold."""
|
|
|
|
def test_large_tool_results_trigger_compression(self, agent):
|
|
"""When tool results push estimated tokens past threshold, compress before next call."""
|
|
agent.compression_enabled = True
|
|
agent.context_compressor.context_length = 200_000
|
|
agent.context_compressor.threshold_tokens = 130_000 # below the 135k reported usage
|
|
agent.context_compressor.last_prompt_tokens = 130_000
|
|
agent.context_compressor.last_completion_tokens = 5_000
|
|
|
|
tc = SimpleNamespace(
|
|
id="tc1", type="function",
|
|
function=SimpleNamespace(name="web_search", arguments='{"query":"test"}'),
|
|
)
|
|
tool_resp = _mock_response(
|
|
content=None, finish_reason="stop", tool_calls=[tc],
|
|
usage={"prompt_tokens": 130_000, "completion_tokens": 5_000, "total_tokens": 135_000},
|
|
)
|
|
ok_resp = _mock_response(
|
|
content="Done after compression", finish_reason="stop",
|
|
usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100},
|
|
)
|
|
agent.client.chat.completions.create.side_effect = [tool_resp, ok_resp]
|
|
large_result = "x" * 100_000
|
|
|
|
with (
|
|
patch("run_agent.handle_function_call", return_value=large_result),
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}], "compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello")
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_anthropic_prompt_too_long_safety_net(self, agent):
|
|
"""Anthropic 'prompt is too long' error triggers compression as safety net."""
|
|
err_400 = Exception(
|
|
"Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', "
|
|
"'message': 'prompt is too long: 233153 tokens > 200000 maximum'}}"
|
|
)
|
|
err_400.status_code = 400
|
|
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
|
prefill = [
|
|
{"role": "user", "content": "previous"},
|
|
{"role": "assistant", "content": "answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}], "compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|