mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Remove unused imports (F401) and duplicate/shadowed import redefinitions (F811) across the codebase using ruff's safe autofixes. No behavioral changes -- imports only. - ~1400 safe autofixes applied across 644 files (net -1072 lines) - __init__.py re-exports preserved (excluded from F401 removal so public re-export surfaces stay intact) - Re-exports that are imported or monkeypatched by tests but look unused in their defining module are kept with explicit # noqa: F401 (gateway/run.py load_dotenv; run_agent re-exports from agent.message_sanitization, agent.context_compressor, agent.retry_utils, agent.prompt_builder, agent.process_bootstrap, agent.codex_responses_adapter) - Unsafe F841 (unused-variable) fixes deliberately skipped -- those can change behavior when the RHS has side effects - ruff lints remain disabled in pyproject.toml (only PLW1514 is selected); this is a one-time cleanup, not a config change Verification: - python -m compileall: clean - pytest --collect-only: all 27161 tests collect (zero import errors) - core entry points import clean (run_agent, model_tools, cli, toolsets, hermes_state, batch_runner, gateway) - static scan: every name any test imports directly from an edited module still resolves
140 lines
5.8 KiB
Python
140 lines
5.8 KiB
Python
"""Tests for image-token accounting in the context compressor.
|
||
|
||
Covers the native-image-routing PR's companion change: the compressor's
|
||
multimodal message length counter now charges ~1600 tokens per attached
|
||
image part instead of 0, so tail-cut / prune decisions are accurate for
|
||
creative workflows that iterate on images across many turns.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
|
||
from agent.context_compressor import (
|
||
_CHARS_PER_TOKEN,
|
||
_IMAGE_CHAR_EQUIVALENT,
|
||
_IMAGE_TOKEN_ESTIMATE,
|
||
_content_length_for_budget,
|
||
)
|
||
|
||
|
||
class TestContentLengthForBudget:
|
||
def test_plain_string(self):
|
||
assert _content_length_for_budget("hello world") == 11
|
||
|
||
def test_empty_string(self):
|
||
assert _content_length_for_budget("") == 0
|
||
|
||
def test_none_coerces_to_zero(self):
|
||
assert _content_length_for_budget(None) == 0
|
||
|
||
def test_text_only_list(self):
|
||
content = [
|
||
{"type": "text", "text": "first"},
|
||
{"type": "text", "text": "second"},
|
||
]
|
||
assert _content_length_for_budget(content) == 5 + 6
|
||
|
||
def test_single_image_part_charges_fixed_budget(self):
|
||
content = [
|
||
{"type": "text", "text": "look"},
|
||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,XXXX"}},
|
||
]
|
||
# 4 chars of text + 1 image at fixed char-equivalent
|
||
assert _content_length_for_budget(content) == 4 + _IMAGE_CHAR_EQUIVALENT
|
||
|
||
def test_image_url_raw_base64_is_not_counted_as_chars(self):
|
||
"""A 1MB base64 blob inside an image_url must NOT inflate token count.
|
||
|
||
The flat image estimate is what the provider actually bills; the raw
|
||
base64 is transport payload, not context tokens.
|
||
"""
|
||
huge_url = "data:image/png;base64," + ("A" * 1_000_000)
|
||
content = [
|
||
{"type": "image_url", "image_url": {"url": huge_url}},
|
||
]
|
||
# Exactly one image's worth, not 1M + something.
|
||
assert _content_length_for_budget(content) == _IMAGE_CHAR_EQUIVALENT
|
||
|
||
def test_multiple_image_parts(self):
|
||
content = [
|
||
{"type": "text", "text": "compare"},
|
||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
|
||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,BBB"}},
|
||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,CCC"}},
|
||
]
|
||
assert _content_length_for_budget(content) == 7 + 3 * _IMAGE_CHAR_EQUIVALENT
|
||
|
||
def test_openai_responses_input_image_shape(self):
|
||
"""Responses API uses type=input_image with top-level image_url string."""
|
||
content = [
|
||
{"type": "input_text", "text": "hey"},
|
||
{"type": "input_image", "image_url": "data:image/png;base64,XX"},
|
||
]
|
||
# input_text has .text "hey" (3 chars) + 1 image
|
||
assert _content_length_for_budget(content) == 3 + _IMAGE_CHAR_EQUIVALENT
|
||
|
||
def test_anthropic_native_image_shape(self):
|
||
"""Anthropic native shape: {type: image, source: {...}}."""
|
||
content = [
|
||
{"type": "text", "text": "hi"},
|
||
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "XX"}},
|
||
]
|
||
assert _content_length_for_budget(content) == 2 + _IMAGE_CHAR_EQUIVALENT
|
||
|
||
def test_bare_string_part_in_list(self):
|
||
"""Older code paths sometimes produce mixed list-of-strings content."""
|
||
content = ["hello", {"type": "text", "text": "world"}]
|
||
assert _content_length_for_budget(content) == 5 + 5
|
||
|
||
def test_image_estimate_constant_is_reasonable(self):
|
||
"""Sanity-check the estimate aligns with real provider billing.
|
||
|
||
Anthropic ≈ width*height/750 → ~1600 for 1000×1200.
|
||
OpenAI GPT-4o high-detail 2048×2048 ≈ 1445.
|
||
Gemini 258/tile × 6 tiles for a 2048×2048 ≈ 1548.
|
||
Anything in the 800-2000 range is defensible. Enforce bounds so an
|
||
accidental edit doesn't drop it to e.g. 16.
|
||
"""
|
||
assert 800 <= _IMAGE_TOKEN_ESTIMATE <= 2500
|
||
assert _IMAGE_CHAR_EQUIVALENT == _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
|
||
|
||
|
||
class TestTokenBudgetWithImages:
|
||
"""Integration: the compressor's tail-cut decision now respects image cost."""
|
||
|
||
def test_image_heavy_turns_count_toward_budget(self):
|
||
"""A tail with 5 image-bearing turns should blow past a 5K token budget."""
|
||
from agent.context_compressor import ContextCompressor
|
||
|
||
# Minimal compressor fixture — just enough to call _find_tail_cut_by_tokens
|
||
cc = object.__new__(ContextCompressor)
|
||
cc.tail_token_budget = 5000
|
||
|
||
# Build 10 messages: 5 with images, 5 with short text. Without the
|
||
# image-tokens fix, the compressor would think all 10 fit in 5K and
|
||
# protect them all. With the fix, images alone cost 5 × 1600 = 8K,
|
||
# so the tail should be trimmed.
|
||
messages = [{"role": "system", "content": "sys"}]
|
||
for i in range(5):
|
||
messages.append({
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": f"turn {i}"},
|
||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
|
||
],
|
||
})
|
||
messages.append({
|
||
"role": "assistant",
|
||
"content": f"response {i}",
|
||
})
|
||
|
||
cut = cc._find_tail_cut_by_tokens(messages, head_end=0, token_budget=5000)
|
||
|
||
# Budget is 5K, soft ceiling 7.5K. 5 images alone = 8000 image-tokens.
|
||
# Walking backward, the compressor should stop before including all 5.
|
||
# Exact cut depends on text lengths and min_tail, but it MUST be > 1
|
||
# (at least some head-side messages should be compressible).
|
||
assert cut > 1, (
|
||
f"Expected image-heavy tail to be trimmed; compressor placed cut at "
|
||
f"{cut} out of {len(messages)} (image tokens were likely ignored)."
|
||
)
|