hermes-agent/tests/agent/test_compressor_image_tokens.py
kshitijk4poor 66827f8947 chore: prune unused imports and duplicate import redefinitions
Remove unused imports (F401) and duplicate/shadowed import
redefinitions (F811) across the codebase using ruff's safe
autofixes. No behavioral changes -- imports only.

- ~1400 safe autofixes applied across 644 files (net -1072 lines)
- __init__.py re-exports preserved (excluded from F401 removal so
  public re-export surfaces stay intact)
- Re-exports that are imported or monkeypatched by tests but look
  unused in their defining module are kept with explicit # noqa:
  F401 (gateway/run.py load_dotenv; run_agent re-exports from
  agent.message_sanitization, agent.context_compressor,
  agent.retry_utils, agent.prompt_builder, agent.process_bootstrap,
  agent.codex_responses_adapter)
- Unsafe F841 (unused-variable) fixes deliberately skipped -- those
  can change behavior when the RHS has side effects
- ruff lints remain disabled in pyproject.toml (only PLW1514 is
  selected); this is a one-time cleanup, not a config change

Verification:
- python -m compileall: clean
- pytest --collect-only: all 27161 tests collect (zero import errors)
- core entry points import clean (run_agent, model_tools, cli,
  toolsets, hermes_state, batch_runner, gateway)
- static scan: every name any test imports directly from an edited
  module still resolves
2026-05-28 22:26:25 -07:00

140 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for image-token accounting in the context compressor.
Covers the native-image-routing PR's companion change: the compressor's
multimodal message length counter now charges ~1600 tokens per attached
image part instead of 0, so tail-cut / prune decisions are accurate for
creative workflows that iterate on images across many turns.
"""
from __future__ import annotations
from agent.context_compressor import (
_CHARS_PER_TOKEN,
_IMAGE_CHAR_EQUIVALENT,
_IMAGE_TOKEN_ESTIMATE,
_content_length_for_budget,
)
class TestContentLengthForBudget:
def test_plain_string(self):
assert _content_length_for_budget("hello world") == 11
def test_empty_string(self):
assert _content_length_for_budget("") == 0
def test_none_coerces_to_zero(self):
assert _content_length_for_budget(None) == 0
def test_text_only_list(self):
content = [
{"type": "text", "text": "first"},
{"type": "text", "text": "second"},
]
assert _content_length_for_budget(content) == 5 + 6
def test_single_image_part_charges_fixed_budget(self):
content = [
{"type": "text", "text": "look"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,XXXX"}},
]
# 4 chars of text + 1 image at fixed char-equivalent
assert _content_length_for_budget(content) == 4 + _IMAGE_CHAR_EQUIVALENT
def test_image_url_raw_base64_is_not_counted_as_chars(self):
"""A 1MB base64 blob inside an image_url must NOT inflate token count.
The flat image estimate is what the provider actually bills; the raw
base64 is transport payload, not context tokens.
"""
huge_url = "data:image/png;base64," + ("A" * 1_000_000)
content = [
{"type": "image_url", "image_url": {"url": huge_url}},
]
# Exactly one image's worth, not 1M + something.
assert _content_length_for_budget(content) == _IMAGE_CHAR_EQUIVALENT
def test_multiple_image_parts(self):
content = [
{"type": "text", "text": "compare"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,BBB"}},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,CCC"}},
]
assert _content_length_for_budget(content) == 7 + 3 * _IMAGE_CHAR_EQUIVALENT
def test_openai_responses_input_image_shape(self):
"""Responses API uses type=input_image with top-level image_url string."""
content = [
{"type": "input_text", "text": "hey"},
{"type": "input_image", "image_url": "data:image/png;base64,XX"},
]
# input_text has .text "hey" (3 chars) + 1 image
assert _content_length_for_budget(content) == 3 + _IMAGE_CHAR_EQUIVALENT
def test_anthropic_native_image_shape(self):
"""Anthropic native shape: {type: image, source: {...}}."""
content = [
{"type": "text", "text": "hi"},
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": "XX"}},
]
assert _content_length_for_budget(content) == 2 + _IMAGE_CHAR_EQUIVALENT
def test_bare_string_part_in_list(self):
"""Older code paths sometimes produce mixed list-of-strings content."""
content = ["hello", {"type": "text", "text": "world"}]
assert _content_length_for_budget(content) == 5 + 5
def test_image_estimate_constant_is_reasonable(self):
"""Sanity-check the estimate aligns with real provider billing.
Anthropic ≈ width*height/750 → ~1600 for 1000×1200.
OpenAI GPT-4o high-detail 2048×2048 ≈ 1445.
Gemini 258/tile × 6 tiles for a 2048×2048 ≈ 1548.
Anything in the 800-2000 range is defensible. Enforce bounds so an
accidental edit doesn't drop it to e.g. 16.
"""
assert 800 <= _IMAGE_TOKEN_ESTIMATE <= 2500
assert _IMAGE_CHAR_EQUIVALENT == _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
class TestTokenBudgetWithImages:
"""Integration: the compressor's tail-cut decision now respects image cost."""
def test_image_heavy_turns_count_toward_budget(self):
"""A tail with 5 image-bearing turns should blow past a 5K token budget."""
from agent.context_compressor import ContextCompressor
# Minimal compressor fixture — just enough to call _find_tail_cut_by_tokens
cc = object.__new__(ContextCompressor)
cc.tail_token_budget = 5000
# Build 10 messages: 5 with images, 5 with short text. Without the
# image-tokens fix, the compressor would think all 10 fit in 5K and
# protect them all. With the fix, images alone cost 5 × 1600 = 8K,
# so the tail should be trimmed.
messages = [{"role": "system", "content": "sys"}]
for i in range(5):
messages.append({
"role": "user",
"content": [
{"type": "text", "text": f"turn {i}"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAA"}},
],
})
messages.append({
"role": "assistant",
"content": f"response {i}",
})
cut = cc._find_tail_cut_by_tokens(messages, head_end=0, token_budget=5000)
# Budget is 5K, soft ceiling 7.5K. 5 images alone = 8000 image-tokens.
# Walking backward, the compressor should stop before including all 5.
# Exact cut depends on text lengths and min_tail, but it MUST be > 1
# (at least some head-side messages should be compressible).
assert cut > 1, (
f"Expected image-heavy tail to be trimmed; compressor placed cut at "
f"{cut} out of {len(messages)} (image tokens were likely ignored)."
)