mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
Remove unused imports (F401) and duplicate/shadowed import redefinitions (F811) across the codebase using ruff's safe autofixes. No behavioral changes -- imports only. - ~1400 safe autofixes applied across 644 files (net -1072 lines) - __init__.py re-exports preserved (excluded from F401 removal so public re-export surfaces stay intact) - Re-exports that are imported or monkeypatched by tests but look unused in their defining module are kept with explicit # noqa: F401 (gateway/run.py load_dotenv; run_agent re-exports from agent.message_sanitization, agent.context_compressor, agent.retry_utils, agent.prompt_builder, agent.process_bootstrap, agent.codex_responses_adapter) - Unsafe F841 (unused-variable) fixes deliberately skipped -- those can change behavior when the RHS has side effects - ruff lints remain disabled in pyproject.toml (only PLW1514 is selected); this is a one-time cleanup, not a config change Verification: - python -m compileall: clean - pytest --collect-only: all 27161 tests collect (zero import errors) - core entry points import clean (run_agent, model_tools, cli, toolsets, hermes_state, batch_runner, gateway) - static scan: every name any test imports directly from an edited module still resolves
59 lines
2.3 KiB
Python
59 lines
2.3 KiB
Python
"""Verify compression trigger excludes reasoning/completion tokens (#12026).
|
|
|
|
Thinking models (GLM-5.1, QwQ, DeepSeek R1) inflate completion_tokens with
|
|
reasoning tokens that don't consume context window space. The compression
|
|
trigger must use only prompt_tokens so sessions aren't prematurely split.
|
|
"""
|
|
|
|
import types
|
|
|
|
|
|
def _make_agent_stub(prompt_tokens, completion_tokens, threshold_tokens):
|
|
"""Create a minimal stub that exercises the compression check path."""
|
|
compressor = types.SimpleNamespace(
|
|
last_prompt_tokens=prompt_tokens,
|
|
last_completion_tokens=completion_tokens,
|
|
threshold_tokens=threshold_tokens,
|
|
)
|
|
# Replicate the fixed logic from run_agent.py ~line 11273
|
|
if compressor.last_prompt_tokens > 0:
|
|
real_tokens = compressor.last_prompt_tokens # Fixed: no completion
|
|
else:
|
|
real_tokens = 0
|
|
return real_tokens, compressor
|
|
|
|
|
|
class TestCompressionTriggerExcludesReasoning:
|
|
def test_high_reasoning_tokens_should_not_trigger_compression(self):
|
|
"""With the old bug, 40k prompt + 80k reasoning = 120k > 100k threshold.
|
|
After the fix, only 40k prompt is compared — no compression."""
|
|
real_tokens, comp = _make_agent_stub(
|
|
prompt_tokens=40_000,
|
|
completion_tokens=80_000, # reasoning-heavy model
|
|
threshold_tokens=100_000,
|
|
)
|
|
assert real_tokens == 40_000
|
|
assert real_tokens < comp.threshold_tokens, (
|
|
"Should NOT trigger compression — only prompt tokens matter"
|
|
)
|
|
|
|
def test_high_prompt_tokens_should_trigger_compression(self):
|
|
"""When prompt tokens genuinely exceed the threshold, compress."""
|
|
real_tokens, comp = _make_agent_stub(
|
|
prompt_tokens=110_000,
|
|
completion_tokens=5_000,
|
|
threshold_tokens=100_000,
|
|
)
|
|
assert real_tokens == 110_000
|
|
assert real_tokens >= comp.threshold_tokens, (
|
|
"Should trigger compression — prompt tokens exceed threshold"
|
|
)
|
|
|
|
def test_zero_prompt_tokens_falls_back(self):
|
|
"""When provider returns 0 prompt tokens, real_tokens is 0 (fallback path)."""
|
|
real_tokens, _ = _make_agent_stub(
|
|
prompt_tokens=0,
|
|
completion_tokens=50_000,
|
|
threshold_tokens=100_000,
|
|
)
|
|
assert real_tokens == 0
|