diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 3af2e7bafb..03f70b3fe4 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1045,16 +1045,21 @@ def get_model_context_length( def estimate_tokens_rough(text: str) -> int: - """Rough token estimate (~4 chars/token) for pre-flight checks.""" + """Rough token estimate (~4 chars/token) for pre-flight checks. + + Uses ceiling division so short texts (1-3 chars) never estimate as + 0 tokens, which would cause the compressor and pre-flight checks to + systematically undercount when many short tool results are present. + """ if not text: return 0 - return len(text) // 4 + return (len(text) + 3) // 4 def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int: """Rough token estimate for a message list (pre-flight only).""" total_chars = sum(len(str(msg)) for msg in messages) - return total_chars // 4 + return (total_chars + 3) // 4 def estimate_request_tokens_rough( @@ -1077,4 +1082,4 @@ def estimate_request_tokens_rough( total_chars += sum(len(str(msg)) for msg in messages) if tools: total_chars += len(str(tools)) - return total_chars // 4 + return (total_chars + 3) // 4 diff --git a/run_agent.py b/run_agent.py index bc4ad2754b..7b97c0ded2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8049,7 +8049,7 @@ class AIAgent: # Calculate approximate request size for logging total_chars = sum(len(str(msg)) for msg in api_messages) - approx_tokens = total_chars // 4 # Rough estimate: 4 chars per token + approx_tokens = estimate_messages_tokens_rough(api_messages) # Thinking spinner for quiet mode (animated during API call) thinking_spinner = None diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 1eac37e20f..df680fb241 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -50,7 +50,8 @@ class TestEstimateTokensRough: assert estimate_tokens_rough("a" * 400) == 100 def test_short_text(self): - assert estimate_tokens_rough("hello") == 1 + # "hello" = 5 chars → ceil(5/4) = 2 + assert estimate_tokens_rough("hello") == 2 def test_proportional(self): short = estimate_tokens_rough("hello world") @@ -68,10 +69,11 @@ class TestEstimateMessagesTokensRough: assert estimate_messages_tokens_rough([]) == 0 def test_single_message_concrete_value(self): - """Verify against known str(msg) length.""" + """Verify against known str(msg) length (ceiling division).""" msg = {"role": "user", "content": "a" * 400} result = estimate_messages_tokens_rough([msg]) - expected = len(str(msg)) // 4 + n = len(str(msg)) + expected = (n + 3) // 4 assert result == expected def test_multiple_messages_additive(self): @@ -80,7 +82,8 @@ class TestEstimateMessagesTokensRough: {"role": "assistant", "content": "Hi there, how can I help?"}, ] result = estimate_messages_tokens_rough(msgs) - expected = sum(len(str(m)) for m in msgs) // 4 + n = sum(len(str(m)) for m in msgs) + expected = (n + 3) // 4 assert result == expected def test_tool_call_message(self): @@ -89,7 +92,7 @@ class TestEstimateMessagesTokensRough: "tool_calls": [{"id": "1", "function": {"name": "terminal", "arguments": "{}"}}]} result = estimate_messages_tokens_rough([msg]) assert result > 0 - assert result == len(str(msg)) // 4 + assert result == (len(str(msg)) + 3) // 4 def test_message_with_list_content(self): """Vision messages with multimodal content arrays.""" @@ -98,7 +101,7 @@ class TestEstimateMessagesTokensRough: {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}} ]} result = estimate_messages_tokens_rough([msg]) - assert result == len(str(msg)) // 4 + assert result == (len(str(msg)) + 3) // 4 # =========================================================================