mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): include tool tokens in preflight estimate, guard context probe persistence (#3164)
Two improvements salvaged from PR #2600 (paraddox): 1. Preflight compression now counts tool schema tokens alongside system prompt and messages. With 50+ tools enabled, schemas can add 20-30K tokens that were previously invisible to the estimator, delaying compression until the API rejected the request. 2. Context probe persistence guard: when the agent steps down context tiers after a context-length error, only provider-confirmed numeric limits (parsed from the error message) are cached to disk. Guessed fallback tiers from get_next_probe_tier() stay in-memory only, preventing wrong values from polluting the persistent cache. Co-authored-by: paraddox <paraddox@users.noreply.github.com>
This commit is contained in:
parent
9989e579da
commit
43af094ae3
2 changed files with 52 additions and 10 deletions
|
|
@ -895,3 +895,26 @@ def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int:
|
|||
"""Rough token estimate for a message list (pre-flight only)."""
|
||||
total_chars = sum(len(str(msg)) for msg in messages)
|
||||
return total_chars // 4
|
||||
|
||||
|
||||
def estimate_request_tokens_rough(
|
||||
messages: List[Dict[str, Any]],
|
||||
*,
|
||||
system_prompt: str = "",
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> int:
|
||||
"""Rough token estimate for a full chat-completions request.
|
||||
|
||||
Includes the major payload buckets Hermes sends to providers:
|
||||
system prompt, conversation messages, and tool schemas. With 50+
|
||||
tools enabled, schemas alone can add 20-30K tokens — a significant
|
||||
blind spot when only counting messages.
|
||||
"""
|
||||
total_chars = 0
|
||||
if system_prompt:
|
||||
total_chars += len(system_prompt)
|
||||
if messages:
|
||||
total_chars += sum(len(str(msg)) for msg in messages)
|
||||
if tools:
|
||||
total_chars += len(str(tools))
|
||||
return total_chars // 4
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue