mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fixup some compression stuff
This commit is contained in:
parent
43dee2e1cf
commit
d988343570
1 changed files with 9 additions and 25 deletions
34
run_agent.py
34
run_agent.py
|
|
@ -7458,7 +7458,7 @@ class AIAgent:
|
|||
or 'error code: 413' in error_msg
|
||||
)
|
||||
|
||||
if is_payload_too_large:
|
||||
if is_payload_too_large and self.compression_enabled:
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
|
||||
|
|
@ -7473,30 +7473,14 @@ class AIAgent:
|
|||
"partial": True
|
||||
}
|
||||
self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
|
||||
if len(messages) < original_len:
|
||||
self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
time.sleep(2) # Brief pause between compression retries
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
elif is_payload_too_large and not self.compression_enabled:
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
|
||||
# Check for context-length errors BEFORE generic 4xx handler.
|
||||
# Local backends (LM Studio, Ollama, llama.cpp) often return
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue