mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
fix(gateway): sync compression session splits before failures
Salvages PR #25747 by preserving gateway session rotation even when a post-compression model call fails before returning final content. Co-authored-by: Hermes <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
parent
2d474e39c7
commit
202e318cb1
3 changed files with 218 additions and 58 deletions
115
gateway/run.py
115
gateway/run.py
|
|
@ -14565,6 +14565,61 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
_context_length = getattr(_agent.context_compressor, "context_length", 0) or 0
|
||||
_resolved_model = getattr(_agent, "model", None) if _agent else None
|
||||
|
||||
# Sync session_id immediately after run_conversation(). Compression
|
||||
# can rotate before a follow-up model call fails; the failure return
|
||||
# below must still point the gateway at the compressed child.
|
||||
agent = agent_holder[0]
|
||||
_session_was_split = False
|
||||
agent_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
|
||||
if agent and session_key and agent_session_id != session_id:
|
||||
_session_was_split = True
|
||||
logger.info(
|
||||
"Session split detected: %s → %s (compression)",
|
||||
session_id, agent_session_id,
|
||||
)
|
||||
entry = self.session_store._entries.get(session_key)
|
||||
if entry:
|
||||
entry.session_id = agent_session_id
|
||||
self.session_store._save()
|
||||
|
||||
# If this is a Telegram DM and source.thread_id was lost during
|
||||
# the session split (synthetic / recovered event), restore it
|
||||
# from the binding so _thread_metadata_for_source produces the
|
||||
# correct message_thread_id instead of routing to the General
|
||||
# thread. Failure here is non-fatal — we log and continue;
|
||||
# worst case the message lands in General, which is the
|
||||
# pre-fix behaviour.
|
||||
if (
|
||||
getattr(source, "platform", None) == Platform.TELEGRAM
|
||||
and getattr(source, "chat_type", None) == "dm"
|
||||
and getattr(source, "thread_id", None) is None
|
||||
and self._session_db is not None
|
||||
):
|
||||
try:
|
||||
_binding = self._session_db.get_telegram_topic_binding_by_session(
|
||||
session_id=agent_session_id,
|
||||
)
|
||||
if _binding and _binding.get("thread_id"):
|
||||
source.thread_id = str(_binding["thread_id"])
|
||||
logger.debug(
|
||||
"Restored source.thread_id=%s from binding after session split %s → %s",
|
||||
source.thread_id,
|
||||
session_id,
|
||||
agent_session_id,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"Failed to restore thread_id from binding after session split",
|
||||
exc_info=True,
|
||||
)
|
||||
if entry:
|
||||
self._sync_telegram_topic_binding(
|
||||
source, entry, reason="agent-run-compression",
|
||||
)
|
||||
|
||||
effective_session_id = agent_session_id
|
||||
_effective_history_offset = 0 if _session_was_split else len(agent_history)
|
||||
|
||||
if not final_response:
|
||||
error_msg = f"⚠️ {result['error']}" if result.get("error") else ""
|
||||
return {
|
||||
|
|
@ -14579,7 +14634,8 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
"error": result.get("error"),
|
||||
"compression_exhausted": result.get("compression_exhausted", False),
|
||||
"tools": tools_holder[0] or [],
|
||||
"history_offset": len(agent_history),
|
||||
"history_offset": _effective_history_offset,
|
||||
"session_id": effective_session_id,
|
||||
"last_prompt_tokens": _last_prompt_toks,
|
||||
"input_tokens": _input_toks,
|
||||
"output_tokens": _output_toks,
|
||||
|
|
@ -14625,63 +14681,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
unique_tags.insert(0, "[[audio_as_voice]]")
|
||||
final_response = final_response + "\n" + "\n".join(unique_tags)
|
||||
|
||||
# Sync session_id: the agent may have created a new session during
|
||||
# mid-run context compression (_compress_context splits sessions).
|
||||
# If so, update the session store entry so the NEXT message loads
|
||||
# the compressed transcript, not the stale pre-compression one.
|
||||
agent = agent_holder[0]
|
||||
_session_was_split = False
|
||||
if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
|
||||
_session_was_split = True
|
||||
logger.info(
|
||||
"Session split detected: %s → %s (compression)",
|
||||
session_id, agent.session_id,
|
||||
)
|
||||
entry = self.session_store._entries.get(session_key)
|
||||
if entry:
|
||||
entry.session_id = agent.session_id
|
||||
self.session_store._save()
|
||||
|
||||
# If this is a Telegram DM and source.thread_id was lost during
|
||||
# the session split (synthetic / recovered event), restore it
|
||||
# from the binding so _thread_metadata_for_source produces the
|
||||
# correct message_thread_id instead of routing to the General
|
||||
# thread. Failure here is non-fatal — we log and continue;
|
||||
# worst case the message lands in General, which is the
|
||||
# pre-fix behaviour.
|
||||
if (
|
||||
getattr(source, "platform", None) == Platform.TELEGRAM
|
||||
and getattr(source, "chat_type", None) == "dm"
|
||||
and getattr(source, "thread_id", None) is None
|
||||
and self._session_db is not None
|
||||
):
|
||||
try:
|
||||
_binding = self._session_db.get_telegram_topic_binding_by_session(
|
||||
session_id=agent.session_id,
|
||||
)
|
||||
if _binding and _binding.get("thread_id"):
|
||||
source.thread_id = str(_binding["thread_id"])
|
||||
logger.debug(
|
||||
"Restored source.thread_id=%s from binding after session split %s → %s",
|
||||
source.thread_id,
|
||||
session_id,
|
||||
agent.session_id,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"Failed to restore thread_id from binding after session split",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
|
||||
|
||||
# When compression created a new session, the messages list was
|
||||
# shortened. Using the original history offset would produce an
|
||||
# empty new_messages slice, causing the gateway to write only a
|
||||
# user/assistant pair — losing the compressed summary and tail.
|
||||
# Reset to 0 so the gateway writes ALL compressed messages.
|
||||
_effective_history_offset = 0 if _session_was_split else len(agent_history)
|
||||
|
||||
# Auto-generate session title after first exchange (non-blocking)
|
||||
if final_response and self._session_db:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ AUTHOR_MAP = {
|
|||
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
|
||||
"157689911+itsflownium@users.noreply.github.com": "itsflownium",
|
||||
"dirtyren@users.noreply.github.com": "dirtyren",
|
||||
"JustinBao@outlook.com": "justinbao19",
|
||||
"kdunn926@gmail.com": "kdunn926",
|
||||
"mvanhorn@MacBook-Pro.local": "mvanhorn",
|
||||
"470766206@qq.com": "youjunxiaji",
|
||||
|
|
|
|||
160
tests/gateway/test_compression_failure_session_sync.py
Normal file
160
tests/gateway/test_compression_failure_session_sync.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
import asyncio
|
||||
import sys
|
||||
import threading
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import gateway.run as gateway_run
|
||||
from gateway.config import Platform
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
SESSION_KEY = "agent:main:telegram:dm:12345"
|
||||
|
||||
|
||||
class _SessionStore:
|
||||
def __init__(self):
|
||||
self.entry = SimpleNamespace(
|
||||
session_key=SESSION_KEY,
|
||||
session_id="session-before-compression",
|
||||
)
|
||||
self._entries = {SESSION_KEY: self.entry}
|
||||
self.save_calls = 0
|
||||
|
||||
def _save(self):
|
||||
self.save_calls += 1
|
||||
|
||||
|
||||
class _CompressionThenFailureAgent:
|
||||
def __init__(self, **kwargs):
|
||||
self.session_id = kwargs["session_id"]
|
||||
self.model = kwargs["model"]
|
||||
self.tools = []
|
||||
self.context_compressor = SimpleNamespace(
|
||||
last_prompt_tokens=4321,
|
||||
context_length=200000,
|
||||
)
|
||||
self.session_prompt_tokens = 4321
|
||||
self.session_completion_tokens = 0
|
||||
|
||||
def run_conversation(self, user_message, conversation_history=None, task_id=None, **_kwargs):
|
||||
self.session_id = "session-after-compression"
|
||||
return {
|
||||
"failed": True,
|
||||
"error": "APIConnectionError: Codex auxiliary Responses stream exceeded 120.0s total timeout",
|
||||
"messages": [
|
||||
{"role": "user", "content": "[compressed summary]"},
|
||||
{"role": "user", "content": user_message},
|
||||
],
|
||||
"api_calls": 1,
|
||||
}
|
||||
|
||||
def interrupt(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
|
||||
class _StreamConsumer:
|
||||
final_response_sent = False
|
||||
|
||||
def __init__(self, *_args, **_kwargs):
|
||||
pass
|
||||
|
||||
async def run(self):
|
||||
return None
|
||||
|
||||
def finish(self):
|
||||
pass
|
||||
|
||||
|
||||
class _Adapter:
|
||||
SUPPORTS_MESSAGE_EDITING = True
|
||||
_pending_messages = {}
|
||||
|
||||
def get_pending_message(self, _session_key):
|
||||
return None
|
||||
|
||||
async def send_typing(self, *_args, **_kwargs):
|
||||
return None
|
||||
|
||||
async def stop_typing(self, *_args, **_kwargs):
|
||||
return None
|
||||
|
||||
|
||||
def _runner(session_store):
|
||||
runner = object.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {Platform.TELEGRAM: _Adapter()}
|
||||
runner.config = SimpleNamespace(streaming=None, group_sessions_per_user=True, thread_sessions_per_user=False)
|
||||
runner.hooks = SimpleNamespace(loaded_hooks=False, emit=AsyncMock())
|
||||
runner.session_store = session_store
|
||||
runner._session_db = MagicMock()
|
||||
runner._session_db.get_telegram_topic_binding_by_session.return_value = None
|
||||
runner._agent_cache = {}
|
||||
runner._agent_cache_lock = threading.Lock()
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._session_run_generation = {}
|
||||
runner._session_model_overrides = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._pending_skills_reload_notes = {}
|
||||
runner._prefill_messages = []
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._draining = False
|
||||
runner._get_proxy_url = lambda: None
|
||||
runner._resolve_session_agent_runtime = lambda **_kwargs: (
|
||||
"gpt-5.4",
|
||||
{"provider": "openai-codex", "api_mode": "codex_responses", "base_url": "https://chatgpt.com/backend-api/codex", "api_key": "token"},
|
||||
)
|
||||
runner._resolve_session_reasoning_config = lambda **_kwargs: None
|
||||
runner._resolve_turn_agent_config = lambda message, model, runtime: {"model": model, "runtime": runtime}
|
||||
runner._load_service_tier = lambda: None
|
||||
runner._agent_config_signature = lambda *_args, **_kwargs: ("sig",)
|
||||
runner._extract_cache_busting_config = lambda _config: ()
|
||||
runner._thread_metadata_for_source = lambda *_args, **_kwargs: None
|
||||
runner._sync_telegram_topic_binding = MagicMock()
|
||||
runner._release_running_agent_state = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
def test_failed_turn_still_syncs_compression_session_split(monkeypatch):
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _CompressionThenFailureAgent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "off")
|
||||
monkeypatch.setenv("HERMES_AGENT_TIMEOUT", "0")
|
||||
monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
|
||||
monkeypatch.setattr("gateway.stream_consumer.GatewayStreamConsumer", _StreamConsumer)
|
||||
|
||||
import hermes_cli.tools_config as tools_config
|
||||
|
||||
monkeypatch.setattr(tools_config, "_get_platform_tools", lambda *_args, **_kwargs: {"core"})
|
||||
|
||||
session_store = _SessionStore()
|
||||
runner = _runner(session_store)
|
||||
source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="user-1")
|
||||
|
||||
result = asyncio.run(
|
||||
asyncio.wait_for(
|
||||
runner._run_agent(
|
||||
message="continue",
|
||||
context_prompt="",
|
||||
history=[{"role": "user", "content": "old question"}],
|
||||
source=source,
|
||||
session_id="session-before-compression",
|
||||
session_key=SESSION_KEY,
|
||||
),
|
||||
timeout=2,
|
||||
)
|
||||
)
|
||||
|
||||
assert result["failed"] is True
|
||||
assert result["session_id"] == "session-after-compression"
|
||||
assert result["history_offset"] == 0
|
||||
assert session_store.entry.session_id == "session-after-compression"
|
||||
assert session_store.save_calls == 1
|
||||
runner._sync_telegram_topic_binding.assert_called_once_with(
|
||||
source, session_store.entry, reason="agent-run-compression"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue