"""Integration tests for gateway AIAgent caching. Verifies that the agent cache correctly: - Reuses agents across messages (same config → same instance) - Rebuilds agents when config changes (model, provider, toolsets) - Updates reasoning_config in-place without rebuilding - Evicts on session reset - Evicts on fallback activation - Preserves frozen system prompt across turns """ import hashlib import json import threading from unittest.mock import MagicMock, patch import pytest def _make_runner(): """Create a minimal GatewayRunner with just the cache infrastructure.""" from gateway.run import GatewayRunner runner = GatewayRunner.__new__(GatewayRunner) runner._agent_cache = {} runner._agent_cache_lock = threading.Lock() return runner class TestAgentConfigSignature: """Config signature produces stable, distinct keys.""" def test_same_config_same_signature(self): from gateway.run import GatewayRunner runtime = {"api_key": "sk-test12345678", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter", "api_mode": "chat_completions"} sig1 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "") sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "") assert sig1 == sig2 def test_model_change_different_signature(self): from gateway.run import GatewayRunner runtime = {"api_key": "sk-test12345678", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter"} sig1 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "") sig2 = GatewayRunner._agent_config_signature("claude-opus-4.6", runtime, ["hermes-telegram"], "") assert sig1 != sig2 def test_same_token_prefix_different_full_token_changes_signature(self): """Tokens sharing a JWT-style prefix must not collide.""" from gateway.run import GatewayRunner rt1 = { "api_key": "eyJhbGci.token-for-account-a", "base_url": "https://chatgpt.com/backend-api/codex", "provider": "openai-codex", "api_mode": "codex_responses", } rt2 = { "api_key": "eyJhbGci.token-for-account-b", "base_url": "https://chatgpt.com/backend-api/codex", "provider": "openai-codex", "api_mode": "codex_responses", } assert rt1["api_key"][:8] == rt2["api_key"][:8] sig1 = GatewayRunner._agent_config_signature("gpt-5.3-codex", rt1, ["hermes-telegram"], "") sig2 = GatewayRunner._agent_config_signature("gpt-5.3-codex", rt2, ["hermes-telegram"], "") assert sig1 != sig2 def test_provider_change_different_signature(self): from gateway.run import GatewayRunner rt1 = {"api_key": "sk-test12345678", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter"} rt2 = {"api_key": "sk-test12345678", "base_url": "https://api.anthropic.com", "provider": "anthropic"} sig1 = GatewayRunner._agent_config_signature("claude-sonnet-4", rt1, ["hermes-telegram"], "") sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", rt2, ["hermes-telegram"], "") assert sig1 != sig2 def test_toolset_change_different_signature(self): from gateway.run import GatewayRunner runtime = {"api_key": "sk-test12345678", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter"} sig1 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "") sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-discord"], "") assert sig1 != sig2 def test_reasoning_not_in_signature(self): """Reasoning config is set per-message, not part of the signature.""" from gateway.run import GatewayRunner runtime = {"api_key": "sk-test12345678", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter"} # Same config — signature should be identical regardless of what # reasoning_config the caller might have (it's not passed in) sig1 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "") sig2 = GatewayRunner._agent_config_signature("claude-sonnet-4", runtime, ["hermes-telegram"], "") assert sig1 == sig2 class TestAgentCacheLifecycle: """End-to-end cache behavior with real AIAgent construction.""" def test_cache_hit_returns_same_agent(self): """Second message with same config reuses the cached agent instance.""" from run_agent import AIAgent runner = _make_runner() session_key = "telegram:12345" runtime = {"api_key": "test", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter", "api_mode": "chat_completions"} sig = runner._agent_config_signature("anthropic/claude-sonnet-4", runtime, ["hermes-telegram"], "") # First message — create and cache agent1 = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, platform="telegram", ) with runner._agent_cache_lock: runner._agent_cache[session_key] = (agent1, sig) # Second message — cache hit with runner._agent_cache_lock: cached = runner._agent_cache.get(session_key) assert cached is not None assert cached[1] == sig assert cached[0] is agent1 # same instance def test_cache_miss_on_model_change(self): """Model change produces different signature → cache miss.""" from run_agent import AIAgent runner = _make_runner() session_key = "telegram:12345" runtime = {"api_key": "test", "base_url": "https://openrouter.ai/api/v1", "provider": "openrouter", "api_mode": "chat_completions"} old_sig = runner._agent_config_signature("anthropic/claude-sonnet-4", runtime, ["hermes-telegram"], "") agent1 = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, platform="telegram", ) with runner._agent_cache_lock: runner._agent_cache[session_key] = (agent1, old_sig) # New model → different signature new_sig = runner._agent_config_signature("anthropic/claude-opus-4.6", runtime, ["hermes-telegram"], "") assert new_sig != old_sig with runner._agent_cache_lock: cached = runner._agent_cache.get(session_key) assert cached[1] != new_sig # signature mismatch → would create new agent def test_evict_on_session_reset(self): """_evict_cached_agent removes the entry.""" from run_agent import AIAgent runner = _make_runner() session_key = "telegram:12345" agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, ) with runner._agent_cache_lock: runner._agent_cache[session_key] = (agent, "sig123") runner._evict_cached_agent(session_key) with runner._agent_cache_lock: assert session_key not in runner._agent_cache def test_evict_does_not_affect_other_sessions(self): """Evicting one session leaves other sessions cached.""" runner = _make_runner() with runner._agent_cache_lock: runner._agent_cache["session-A"] = ("agent-A", "sig-A") runner._agent_cache["session-B"] = ("agent-B", "sig-B") runner._evict_cached_agent("session-A") with runner._agent_cache_lock: assert "session-A" not in runner._agent_cache assert "session-B" in runner._agent_cache def test_reasoning_config_updates_in_place(self): """Reasoning config can be set on a cached agent without eviction.""" from run_agent import AIAgent agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, reasoning_config={"enabled": True, "effort": "medium"}, ) # Simulate per-message reasoning update agent.reasoning_config = {"enabled": True, "effort": "high"} assert agent.reasoning_config["effort"] == "high" # System prompt should not be affected by reasoning change prompt1 = agent._build_system_prompt() agent._cached_system_prompt = prompt1 # simulate run_conversation caching agent.reasoning_config = {"enabled": True, "effort": "low"} prompt2 = agent._cached_system_prompt assert prompt1 is prompt2 # same object — not invalidated by reasoning change def test_system_prompt_frozen_across_cache_reuse(self): """The cached agent's system prompt stays identical across turns.""" from run_agent import AIAgent agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, platform="telegram", ) # Build system prompt (simulates first run_conversation) prompt1 = agent._build_system_prompt() agent._cached_system_prompt = prompt1 # Simulate second turn — prompt should be frozen prompt2 = agent._cached_system_prompt assert prompt1 is prompt2 # same object, not rebuilt def test_callbacks_update_without_cache_eviction(self): """Per-message callbacks can be set on cached agent.""" from run_agent import AIAgent agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, ) # Set callbacks like the gateway does per-message cb1 = lambda *a: None cb2 = lambda *a: None agent.tool_progress_callback = cb1 agent.step_callback = cb2 agent.stream_delta_callback = None agent.status_callback = None assert agent.tool_progress_callback is cb1 assert agent.step_callback is cb2 # Update for next message cb3 = lambda *a: None agent.tool_progress_callback = cb3 assert agent.tool_progress_callback is cb3 class TestAgentCacheBoundedGrowth: """LRU cap and idle-TTL eviction prevent unbounded cache growth.""" def _bounded_runner(self): """Runner with an OrderedDict cache (matches real gateway init).""" from collections import OrderedDict from gateway.run import GatewayRunner runner = GatewayRunner.__new__(GatewayRunner) runner._agent_cache = OrderedDict() runner._agent_cache_lock = threading.Lock() return runner def _fake_agent(self, last_activity: float | None = None): """Lightweight stand-in; real AIAgent is heavy to construct.""" m = MagicMock() if last_activity is not None: m._last_activity_ts = last_activity else: import time as _t m._last_activity_ts = _t.time() return m def test_cap_evicts_lru_when_exceeded(self, monkeypatch): """Inserting past _AGENT_CACHE_MAX_SIZE pops the oldest entry.""" from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 3) runner = self._bounded_runner() runner._cleanup_agent_resources = MagicMock() for i in range(3): runner._agent_cache[f"s{i}"] = (self._fake_agent(), f"sig{i}") # Insert a 4th — oldest (s0) must be evicted. with runner._agent_cache_lock: runner._agent_cache["s3"] = (self._fake_agent(), "sig3") runner._enforce_agent_cache_cap() assert "s0" not in runner._agent_cache assert "s3" in runner._agent_cache assert len(runner._agent_cache) == 3 def test_cap_respects_move_to_end(self, monkeypatch): """Entries refreshed via move_to_end are NOT evicted as 'oldest'.""" from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 3) runner = self._bounded_runner() runner._cleanup_agent_resources = MagicMock() for i in range(3): runner._agent_cache[f"s{i}"] = (self._fake_agent(), f"sig{i}") # Touch s0 — it is now MRU, so s1 becomes LRU. runner._agent_cache.move_to_end("s0") with runner._agent_cache_lock: runner._agent_cache["s3"] = (self._fake_agent(), "sig3") runner._enforce_agent_cache_cap() assert "s0" in runner._agent_cache # rescued by move_to_end assert "s1" not in runner._agent_cache # now oldest → evicted assert "s3" in runner._agent_cache def test_cap_triggers_cleanup_thread(self, monkeypatch): """Evicted agent has release_clients() called for it (soft cleanup). Uses the soft path (_release_evicted_agent_soft), NOT the hard _cleanup_agent_resources — cache eviction must not tear down per-task state (terminal/browser/bg procs). """ from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1) runner = self._bounded_runner() release_calls: list = [] cleanup_calls: list = [] # Intercept both paths; only release_clients path should fire. def _soft(agent): release_calls.append(agent) runner._release_evicted_agent_soft = _soft runner._cleanup_agent_resources = lambda a: cleanup_calls.append(a) old_agent = self._fake_agent() new_agent = self._fake_agent() with runner._agent_cache_lock: runner._agent_cache["old"] = (old_agent, "sig_old") runner._agent_cache["new"] = (new_agent, "sig_new") runner._enforce_agent_cache_cap() # Cleanup is dispatched to a daemon thread; join briefly to observe. import time as _t deadline = _t.time() + 2.0 while _t.time() < deadline and not release_calls: _t.sleep(0.02) assert old_agent in release_calls assert new_agent not in release_calls # Hard-cleanup path must NOT have fired — that's for session expiry only. assert cleanup_calls == [] def test_idle_ttl_sweep_evicts_stale_agents(self, monkeypatch): """_sweep_idle_cached_agents removes agents idle past the TTL.""" from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.05) runner = self._bounded_runner() runner._cleanup_agent_resources = MagicMock() import time as _t fresh = self._fake_agent(last_activity=_t.time()) stale = self._fake_agent(last_activity=_t.time() - 10.0) runner._agent_cache["fresh"] = (fresh, "s1") runner._agent_cache["stale"] = (stale, "s2") evicted = runner._sweep_idle_cached_agents() assert evicted == 1 assert "stale" not in runner._agent_cache assert "fresh" in runner._agent_cache def test_idle_sweep_skips_agents_without_activity_ts(self, monkeypatch): """Agents missing _last_activity_ts are left alone (defensive).""" from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.01) runner = self._bounded_runner() runner._cleanup_agent_resources = MagicMock() no_ts = MagicMock(spec=[]) # no _last_activity_ts attribute runner._agent_cache["s"] = (no_ts, "sig") assert runner._sweep_idle_cached_agents() == 0 assert "s" in runner._agent_cache def test_plain_dict_cache_is_tolerated(self): """Test fixtures using plain {} don't crash _enforce_agent_cache_cap.""" from gateway.run import GatewayRunner runner = GatewayRunner.__new__(GatewayRunner) runner._agent_cache = {} # plain dict, not OrderedDict runner._agent_cache_lock = threading.Lock() runner._cleanup_agent_resources = MagicMock() # Should be a no-op rather than raising. with runner._agent_cache_lock: for i in range(200): runner._agent_cache[f"s{i}"] = (MagicMock(), f"sig{i}") runner._enforce_agent_cache_cap() # no crash, no eviction assert len(runner._agent_cache) == 200 def test_main_lookup_updates_lru_order(self, monkeypatch): """Cache hit via the main-lookup path refreshes LRU position.""" runner = self._bounded_runner() a0 = self._fake_agent() a1 = self._fake_agent() a2 = self._fake_agent() runner._agent_cache["s0"] = (a0, "sig0") runner._agent_cache["s1"] = (a1, "sig1") runner._agent_cache["s2"] = (a2, "sig2") # Simulate what _process_message_background does on a cache hit # (minus the agent-state reset which isn't relevant here). with runner._agent_cache_lock: cached = runner._agent_cache.get("s0") if cached and hasattr(runner._agent_cache, "move_to_end"): runner._agent_cache.move_to_end("s0") # After the hit, insertion order should be s1, s2, s0. assert list(runner._agent_cache.keys()) == ["s1", "s2", "s0"] class TestAgentCacheActiveSafety: """Safety: eviction must not tear down agents currently mid-turn. AIAgent.close() kills process_registry entries for the task, cleans the terminal sandbox, closes the OpenAI client, and cascades .close() into active child subagents. Calling it while the agent is still processing would crash the in-flight request. These tests pin that eviction skips any agent present in _running_agents. """ def _runner(self): from collections import OrderedDict from gateway.run import GatewayRunner runner = GatewayRunner.__new__(GatewayRunner) runner._agent_cache = OrderedDict() runner._agent_cache_lock = threading.Lock() runner._running_agents = {} return runner def _fake_agent(self, idle_seconds: float = 0.0): import time as _t m = MagicMock() m._last_activity_ts = _t.time() - idle_seconds return m def test_cap_skips_active_lru_entry(self, monkeypatch): """Active LRU entry is skipped; cache stays over cap rather than compensating by evicting a newer entry. Rationale: evicting a more-recent entry just because the oldest slot is temporarily locked would punish the most recently- inserted session (which has no cache to preserve) to protect one that happens to be mid-turn. Better to let the cache stay transiently over cap and re-check on the next insert. """ from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 2) runner = self._runner() runner._cleanup_agent_resources = MagicMock() active = self._fake_agent() idle_a = self._fake_agent() idle_b = self._fake_agent() # Insertion order: active (oldest), idle_a, idle_b. runner._agent_cache["session-active"] = (active, "sig") runner._agent_cache["session-idle-a"] = (idle_a, "sig") runner._agent_cache["session-idle-b"] = (idle_b, "sig") # Mark `active` as mid-turn — it's LRU, but protected. runner._running_agents["session-active"] = active with runner._agent_cache_lock: runner._enforce_agent_cache_cap() # All three remain; no eviction ran, no cleanup dispatched. assert "session-active" in runner._agent_cache assert "session-idle-a" in runner._agent_cache assert "session-idle-b" in runner._agent_cache assert runner._cleanup_agent_resources.call_count == 0 def test_cap_evicts_when_multiple_excess_and_some_inactive(self, monkeypatch): """Mixed active/idle in the LRU excess window: only the idle ones go. With CAP=2 and 4 entries, excess=2 (the two oldest). If the oldest is active and the next is idle, we evict exactly one. Cache ends at CAP+1, which is still better than unbounded. """ from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 2) runner = self._runner() runner._cleanup_agent_resources = MagicMock() oldest_active = self._fake_agent() idle_second = self._fake_agent() idle_third = self._fake_agent() idle_fourth = self._fake_agent() runner._agent_cache["s1"] = (oldest_active, "sig") runner._agent_cache["s2"] = (idle_second, "sig") # in excess window, idle runner._agent_cache["s3"] = (idle_third, "sig") runner._agent_cache["s4"] = (idle_fourth, "sig") runner._running_agents["s1"] = oldest_active # oldest is mid-turn with runner._agent_cache_lock: runner._enforce_agent_cache_cap() # s1 protected (active), s2 evicted (idle + in excess window), # s3 and s4 untouched (outside excess window). assert "s1" in runner._agent_cache assert "s2" not in runner._agent_cache assert "s3" in runner._agent_cache assert "s4" in runner._agent_cache def test_cap_leaves_cache_over_limit_if_all_active(self, monkeypatch, caplog): """If every over-cap entry is mid-turn, the cache stays over cap. Better to temporarily exceed the cap than to crash an in-flight turn by tearing down its clients. """ from gateway import run as gw_run import logging as _logging monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1) runner = self._runner() runner._cleanup_agent_resources = MagicMock() a1 = self._fake_agent() a2 = self._fake_agent() a3 = self._fake_agent() runner._agent_cache["s1"] = (a1, "sig") runner._agent_cache["s2"] = (a2, "sig") runner._agent_cache["s3"] = (a3, "sig") # All three are mid-turn. runner._running_agents["s1"] = a1 runner._running_agents["s2"] = a2 runner._running_agents["s3"] = a3 with caplog.at_level(_logging.WARNING, logger="gateway.run"): with runner._agent_cache_lock: runner._enforce_agent_cache_cap() # Cache unchanged because eviction had to skip every candidate. assert len(runner._agent_cache) == 3 # _cleanup_agent_resources must NOT have been scheduled. assert runner._cleanup_agent_resources.call_count == 0 # And we logged a warning so operators can see the condition. assert any("mid-turn" in r.message for r in caplog.records) def test_cap_pending_sentinel_does_not_block_eviction(self, monkeypatch): """_AGENT_PENDING_SENTINEL in _running_agents is treated as 'not active'. The sentinel is set while an agent is being CONSTRUCTED, before the real AIAgent instance exists. Cached agents from other sessions can still be evicted safely. """ from gateway import run as gw_run from gateway.run import _AGENT_PENDING_SENTINEL monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1) runner = self._runner() runner._cleanup_agent_resources = MagicMock() a1 = self._fake_agent() a2 = self._fake_agent() runner._agent_cache["s1"] = (a1, "sig") runner._agent_cache["s2"] = (a2, "sig") # Another session is mid-creation — sentinel, no real agent yet. runner._running_agents["s3-being-created"] = _AGENT_PENDING_SENTINEL with runner._agent_cache_lock: runner._enforce_agent_cache_cap() assert "s1" not in runner._agent_cache # evicted normally assert "s2" in runner._agent_cache def test_idle_sweep_skips_active_agent(self, monkeypatch): """Idle-TTL sweep must not tear down an active agent even if 'stale'.""" from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.01) runner = self._runner() runner._cleanup_agent_resources = MagicMock() old_but_active = self._fake_agent(idle_seconds=10.0) runner._agent_cache["s1"] = (old_but_active, "sig") runner._running_agents["s1"] = old_but_active evicted = runner._sweep_idle_cached_agents() assert evicted == 0 assert "s1" in runner._agent_cache assert runner._cleanup_agent_resources.call_count == 0 def test_eviction_does_not_close_active_agent_client(self, monkeypatch): """Live test: evicting an active agent does NOT null its .client. This reproduces the original concern — if eviction fired while an agent was mid-turn, `agent.close()` would set `self.client = None` and the next API call inside the loop would crash. With the active-agent skip, the client stays intact. """ from gateway import run as gw_run monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", 1) runner = self._runner() # Build a proper fake agent whose close() matches AIAgent's contract. active = MagicMock() active._last_activity_ts = __import__("time").time() active.client = MagicMock() # simulate an OpenAI client def _real_close(): active.client = None # mirrors run_agent.py:3299 active.close = _real_close active.shutdown_memory_provider = MagicMock() idle = self._fake_agent() runner._agent_cache["active-session"] = (active, "sig") runner._agent_cache["idle-session"] = (idle, "sig") runner._running_agents["active-session"] = active # Real cleanup function, not mocked — we want to see whether close() # runs on the active agent. (It shouldn't.) with runner._agent_cache_lock: runner._enforce_agent_cache_cap() # Let any eviction cleanup threads drain. import time as _t _t.sleep(0.2) # The ACTIVE agent's client must still be usable. assert active.client is not None, ( "Active agent's client was closed by eviction — " "running turn would crash on its next API call." ) class TestAgentCacheSpilloverLive: """Live E2E: fill cache with real AIAgent instances and stress it.""" def _runner(self): from collections import OrderedDict from gateway.run import GatewayRunner runner = GatewayRunner.__new__(GatewayRunner) runner._agent_cache = OrderedDict() runner._agent_cache_lock = threading.Lock() runner._running_agents = {} return runner def _real_agent(self): """A genuine AIAgent; no API calls are made during these tests.""" from run_agent import AIAgent return AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, platform="telegram", ) def test_fill_to_cap_then_spillover(self, monkeypatch): """Fill to cap with real agents, insert one more, oldest evicted.""" from gateway import run as gw_run CAP = 8 monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP) runner = self._runner() agents = [self._real_agent() for _ in range(CAP)] for i, a in enumerate(agents): with runner._agent_cache_lock: runner._agent_cache[f"s{i}"] = (a, "sig") runner._enforce_agent_cache_cap() assert len(runner._agent_cache) == CAP # Spillover insertion. newcomer = self._real_agent() with runner._agent_cache_lock: runner._agent_cache["new"] = (newcomer, "sig") runner._enforce_agent_cache_cap() # Oldest (s0) evicted, cap still CAP. assert "s0" not in runner._agent_cache assert "new" in runner._agent_cache assert len(runner._agent_cache) == CAP # Clean up so pytest doesn't leak resources. for a in agents + [newcomer]: try: a.close() except Exception: pass def test_spillover_all_active_keeps_cache_over_cap(self, monkeypatch, caplog): """Every slot active: cache goes over cap, no one gets torn down.""" from gateway import run as gw_run import logging as _logging CAP = 4 monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP) runner = self._runner() agents = [self._real_agent() for _ in range(CAP)] for i, a in enumerate(agents): runner._agent_cache[f"s{i}"] = (a, "sig") runner._running_agents[f"s{i}"] = a # every session mid-turn newcomer = self._real_agent() with caplog.at_level(_logging.WARNING, logger="gateway.run"): with runner._agent_cache_lock: runner._agent_cache["new"] = (newcomer, "sig") runner._enforce_agent_cache_cap() assert len(runner._agent_cache) == CAP + 1 # temporarily over cap # All existing agents still usable. for i, a in enumerate(agents): assert a.client is not None, f"s{i} got closed while active!" # And we warned operators. assert any("mid-turn" in r.message for r in caplog.records) for a in agents + [newcomer]: try: a.close() except Exception: pass def test_concurrent_inserts_settle_at_cap(self, monkeypatch): """Many threads inserting in parallel end with len(cache) == CAP.""" from gateway import run as gw_run CAP = 16 monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP) runner = self._runner() N_THREADS = 8 PER_THREAD = 20 # 8 * 20 = 160 inserts into a 16-slot cache def worker(tid: int): for j in range(PER_THREAD): a = self._real_agent() key = f"t{tid}-s{j}" with runner._agent_cache_lock: runner._agent_cache[key] = (a, "sig") runner._enforce_agent_cache_cap() threads = [ threading.Thread(target=worker, args=(t,), daemon=True) for t in range(N_THREADS) ] for t in threads: t.start() for t in threads: t.join(timeout=30) assert not t.is_alive(), "Worker thread hung — possible deadlock?" # Let daemon cleanup threads settle. import time as _t _t.sleep(0.5) assert len(runner._agent_cache) == CAP, ( f"Expected exactly {CAP} entries after concurrent inserts, " f"got {len(runner._agent_cache)}." ) def test_evicted_session_next_turn_gets_fresh_agent(self, monkeypatch): """After eviction, the same session_key can insert a fresh agent. Simulates the real spillover flow: evicted session sends another message, which builds a new AIAgent and re-enters the cache. """ from gateway import run as gw_run CAP = 2 monkeypatch.setattr(gw_run, "_AGENT_CACHE_MAX_SIZE", CAP) runner = self._runner() a0 = self._real_agent() a1 = self._real_agent() runner._agent_cache["sA"] = (a0, "sig") runner._agent_cache["sB"] = (a1, "sig") # 3rd session forces sA (oldest) out. a2 = self._real_agent() with runner._agent_cache_lock: runner._agent_cache["sC"] = (a2, "sig") runner._enforce_agent_cache_cap() assert "sA" not in runner._agent_cache # Let the eviction cleanup thread run. import time as _t _t.sleep(0.3) # Now sA's user sends another message → a fresh agent goes in. a0_new = self._real_agent() with runner._agent_cache_lock: runner._agent_cache["sA"] = (a0_new, "sig") runner._enforce_agent_cache_cap() assert "sA" in runner._agent_cache assert runner._agent_cache["sA"][0] is a0_new # the new one, not stale # Fresh agent is usable. assert a0_new.client is not None for a in (a0, a1, a2, a0_new): try: a.close() except Exception: pass class TestAgentCacheIdleResume: """End-to-end: idle-TTL-evicted session resumes cleanly with task state. Real-world scenario: user leaves a Telegram session open for 2+ hours. Idle-TTL evicts their cached agent. They come back and send a message. The new agent built for the same session_id must inherit: - Conversation history (from SessionStore — outside cache concern) - Terminal sandbox (same task_id → same _active_environments entry) - Browser daemon (same task_id → same browser session) - Background processes (same task_id → same process_registry entries) The ONLY thing that should reset is the LLM client pool (rebuilt fresh). """ def _runner(self): from collections import OrderedDict from gateway.run import GatewayRunner runner = GatewayRunner.__new__(GatewayRunner) runner._agent_cache = OrderedDict() runner._agent_cache_lock = threading.Lock() runner._running_agents = {} return runner def test_release_clients_does_not_touch_process_registry(self, monkeypatch): """release_clients must not call process_registry.kill_all for task_id.""" from run_agent import AIAgent agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, session_id="idle-resume-test-session", ) # Spy on process_registry.kill_all — it MUST NOT be called. from tools import process_registry as _pr kill_all_calls: list = [] original_kill_all = _pr.process_registry.kill_all _pr.process_registry.kill_all = lambda **kw: kill_all_calls.append(kw) try: agent.release_clients() finally: _pr.process_registry.kill_all = original_kill_all try: agent.close() except Exception: pass assert kill_all_calls == [], ( f"release_clients() called process_registry.kill_all — would " f"kill user's bg processes on cache eviction. Calls: {kill_all_calls}" ) def test_release_clients_does_not_touch_terminal_or_browser(self, monkeypatch): """release_clients must not call cleanup_vm or cleanup_browser.""" from run_agent import AIAgent from tools import terminal_tool as _tt from tools import browser_tool as _bt agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, session_id="idle-resume-test-2", ) vm_calls: list = [] browser_calls: list = [] original_vm = _tt.cleanup_vm original_browser = _bt.cleanup_browser _tt.cleanup_vm = lambda tid: vm_calls.append(tid) _bt.cleanup_browser = lambda tid: browser_calls.append(tid) try: agent.release_clients() finally: _tt.cleanup_vm = original_vm _bt.cleanup_browser = original_browser try: agent.close() except Exception: pass assert vm_calls == [], ( f"release_clients() tore down terminal sandbox — user's cwd, " f"env, and bg shells would be gone on resume. Calls: {vm_calls}" ) assert browser_calls == [], ( f"release_clients() tore down browser session — user's open " f"tabs and cookies gone on resume. Calls: {browser_calls}" ) def test_release_clients_closes_llm_client(self): """release_clients IS expected to close the OpenAI/httpx client.""" from run_agent import AIAgent agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, ) # Clients are lazy-built; force one to exist so we can verify close. assert agent.client is not None # __init__ builds it agent.release_clients() # Post-release: client reference is dropped (memory freed). assert agent.client is None def test_close_vs_release_full_teardown_difference(self, monkeypatch): """close() tears down task state; release_clients() does not. This pins the semantic contract: session-expiry path uses close() (full teardown — session is done), cache-eviction path uses release_clients() (soft — session may resume). """ from run_agent import AIAgent from tools import terminal_tool as _tt # Agent A: evicted from cache (soft) — terminal survives. # Agent B: session expired (hard) — terminal torn down. agent_a = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, session_id="soft-session", ) agent_b = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, session_id="hard-session", ) vm_calls: list = [] original_vm = _tt.cleanup_vm _tt.cleanup_vm = lambda tid: vm_calls.append(tid) try: agent_a.release_clients() # cache eviction agent_b.close() # session expiry finally: _tt.cleanup_vm = original_vm try: agent_a.close() except Exception: pass # Only agent_b's task_id should appear in cleanup calls. assert "hard-session" in vm_calls assert "soft-session" not in vm_calls def test_idle_evicted_session_rebuild_inherits_task_id(self, monkeypatch): """After idle-TTL eviction, a fresh agent with the same session_id gets the same task_id — so tool state (terminal/browser/bg procs) that persisted across eviction is reachable via the new agent. """ from gateway import run as gw_run from run_agent import AIAgent monkeypatch.setattr(gw_run, "_AGENT_CACHE_IDLE_TTL_SECS", 0.01) runner = self._runner() # Build an agent representing a stale (idle) session. SESSION_ID = "long-lived-user-session" old = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, session_id=SESSION_ID, ) old._last_activity_ts = 0.0 # force idle runner._agent_cache["sKey"] = (old, "sig") # Simulate the idle-TTL sweep firing. runner._sweep_idle_cached_agents() assert "sKey" not in runner._agent_cache # Wait for the daemon thread doing release_clients() to finish. import time as _t _t.sleep(0.3) # Old agent's client is gone (soft cleanup fired). assert old.client is None # User comes back — new agent built for the SAME session_id. new_agent = AIAgent( model="anthropic/claude-sonnet-4", api_key="test", base_url="https://openrouter.ai/api/v1", provider="openrouter", max_iterations=5, quiet_mode=True, skip_context_files=True, skip_memory=True, session_id=SESSION_ID, ) # Same session_id means same task_id routed to tools. The new # agent inherits any per-task state (terminal sandbox etc.) that # was preserved across eviction. assert new_agent.session_id == old.session_id == SESSION_ID # And it has a fresh working client. assert new_agent.client is not None try: new_agent.close() except Exception: pass