From 5797728ca6d1ce32bdba64970405426775745eec Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 16 Apr 2026 16:36:33 -0700 Subject: [PATCH] test: regression guards for the keepalive/transport bug class (#10933) (#11266) Two new tests in tests/run_agent/ that pin the user-visible invariant behind AlexKucera's Discord report (2026-04-16): no matter how a future keepalive / transport fix for #10324 plumbs sockets in, sequential chats on the same AIAgent instance must all succeed. test_create_openai_client_reuse.py (no network, runs in CI): - test_second_create_does_not_wrap_closed_transport_from_first back-to-back _create_openai_client calls must not hand the same http_client (after an SDK close) to the second construction - test_replace_primary_openai_client_survives_repeated_rebuilds three sequential rebuilds via the real _replace_primary_openai_client entrypoint must each install a live client test_sequential_chats_live.py (opt-in, HERMES_LIVE_TESTS=1): - test_three_sequential_chats_across_client_rebuild real OpenRouter round trips, with an explicit _replace_primary_openai_client call between turns 2 and 3. Error-sentinel detector treats 'API call failed after 3 retries' replies as failures instead of letting them pass the naive truthy check (which is how a first draft of this test missed the bug it was meant to catch). Validation: clean main (post-revert, defensive copy present) -> all 4 tests PASS broken #10933 state (keepalive injection, no defensive copy) -> all 4 tests FAIL with precise messages pointing at #10933 Companion to taeuk178's test_create_openai_client_kwargs_isolation.py, which pins the syntactic 'don't mutate input dict' half of the same contract. Together they catch both the specific mechanism of #10933 and any other reimplementation that breaks the sequential-call invariant. --- .../test_create_openai_client_reuse.py | 186 ++++++++++++++++++ tests/run_agent/test_sequential_chats_live.py | 137 +++++++++++++ 2 files changed, 323 insertions(+) create mode 100644 tests/run_agent/test_create_openai_client_reuse.py create mode 100644 tests/run_agent/test_sequential_chats_live.py diff --git a/tests/run_agent/test_create_openai_client_reuse.py b/tests/run_agent/test_create_openai_client_reuse.py new file mode 100644 index 000000000..8183e7eea --- /dev/null +++ b/tests/run_agent/test_create_openai_client_reuse.py @@ -0,0 +1,186 @@ +"""Regression guardrail: sequential _create_openai_client calls must not +share a closed transport across invocations. + +This is the behavioral twin of test_create_openai_client_kwargs_isolation.py. +That test pins "don't mutate input kwargs" at the syntactic level — it catches +#10933 specifically because the bug mutated ``client_kwargs`` in place. This +test pins the user-visible invariant at the behavioral level: no matter HOW a +future keepalive / transport reimplementation plumbs sockets in, the Nth call +to ``_create_openai_client`` must not hand back a client wrapping a +now-closed httpx transport from an earlier call. + +AlexKucera's Discord report (2026-04-16): after ``hermes update`` pulled +#10933, the first chat on a session worked, every subsequent chat failed +with ``APIConnectionError('Connection error.')`` whose cause was +``RuntimeError: Cannot send a request, as the client has been closed``. +That is the exact scenario this test reproduces at object level without a +network, so it runs in CI on every PR. +""" +from unittest.mock import MagicMock, patch + +from run_agent import AIAgent + + +def _make_agent(): + return AIAgent( + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + +def _make_fake_openai_factory(constructed): + """Return a fake ``OpenAI`` class that records every constructed instance + along with whatever ``http_client`` it was handed (or ``None`` if the + caller did not inject one). + + The fake also forwards ``.close()`` calls down to the http_client if one + is present, mirroring what the real OpenAI SDK does during teardown and + what would expose the #10933 bug. + """ + + class _FakeOpenAI: + def __init__(self, **kwargs): + self._kwargs = kwargs + self._http_client = kwargs.get("http_client") + self._closed = False + constructed.append(self) + + def close(self): + self._closed = True + hc = self._http_client + if hc is not None and hasattr(hc, "close"): + try: + hc.close() + except Exception: + pass + + return _FakeOpenAI + + +def test_second_create_does_not_wrap_closed_transport_from_first(): + """Back-to-back _create_openai_client calls on the same _client_kwargs + must not hand call N a closed http_client from call N-1. + + The bug class: call 1 injects an httpx.Client into self._client_kwargs, + client 1 closes (SDK teardown), its http_client closes with it, call 2 + reads the SAME now-closed http_client from self._client_kwargs and wraps + it. Every request through client 2 then fails. + """ + agent = _make_agent() + constructed: list = [] + fake_openai = _make_fake_openai_factory(constructed) + + # Seed a baseline kwargs dict resembling real runtime state. + agent._client_kwargs = { + "api_key": "test-key-value", + "base_url": "https://api.example.com/v1", + } + + with patch("run_agent.OpenAI", fake_openai): + # Call 1 — what _replace_primary_openai_client does at init/rebuild. + client_a = agent._create_openai_client( + agent._client_kwargs, reason="initial", shared=True + ) + # Simulate the SDK teardown that follows a rebuild: the old client's + # close() is invoked, which closes its underlying http_client if one + # was injected. This is exactly what _replace_primary_openai_client + # does via _close_openai_client after a successful rebuild. + client_a.close() + + # Call 2 — the rebuild path. This is where #10933 crashed on the + # next real request. + client_b = agent._create_openai_client( + agent._client_kwargs, reason="rebuild", shared=True + ) + + assert len(constructed) == 2, f"expected 2 OpenAI constructions, got {len(constructed)}" + assert constructed[0] is client_a + assert constructed[1] is client_b + + hc_a = constructed[0]._http_client + hc_b = constructed[1]._http_client + + # If the implementation does not inject http_client at all, we're safely + # past the bug class — nothing to share, nothing to close. That's fine. + if hc_a is None and hc_b is None: + return + + # If ANY http_client is injected, the two calls MUST NOT share the same + # object, because call 1's object was closed between calls. + if hc_a is not None and hc_b is not None: + assert hc_a is not hc_b, ( + "Regression of #10933: _create_openai_client handed the same " + "http_client to two sequential constructions. After the first " + "client is closed (normal SDK teardown on rebuild), the second " + "wraps a closed transport and every subsequent chat raises " + "'Cannot send a request, as the client has been closed'." + ) + + # And whatever http_client the LATEST call handed out must not be closed + # already. This catches implementations that cache the injected client on + # ``self`` (under any attribute name) and rebuild the SDK client around + # it even after the previous SDK close closed the cached transport. + if hc_b is not None: + is_closed_attr = getattr(hc_b, "is_closed", None) + if is_closed_attr is not None: + assert not is_closed_attr, ( + "Regression of #10933: second _create_openai_client returned " + "a client whose http_client is already closed. New chats on " + "this session will fail with 'Cannot send a request, as the " + "client has been closed'." + ) + + +def test_replace_primary_openai_client_survives_repeated_rebuilds(): + """Full rebuild path: exercise _replace_primary_openai_client three times + back-to-back and confirm every resulting ``self.client`` is a fresh, + usable construction rather than a wrapper around a previously-closed + transport. + + _replace_primary_openai_client is the real rebuild entrypoint — it is + what runs on 401 credential refresh, pool rotation, and model switch. + If a future keepalive tweak stores state on ``self`` between calls, + this test is what notices. + """ + agent = _make_agent() + constructed: list = [] + fake_openai = _make_fake_openai_factory(constructed) + + agent._client_kwargs = { + "api_key": "test-key-value", + "base_url": "https://api.example.com/v1", + } + + with patch("run_agent.OpenAI", fake_openai): + # Seed the initial client so _replace has something to tear down. + agent.client = agent._create_openai_client( + agent._client_kwargs, reason="seed", shared=True + ) + # Three rebuilds in a row. Each one must install a fresh live client. + for label in ("rebuild_1", "rebuild_2", "rebuild_3"): + ok = agent._replace_primary_openai_client(reason=label) + assert ok, f"rebuild {label} returned False" + cur = agent.client + assert not cur._closed, ( + f"after rebuild {label}, self.client is already closed — " + "this breaks the very next chat turn" + ) + hc = cur._http_client + if hc is not None: + is_closed_attr = getattr(hc, "is_closed", None) + if is_closed_attr is not None: + assert not is_closed_attr, ( + f"after rebuild {label}, self.client.http_client is " + "closed — reproduces #10933 (AlexKucera report, " + "Discord 2026-04-16)" + ) + + # All four constructions (seed + 3 rebuilds) should be distinct objects. + # If two are the same, the rebuild is cacheing the SDK client across + # teardown, which also reproduces the bug class. + assert len({id(c) for c in constructed}) == len(constructed), ( + "Some _create_openai_client calls returned the same object across " + "a teardown — rebuild is not producing fresh clients" + ) diff --git a/tests/run_agent/test_sequential_chats_live.py b/tests/run_agent/test_sequential_chats_live.py new file mode 100644 index 000000000..f6b9937bd --- /dev/null +++ b/tests/run_agent/test_sequential_chats_live.py @@ -0,0 +1,137 @@ +"""Live regression guardrail for the keepalive/transport bug class (#10933). + +AlexKucera reported on Discord (2026-04-16) that after ``hermes update`` pulled +#10933, the FIRST chat in a session worked and EVERY subsequent chat failed +with ``APIConnectionError('Connection error.')`` whose cause was +``RuntimeError: Cannot send a request, as the client has been closed``. + +The companion ``test_create_openai_client_reuse.py`` pins this contract at +object level with mocked ``OpenAI``. This file runs the same shape of +reproduction against a real provider so we have a true end-to-end smoke test +for any future keepalive / transport plumbing. + +Opt-in — not part of default CI: + HERMES_LIVE_TESTS=1 pytest tests/run_agent/test_sequential_chats_live.py -v + +Requires ``OPENROUTER_API_KEY`` to be set (or sourced via ~/.hermes/.env). +""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + + +# Load ~/.hermes/.env so live runs pick up OPENROUTER_API_KEY without +# needing the runner to shell-source it first. Silent if the file is absent. +def _load_user_env() -> None: + env_file = Path.home() / ".hermes" / ".env" + if not env_file.exists(): + return + for raw in env_file.read_text().splitlines(): + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + k = k.strip() + v = v.strip().strip('"').strip("'") + # Don't clobber an already-set env var — lets the caller override. + os.environ.setdefault(k, v) + + +_load_user_env() + + +LIVE = os.environ.get("HERMES_LIVE_TESTS") == "1" +OR_KEY = os.environ.get("OPENROUTER_API_KEY", "") + +pytestmark = [ + pytest.mark.skipif(not LIVE, reason="live-only — set HERMES_LIVE_TESTS=1"), + pytest.mark.skipif(not OR_KEY, reason="OPENROUTER_API_KEY not configured"), +] + +# Cheap, fast, tool-capable. Swap if it ever goes dark. +LIVE_MODEL = "google/gemini-2.5-flash" + + +def _make_live_agent(): + from run_agent import AIAgent + + return AIAgent( + model=LIVE_MODEL, + provider="openrouter", + api_key=OR_KEY, + base_url="https://openrouter.ai/api/v1", + max_iterations=3, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + # All toolsets off so the agent just produces a single text reply + # per turn — we want to test the HTTP client lifecycle, not tools. + disabled_toolsets=["*"], + ) + + +def _looks_like_error_reply(reply: str) -> tuple[bool, str]: + """AIAgent returns an error-sentinel string (not an exception) when the + underlying API call fails past retries. A naive ``assert reply and + reply.strip()`` misses this because the sentinel is truthy. This + checker enumerates the known-bad shapes so the live test actually + catches #10933 instead of rubber-stamping the error response. + """ + lowered = reply.lower().strip() + bad_substrings = ( + "api call failed", + "connection error", + "client has been closed", + "cannot send a request", + "max retries", + ) + for marker in bad_substrings: + if marker in lowered: + return True, marker + return False, "" + + +def _assert_healthy_reply(reply, turn_label: str) -> None: + assert reply and reply.strip(), f"{turn_label} returned empty: {reply!r}" + is_err, marker = _looks_like_error_reply(reply) + assert not is_err, ( + f"{turn_label} returned an error-sentinel string instead of a real " + f"model reply — matched marker {marker!r}. This is the exact shape " + f"of #10933 (AlexKucera Discord report, 2026-04-16): the agent's " + f"retry loop burned three attempts against a closed httpx transport " + f"and surfaced 'API call failed after 3 retries: Connection error.' " + f"to the user. Reply was: {reply!r}" + ) + + +def test_three_sequential_chats_across_client_rebuild(): + """Reproduces AlexKucera's exact failure shape end-to-end. + + Turn 1 always worked under #10933. Turn 2 was the one that failed + because the shared httpx transport had been torn down between turns. + Turn 3 is here as extra insurance against any lazy-init shape where + the failure only shows up on call N>=3. + + We also deliberately trigger ``_replace_primary_openai_client`` between + turn 2 and turn 3 — that is the real rebuild entrypoint (401 refresh, + credential rotation, model switch) and is the path that actually + stored the closed transport into ``self._client_kwargs`` in #10933. + """ + agent = _make_live_agent() + + r1 = agent.chat("Respond with only the word: ONE") + _assert_healthy_reply(r1, "turn 1") + + r2 = agent.chat("Respond with only the word: TWO") + _assert_healthy_reply(r2, "turn 2") + + # Force a client rebuild through the real path — mimics 401 refresh / + # credential rotation / model switch lifecycle. + rebuilt = agent._replace_primary_openai_client(reason="regression_test_rebuild") + assert rebuilt, "rebuild via _replace_primary_openai_client returned False" + + r3 = agent.chat("Respond with only the word: THREE") + _assert_healthy_reply(r3, "turn 3 (post-rebuild)")