diff --git a/tests/conftest.py b/tests/conftest.py index 27950118e..c5b367266 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -229,6 +229,15 @@ def _hermetic_environment(tmp_path, monkeypatch): monkeypatch.setenv("LC_ALL", "C.UTF-8") monkeypatch.setenv("PYTHONHASHSEED", "0") + # 4b. Disable AWS IMDS lookups. Without this, any test that ends up + # calling has_aws_credentials() / resolve_aws_auth_env_var() + # (e.g. provider auto-detect, status command, cron run_job) burns + # ~2s waiting for the metadata service at 169.254.169.254 to time + # out. Tests don't run on EC2 — IMDS is always unreachable here. + monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true") + monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1") + monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1") + # 5. Reset plugin singleton so tests don't leak plugins from # ~/.hermes/plugins/ (which, per step 3, is now empty — but the # singleton might still be cached from a previous test). diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index f3f2a0444..6e10d5622 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -13,9 +13,29 @@ from unittest.mock import patch, MagicMock import pytest import hermes_cli.gateway as gateway_cli +import hermes_cli.main as cli_main from hermes_cli.main import cmd_update +# --------------------------------------------------------------------------- +# Skip the real-time sleeps inside cmd_update's restart-verification path +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _no_restart_verify_sleep(monkeypatch): + """hermes_cli/main.py uses time.sleep(3) after systemctl restart to + verify the service survived. Tests mock subprocess.run — nothing + actually restarts — so the 3s wait is dead time. + + main.py does ``import time as _time`` at both module level (line 167) + and inside functions (lines 3281, 4384, 4401). Patching the global + ``time.sleep`` affects only the duration of this test. + """ + import time as _real_time + monkeypatch.setattr(_real_time, "sleep", lambda *_a, **_k: None) + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/tests/plugins/test_retaindb_plugin.py b/tests/plugins/test_retaindb_plugin.py index 9ad801769..5d517bce7 100644 --- a/tests/plugins/test_retaindb_plugin.py +++ b/tests/plugins/test_retaindb_plugin.py @@ -31,6 +31,31 @@ def _isolate_env(tmp_path, monkeypatch): monkeypatch.delenv("RETAINDB_PROJECT", raising=False) +@pytest.fixture(autouse=True) +def _cap_retaindb_sleeps(monkeypatch): + """Cap production-code sleeps so background-thread tests run fast. + + The retaindb ``_WriteQueue._flush_row`` does ``time.sleep(2)`` after + errors. Across multiple tests that trigger the retry path, that adds + up. Cap the module's bound ``time.sleep`` to 0.05s — tests don't care + about the exact retry delay, only that it happens. The test file's + own ``time.sleep`` stays real since it uses a different reference. + """ + try: + from plugins.memory import retaindb as _retaindb + except ImportError: + return + + real_sleep = _retaindb.time.sleep + + def _capped_sleep(seconds): + return real_sleep(min(float(seconds), 0.05)) + + import types as _types + fake_time = _types.SimpleNamespace(sleep=_capped_sleep, time=_retaindb.time.time) + monkeypatch.setattr(_retaindb, "time", fake_time) + + # We need the repo root on sys.path so the plugin can import agent.memory_provider import sys _repo_root = str(Path(__file__).resolve().parents[2]) @@ -130,16 +155,18 @@ class TestWriteQueue: def test_enqueue_creates_row(self, tmp_path): q, client, db_path = self._make_queue(tmp_path) q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}]) - # Give the writer thread a moment to process - time.sleep(1) + # shutdown() blocks until the writer thread drains the queue — no need + # to pre-sleep (the old 1s sleep was a just-in-case wait, but shutdown + # does the right thing). q.shutdown() # If ingest succeeded, the row should be deleted client.ingest_session.assert_called_once() def test_enqueue_persists_to_sqlite(self, tmp_path): client = MagicMock() - # Make ingest hang so the row stays in SQLite - client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(5)) + # Make ingest slow so the row is still in SQLite when we peek. + # 0.5s is plenty — the test just needs the flush to still be in-flight. + client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(0.5)) db_path = tmp_path / "test_queue.db" q = _WriteQueue(client, db_path) q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}]) @@ -154,8 +181,7 @@ class TestWriteQueue: def test_flush_deletes_row_on_success(self, tmp_path): q, client, db_path = self._make_queue(tmp_path) q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}]) - time.sleep(1) - q.shutdown() + q.shutdown() # blocks until drain # Row should be gone conn = sqlite3.connect(str(db_path)) rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0] @@ -168,14 +194,20 @@ class TestWriteQueue: db_path = tmp_path / "test_queue.db" q = _WriteQueue(client, db_path) q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}]) - time.sleep(3) # Allow retry + sleep(2) in _flush_row + # Poll for the error to be recorded (max 2s), instead of a fixed 3s wait. + deadline = time.time() + 2.0 + last_error = None + while time.time() < deadline: + conn = sqlite3.connect(str(db_path)) + row = conn.execute("SELECT last_error FROM pending").fetchone() + conn.close() + if row and row[0]: + last_error = row[0] + break + time.sleep(0.05) q.shutdown() - # Row should still exist with error recorded - conn = sqlite3.connect(str(db_path)) - row = conn.execute("SELECT last_error FROM pending").fetchone() - conn.close() - assert row is not None - assert "API down" in row[0] + assert last_error is not None + assert "API down" in last_error def test_thread_local_connection_reuse(self, tmp_path): q, _, _ = self._make_queue(tmp_path) @@ -193,14 +225,27 @@ class TestWriteQueue: client1.ingest_session = MagicMock(side_effect=RuntimeError("fail")) q1 = _WriteQueue(client1, db_path) q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}]) - time.sleep(3) + # Wait until the error is recorded (poll with short interval). + deadline = time.time() + 2.0 + while time.time() < deadline: + conn = sqlite3.connect(str(db_path)) + row = conn.execute("SELECT last_error FROM pending").fetchone() + conn.close() + if row and row[0]: + break + time.sleep(0.05) q1.shutdown() # Now create a new queue — it should replay the pending rows client2 = MagicMock() client2.ingest_session = MagicMock(return_value={"status": "ok"}) q2 = _WriteQueue(client2, db_path) - time.sleep(2) + # Poll for the replay to happen. + deadline = time.time() + 2.0 + while time.time() < deadline: + if client2.ingest_session.called: + break + time.sleep(0.05) q2.shutdown() # The replayed row should have been ingested via client2 diff --git a/tests/run_agent/conftest.py b/tests/run_agent/conftest.py new file mode 100644 index 000000000..9b431869b --- /dev/null +++ b/tests/run_agent/conftest.py @@ -0,0 +1,34 @@ +"""Fast-path fixtures shared across tests/run_agent/. + +Many tests in this directory exercise the retry/backoff paths in the +agent loop. Production code uses ``jittered_backoff(base_delay=5.0)`` +with a ``while time.time() < sleep_end`` loop — a single retry test +spends 5+ seconds of real wall-clock time on backoff waits. + +Mocking ``jittered_backoff`` to return 0.0 collapses the while-loop +to a no-op (``time.time() < time.time() + 0`` is false immediately), +which handles the most common case without touching ``time.sleep``. + +We deliberately DO NOT mock ``time.sleep`` here — some tests +(test_interrupt_propagation, test_primary_runtime_restore, etc.) use +the real ``time.sleep`` for threading coordination or assert that it +was called with specific values. Tests that want to additionally +fast-path direct ``time.sleep(N)`` calls in production code should +monkeypatch ``run_agent.time.sleep`` locally (see +``test_anthropic_error_handling.py`` for the pattern). +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture(autouse=True) +def _fast_retry_backoff(monkeypatch): + """Short-circuit retry backoff for all tests in this directory.""" + try: + import run_agent + except ImportError: + return + + monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0) diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py index e8835c641..8bd357d3d 100644 --- a/tests/run_agent/test_413_compression.py +++ b/tests/run_agent/test_413_compression.py @@ -19,6 +19,24 @@ import pytest from agent.context_compressor import SUMMARY_PREFIX from run_agent import AIAgent +import run_agent + + +# --------------------------------------------------------------------------- +# Fast backoff for compression retry tests +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _no_compression_sleep(monkeypatch): + """Short-circuit the 2s time.sleep between compression retries. + + Production code has ``time.sleep(2)`` in multiple places after a 413/context + compression, for rate-limit smoothing. Tests assert behavior, not timing. + """ + import time as _time + monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None) + monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0) # --------------------------------------------------------------------------- diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py index 00055928e..cdf337254 100644 --- a/tests/run_agent/test_anthropic_error_handling.py +++ b/tests/run_agent/test_anthropic_error_handling.py @@ -27,6 +27,39 @@ from gateway.config import Platform from gateway.session import SessionSource +# --------------------------------------------------------------------------- +# Fast backoff for tests that exercise the retry loop +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _no_backoff_wait(monkeypatch): + """Short-circuit retry backoff so tests don't block on real wall-clock waits. + + The production code uses jittered_backoff() with a 5s base delay plus a + tight time.sleep(0.2) loop. Without this patch, each 429/500/529 retry + test burns ~10s of real time on CI — across six tests that's ~60s for + behavior we're not asserting against timing. + + Tests assert retry counts and final results, never wait durations. + """ + import asyncio as _asyncio + import time as _time + + monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0) + monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None) + + # Also fast-path asyncio.sleep — the gateway's _run_agent path has + # several await asyncio.sleep(...) calls that add real wall-clock time. + _real_asyncio_sleep = _asyncio.sleep + + async def _fast_sleep(delay=0, *args, **kwargs): + # Yield to the event loop but skip the actual delay. + await _real_asyncio_sleep(0) + + monkeypatch.setattr(_asyncio, "sleep", _fast_sleep) + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/tests/run_agent/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py index 6a5d7b363..1e5d8431c 100644 --- a/tests/run_agent/test_exit_cleanup_interrupt.py +++ b/tests/run_agent/test_exit_cleanup_interrupt.py @@ -13,6 +13,24 @@ from unittest.mock import MagicMock, patch, call import pytest +@pytest.fixture(autouse=True) +def _mock_runtime_provider(monkeypatch): + """run_job calls resolve_runtime_provider which can try real network + auto-detection (~4s of socket timeouts in hermetic CI). Mock it out + since these tests don't care about provider resolution — the agent + is mocked too.""" + import hermes_cli.runtime_provider as rp + def _fake_resolve(*args, **kwargs): + return { + "provider": "openrouter", + "api_key": "test-key", + "base_url": "https://openrouter.ai/api/v1", + "model": "test/model", + "api_mode": "chat_completions", + } + monkeypatch.setattr(rp, "resolve_runtime_provider", _fake_resolve) + + class TestCronJobCleanup: """cron/scheduler.py — end_session + close in the finally block.""" diff --git a/tests/run_agent/test_fallback_model.py b/tests/run_agent/test_fallback_model.py index 6491bd686..d2aec022e 100644 --- a/tests/run_agent/test_fallback_model.py +++ b/tests/run_agent/test_fallback_model.py @@ -11,6 +11,16 @@ from unittest.mock import MagicMock, patch import pytest from run_agent import AIAgent +import run_agent + + +@pytest.fixture(autouse=True) +def _no_fallback_wait(monkeypatch): + """Short-circuit time.sleep in fallback/recovery paths so tests don't + block on the ``min(3 + retry_count, 8)`` wait before a primary retry.""" + import time as _time + monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None) + monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0) def _make_tool_defs(*names: str) -> list: diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 4ff00018d..81213aaf6 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -12,6 +12,15 @@ sys.modules.setdefault("fal_client", types.SimpleNamespace()) import run_agent +@pytest.fixture(autouse=True) +def _no_codex_backoff(monkeypatch): + """Short-circuit retry backoff so Codex retry tests don't block on real + wall-clock waits (5s jittered_backoff base delay + tight time.sleep loop).""" + import time as _time + monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0) + monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None) + + def _patch_agent_bootstrap(monkeypatch): monkeypatch.setattr( run_agent, diff --git a/tests/test_timezone.py b/tests/test_timezone.py index 1af60cbfa..ffb831617 100644 --- a/tests/test_timezone.py +++ b/tests/test_timezone.py @@ -159,18 +159,34 @@ class TestCodeExecutionTZ: return _json.dumps({"error": f"unexpected tool call: {function_name}"}) def test_tz_injected_when_configured(self): - """When HERMES_TIMEZONE is set, child process sees TZ env var.""" + """When HERMES_TIMEZONE is set, child process sees TZ env var. + + Verified alongside leak-prevention + empty-TZ handling in one + subprocess call so we don't pay 3x the subprocess startup cost + (each execute_code spawns a real Python subprocess ~3s). + """ import json as _json os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata" + # One subprocess, three things checked: + # 1) TZ is injected as "Asia/Kolkata" + # 2) HERMES_TIMEZONE itself does NOT leak into the child env + probe = ( + 'import os; ' + 'print("TZ=" + os.environ.get("TZ", "NOT_SET")); ' + 'print("HERMES_TIMEZONE=" + os.environ.get("HERMES_TIMEZONE", "NOT_SET"))' + ) with patch("model_tools.handle_function_call", side_effect=self._mock_handle): result = _json.loads(self._execute_code( - code='import os; print(os.environ.get("TZ", "NOT_SET"))', - task_id="tz-test", + code=probe, + task_id="tz-combined-test", enabled_tools=[], )) assert result["status"] == "success" - assert "Asia/Kolkata" in result["output"] + assert "TZ=Asia/Kolkata" in result["output"] + assert "HERMES_TIMEZONE=NOT_SET" in result["output"], ( + "HERMES_TIMEZONE should not leak into child env (only TZ)" + ) def test_tz_not_injected_when_empty(self): """When HERMES_TIMEZONE is not set, child process has no TZ.""" @@ -186,20 +202,6 @@ class TestCodeExecutionTZ: assert result["status"] == "success" assert "NOT_SET" in result["output"] - def test_hermes_timezone_not_leaked_to_child(self): - """HERMES_TIMEZONE itself must NOT appear in child env (only TZ).""" - import json as _json - os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata" - - with patch("model_tools.handle_function_call", side_effect=self._mock_handle): - result = _json.loads(self._execute_code( - code='import os; print(os.environ.get("HERMES_TIMEZONE", "NOT_SET"))', - task_id="tz-leak-test", - enabled_tools=[], - )) - assert result["status"] == "success" - assert "NOT_SET" in result["output"] - # ========================================================================= # Cron timezone-aware scheduling