mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
test: speed up slow tests (backoff + subprocess + IMDS network) (#11797)
Cuts shard-3 local runtime in half by neutralizing real wall-clock waits across three classes of slow test: ## 1. Retry backoff mocks - tests/run_agent/conftest.py (NEW): autouse fixture mocks jittered_backoff to 0.0 so the `while time.time() < sleep_end` busy-loop exits immediately. No global time.sleep mock (would break threading tests). - test_anthropic_error_handling, test_413_compression, test_run_agent_codex_responses, test_fallback_model: per-file fixtures mock time.sleep / asyncio.sleep for retry / compression paths. - test_retaindb_plugin: cap the retaindb module's bound time.sleep to 0.05s via a per-test shim (background writer-thread retries sleep 2s after errors; tests don't care about exact duration). Plus replace arbitrary time.sleep(N) waits with short polling loops bounded by deadline. ## 2. Subprocess sleeps in production code - test_update_gateway_restart: mock time.sleep. Production code does time.sleep(3) after `systemctl restart` to verify the service survived. Tests mock subprocess.run \u2014 nothing actually restarts \u2014 so the wait is dead time. ## 3. Network / IMDS timeouts (biggest single win) - tests/conftest.py: add AWS_EC2_METADATA_DISABLED=true plus AWS_METADATA_SERVICE_TIMEOUT=1 and ATTEMPTS=1. boto3 falls back to IMDS (169.254.169.254) when no AWS creds are set. Any test hitting has_aws_credentials() / resolve_aws_auth_env_var() (e.g. test_status, test_setup_copilot_acp, anything that touches provider auto-detect) burned ~2-4s waiting for that to time out. - test_exit_cleanup_interrupt: explicitly mock resolve_runtime_provider which was doing real network auto-detect (~4s). Tests don't care about provider resolution \u2014 the agent is already mocked. - test_timezone: collapse the 3-test "TZ env in subprocess" suite into 2 tests by checking both injection AND no-leak in the same subprocess spawn (was 3 \u00d7 3.2s, now 2 \u00d7 4s). ## Validation | Test | Before | After | |---|---|---| | test_anthropic_error_handling (8 tests) | ~80s | ~15s | | test_413_compression (14 tests) | ~18s | 2.3s | | test_retaindb_plugin (67 tests) | ~13s | 1.3s | | test_status_includes_tavily_key | 4.0s | 0.05s | | test_setup_copilot_acp_skips_same_provider_pool_step | 8.0s | 0.26s | | test_update_gateway_restart (5 tests) | ~18s total | ~0.35s total | | test_exit_cleanup_interrupt (2 tests) | 8s | 1.5s | | **Matrix shard 3 local** | **108s** | **50s** | No behavioral contract changed \u2014 tests still verify retry happens, service restart logic runs, etc.; they just don't burn real seconds waiting for it. Supersedes PR #11779 (those changes are included here).
This commit is contained in:
parent
eb07c05646
commit
3207b9bda0
10 changed files with 231 additions and 33 deletions
|
|
@ -229,6 +229,15 @@ def _hermetic_environment(tmp_path, monkeypatch):
|
||||||
monkeypatch.setenv("LC_ALL", "C.UTF-8")
|
monkeypatch.setenv("LC_ALL", "C.UTF-8")
|
||||||
monkeypatch.setenv("PYTHONHASHSEED", "0")
|
monkeypatch.setenv("PYTHONHASHSEED", "0")
|
||||||
|
|
||||||
|
# 4b. Disable AWS IMDS lookups. Without this, any test that ends up
|
||||||
|
# calling has_aws_credentials() / resolve_aws_auth_env_var()
|
||||||
|
# (e.g. provider auto-detect, status command, cron run_job) burns
|
||||||
|
# ~2s waiting for the metadata service at 169.254.169.254 to time
|
||||||
|
# out. Tests don't run on EC2 — IMDS is always unreachable here.
|
||||||
|
monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
|
||||||
|
monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1")
|
||||||
|
monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1")
|
||||||
|
|
||||||
# 5. Reset plugin singleton so tests don't leak plugins from
|
# 5. Reset plugin singleton so tests don't leak plugins from
|
||||||
# ~/.hermes/plugins/ (which, per step 3, is now empty — but the
|
# ~/.hermes/plugins/ (which, per step 3, is now empty — but the
|
||||||
# singleton might still be cached from a previous test).
|
# singleton might still be cached from a previous test).
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,29 @@ from unittest.mock import patch, MagicMock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import hermes_cli.gateway as gateway_cli
|
import hermes_cli.gateway as gateway_cli
|
||||||
|
import hermes_cli.main as cli_main
|
||||||
from hermes_cli.main import cmd_update
|
from hermes_cli.main import cmd_update
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Skip the real-time sleeps inside cmd_update's restart-verification path
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _no_restart_verify_sleep(monkeypatch):
|
||||||
|
"""hermes_cli/main.py uses time.sleep(3) after systemctl restart to
|
||||||
|
verify the service survived. Tests mock subprocess.run — nothing
|
||||||
|
actually restarts — so the 3s wait is dead time.
|
||||||
|
|
||||||
|
main.py does ``import time as _time`` at both module level (line 167)
|
||||||
|
and inside functions (lines 3281, 4384, 4401). Patching the global
|
||||||
|
``time.sleep`` affects only the duration of this test.
|
||||||
|
"""
|
||||||
|
import time as _real_time
|
||||||
|
monkeypatch.setattr(_real_time, "sleep", lambda *_a, **_k: None)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,31 @@ def _isolate_env(tmp_path, monkeypatch):
|
||||||
monkeypatch.delenv("RETAINDB_PROJECT", raising=False)
|
monkeypatch.delenv("RETAINDB_PROJECT", raising=False)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _cap_retaindb_sleeps(monkeypatch):
|
||||||
|
"""Cap production-code sleeps so background-thread tests run fast.
|
||||||
|
|
||||||
|
The retaindb ``_WriteQueue._flush_row`` does ``time.sleep(2)`` after
|
||||||
|
errors. Across multiple tests that trigger the retry path, that adds
|
||||||
|
up. Cap the module's bound ``time.sleep`` to 0.05s — tests don't care
|
||||||
|
about the exact retry delay, only that it happens. The test file's
|
||||||
|
own ``time.sleep`` stays real since it uses a different reference.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from plugins.memory import retaindb as _retaindb
|
||||||
|
except ImportError:
|
||||||
|
return
|
||||||
|
|
||||||
|
real_sleep = _retaindb.time.sleep
|
||||||
|
|
||||||
|
def _capped_sleep(seconds):
|
||||||
|
return real_sleep(min(float(seconds), 0.05))
|
||||||
|
|
||||||
|
import types as _types
|
||||||
|
fake_time = _types.SimpleNamespace(sleep=_capped_sleep, time=_retaindb.time.time)
|
||||||
|
monkeypatch.setattr(_retaindb, "time", fake_time)
|
||||||
|
|
||||||
|
|
||||||
# We need the repo root on sys.path so the plugin can import agent.memory_provider
|
# We need the repo root on sys.path so the plugin can import agent.memory_provider
|
||||||
import sys
|
import sys
|
||||||
_repo_root = str(Path(__file__).resolve().parents[2])
|
_repo_root = str(Path(__file__).resolve().parents[2])
|
||||||
|
|
@ -130,16 +155,18 @@ class TestWriteQueue:
|
||||||
def test_enqueue_creates_row(self, tmp_path):
|
def test_enqueue_creates_row(self, tmp_path):
|
||||||
q, client, db_path = self._make_queue(tmp_path)
|
q, client, db_path = self._make_queue(tmp_path)
|
||||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
||||||
# Give the writer thread a moment to process
|
# shutdown() blocks until the writer thread drains the queue — no need
|
||||||
time.sleep(1)
|
# to pre-sleep (the old 1s sleep was a just-in-case wait, but shutdown
|
||||||
|
# does the right thing).
|
||||||
q.shutdown()
|
q.shutdown()
|
||||||
# If ingest succeeded, the row should be deleted
|
# If ingest succeeded, the row should be deleted
|
||||||
client.ingest_session.assert_called_once()
|
client.ingest_session.assert_called_once()
|
||||||
|
|
||||||
def test_enqueue_persists_to_sqlite(self, tmp_path):
|
def test_enqueue_persists_to_sqlite(self, tmp_path):
|
||||||
client = MagicMock()
|
client = MagicMock()
|
||||||
# Make ingest hang so the row stays in SQLite
|
# Make ingest slow so the row is still in SQLite when we peek.
|
||||||
client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(5))
|
# 0.5s is plenty — the test just needs the flush to still be in-flight.
|
||||||
|
client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(0.5))
|
||||||
db_path = tmp_path / "test_queue.db"
|
db_path = tmp_path / "test_queue.db"
|
||||||
q = _WriteQueue(client, db_path)
|
q = _WriteQueue(client, db_path)
|
||||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}])
|
q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}])
|
||||||
|
|
@ -154,8 +181,7 @@ class TestWriteQueue:
|
||||||
def test_flush_deletes_row_on_success(self, tmp_path):
|
def test_flush_deletes_row_on_success(self, tmp_path):
|
||||||
q, client, db_path = self._make_queue(tmp_path)
|
q, client, db_path = self._make_queue(tmp_path)
|
||||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
||||||
time.sleep(1)
|
q.shutdown() # blocks until drain
|
||||||
q.shutdown()
|
|
||||||
# Row should be gone
|
# Row should be gone
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0]
|
rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0]
|
||||||
|
|
@ -168,14 +194,20 @@ class TestWriteQueue:
|
||||||
db_path = tmp_path / "test_queue.db"
|
db_path = tmp_path / "test_queue.db"
|
||||||
q = _WriteQueue(client, db_path)
|
q = _WriteQueue(client, db_path)
|
||||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
||||||
time.sleep(3) # Allow retry + sleep(2) in _flush_row
|
# Poll for the error to be recorded (max 2s), instead of a fixed 3s wait.
|
||||||
q.shutdown()
|
deadline = time.time() + 2.0
|
||||||
# Row should still exist with error recorded
|
last_error = None
|
||||||
|
while time.time() < deadline:
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
row = conn.execute("SELECT last_error FROM pending").fetchone()
|
row = conn.execute("SELECT last_error FROM pending").fetchone()
|
||||||
conn.close()
|
conn.close()
|
||||||
assert row is not None
|
if row and row[0]:
|
||||||
assert "API down" in row[0]
|
last_error = row[0]
|
||||||
|
break
|
||||||
|
time.sleep(0.05)
|
||||||
|
q.shutdown()
|
||||||
|
assert last_error is not None
|
||||||
|
assert "API down" in last_error
|
||||||
|
|
||||||
def test_thread_local_connection_reuse(self, tmp_path):
|
def test_thread_local_connection_reuse(self, tmp_path):
|
||||||
q, _, _ = self._make_queue(tmp_path)
|
q, _, _ = self._make_queue(tmp_path)
|
||||||
|
|
@ -193,14 +225,27 @@ class TestWriteQueue:
|
||||||
client1.ingest_session = MagicMock(side_effect=RuntimeError("fail"))
|
client1.ingest_session = MagicMock(side_effect=RuntimeError("fail"))
|
||||||
q1 = _WriteQueue(client1, db_path)
|
q1 = _WriteQueue(client1, db_path)
|
||||||
q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}])
|
q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}])
|
||||||
time.sleep(3)
|
# Wait until the error is recorded (poll with short interval).
|
||||||
|
deadline = time.time() + 2.0
|
||||||
|
while time.time() < deadline:
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
row = conn.execute("SELECT last_error FROM pending").fetchone()
|
||||||
|
conn.close()
|
||||||
|
if row and row[0]:
|
||||||
|
break
|
||||||
|
time.sleep(0.05)
|
||||||
q1.shutdown()
|
q1.shutdown()
|
||||||
|
|
||||||
# Now create a new queue — it should replay the pending rows
|
# Now create a new queue — it should replay the pending rows
|
||||||
client2 = MagicMock()
|
client2 = MagicMock()
|
||||||
client2.ingest_session = MagicMock(return_value={"status": "ok"})
|
client2.ingest_session = MagicMock(return_value={"status": "ok"})
|
||||||
q2 = _WriteQueue(client2, db_path)
|
q2 = _WriteQueue(client2, db_path)
|
||||||
time.sleep(2)
|
# Poll for the replay to happen.
|
||||||
|
deadline = time.time() + 2.0
|
||||||
|
while time.time() < deadline:
|
||||||
|
if client2.ingest_session.called:
|
||||||
|
break
|
||||||
|
time.sleep(0.05)
|
||||||
q2.shutdown()
|
q2.shutdown()
|
||||||
|
|
||||||
# The replayed row should have been ingested via client2
|
# The replayed row should have been ingested via client2
|
||||||
|
|
|
||||||
34
tests/run_agent/conftest.py
Normal file
34
tests/run_agent/conftest.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
"""Fast-path fixtures shared across tests/run_agent/.
|
||||||
|
|
||||||
|
Many tests in this directory exercise the retry/backoff paths in the
|
||||||
|
agent loop. Production code uses ``jittered_backoff(base_delay=5.0)``
|
||||||
|
with a ``while time.time() < sleep_end`` loop — a single retry test
|
||||||
|
spends 5+ seconds of real wall-clock time on backoff waits.
|
||||||
|
|
||||||
|
Mocking ``jittered_backoff`` to return 0.0 collapses the while-loop
|
||||||
|
to a no-op (``time.time() < time.time() + 0`` is false immediately),
|
||||||
|
which handles the most common case without touching ``time.sleep``.
|
||||||
|
|
||||||
|
We deliberately DO NOT mock ``time.sleep`` here — some tests
|
||||||
|
(test_interrupt_propagation, test_primary_runtime_restore, etc.) use
|
||||||
|
the real ``time.sleep`` for threading coordination or assert that it
|
||||||
|
was called with specific values. Tests that want to additionally
|
||||||
|
fast-path direct ``time.sleep(N)`` calls in production code should
|
||||||
|
monkeypatch ``run_agent.time.sleep`` locally (see
|
||||||
|
``test_anthropic_error_handling.py`` for the pattern).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _fast_retry_backoff(monkeypatch):
|
||||||
|
"""Short-circuit retry backoff for all tests in this directory."""
|
||||||
|
try:
|
||||||
|
import run_agent
|
||||||
|
except ImportError:
|
||||||
|
return
|
||||||
|
|
||||||
|
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||||
|
|
@ -19,6 +19,24 @@ import pytest
|
||||||
|
|
||||||
from agent.context_compressor import SUMMARY_PREFIX
|
from agent.context_compressor import SUMMARY_PREFIX
|
||||||
from run_agent import AIAgent
|
from run_agent import AIAgent
|
||||||
|
import run_agent
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fast backoff for compression retry tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _no_compression_sleep(monkeypatch):
|
||||||
|
"""Short-circuit the 2s time.sleep between compression retries.
|
||||||
|
|
||||||
|
Production code has ``time.sleep(2)`` in multiple places after a 413/context
|
||||||
|
compression, for rate-limit smoothing. Tests assert behavior, not timing.
|
||||||
|
"""
|
||||||
|
import time as _time
|
||||||
|
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||||
|
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,39 @@ from gateway.config import Platform
|
||||||
from gateway.session import SessionSource
|
from gateway.session import SessionSource
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fast backoff for tests that exercise the retry loop
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _no_backoff_wait(monkeypatch):
|
||||||
|
"""Short-circuit retry backoff so tests don't block on real wall-clock waits.
|
||||||
|
|
||||||
|
The production code uses jittered_backoff() with a 5s base delay plus a
|
||||||
|
tight time.sleep(0.2) loop. Without this patch, each 429/500/529 retry
|
||||||
|
test burns ~10s of real time on CI — across six tests that's ~60s for
|
||||||
|
behavior we're not asserting against timing.
|
||||||
|
|
||||||
|
Tests assert retry counts and final results, never wait durations.
|
||||||
|
"""
|
||||||
|
import asyncio as _asyncio
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||||
|
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||||
|
|
||||||
|
# Also fast-path asyncio.sleep — the gateway's _run_agent path has
|
||||||
|
# several await asyncio.sleep(...) calls that add real wall-clock time.
|
||||||
|
_real_asyncio_sleep = _asyncio.sleep
|
||||||
|
|
||||||
|
async def _fast_sleep(delay=0, *args, **kwargs):
|
||||||
|
# Yield to the event loop but skip the actual delay.
|
||||||
|
await _real_asyncio_sleep(0)
|
||||||
|
|
||||||
|
monkeypatch.setattr(_asyncio, "sleep", _fast_sleep)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,24 @@ from unittest.mock import MagicMock, patch, call
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _mock_runtime_provider(monkeypatch):
|
||||||
|
"""run_job calls resolve_runtime_provider which can try real network
|
||||||
|
auto-detection (~4s of socket timeouts in hermetic CI). Mock it out
|
||||||
|
since these tests don't care about provider resolution — the agent
|
||||||
|
is mocked too."""
|
||||||
|
import hermes_cli.runtime_provider as rp
|
||||||
|
def _fake_resolve(*args, **kwargs):
|
||||||
|
return {
|
||||||
|
"provider": "openrouter",
|
||||||
|
"api_key": "test-key",
|
||||||
|
"base_url": "https://openrouter.ai/api/v1",
|
||||||
|
"model": "test/model",
|
||||||
|
"api_mode": "chat_completions",
|
||||||
|
}
|
||||||
|
monkeypatch.setattr(rp, "resolve_runtime_provider", _fake_resolve)
|
||||||
|
|
||||||
|
|
||||||
class TestCronJobCleanup:
|
class TestCronJobCleanup:
|
||||||
"""cron/scheduler.py — end_session + close in the finally block."""
|
"""cron/scheduler.py — end_session + close in the finally block."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,16 @@ from unittest.mock import MagicMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from run_agent import AIAgent
|
from run_agent import AIAgent
|
||||||
|
import run_agent
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _no_fallback_wait(monkeypatch):
|
||||||
|
"""Short-circuit time.sleep in fallback/recovery paths so tests don't
|
||||||
|
block on the ``min(3 + retry_count, 8)`` wait before a primary retry."""
|
||||||
|
import time as _time
|
||||||
|
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||||
|
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||||
|
|
||||||
|
|
||||||
def _make_tool_defs(*names: str) -> list:
|
def _make_tool_defs(*names: str) -> list:
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,15 @@ sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
||||||
import run_agent
|
import run_agent
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _no_codex_backoff(monkeypatch):
|
||||||
|
"""Short-circuit retry backoff so Codex retry tests don't block on real
|
||||||
|
wall-clock waits (5s jittered_backoff base delay + tight time.sleep loop)."""
|
||||||
|
import time as _time
|
||||||
|
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||||
|
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||||
|
|
||||||
|
|
||||||
def _patch_agent_bootstrap(monkeypatch):
|
def _patch_agent_bootstrap(monkeypatch):
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
run_agent,
|
run_agent,
|
||||||
|
|
|
||||||
|
|
@ -159,18 +159,34 @@ class TestCodeExecutionTZ:
|
||||||
return _json.dumps({"error": f"unexpected tool call: {function_name}"})
|
return _json.dumps({"error": f"unexpected tool call: {function_name}"})
|
||||||
|
|
||||||
def test_tz_injected_when_configured(self):
|
def test_tz_injected_when_configured(self):
|
||||||
"""When HERMES_TIMEZONE is set, child process sees TZ env var."""
|
"""When HERMES_TIMEZONE is set, child process sees TZ env var.
|
||||||
|
|
||||||
|
Verified alongside leak-prevention + empty-TZ handling in one
|
||||||
|
subprocess call so we don't pay 3x the subprocess startup cost
|
||||||
|
(each execute_code spawns a real Python subprocess ~3s).
|
||||||
|
"""
|
||||||
import json as _json
|
import json as _json
|
||||||
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
||||||
|
|
||||||
|
# One subprocess, three things checked:
|
||||||
|
# 1) TZ is injected as "Asia/Kolkata"
|
||||||
|
# 2) HERMES_TIMEZONE itself does NOT leak into the child env
|
||||||
|
probe = (
|
||||||
|
'import os; '
|
||||||
|
'print("TZ=" + os.environ.get("TZ", "NOT_SET")); '
|
||||||
|
'print("HERMES_TIMEZONE=" + os.environ.get("HERMES_TIMEZONE", "NOT_SET"))'
|
||||||
|
)
|
||||||
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
||||||
result = _json.loads(self._execute_code(
|
result = _json.loads(self._execute_code(
|
||||||
code='import os; print(os.environ.get("TZ", "NOT_SET"))',
|
code=probe,
|
||||||
task_id="tz-test",
|
task_id="tz-combined-test",
|
||||||
enabled_tools=[],
|
enabled_tools=[],
|
||||||
))
|
))
|
||||||
assert result["status"] == "success"
|
assert result["status"] == "success"
|
||||||
assert "Asia/Kolkata" in result["output"]
|
assert "TZ=Asia/Kolkata" in result["output"]
|
||||||
|
assert "HERMES_TIMEZONE=NOT_SET" in result["output"], (
|
||||||
|
"HERMES_TIMEZONE should not leak into child env (only TZ)"
|
||||||
|
)
|
||||||
|
|
||||||
def test_tz_not_injected_when_empty(self):
|
def test_tz_not_injected_when_empty(self):
|
||||||
"""When HERMES_TIMEZONE is not set, child process has no TZ."""
|
"""When HERMES_TIMEZONE is not set, child process has no TZ."""
|
||||||
|
|
@ -186,20 +202,6 @@ class TestCodeExecutionTZ:
|
||||||
assert result["status"] == "success"
|
assert result["status"] == "success"
|
||||||
assert "NOT_SET" in result["output"]
|
assert "NOT_SET" in result["output"]
|
||||||
|
|
||||||
def test_hermes_timezone_not_leaked_to_child(self):
|
|
||||||
"""HERMES_TIMEZONE itself must NOT appear in child env (only TZ)."""
|
|
||||||
import json as _json
|
|
||||||
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
|
||||||
|
|
||||||
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
|
||||||
result = _json.loads(self._execute_code(
|
|
||||||
code='import os; print(os.environ.get("HERMES_TIMEZONE", "NOT_SET"))',
|
|
||||||
task_id="tz-leak-test",
|
|
||||||
enabled_tools=[],
|
|
||||||
))
|
|
||||||
assert result["status"] == "success"
|
|
||||||
assert "NOT_SET" in result["output"]
|
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Cron timezone-aware scheduling
|
# Cron timezone-aware scheduling
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue