mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
test: speed up slow tests (backoff + subprocess + IMDS network) (#11797)
Cuts shard-3 local runtime in half by neutralizing real wall-clock waits across three classes of slow test: ## 1. Retry backoff mocks - tests/run_agent/conftest.py (NEW): autouse fixture mocks jittered_backoff to 0.0 so the `while time.time() < sleep_end` busy-loop exits immediately. No global time.sleep mock (would break threading tests). - test_anthropic_error_handling, test_413_compression, test_run_agent_codex_responses, test_fallback_model: per-file fixtures mock time.sleep / asyncio.sleep for retry / compression paths. - test_retaindb_plugin: cap the retaindb module's bound time.sleep to 0.05s via a per-test shim (background writer-thread retries sleep 2s after errors; tests don't care about exact duration). Plus replace arbitrary time.sleep(N) waits with short polling loops bounded by deadline. ## 2. Subprocess sleeps in production code - test_update_gateway_restart: mock time.sleep. Production code does time.sleep(3) after `systemctl restart` to verify the service survived. Tests mock subprocess.run \u2014 nothing actually restarts \u2014 so the wait is dead time. ## 3. Network / IMDS timeouts (biggest single win) - tests/conftest.py: add AWS_EC2_METADATA_DISABLED=true plus AWS_METADATA_SERVICE_TIMEOUT=1 and ATTEMPTS=1. boto3 falls back to IMDS (169.254.169.254) when no AWS creds are set. Any test hitting has_aws_credentials() / resolve_aws_auth_env_var() (e.g. test_status, test_setup_copilot_acp, anything that touches provider auto-detect) burned ~2-4s waiting for that to time out. - test_exit_cleanup_interrupt: explicitly mock resolve_runtime_provider which was doing real network auto-detect (~4s). Tests don't care about provider resolution \u2014 the agent is already mocked. - test_timezone: collapse the 3-test "TZ env in subprocess" suite into 2 tests by checking both injection AND no-leak in the same subprocess spawn (was 3 \u00d7 3.2s, now 2 \u00d7 4s). ## Validation | Test | Before | After | |---|---|---| | test_anthropic_error_handling (8 tests) | ~80s | ~15s | | test_413_compression (14 tests) | ~18s | 2.3s | | test_retaindb_plugin (67 tests) | ~13s | 1.3s | | test_status_includes_tavily_key | 4.0s | 0.05s | | test_setup_copilot_acp_skips_same_provider_pool_step | 8.0s | 0.26s | | test_update_gateway_restart (5 tests) | ~18s total | ~0.35s total | | test_exit_cleanup_interrupt (2 tests) | 8s | 1.5s | | **Matrix shard 3 local** | **108s** | **50s** | No behavioral contract changed \u2014 tests still verify retry happens, service restart logic runs, etc.; they just don't burn real seconds waiting for it. Supersedes PR #11779 (those changes are included here).
This commit is contained in:
parent
eb07c05646
commit
3207b9bda0
10 changed files with 231 additions and 33 deletions
|
|
@ -159,18 +159,34 @@ class TestCodeExecutionTZ:
|
|||
return _json.dumps({"error": f"unexpected tool call: {function_name}"})
|
||||
|
||||
def test_tz_injected_when_configured(self):
|
||||
"""When HERMES_TIMEZONE is set, child process sees TZ env var."""
|
||||
"""When HERMES_TIMEZONE is set, child process sees TZ env var.
|
||||
|
||||
Verified alongside leak-prevention + empty-TZ handling in one
|
||||
subprocess call so we don't pay 3x the subprocess startup cost
|
||||
(each execute_code spawns a real Python subprocess ~3s).
|
||||
"""
|
||||
import json as _json
|
||||
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
||||
|
||||
# One subprocess, three things checked:
|
||||
# 1) TZ is injected as "Asia/Kolkata"
|
||||
# 2) HERMES_TIMEZONE itself does NOT leak into the child env
|
||||
probe = (
|
||||
'import os; '
|
||||
'print("TZ=" + os.environ.get("TZ", "NOT_SET")); '
|
||||
'print("HERMES_TIMEZONE=" + os.environ.get("HERMES_TIMEZONE", "NOT_SET"))'
|
||||
)
|
||||
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
||||
result = _json.loads(self._execute_code(
|
||||
code='import os; print(os.environ.get("TZ", "NOT_SET"))',
|
||||
task_id="tz-test",
|
||||
code=probe,
|
||||
task_id="tz-combined-test",
|
||||
enabled_tools=[],
|
||||
))
|
||||
assert result["status"] == "success"
|
||||
assert "Asia/Kolkata" in result["output"]
|
||||
assert "TZ=Asia/Kolkata" in result["output"]
|
||||
assert "HERMES_TIMEZONE=NOT_SET" in result["output"], (
|
||||
"HERMES_TIMEZONE should not leak into child env (only TZ)"
|
||||
)
|
||||
|
||||
def test_tz_not_injected_when_empty(self):
|
||||
"""When HERMES_TIMEZONE is not set, child process has no TZ."""
|
||||
|
|
@ -186,20 +202,6 @@ class TestCodeExecutionTZ:
|
|||
assert result["status"] == "success"
|
||||
assert "NOT_SET" in result["output"]
|
||||
|
||||
def test_hermes_timezone_not_leaked_to_child(self):
|
||||
"""HERMES_TIMEZONE itself must NOT appear in child env (only TZ)."""
|
||||
import json as _json
|
||||
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
||||
|
||||
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
||||
result = _json.loads(self._execute_code(
|
||||
code='import os; print(os.environ.get("HERMES_TIMEZONE", "NOT_SET"))',
|
||||
task_id="tz-leak-test",
|
||||
enabled_tools=[],
|
||||
))
|
||||
assert result["status"] == "success"
|
||||
assert "NOT_SET" in result["output"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Cron timezone-aware scheduling
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue