diff --git a/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py b/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py index 8d82a2af679..76481154630 100644 --- a/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py +++ b/tests/run_agent/test_24996_fallback_exhaustion_cooldown.py @@ -70,7 +70,12 @@ class TestExhaustionArmsCooldown: cooldown = getattr(agent, "_rate_limited_until", 0) assert cooldown > before # Cooldown is the short exhaustion window, not the 60s rate-limit one. - assert cooldown <= before + _FALLBACK_EXHAUSTED_COOLDOWN_S + 1.0 + # Use a generous upper slack: the only thing this must distinguish is + # the ~5s short window from the 60s rate-limit window, so any margin + # well below 60s proves it. A tight +1.0s false-fails on a loaded CI + # runner when wall-clock jitter between `before` and the cooldown + # computation exceeds a second (GC pause, swap, scheduler contention). + assert cooldown <= before + _FALLBACK_EXHAUSTED_COOLDOWN_S + 30.0 def test_no_chain_does_not_arm_cooldown(self): """An empty chain (no fallback configured) must not arm a cooldown — diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 97f584d0732..92a01b3f41d 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -982,10 +982,16 @@ def test_session_resume_follows_compression_tip(monkeypatch, tmp_path): db = SessionDB(db_path=tmp_path / "state.db") base = int(time.time()) - 10_000 db.create_session("parent_root", source="tui") - db.append_message("parent_root", role="user", content="pre-compression turn") + db.append_message( + "parent_root", role="user", content="pre-compression turn", + timestamp=base + 10, + ) db.end_session("parent_root", "compression") db.create_session("cont_tip", source="tui", parent_session_id="parent_root") - db.append_message("cont_tip", role="assistant", content="post-compression reply") + db.append_message( + "cont_tip", role="assistant", content="post-compression reply", + timestamp=base + 110, + ) conn = db._conn assert conn is not None conn.execute( diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index 020dcd11ae8..659ef2e21e8 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -1568,14 +1568,31 @@ class TestSigkillEscalation: try: ProcessRegistry._terminate_host_pid(parent.pid) - def _all_dead(): - return not any( - psutil.pid_exists(p) - and ProcessRegistry._proc_alive(psutil.Process(p)) - for p in all_pids - ) + def _pid_dead(p: int) -> bool: + # A pid is "dead" for our purposes if it no longer exists OR + # exists only as an unreaped zombie (already terminated, just + # not reaped by its reparented parent yet). psutil can also + # raise mid-probe if the pid vanishes between the existence + # check and the status read — treat any such race as dead. + try: + if not psutil.pid_exists(p): + return True + return not ProcessRegistry._proc_alive(psutil.Process(p)) + except Exception: + return True - assert _wait_until(_all_dead, timeout=4.0), ( + def _all_dead(): + return all(_pid_dead(p) for p in all_pids) + + # _terminate_host_pid SIGKILLs synchronously before returning, so + # the kill signals are already delivered here. The only remaining + # wait is the kernel tearing down 3 processes and the reparented + # children transitioning to zombie — which can lag on a loaded CI + # runner. Give a generous budget (matches the wait() test's 10s) + # so this asserts the escalation BEHAVIOR, not the runner's + # scheduling latency. The assertion itself never weakens: every + # tree member must end up dead/zombie. + assert _wait_until(_all_dead, timeout=15.0, interval=0.02), ( "entire SIGTERM-ignoring tree (parent + children) must be SIGKILLed" ) finally: