"""Reproducible perf benchmark for file sync overhead.

Measures actual env.execute() wall-clock time, no LLM in the loop.
Run with: uv run pytest tests/tools/test_file_sync_perf.py -v -o "addopts=" -s

Requires backends to be configured (SSH host, Modal creds, etc).
Skip markers gate each backend.
"""

import statistics
import time

import pytest

# ---------------------------------------------------------------------------
# Backend fixtures
# ---------------------------------------------------------------------------

@pytest.fixture
def local_env():
    from tools.environments.local import LocalEnvironment
    env = LocalEnvironment(cwd="/tmp", timeout=30)
    yield env
    env.cleanup()


@pytest.fixture
def ssh_env():
    import os
    host = os.environ.get("TERMINAL_SSH_HOST")
    user = os.environ.get("TERMINAL_SSH_USER")
    if not host or not user:
        pytest.skip("TERMINAL_SSH_HOST and TERMINAL_SSH_USER required")
    from tools.environments.ssh import SSHEnvironment
    env = SSHEnvironment(host=host, user=user, cwd="/tmp", timeout=30)
    yield env
    env.cleanup()


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _time_executions(env, command: str, n: int = 10) -> list[float]:
    """Run *command* n times and return per-call wall-clock durations."""
    durations = []
    for _ in range(n):
        t0 = time.monotonic()
        result = env.execute(command, timeout=10)
        elapsed = time.monotonic() - t0
        durations.append(elapsed)
        assert result.get("returncode", result.get("exit_code", -1)) == 0, \
            f"command failed: {result}"
    return durations


def _report(label: str, durations: list[float]):
    """Print timing stats."""
    med = statistics.median(durations)
    mean = statistics.mean(durations)
    p95 = sorted(durations)[int(len(durations) * 0.95)]
    print(f"\n  {label}:")
    print(f"    n={len(durations)}  median={med*1000:.0f}ms  mean={mean*1000:.0f}ms  p95={p95*1000:.0f}ms")
    print(f"    raw: {[f'{d*1000:.0f}ms' for d in durations]}")
    return med


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------

class TestLocalPerf:
    """Local baseline — no file sync, no network. Sets the floor."""

    def test_echo_latency(self, local_env):
        durations = _time_executions(local_env, "echo hello", n=20)
        med = _report("local echo", durations)
        # Spawn-per-call overhead should be < 500ms
        assert med < 0.5, f"local echo median {med*1000:.0f}ms exceeds 500ms"


@pytest.mark.ssh
class TestSSHPerf:
    """SSH with FileSyncManager — mtime skip should make sync ~0ms."""

    def test_echo_latency(self, ssh_env):
        """Sequential echo commands — measures per-command overhead including sync check."""
        durations = _time_executions(ssh_env, "echo hello", n=20)
        med = _report("ssh echo (with sync check)", durations)
        # SSH round-trip + spawn-per-call, but sync should be ~0ms (rate limited)
        assert med < 2.0, f"ssh echo median {med*1000:.0f}ms exceeds 2000ms"

    def test_sync_overhead_after_interval(self, ssh_env):
        """Measure sync cost when the rate-limit window has expired.

        Sleep past the 5s interval, then time the next command which
        triggers a real sync cycle (but with mtime skip, should be fast).
        """
        # Warm up
        ssh_env.execute("echo warmup", timeout=10)

        # Wait for sync interval to expire
        time.sleep(6)

        # This command will trigger a real sync cycle
        t0 = time.monotonic()
        result = ssh_env.execute("echo after-interval", timeout=10)
        elapsed = time.monotonic() - t0

        print(f"\n  ssh echo after 6s wait (sync triggered): {elapsed*1000:.0f}ms")
        assert result.get("returncode", result.get("exit_code", -1)) == 0

        # Even with sync triggered, mtime skip should keep it fast
        # Old rsync approach: ~2-3s. New mtime skip: should be < 1.5s
        assert elapsed < 1.5, f"sync-triggered command took {elapsed*1000:.0f}ms (expected < 1500ms)"

    def test_no_sync_within_interval(self, ssh_env):
        """Rapid sequential commands within 5s window — no sync at all."""
        # First command triggers sync
        ssh_env.execute("echo prime", timeout=10)

        # Immediately run 10 more — all within rate-limit window
        durations = _time_executions(ssh_env, "echo rapid", n=10)
        med = _report("ssh echo (within interval, no sync)", durations)

        # Should be pure SSH overhead, no sync
        assert med < 1.5, f"within-interval median {med*1000:.0f}ms exceeds 1500ms"