From 1cefc2a24e8364b9edcbb3866c161119d56a89d6 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 21 Jun 2026 16:54:13 -0700 Subject: [PATCH] test(whatsapp): fix port-spares-client test race (listen before announce + retry connect) The salvaged test spawned a listener subprocess that printed its port immediately after bind() but BEFORE listen(), so under CI's loaded 8-worker box the parent connected before the socket was listening -> ConnectionRefused (flaked on test slice 2/6). Reorder the child to listen() then print the port, and make the client connect with a short bounded retry to absorb scheduler jitter. 15/15 green locally including direct hammering. --- tests/gateway/test_whatsapp_bridge_pidfile.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/gateway/test_whatsapp_bridge_pidfile.py b/tests/gateway/test_whatsapp_bridge_pidfile.py index 3da6fe998a1..4d96a616567 100644 --- a/tests/gateway/test_whatsapp_bridge_pidfile.py +++ b/tests/gateway/test_whatsapp_bridge_pidfile.py @@ -168,14 +168,29 @@ class TestKillPortProcess: sys.executable, "-c", "import socket,time;" "s=socket.socket();s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1);" - "s.bind(('127.0.0.1',0));print(s.getsockname()[1],flush=True);" - "s.listen(5);time.sleep(30)", + "s.bind(('127.0.0.1',0));port=s.getsockname()[1];" + "s.listen(5);" # listen BEFORE announcing the port + "print(port,flush=True);" # so the parent never connects too early + "time.sleep(30)", ], stdout=subprocess.PIPE, text=True, ) try: port = int(listener.stdout.readline().strip()) - cli = socket.create_connection(("127.0.0.1", port)) # we are the client + # Connect with a short retry: under a loaded CI box the child can + # print the port a hair before the listen backlog is fully ready, + # so a single immediate connect occasionally hits ECONNREFUSED. + cli = None + deadline = time.monotonic() + 5.0 + last_err = None + while time.monotonic() < deadline: + try: + cli = socket.create_connection(("127.0.0.1", port), timeout=1.0) + break + except (ConnectionRefusedError, OSError) as e: + last_err = e + time.sleep(0.05) + assert cli is not None, f"could not connect to listener: {last_err}" _kill_port_process(port) assert _pid_exists(os.getpid()), "client (test process) must survive" assert _wait_dead(listener, timeout=5.0), "stale listener should be killed"