mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
test(whatsapp): fix port-spares-client test race (listen before announce + retry connect)
The salvaged test spawned a listener subprocess that printed its port immediately after bind() but BEFORE listen(), so under CI's loaded 8-worker box the parent connected before the socket was listening -> ConnectionRefused (flaked on test slice 2/6). Reorder the child to listen() then print the port, and make the client connect with a short bounded retry to absorb scheduler jitter. 15/15 green locally including direct hammering.
This commit is contained in:
parent
0fb3b13b00
commit
1cefc2a24e
1 changed files with 18 additions and 3 deletions
|
|
@ -168,14 +168,29 @@ class TestKillPortProcess:
|
|||
sys.executable, "-c",
|
||||
"import socket,time;"
|
||||
"s=socket.socket();s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1);"
|
||||
"s.bind(('127.0.0.1',0));print(s.getsockname()[1],flush=True);"
|
||||
"s.listen(5);time.sleep(30)",
|
||||
"s.bind(('127.0.0.1',0));port=s.getsockname()[1];"
|
||||
"s.listen(5);" # listen BEFORE announcing the port
|
||||
"print(port,flush=True);" # so the parent never connects too early
|
||||
"time.sleep(30)",
|
||||
],
|
||||
stdout=subprocess.PIPE, text=True,
|
||||
)
|
||||
try:
|
||||
port = int(listener.stdout.readline().strip())
|
||||
cli = socket.create_connection(("127.0.0.1", port)) # we are the client
|
||||
# Connect with a short retry: under a loaded CI box the child can
|
||||
# print the port a hair before the listen backlog is fully ready,
|
||||
# so a single immediate connect occasionally hits ECONNREFUSED.
|
||||
cli = None
|
||||
deadline = time.monotonic() + 5.0
|
||||
last_err = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
cli = socket.create_connection(("127.0.0.1", port), timeout=1.0)
|
||||
break
|
||||
except (ConnectionRefusedError, OSError) as e:
|
||||
last_err = e
|
||||
time.sleep(0.05)
|
||||
assert cli is not None, f"could not connect to listener: {last_err}"
|
||||
_kill_port_process(port)
|
||||
assert _pid_exists(os.getpid()), "client (test process) must survive"
|
||||
assert _wait_dead(listener, timeout=5.0), "stale listener should be killed"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue