diff --git a/docs/news.rst b/docs/news.rst index 8cbfd00a0..15a405229 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -1,6 +1,14 @@ Changelog ========= +next (unreleased) +----------------- + +Lab +^^^ +* Fix process group termination: always escalate to ``SIGKILL`` after ``SIGTERM``, since the previous ``poll()``-based check on the leader missed cases where a wrapper script exited cleanly while its children kept running (Travis Rivera Petit). + + v8.9 (2026-02-25) ----------------- diff --git a/lab/calls/call.py b/lab/calls/call.py index 7c6e5cb2e..4491c240a 100644 --- a/lab/calls/call.py +++ b/lab/calls/call.py @@ -242,13 +242,33 @@ def _update_cpu_time(self): def _terminate_process_group(self): """Terminate the entire process group (parent and all children).""" + # Resolve the pgid once: after SIGTERM the leader may exit and be + # reaped, making a later os.getpgid(self.process.pid) fail even + # though children in the group are still running. + try: + pgid = os.getpgid(self.process.pid) + except (OSError, ProcessLookupError): + return + with contextlib.suppress(OSError, ProcessLookupError): - os.killpg(os.getpgid(self.process.pid), signal.SIGTERM) + os.killpg(pgid, signal.SIGTERM) + # Give it a moment to terminate gracefully. time.sleep(1) - if self.process.poll() is None: - with contextlib.suppress(OSError, ProcessLookupError): - os.killpg(os.getpgid(self.process.pid), signal.SIGKILL) + + # We can't use self.process.poll() to decide whether to escalate: + # poll() only tracks the leader, but a wrapper leader (e.g. + # fast-downward.py) can exit cleanly after SIGTERM while its + # children (translate, search) keep running in the group. Probing + # the group with signal 0 tells us whether any member is still + # alive. + try: + os.killpg(pgid, 0) + except (OSError, ProcessLookupError): + return + + with contextlib.suppress(OSError, ProcessLookupError): + os.killpg(pgid, signal.SIGKILL) def _monitor_time_limits(self): """