diff --git a/flake.nix b/flake.nix index 87362a393..e3b5a970b 100644 --- a/flake.nix +++ b/flake.nix @@ -21,7 +21,7 @@ { devShells = rec { base = pkgs.mkShell { - nativeBuildInputs = with pkgs; [ bash coreutils curl git gnugrep gnumake gnutar jq procps xz ]; + nativeBuildInputs = with pkgs; [ bash coreutils curl git gnugrep gnumake gnutar jq procps util-linux xz ]; }; postgres = pkgs.mkShell { nativeBuildInputs = with pkgs; [ glibcLocales postgresql lsof procps ]; diff --git a/runner/node_upgrade.sh b/runner/node_upgrade.sh index e38b78ec2..721fd03df 100755 --- a/runner/node_upgrade.sh +++ b/runner/node_upgrade.sh @@ -37,6 +37,9 @@ if is_venv_active; then exit 1 fi +# Refuse to start if another testrun is already using this workdir. +acquire_workdir_lock "$WORKDIR" || exit 1 + # shellcheck disable=SC1091 . runner/stop_cluster_instances.sh diff --git a/runner/node_upgrade_pytest.sh b/runner/node_upgrade_pytest.sh index 1f550cca0..a97eb675a 100755 --- a/runner/node_upgrade_pytest.sh +++ b/runner/node_upgrade_pytest.sh @@ -11,6 +11,8 @@ STATE_CLUSTER="${CARDANO_NODE_SOCKET_PATH_CI%/*}" # default era to use, can be overridden in each step if needed export CLUSTER_ERA="${CLUSTER_ERA:-"conway"}" export COMMAND_ERA="${COMMAND_ERA:-"$CLUSTER_ERA"}" + +: "${WORKDIR:?WORKDIR environment variable must be set}" CLUSTER_SCRIPTS_DIR="$WORKDIR/cluster0_${CLUSTER_ERA}" # init dir for step1 binaries diff --git a/runner/regression.sh b/runner/regression.sh index 72740d460..1e73706cc 100755 --- a/runner/regression.sh +++ b/runner/regression.sh @@ -27,6 +27,9 @@ if is_venv_active; then exit 1 fi +# Refuse to start if another testrun is already using this workdir. +acquire_workdir_lock "$WORKDIR" || exit 1 + # shellcheck disable=SC1091 . runner/stop_cluster_instances.sh diff --git a/scripts/common.sh b/scripts/common.sh index c558766d2..24e70e545 100644 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -18,6 +18,37 @@ is_venv_active() { [ -n "${VIRTUAL_ENV:-}" ] } +# Acquire an exclusive, non-blocking lock tied to the given workdir to prevent +# concurrent testruns from clobbering each other's workdir. The lock is held +# for the lifetime of the calling shell; it is released automatically on exit. +# The lock file lives next to the workdir so that wiping the workdir does not +# drop the lock. +# Usage: acquire_workdir_lock +acquire_workdir_lock() { + local workdir="${1:?acquire_workdir_lock requires a workdir argument}" + local lockfile="${workdir}.lock" + local lockfd + + if ! command -v flock >/dev/null 2>&1; then + echo "Error: 'flock' is required for testrun locking but was not found." >&2 + return 1 + fi + + # Open lock file in the current shell so the lock outlives this function. + if ! exec {lockfd}>"$lockfile"; then + echo "Error: failed to open lock file '$lockfile' for writing." >&2 + return 1 + fi + + if ! flock -n "$lockfd"; then + echo "Error: another testrun appears to be in progress." >&2 + echo "Lock '$lockfile' is held by another process; refusing to start a new testrun." >&2 + echo "If no testrun is running, simply retry (the lock is released automatically when the holding process exits)." >&2 + exec {lockfd}>&- + return 1 + fi +} + # Verify that VIRTUAL_ENV is activated and points to .venv inside the given top dir. # Compares canonicalized paths to tolerate symlinks and trailing slashes. # Usage: assert_correct_venv