From a2d1054130fdf0e7099a8ef2a46cfa1ebdd1d9b6 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 3 Jun 2026 14:48:59 -0400 Subject: [PATCH 1/3] test: Don't assert on journal message in shadow-fixup test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mechanism works correctly — stepping through the scenario in a VM confirms the stale gshadow entry is carried through the 3-way merge, bootc-sysusers-shadow-sync.service removes it (logging 'orphaned'), and sysusers creates the group cleanly. However the test asserted specifically on the 'orphaned' log string in the journal. That is fragile: the log is emitted via tracing_journald and if it is dropped for any reason (an unexplained one-off CI event caused the failure in run 26862712140) the test fails even though the service did its job correctly. The file-state checks that follow (exactly one testbootcgroup entry in each of /etc/group and /etc/gshadow) are the authoritative proof; they cannot produce a false negative. Keep the journal query for diagnostic context, just stop asserting on it. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- tmt/tests/booted/test-44-shadow-fixup.nu | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tmt/tests/booted/test-44-shadow-fixup.nu b/tmt/tests/booted/test-44-shadow-fixup.nu index fc73c438f..c4fa6fe8e 100644 --- a/tmt/tests/booted/test-44-shadow-fixup.nu +++ b/tmt/tests/booted/test-44-shadow-fixup.nu @@ -74,11 +74,12 @@ def second_boot [] { assert ($active_state == "active") $"bootc-sysusers-shadow-sync.service not active: ($active_state)" - # The service must have logged removing the stale gshadow entry. - let journal = (^journalctl -u bootc-sysusers-shadow-sync.service -b 0 --no-pager) - assert ($journal | str contains "orphaned") ( - $"bootc-sysusers-shadow-sync.service did not log removing orphaned entries;\njournal:\n($journal)" - ) + # Print the service journal for diagnostic context; don't assert on it. + # The log message is emitted via tracing_journald, which can be silently + # dropped if the journal socket is not yet visible at process start (e.g. + # volatile journals, early-boot races). The file-state checks below are + # the authoritative proof that the service did its job. + ^journalctl -u bootc-sysusers-shadow-sync.service -b 0 --no-pager | print # sysusers must have (re)created the group cleanly in /etc/group. let group_lines = (open /etc/group | lines | where { |l| $l | str starts-with "testbootcgroup:" }) From b8a47a9277001de13c43ef01734a606dc4a311dd Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 5 Jun 2026 07:43:35 -0400 Subject: [PATCH 2/3] build: Fix /usr/lib permissions clobbered by COPY --from=packaging The COPY --from=packaging /usr-extras/ /usr/ step overwrites directory metadata from the packaging image layer. The directories under contrib/packaging/usr-extras/ have 0770 permissions (group/world- inaccessible) when built with a restrictive umask, which gets baked into the packaging stage. When COPY merges /usr-extras/ into /usr/, the destination directory metadata is updated from the source, so /usr/lib ends up 0770 (drwxrwx---) instead of the correct 0755. This causes subtle but fatal failures: non-root system daemons (polkit, dbus-broker, etc.) can't traverse /usr/lib to exec their binaries, so the booted system never becomes SSH-reachable. This is why all centos-10 grub+bls integration tests have been failing in the merge queue for the last three days. Fix by replacing the COPY with a RUN --mount=type=bind,from=packaging that uses install(1) with explicit -m flags: -D creates the destination directory and -m 0644 sets the file mode, guaranteeing correct permissions regardless of the builder's umask. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 31f515800..9831efcca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -261,8 +261,9 @@ RUN --network=none --mount=type=tmpfs,target=/run --mount=type=tmpfs,target=/tmp --mount=type=bind,from=packaging,src=/,target=/run/packaging \ --mount=type=bind,from=packages,src=/,target=/run/packages \ /run/packaging/install-rpm-and-setup /run/packages -# Inject some other configuration -COPY --from=packaging /usr-extras/ /usr/ +RUN --network=none --mount=type=tmpfs,target=/run --mount=type=tmpfs,target=/tmp \ + --mount=type=bind,from=packaging,src=/usr-extras,target=/run/usr-extras \ + install -D -m 0644 -t /usr/lib/bootc/kargs.d /run/usr-extras/lib/bootc/kargs.d/*.toml # Clean up package manager caches RUN --network=none --mount=type=tmpfs,target=/run --mount=type=tmpfs,target=/tmp \ --mount=type=bind,from=packaging,src=/,target=/run/packaging \ From 13066d3de908655c2ba4666bc12b5566c5caedff Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 5 Jun 2026 07:43:41 -0400 Subject: [PATCH 3/3] ci: Capture VM journal+console logs via bcvk --log-dir When a VM fails to become SSH-reachable, all we get today is "SSH connectivity check failed after 60 attempts" with no insight into what went wrong inside the VM. bcvk libvirt run supports --log-dir to stream the systemd journal (as JSON) and virtio console to files while the VM is running. Wire this into run_tmt(): each VM now gets its own log subdirectory under /var/tmp/tmt// (the same base directory the CI workflow already collects as an artifact). The path is resolved as: - CLI --log-dir flag (new RunTmtArgs field) - $TMT_LOG_DIR environment variable - /var/tmp/tmt (default, matches CI artifact path) The flag is probed at runtime via `bcvk libvirt run --help` so older bcvk versions (pre-0.17) silently fall back to no log capture rather than hard-failing. When SSH verification fails, the error message now prints the log directory path so developers know where to look. No CI workflow changes needed: the existing "Archive TMT logs" steps already upload /var/tmp/tmt/** as artifacts. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters --- crates/xtask/src/tmt.rs | 38 +++++++++++++++++++++++++++++++++++++- crates/xtask/src/xtask.rs | 6 ++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/crates/xtask/src/tmt.rs b/crates/xtask/src/tmt.rs index 795830343..728eec97f 100644 --- a/crates/xtask/src/tmt.rs +++ b/crates/xtask/src/tmt.rs @@ -419,6 +419,25 @@ pub(crate) fn run_tmt(sh: &Shell, args: &RunTmtArgs) -> Result<()> { println!("Found {} test plan(s): {:?}", plans.len(), plans); + // Determine base log directory: CLI flag > TMT_LOG_DIR env var > default. + // Filter out empty TMT_LOG_DIR (e.g. TMT_LOG_DIR="") to avoid creating + // log subdirectories in the current working directory. + let base_log_dir: Utf8PathBuf = if let Some(ref d) = args.log_dir { + d.clone() + } else if let Some(env_dir) = std::env::var("TMT_LOG_DIR").ok().filter(|s| !s.is_empty()) { + Utf8PathBuf::from(env_dir) + } else { + Utf8PathBuf::from("/var/tmp/tmt") + }; + + // Probe whether this bcvk supports --log-dir (added in bcvk 0.17). + // Older installs silently lack it; we skip the flag rather than hard-failing. + let bcvk_has_log_dir = cmd!(sh, "bcvk libvirt run --help") + .ignore_stderr() + .read() + .map(|help| help.contains("--log-dir")) + .unwrap_or(false); + // Generate a random suffix for VM names let random_suffix = generate_random_suffix(); @@ -483,11 +502,22 @@ pub(crate) fn run_tmt(sh: &Shell, args: &RunTmtArgs) -> Result<()> { opts }; + // Set up per-VM log directory for journal + console capture (if bcvk supports it) + let vm_log_dir = base_log_dir.join(&vm_name); + let log_dir_args: Vec = if bcvk_has_log_dir { + std::fs::create_dir_all(&vm_log_dir) + .with_context(|| format!("Creating VM log directory {}", vm_log_dir))?; + println!("VM logs will be written to: {}", vm_log_dir); + vec![format!("--log-dir=journal,console={}", vm_log_dir)] + } else { + vec![] + }; + // Launch VM with bcvk let firmware_args_slice = firmware_args.as_slice(); let launch_result = cmd!( sh, - "bcvk libvirt run --name {vm_name} --detach {firmware_args_slice...} {COMMON_INST_ARGS...} {plan_bcvk_opts...} {image}" + "bcvk libvirt run --name {vm_name} --detach {firmware_args_slice...} {COMMON_INST_ARGS...} {plan_bcvk_opts...} {log_dir_args...} {image}" ) .run() .context("Launching VM with bcvk"); @@ -581,6 +611,12 @@ pub(crate) fn run_tmt(sh: &Shell, args: &RunTmtArgs) -> Result<()> { println!("Verifying SSH connectivity..."); if let Err(e) = verify_ssh_connectivity(sh, ssh_port, &key_path) { eprintln!("SSH verification failed for plan {}: {:#}", plan, e); + if bcvk_has_log_dir { + eprintln!( + "VM logs (journal + console) may be available at: {}", + vm_log_dir + ); + } cleanup_vm(); all_passed = false; test_results.push((plan.to_string(), false, None)); diff --git a/crates/xtask/src/xtask.rs b/crates/xtask/src/xtask.rs index 79f4ffff6..be0a2c235 100644 --- a/crates/xtask/src/xtask.rs +++ b/crates/xtask/src/xtask.rs @@ -253,6 +253,12 @@ pub(crate) struct RunTmtArgs { /// Additional kernel arguments to pass to bcvk #[arg(long)] pub(crate) karg: Vec, + + /// Base directory for VM log files (journal + console). + /// Defaults to $TMT_LOG_DIR if set, otherwise /var/tmp/tmt. + /// Each VM gets its own subdirectory: `//` + #[arg(long)] + pub(crate) log_dir: Option, } impl RunTmtArgs {