diff --git a/crates/ostree-ext/src/container/composefs_import.rs b/crates/ostree-ext/src/container/composefs_import.rs new file mode 100644 index 000000000..70ac58350 --- /dev/null +++ b/crates/ostree-ext/src/container/composefs_import.rs @@ -0,0 +1,970 @@ +//! Synthesize an ostree commit directly from a composefs OCI repository. +//! +//! This is the inverse of the existing ostree→composefs flow in libostree. +//! Rather than importing OCI layers through the tar pipeline, we walk the +//! composefs `FileSystem` tree and write each entry as an ostree object. +//! +//! For external regular files we bypass the ostree C library's `write_content` +//! API and instead write content objects directly to disk. This lets us +//! attempt `FICLONE` (reflink) from the composefs object fd, falling back to +//! `copy_file_range` / read-write. The ostree SHA256 content checksum is +//! still computed via the C library's `checksum_file_from_input` (a pure, +//! repo-independent function) so the serialisation format is never +//! reimplemented. +//! +//! Metadata objects (dirmeta, dirtree, commit) and small inline files continue +//! to use the ostree C API — they are tiny and benefit from the existing +//! validation. +//! +//! # Why reflink, not hardlink +//! +//! A composefs object is content-addressed by its SHA-512 fsverity digest. +//! Multiple filesystem paths with identical content share a single composefs +//! object (inode). In ostree bare mode, however, every content object carries +//! its own per-inode metadata: uid, gid, mode, and *all* xattrs including +//! `security.selinux`. The same composefs object can be referenced by two +//! ostree objects that need different SELinux labels — for example +//! `/etc/sudo.conf` (label `etc_t`) and +//! `/usr/share/doc/sudo/examples/sudo.conf` (label `usr_t`). Because a +//! single inode can hold only one `security.selinux` value, hardlinking both +//! ostree objects to the shared composefs inode is impossible: the second +//! `setxattr` would overwrite the first, causing `ostree fsck` to report a +//! checksum mismatch. +//! +//! Reflinks (FICLONE) solve this: each ostree object gets its own inode and +//! therefore its own metadata, while the underlying disk extents are shared +//! with the composefs object — zero copy, same space on the wire. +//! +//! # Filesystem requirements +//! +//! FICLONE requires a reflink-capable filesystem such as XFS or btrfs. On +//! these filesystems the ostree and composefs repositories share disk blocks +//! so installing the OS adds no extra space for the ostree content objects. +//! On ext4 (and other filesystems without reflink support) FICLONE fails with +//! `EOPNOTSUPP` and the code falls back to a byte copy; installation succeeds +//! but no block sharing occurs between the two repositories. +//! +//! # SELinux labels and the NUL-terminator fix +//! +//! `selabel()` from composefs-rs applies SELinux labels in-memory to the +//! filesystem tree before ostree synthesis begins. On SELinux-disabled hosts +//! (such as a container used for `bootc install`) the `fsetxattr` call for +//! `security.selinux` is a no-op; labels are applied by the kernel at first +//! boot during auto-relabeling. The ostree checksums are computed with the +//! correct label values (because `checksum_file_from_input` sees them from the +//! in-memory tree), so `ostree fsck` passes after boot. +//! +//! One subtlety: composefs-rs `selabel()` stores SELinux values *without* a +//! trailing NUL, but the kernel stores them *with* one. `xattrs_to_variant` +//! appends the NUL when computing the ostree checksum so that it matches what +//! `ostree fsck` reads back from the live filesystem. + +use std::collections::BTreeMap; +use std::ffi::OsStr; +use std::os::fd::{AsFd as _, AsRawFd as _, BorrowedFd}; +use std::os::unix::ffi::OsStrExt as _; + +use anyhow::{Context as _, Result}; + +use composefs_ctl::composefs::fsverity::Sha512HashValue; +use composefs_ctl::composefs::generic_tree::{Inode, Stat}; +use composefs_ctl::composefs::repository::Repository; +use composefs_ctl::composefs::tree::{Directory, Leaf, LeafContent, RegularFile}; +use composefs_ctl::composefs_boot::selabel::selabel; +use composefs_ctl::composefs_oci; + +use crate::container::store::{ + META_COMPOSEFS_SYNTHESIZED, META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST, +}; +use crate::prelude::{Cast as _, ToVariant as _}; +use crate::{gio, glib}; + +type ComposefsRepo = Repository; + +/// Tracks per-import statistics and FICLONE capability for the composefs→ostree +/// object copy loop. +/// +/// # Why reflink, not hardlink +/// +/// A composefs object is a content-addressed file identified by its SHA-512 +/// fsverity digest. Multiple filesystem paths with identical content share the +/// same composefs object (inode). In ostree's bare repo mode, however, each +/// content object carries its own metadata — uid, gid, mode, and **all xattrs +/// including `security.selinux`** — directly on the inode. +/// +/// The same composefs object can be referenced by ostree objects that need +/// different metadata. For example `/etc/sudo.conf` and +/// `/usr/share/doc/sudo/examples/sudo.conf` have identical bytes but different +/// SELinux labels (`etc_t` vs `usr_t`), producing different ostree content +/// checksums. A single inode can carry only one `security.selinux` value, so +/// hardlinking both ostree objects to the shared composefs inode is impossible: +/// whichever metadata is written second silently overwrites the first, causing +/// `ostree fsck` to report a checksum mismatch. +/// +/// Reflinks (FICLONE) solve this cleanly: each ostree object gets its own +/// inode (its own metadata) while the underlying disk extents are shared with +/// the composefs object — zero copy, same space efficiency. If the filesystem +/// does not support reflinks (e.g. ext4) we fall back to a byte copy; unified +/// storage is not possible in that configuration. +struct DirectImportContext { + /// `true` after FICLONE has failed on this device pair. + ficlone_failed: bool, + /// Count of files reflinked (FICLONE succeeded — blocks shared with composefs object). + reflinked: u64, + /// Count of files written via byte copy fallback (filesystem lacks reflink support). + copied: u64, + /// Count of objects that already existed in the repo (skipped). + existing: u64, + /// When `true`, any FICLONE failure is returned as a hard error instead of + /// silently falling back to byte copy. Set by the caller when it has already + /// confirmed that reflinks are supported on the storage (e.g. via a probe). + reflinks_required: bool, +} + +/// Convert composefs xattrs (`BTreeMap`) to the ostree GVariant format (`a(ayay)`). +/// +/// The ostree on-disk format stores xattr names as NUL-terminated C strings inside +/// `ay` (byte array) GVariant values — matching the C library's use of +/// `g_variant_new_bytestring(name)`. The validation function in libostree reads +/// names back with the `"(^&ay@ay)"` format specifier, which returns a raw pointer +/// into the GVariant buffer, and then calls `strcmp` on it — so the trailing NUL +/// *must* be present. Without it the first byte of the *value* would be treated as +/// part of the name, and if that byte happens to be `0x00` the validator throws +/// "Invalid xattr name (empty or missing NUL)". +fn xattrs_to_variant(xattrs: &BTreeMap, Box<[u8]>>) -> Option { + if xattrs.is_empty() { + return None; + } + let children = xattrs.iter().map(|(k, v)| { + // Append NUL so that ostree's C validator can read the name as a C string. + let mut name_bytes = k.as_bytes().to_vec(); + name_bytes.push(b'\0'); + // SELinux stores label values as NUL-terminated strings on the filesystem. + // The composefs selabel() function strips the NUL, but ostree fsck reads + // xattrs back from disk (where the kernel always stores them with NUL) when + // verifying checksums. Ensure our checksum matches by adding the NUL here + // for security.selinux values that are missing it. + let value: &[u8] = v.as_ref(); + let value_owned; + let value = if k.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_owned = [value, &[0u8]].concat(); + value_owned.as_slice() + } else { + value + }; + glib::Variant::tuple_from_iter([name_bytes.as_slice().to_variant(), value.to_variant()]) + }); + Some(glib::Variant::array_from_iter::<(&[u8], &[u8])>(children)) +} + +/// Build an ostree `DirMeta` GVariant from a composefs `Stat`. +fn create_dirmeta(stat: &Stat) -> glib::Variant { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + // ostree's create_directory_metadata requires the full mode word including + // the S_IFDIR type bits. The composefs Stat only stores the permission + // bits, so we must OR in S_IFDIR ourselves. + let mode = libc::S_IFDIR | stat.st_mode; + finfo.set_attribute_uint32("unix::mode", mode); + let xattrs = xattrs_to_variant(&stat.xattrs); + crate::ostree::create_directory_metadata(&finfo, xattrs.as_ref()) +} + +/// Top-level paths from the OCI image that are API/virtual filesystems at runtime. +/// +/// These paths (`/proc`, `/sys`, etc.) must NOT have their children written into +/// the ostree commit — they are mounted as kernel-provided virtual filesystems at +/// boot time and any content baked into the image is irrelevant (and in the case +/// of device nodes, actively harmful). The directory entries themselves are +/// written as empty directories so that the mount points exist. +/// +/// This mirrors the `EXCLUDED_TOPLEVEL_PATHS` list and the filtering logic in +/// `crate::tar::write::normalize_validate_path()`. +const EXCLUDED_TOPLEVEL_PATHS: &[&str] = &["run", "tmp", "proc", "sys", "dev"]; + +/// Walk the composefs root directory, applying ostree path normalization: +/// +/// - `/etc` → `usr/etc` (ostree's deployment mechanism expects config here) +/// - Children of API/virtual-filesystem mounts (`/proc`, `/sys`, `/dev`, `/run`, +/// `/tmp`) are **omitted** — the directory entries are written as empty mount +/// points but their content is never part of the commit. +/// +/// This matches the filtering that the tar import pipeline applies via +/// `normalize_validate_path()` in `tar/write.rs`. +fn write_dir_to_mtree_remap_etc( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + // Write the root dirmeta first + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing root dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + // Collect the `etc` directory entry (if any) for deferred processing + let mut etc_dir: Option<&Directory> = None; + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + + // Intercept `etc` at the root and defer it — we'll write it + // as `usr/etc` after processing everything else. + if name == "etc" { + match inode { + Inode::Directory(child) => { + etc_dir = Some(child); + continue; + } + _ => { + // Unusual: /etc is not a directory (e.g. symlink). + // Write it as-is — the deployment code will handle it + // or fail later with a more specific error. + } + } + } + + // API/virtual-filesystem mount points: include the empty directory itself + // (so the mount point exists at runtime) but skip all children. This + // mirrors the tar importer's EXCLUDED_TOPLEVEL_PATHS handling. + if EXCLUDED_TOPLEVEL_PATHS.contains(&name) { + if let Inode::Directory(child) = inode { + let child_mtree = mtree.ensure_dir(name)?; + // Write only the dirmeta (permissions/xattrs) — no children. + let child_dirmeta = create_dirmeta(&child.stat); + let child_meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &child_dirmeta, + gio::Cancellable::NONE, + ) + .with_context(|| format!("Writing dirmeta for excluded path {name}"))?; + child_mtree.set_metadata_checksum(&child_meta_csum.to_hex()); + tracing::debug!("Composefs import: skipping children of excluded path /{name}"); + continue; + } + // Non-directory at an excluded path (e.g. a symlink for /tmp)? + // Skip it entirely. + tracing::debug!("Composefs import: skipping non-directory excluded path /{name}"); + continue; + } + + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + + // Now write the deferred `/etc` directory as `usr/etc`. + if let Some(etc) = etc_dir { + let usr_mtree = mtree.ensure_dir("usr")?; + let usr_etc_mtree = usr_mtree.ensure_dir("etc")?; + write_dir_to_mtree(orepo, crepo, etc, leaves, &usr_etc_mtree, ctx) + .context("Writing /etc as usr/etc")?; + } + + Ok(()) +} + +/// Recursively walk a composefs directory, writing each entry into `mtree`. +fn write_dir_to_mtree( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + dir: &Directory, + leaves: &[Leaf], + mtree: &crate::ostree::MutableTree, + ctx: &mut DirectImportContext, +) -> Result<()> { + let dirmeta = create_dirmeta(&dir.stat); + let meta_csum = orepo + .write_metadata( + crate::ostree::ObjectType::DirMeta, + None, + &dirmeta, + gio::Cancellable::NONE, + ) + .context("Writing dirmeta")?; + mtree.set_metadata_checksum(&meta_csum.to_hex()); + + for (name, inode) in dir.sorted_entries() { + let name = name + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 entry name: {:?}", name))?; + match inode { + Inode::Directory(child) => { + let child_mtree = mtree.ensure_dir(name)?; + write_dir_to_mtree(orepo, crepo, child, leaves, &child_mtree, ctx)?; + } + Inode::Leaf(leaf_id, _) => { + let leaf = &leaves[leaf_id.0]; + let csum = write_leaf(orepo, crepo, &leaf.stat, &leaf.content, ctx)?; + mtree + .replace_file(name, &csum) + .with_context(|| format!("Inserting {name} into mtree"))?; + } + } + } + Ok(()) +} + +/// Write a single composefs leaf (regular file or symlink) to the ostree repo. +/// Returns the hex content checksum. +fn write_leaf( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + content: &LeafContent, + ctx: &mut DirectImportContext, +) -> Result { + let xattrs = xattrs_to_variant(&stat.xattrs); + match content { + LeafContent::Symlink(target) => { + let target = target + .to_str() + .ok_or_else(|| anyhow::anyhow!("Non-UTF8 symlink target"))?; + let csum = orepo + .write_symlink( + None, + stat.st_uid, + stat.st_gid, + xattrs.as_ref(), + target, + gio::Cancellable::NONE, + ) + .context("Writing symlink")?; + Ok(csum.to_string()) + } + LeafContent::Regular(file) => { + write_regular_file(orepo, crepo, stat, file, xattrs.as_ref(), ctx) + } + other => anyhow::bail!( + "Unsupported composefs leaf (device/fifo/socket): {:?}", + std::mem::discriminant(other) + ), + } +} + +/// Write a regular file into the ostree repo. +/// +/// Inline files use the ostree C API directly. External files bypass the C +/// `write_content` path and instead write content objects directly to disk, +/// attempting FICLONE (reflink) first and falling back to a byte copy. +/// +/// The ostree content checksum is computed via the C library's +/// `checksum_file_from_input` to guarantee format compatibility. +fn write_regular_file( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + file: &RegularFile, + xattrs: Option<&glib::Variant>, + ctx: &mut DirectImportContext, +) -> Result { + // ostree requires the full mode word including S_IFREG + let mode = libc::S_IFREG | stat.st_mode; + + match file { + RegularFile::Inline(data) => { + let csum = orepo + .write_regfile_inline( + None, + stat.st_uid, + stat.st_gid, + mode, + xattrs, + data, + gio::Cancellable::NONE, + ) + .context("Writing inline file")?; + Ok(csum.to_string()) + } + RegularFile::External(object_id, size) => { + write_external_file_direct(orepo, crepo, stat, object_id, *size, xattrs, mode, ctx) + } + } +} + +/// Compute the ostree content checksum for a regular file using the C API. +/// +/// This calls `ostree_checksum_file_from_input()` which is a pure, +/// repo-independent function. It constructs the standard file header +/// GVariant (uid/gid/mode/xattrs) and hashes `header || content`. +fn compute_content_checksum( + stat: &Stat, + xattrs: Option<&glib::Variant>, + fd: BorrowedFd<'_>, + size: u64, + mode: u32, +) -> Result { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", stat.st_uid); + finfo.set_attribute_uint32("unix::gid", stat.st_gid); + finfo.set_attribute_uint32("unix::mode", mode); + finfo.set_size(size as i64); + finfo.set_file_type(gio::FileType::Regular); + + // Dup the fd so we can hand an owned File to ReadInputStream without + // touching the caller's fd or using unsafe. + let file = std::fs::File::from(fd.try_clone_to_owned().context("Dup fd for checksum")?); + let istream = gio::ReadInputStream::new(file); + let csum = crate::ostree::checksum_file_from_input( + &finfo, + xattrs, + Some(&istream), + crate::ostree::ObjectType::File, + gio::Cancellable::NONE, + ) + .map_err(|e| anyhow::anyhow!("Computing content checksum: {e}"))?; + + Ok(csum.to_string()) +} + +/// Build the `user.ostreemeta` GVariant `(uuu@a(ayay))` for bare-user mode. +/// +/// This matches the C function `create_file_metadata()` in ostree-repo-commit.c. +fn build_bareuser_metadata( + uid: u32, + gid: u32, + mode: u32, + xattrs: Option<&glib::Variant>, +) -> glib::Variant { + let empty_xattrs; + let xattrs = match xattrs { + Some(v) => v.clone(), + None => { + empty_xattrs = glib::Variant::array_from_iter::<(&[u8], &[u8])>(std::iter::empty()); + empty_xattrs + } + }; + // ostree stores these as big-endian u32 values + glib::Variant::tuple_from_iter([ + uid.to_be().to_variant(), + gid.to_be().to_variant(), + mode.to_be().to_variant(), + xattrs, + ]) +} + +/// Apply file metadata to an fd according to the ostree repo mode. +/// +/// For `Bare`: sets real uid/gid, mode, and xattrs via syscalls. +/// For `BareUser`: stores metadata in a `user.ostreemeta` xattr and +/// applies a masked mode suitable for non-root operation. +fn apply_file_metadata( + fd: BorrowedFd<'_>, + stat: &Stat, + mode: u32, + xattrs: Option<&glib::Variant>, + repo_mode: crate::ostree::RepoMode, +) -> Result<()> { + use rustix::fs::{Mode, Timestamps, XattrFlags, fchmod, fchown, fsetxattr, futimens}; + + match repo_mode { + crate::ostree::RepoMode::Bare => { + fchown( + fd, + Some(rustix::process::Uid::from_raw(stat.st_uid)), + Some(rustix::process::Gid::from_raw(stat.st_gid)), + ) + .context("fchown")?; + fchmod(fd, Mode::from_raw_mode(mode)).context("fchmod")?; + // Set real xattrs on the file. For security.selinux the composefs + // selabel() function stores values without the NUL terminator that + // the kernel expects; add it here so the on-disk value matches what + // xattrs_to_variant() (and thus the ostree checksum) sees. + for (name, value) in &stat.xattrs { + let value: &[u8] = value.as_ref(); + let value_with_nul; + let value = if name.as_bytes() == b"security.selinux" + && !value.last().copied().map_or(true, |b| b == 0) + { + value_with_nul = [value, &[0u8]].concat(); + value_with_nul.as_slice() + } else { + value + }; + fsetxattr(fd, name.as_bytes(), value, XattrFlags::empty()) + .with_context(|| format!("fsetxattr({:?})", name))?; + } + } + crate::ostree::RepoMode::BareUser => { + // Write the user.ostreemeta xattr + let meta = build_bareuser_metadata(stat.st_uid, stat.st_gid, mode, xattrs); + let meta_bytes = meta.data_as_bytes(); + fsetxattr( + fd, + c"user.ostreemeta", + meta_bytes.as_ref(), + XattrFlags::empty(), + ) + .context("fsetxattr(user.ostreemeta)")?; + // Mask mode for non-root safety (matches libostree) + let content_mode = (mode & (libc::S_IFREG | 0o775)) | libc::S_IRUSR; + fchmod(fd, Mode::from_raw_mode(content_mode)).context("fchmod (bare-user)")?; + } + other => anyhow::bail!("Unsupported ostree repo mode for direct write: {other:?}"), + } + + // Set mtime to OSTREE_TIMESTAMP (0), matching libostree behavior. + // UTIME_OMIT for atime means "leave atime unchanged". + futimens( + fd, + &Timestamps { + last_access: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: rustix::fs::UTIME_OMIT, + }, + last_modification: rustix::fs::Timespec { + tv_sec: 0, + tv_nsec: 0, + }, + }, + ) + .context("futimens")?; + + Ok(()) +} + +/// Ensure the `objects/XX/` directory exists under the repo. +fn ensure_object_dir(repo_dfd: BorrowedFd<'_>, prefix: &str) -> Result<()> { + use rustix::fs::{Mode, mkdirat}; + + let objects_dir = format!("objects/{prefix}"); + match mkdirat(repo_dfd, objects_dir.as_str(), Mode::from_raw_mode(0o777)) { + Ok(()) => Ok(()), + Err(rustix::io::Errno::EXIST) => Ok(()), + Err(e) => Err(e).context(format!("Creating {objects_dir}"))?, + } +} + +/// Write a composefs external regular file directly into the ostree repo. +/// +/// # Sharing strategy: FICLONE (reflink) → byte copy +/// +/// Each ostree content object is written as a fresh inode (via `O_TMPFILE`) +/// and populated from the composefs object using `FICLONE` (reflink) when the +/// filesystem supports it, falling back to `copy_file_range` / `std::io::copy`. +/// +/// **Why not hardlink?** A composefs object is identified by its SHA-512 +/// fsverity digest of raw content. The same object can be referenced by +/// multiple ostree content objects that carry different metadata — in +/// particular different `security.selinux` xattrs assigned by the SELinux +/// policy relabeling step. Ostree bare mode stores metadata (uid/gid/mode/ +/// xattrs) on the inode, so a single shared inode cannot satisfy two different +/// metadata requirements. FICLONE shares the underlying disk extents while +/// keeping inodes independent, giving each ostree object its own metadata. +/// +/// On filesystems without reflink support (ext4, etc.) FICLONE fails with +/// `EOPNOTSUPP` and we fall back to a full byte copy. Unified storage +/// (space-sharing between the composefs and ostree repos) requires a +/// reflink-capable filesystem (XFS, btrfs, …). +/// +/// Returns the hex SHA256 content checksum. +#[allow(clippy::too_many_arguments)] +fn write_external_file_direct( + orepo: &crate::ostree::Repo, + crepo: &ComposefsRepo, + stat: &Stat, + object_id: &Sha512HashValue, + size: u64, + xattrs: Option<&glib::Variant>, + mode: u32, + ctx: &mut DirectImportContext, +) -> Result { + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::io::{Seek, SeekFrom}; + + let cfs_fd = crepo + .open_object(object_id) + .context("Opening composefs object")?; + + // Compute the ostree content checksum using the C API. + let checksum = compute_content_checksum(stat, xattrs, cfs_fd.as_fd(), size, mode) + .context("Computing content checksum")?; + + // Check if the object already exists — skip if so. + if orepo + .has_object( + crate::ostree::ObjectType::File, + &checksum, + gio::Cancellable::NONE, + ) + .context("Checking for existing object")? + { + ctx.existing += 1; + return Ok(checksum); + } + + let repo_dfd = orepo.dfd_borrow(); + let repo_mode = orepo.mode(); + + // Ensure objects/XX/ directory exists. + let (prefix, _rest) = checksum.split_at(2); + ensure_object_dir(repo_dfd, prefix)?; + + let obj_path = format!("objects/{prefix}/{}.file", &checksum[2..]); + + // Create an O_TMPFILE in the repo's tmp/ directory. This gives us an + // anonymous inode we can populate before atomically linking it into place, + // avoiding partial writes visible to concurrent readers. + let tmp_fd = openat( + repo_dfd, + c"tmp", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .context("Creating tmpfile in ostree repo/tmp")?; + + // Seek the source fd back to the start (compute_content_checksum consumed it). + let mut src_file = std::fs::File::from(cfs_fd); + src_file + .seek(SeekFrom::Start(0)) + .context("Seeking composefs fd to start")?; + + // --- Primary path: FICLONE (reflink) --- + // + // Shares the underlying disk extents with the composefs object while giving + // the ostree object its own inode (and thus its own metadata). On XFS this + // is a near-zero-cost operation. After the first failure we stop trying. + let mut reflinked = false; + if !ctx.ficlone_failed { + match rustix::fs::ioctl_ficlone(&tmp_fd, &src_file) { + Ok(()) => { + reflinked = true; + } + Err(e @ (rustix::io::Errno::OPNOTSUPP | rustix::io::Errno::XDEV)) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed ({e}) but reflinks were confirmed supported \ + by the storage probe — cannot safely continue import" + )); + } + tracing::debug!( + "FICLONE not supported on this filesystem; \ + falling back to copy for all remaining objects" + ); + ctx.ficlone_failed = true; + } + Err(e) => { + if ctx.reflinks_required { + return Err(anyhow::anyhow!( + "FICLONE failed unexpectedly ({e}); reflinks are required" + )); + } + tracing::debug!("FICLONE failed unexpectedly ({e}); falling back to copy"); + ctx.ficlone_failed = true; + } + } + } + + if reflinked { + ctx.reflinked += 1; + } else { + // --- Fallback: byte copy via copy_file_range --- + let mut dst_file = std::fs::File::from(tmp_fd.try_clone().context("Cloning tmpfile fd")?); + let n = std::io::copy(&mut src_file, &mut dst_file).context("Copying file content")?; + anyhow::ensure!( + n == size, + "Size mismatch: expected {size} bytes but copied {n}" + ); + ctx.copied += 1; + } + + // Apply metadata (uid/gid/mode/xattrs/mtime) to the new inode. + apply_file_metadata(tmp_fd.as_fd(), stat, mode, xattrs, repo_mode)?; + + // Atomically link the tmpfile into place at objects/XX/rest.file. + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_raw_fd()); + match linkat( + rustix::fs::CWD, + proc_path.as_str(), + repo_dfd, + obj_path.as_str(), + AtFlags::SYMLINK_FOLLOW, + ) { + Ok(()) => {} + Err(rustix::io::Errno::EXIST) => { + // A concurrent writer (or an earlier call) already wrote this object. + } + Err(e) => { + return Err(e).with_context(|| format!("Linking tmpfile to {obj_path}")); + } + } + + Ok(checksum) +} + +/// Synthesize an ostree commit from a composefs repository that has already +/// pulled an OCI image. +/// +/// This is the inverse of the ostree→composefs path in libostree: instead of +/// reading an ostree commit to build an EROFS image, we walk the composefs +/// `FileSystem` tree and write each entry as an ostree object. +/// +/// The resulting commit carries the same OCI metadata as a commit produced by +/// the standard `ostree-container` import pipeline: +/// - `ostree.container.image-config` +/// - `ostree.manifest` +/// - `ostree.manifest-digest` +/// +/// External regular files are written directly to disk with FICLONE (reflink) +/// attempted first, falling back to a byte copy. On the same filesystem this +/// avoids copying data entirely. +pub fn import_from_composefs_repo( + ostree_repo: &crate::ostree::Repo, + composefs_repo: &ComposefsRepo, + config_digest: &composefs_oci::OciDigest, + manifest_digest: &str, + image_manifest: &crate::oci_spec::image::ImageManifest, + image_config: &crate::oci_spec::image::ImageConfiguration, + reflinks_required: bool, +) -> Result { + let cancellable = gio::Cancellable::NONE; + + // Reconstruct the merged filesystem tree from the composefs store + let mut fs = composefs_oci::image::create_filesystem(composefs_repo, config_digest, None) + .context("Reconstructing composefs filesystem tree")?; + + // Apply SELinux labels in-memory before writing any objects. + // selabel walks the FileSystem tree and reads policy files directly from + // the composefs object store — no file copies are made. + let labeled = selabel(&mut fs, composefs_repo) + .context("Applying SELinux labels from composefs image policy")?; + tracing::debug!( + labeled, + total_leaves = fs.leaves.len(), + "SELinux labeling complete" + ); + + let txn = ostree_repo + .auto_transaction(cancellable) + .context("Beginning ostree transaction")?; + + let mut ctx = DirectImportContext { + ficlone_failed: false, + reflinked: 0, + copied: 0, + existing: 0, + reflinks_required, + }; + + // Walk and write all objects. + // + // The composefs filesystem tree uses the OCI image layout where `/etc` + // lives at the tree root. However, ostree's deployment mechanism expects + // configuration to be at `usr/etc` (it copies `usr/etc` → `etc` during + // deployment initialization and performs a 3-way merge). The old + // `ImageImporter` tar pipeline applies this `/etc` → `usr/etc` remap + // via `normalize_validate_path()` in `tar/write.rs`. + // + // We replicate that remapping here: if the composefs root has an `etc` + // directory, we write it into the ostree tree as `usr/etc` instead of + // a top-level `etc`. + let root_mtree = crate::ostree::MutableTree::new(); + write_dir_to_mtree_remap_etc( + ostree_repo, + composefs_repo, + &fs.root, + &fs.leaves, + &root_mtree, + &mut ctx, + ) + .context("Writing composefs tree into ostree")?; + + tracing::info!( + reflinked = ctx.reflinked, + copied = ctx.copied, + existing = ctx.existing, + "Composefs→ostree import complete: reflinked={} copied={} existing={}", + ctx.reflinked, + ctx.copied, + ctx.existing, + ); + + // Seal the tree + let root = ostree_repo + .write_mtree(&root_mtree, cancellable) + .context("Sealing MutableTree")?; + let root = root + .downcast::() + .map_err(|_| anyhow::anyhow!("BUG: write_mtree did not return a RepoFile"))?; + + // Attach OCI metadata so the commit looks like a normal ostree-container import, + // plus a flag indicating this commit was synthesized from a composefs repository + // (so per-layer blob refs do not exist). + let meta = glib::VariantDict::new(None); + meta.insert( + META_CONFIG, + &serde_json::to_string(image_config).context("Serializing image config")?, + ); + meta.insert( + META_MANIFEST, + &serde_json::to_string(image_manifest).context("Serializing manifest")?, + ); + meta.insert(META_MANIFEST_DIGEST, &manifest_digest); + meta.insert(crate::ostree::METADATA_KEY_BOOTABLE.as_ref(), &true); + meta.insert(META_COMPOSEFS_SYNTHESIZED, &true); + let meta = meta.to_variant(); + + // Use the image creation timestamp for the ostree commit, matching the + // behaviour of the old ImageImporter pipeline. This is important because + // rpm-ostree unconditionally expects a non-zero timestamp in the deployment + // metadata; a zero (epoch) timestamp can cause `rpm-ostree status` to abort. + let timestamp = + crate::container::store::timestamp_of_manifest_or_config(image_manifest, image_config) + .unwrap_or_else(|| chrono::offset::Utc::now().timestamp() as u64); + + let commit = ostree_repo + .write_commit_with_time(None, None, None, Some(&meta), &root, timestamp, cancellable) + .context("Writing ostree commit")?; + + txn.commit(cancellable).context("Committing transaction")?; + + Ok(commit.to_string()) +} + +#[cfg(test)] +mod tests { + use rustix::fd::AsRawFd as _; + use rustix::fs::{AtFlags, Mode, OFlags, linkat, openat}; + use std::os::fd::AsFd as _; + + /// Verify that O_TMPFILE can be materialized via /proc/self/fd/N without + /// root (the linkat+SYMLINK_FOLLOW approach used by write_external_file_direct). + #[test] + fn test_otmpfile_materialize_via_proc() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Create O_TMPFILE — anonymous inode in the directory + let tmp_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .expect("O_TMPFILE"); + + rustix::io::write(&tmp_fd, b"hello").unwrap(); + + // Materialize via /proc/self/fd/N with SYMLINK_FOLLOW + let proc_path = format!("/proc/self/fd/{}", tmp_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "materialized.txt", + AtFlags::SYMLINK_FOLLOW, + ) + .expect("linkat via /proc/self/fd should work without root"); + + let contents = std::fs::read(dir.path().join("materialized.txt")).unwrap(); + assert_eq!(contents, b"hello"); + } + + /// Verify FICLONE (reflink) works between two files in /var/tmp. + /// This will return EOPNOTSUPP on filesystems that don't support it + /// (e.g. ext4), which is acceptable — we just need to confirm the + /// ioctl itself is reachable and returns a meaningful error code. + #[test] + fn test_ficlone_or_enotsup() { + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let dir_fd = rustix::fs::open( + dir.path(), + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .unwrap(); + + // Source file with some content + let src_fd = openat( + &dir_fd, + c"src", + OFlags::RDWR | OFlags::CREATE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + rustix::io::write(&src_fd, b"reflink-test-content").unwrap(); + + // Destination — O_TMPFILE + let dst_fd = openat( + &dir_fd, + c".", + OFlags::WRONLY | OFlags::TMPFILE | OFlags::CLOEXEC, + Mode::from_raw_mode(0o644), + ) + .unwrap(); + + match rustix::fs::ioctl_ficlone(&dst_fd, &src_fd) { + Ok(()) => { + // Reflinks supported — verify content matches + let proc_path = format!("/proc/self/fd/{}", dst_fd.as_fd().as_raw_fd()); + linkat( + rustix::fs::CWD, + proc_path.as_str(), + &dir_fd, + "dst", + AtFlags::SYMLINK_FOLLOW, + ) + .unwrap(); + let contents = std::fs::read(dir.path().join("dst")).unwrap(); + assert_eq!(contents, b"reflink-test-content"); + eprintln!("FICLONE: supported on this filesystem"); + } + Err(rustix::io::Errno::OPNOTSUPP) | Err(rustix::io::Errno::XDEV) => { + eprintln!( + "FICLONE: not supported on this filesystem (EOPNOTSUPP/EXDEV) — fallback to copy is correct" + ); + } + Err(e) => panic!("Unexpected FICLONE error: {e}"), + } + } + + /// Verify that user.ostreemeta xattr can be set on a regular file + /// without root in /var/tmp. + #[test] + fn test_user_ostreemeta_xattr() { + use rustix::fs::{XattrFlags, fgetxattr, fsetxattr}; + + let dir = tempfile::tempdir_in("/var/tmp").unwrap(); + let f = tempfile::NamedTempFile::new_in(dir.path()).unwrap(); + let fd = rustix::fs::open(f.path(), OFlags::RDWR | OFlags::CLOEXEC, Mode::empty()).unwrap(); + + let meta_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xed\x00\x00\x00\x00"; + fsetxattr(&fd, c"user.ostreemeta", meta_bytes, XattrFlags::empty()) + .expect("setting user.ostreemeta should work without root"); + + let mut buf = vec![0u8; 256]; + let n = fgetxattr(&fd, c"user.ostreemeta", &mut buf).unwrap(); + assert_eq!(&buf[..n], meta_bytes); + } +} diff --git a/crates/ostree-ext/src/container/mod.rs b/crates/ostree-ext/src/container/mod.rs index 3419bdf82..4f8179443 100644 --- a/crates/ostree-ext/src/container/mod.rs +++ b/crates/ostree-ext/src/container/mod.rs @@ -496,6 +496,7 @@ pub fn version_for_config(config: &oci_spec::image::ImageConfiguration) -> Optio None } +pub mod composefs_import; pub mod deploy; mod encapsulate; pub use encapsulate::*; diff --git a/crates/ostree-ext/src/container/store.rs b/crates/ostree-ext/src/container/store.rs index 8acf5831e..5fb098803 100644 --- a/crates/ostree-ext/src/container/store.rs +++ b/crates/ostree-ext/src/container/store.rs @@ -137,6 +137,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use canon_json::CanonJsonSerialize; use cap_std_ext::cap_std; use cap_std_ext::cap_std::fs::{Dir, MetadataExt}; +use gvariant::{Marker as _, Structure as _}; use cap_std_ext::dirext::CapStdExtDirExt; use containers_image_proxy::{ImageProxy, OpenedImage}; @@ -172,12 +173,17 @@ const IMAGE_PREFIX: &str = "ostree/container/image"; /// ref with the project name, so the final ref may be of the form e.g. `ostree/container/baseimage/bootc/foo`. pub const BASE_IMAGE_PREFIX: &str = "ostree/container/baseimage"; -/// The key injected into the merge commit for the manifest digest. -pub(crate) const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; -/// The key injected into the merge commit with the manifest serialized as JSON. -const META_MANIFEST: &str = "ostree.manifest"; -/// The key injected into the merge commit with the image configuration serialized as JSON. -const META_CONFIG: &str = "ostree.container.image-config"; +/// The key for the manifest digest in ostree commit metadata. +pub const META_MANIFEST_DIGEST: &str = "ostree.manifest-digest"; +/// The key for the serialized OCI manifest in ostree commit metadata. +pub const META_MANIFEST: &str = "ostree.manifest"; +/// The key for the serialized OCI image configuration in ostree commit metadata. +pub const META_CONFIG: &str = "ostree.container.image-config"; +/// Commit metadata key written by the composefs import path to indicate this commit +/// was synthesized from a composefs repository rather than imported layer-by-layer. +/// When present, per-layer blob refs do not exist and `query_image_commit` should +/// use the merge commit itself as the base commit. +pub const META_COMPOSEFS_SYNTHESIZED: &str = "ostree-ext.composefs-synthesized"; /// The type used to store content filtering information. pub type MetaFilteredData = HashMap>; @@ -197,7 +203,7 @@ fn ref_for_layer(l: &oci_image::Descriptor) -> Result { } /// Convert e.g. sha256:12345... into `/ostree/container/blob/sha256_2B12345...`. -fn ref_for_image(l: &ImageReference) -> Result { +pub fn ref_for_image(l: &ImageReference) -> Result { refescape::prefix_escape_for_ref(IMAGE_PREFIX, &l.to_string()) } @@ -585,7 +591,7 @@ pub(crate) fn parse_ostree_manifest_layout<'a>( } /// Find the timestamp of the manifest (or config), ignoring errors. -fn timestamp_of_manifest_or_config( +pub(crate) fn timestamp_of_manifest_or_config( manifest: &ImageManifest, config: &ImageConfiguration, ) -> Option { @@ -1624,6 +1630,58 @@ pub fn list_images(repo: &ostree::Repo) -> Result> { .collect() } +/// Recursively walk a dirtree GVariant, calling `f` with each file's hex content checksum. +/// +/// Reads `(a(say)a(sayay))` — the ostree DIRTREE format — directly from GVariant bytes, +/// the same approach used by [`crate::ima`]. Subdirectories are visited depth-first. +fn walk_dirtree_checksums( + repo: &ostree::Repo, + dirtree_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_dirtree; + use gvariant::aligned_bytes::TryAsAligned as _; + + let v = repo.load_variant(ostree::ObjectType::DirTree, dirtree_checksum)?; + let bytes = v.data_as_bytes(); + let bytes = bytes.try_as_aligned()?; + let tree = gv_dirtree!().cast(bytes); + let (files, dirs) = tree.to_tuple(); + + let mut hexbuf = [0u8; 64]; + for file in files { + let (_name, csum) = file.to_tuple(); + hex::encode_to_slice(csum, &mut hexbuf)?; + f(std::str::from_utf8(&hexbuf)?)?; + } + for dir in dirs { + let (_name, contents_csum, _meta_csum) = dir.to_tuple(); + hex::encode_to_slice(contents_csum, &mut hexbuf)?; + walk_dirtree_checksums(repo, std::str::from_utf8(&hexbuf)?, f)?; + } + Ok(()) +} + +/// Call `f` once for each regular-file content checksum (hex SHA-256) reachable from +/// `commit_checksum`, reading dirtree GVariants directly without GObject overhead. +/// +/// Subtrees are visited depth-first. Shared dirtree objects that appear more than once +/// in the tree will be visited multiple times. +pub fn for_each_file_checksum( + repo: &ostree::Repo, + commit_checksum: &str, + f: &mut impl FnMut(&str) -> Result<()>, +) -> Result<()> { + use crate::objgv::gv_commit; + use gvariant::aligned_bytes::TryAsAligned as _; + + let (commit_v, _) = repo.load_commit(commit_checksum)?; + let commit_bytes = commit_v.data_as_bytes(); + let commit_bytes = commit_bytes.try_as_aligned()?; + let root_dirtree = hex::encode(gv_commit!().cast(commit_bytes).to_tuple().6); + walk_dirtree_checksums(repo, &root_dirtree, f) +} + /// Attempt to query metadata for a pulled image; if it is corrupted, /// the error is printed to stderr and None is returned. fn try_query_image( @@ -1745,11 +1803,21 @@ pub fn query_image_commit(repo: &ostree::Repo, commit: &str) -> Result(META_COMPOSEFS_SYNTHESIZED)? + .unwrap_or(false); + let base_commit = if is_composefs_synthesized { + merge_commit.clone() + } else { + let base_layer = query_layer(repo, base_layer)?; + let ostree_ref = base_layer.ostree_ref.as_str(); + base_layer + .commit + .ok_or_else(|| anyhow!("Missing base image ref {ostree_ref}"))? + }; let detached_commitmeta = repo.read_commit_detached_metadata(&merge_commit, gio::Cancellable::NONE)?; @@ -1877,15 +1945,21 @@ fn chunking_from_layer_committed( } /// Export an imported container image to a target OCI directory. +/// +/// The source image is identified by its already-resolved [`LayeredImageState`] +/// rather than an image reference, so callers may obtain it either via +/// [`query_image`] (by reference) or [`query_image_commit`] (by deployment +/// commit checksum). The latter is important for the booted deployment, whose +/// recorded spec transport may not match the transport of the ostree-container +/// ref that actually backs its commit. #[context("Copying image")] pub(crate) fn export_to_oci( repo: &ostree::Repo, - imgref: &ImageReference, + srcinfo: &LayeredImageState, dest_oci: &Dir, tag: Option<&str>, opts: ExportToOCIOpts, ) -> Result { - let srcinfo = query_image(repo, imgref)?.ok_or_else(|| anyhow!("No such image"))?; let (commit_layer, component_layers, remaining_layers) = parse_manifest_layout(&srcinfo.manifest, &srcinfo.configuration)?; @@ -1995,6 +2069,36 @@ pub async fn export( src_imgref: &ImageReference, dest_imgref: &ImageReference, opts: Option, +) -> Result { + let srcinfo = query_image(repo, src_imgref)?.ok_or_else(|| anyhow!("No such image"))?; + export_impl(repo, &srcinfo, dest_imgref, opts).await +} + +/// Like [`export`], but identifies the source image by an ostree commit +/// checksum (e.g. a deployment's `csum()`) rather than an image reference. +/// +/// This is the reliable way to export the booted image: it resolves the image +/// state directly from the commit via [`query_image_commit`], so it works even +/// when the deployment's recorded spec transport differs from the transport of +/// the ostree-container ref backing the commit. +#[context("Export")] +pub async fn export_commit( + repo: &ostree::Repo, + commit: &str, + dest_imgref: &ImageReference, + opts: Option, +) -> Result { + let srcinfo = query_image_commit(repo, commit)?; + export_impl(repo, &srcinfo, dest_imgref, opts).await +} + +/// Shared implementation for [`export`] and [`export_commit`]: serialize an +/// already-resolved image state to `dest_imgref`. +async fn export_impl( + repo: &ostree::Repo, + srcinfo: &LayeredImageState, + dest_imgref: &ImageReference, + opts: Option, ) -> Result { let opts = opts.unwrap_or_default(); let target_oci = dest_imgref.transport == Transport::OciDir; @@ -2007,14 +2111,14 @@ pub async fn export( progress_to_stdout: opts.progress_to_stdout, ..Default::default() }; - export_to_oci(repo, src_imgref, &td, None, opts)?; + export_to_oci(repo, srcinfo, &td, None, opts)?; td } else { let (path, tag) = parse_oci_path_and_tag(dest_imgref.name.as_str()); tracing::debug!("using OCI path={path} tag={tag:?}"); let path = Dir::open_ambient_dir(path, cap_std::ambient_authority()) .with_context(|| format!("Opening {path}"))?; - let descriptor = export_to_oci(repo, src_imgref, &path, tag, opts)?; + let descriptor = export_to_oci(repo, srcinfo, &path, tag, opts)?; return Ok(descriptor.digest().clone()); }; // Pass the temporary oci directory as the current working directory for the skopeo process diff --git a/crates/ostree-ext/src/fixture.rs b/crates/ostree-ext/src/fixture.rs index 20a349f67..4d8329003 100644 --- a/crates/ostree-ext/src/fixture.rs +++ b/crates/ostree-ext/src/fixture.rs @@ -23,10 +23,8 @@ use gvariant::aligned_bytes::TryAsAligned; use gvariant::{Marker, Structure}; use io_lifetimes::AsFd; use ocidir::cap_std::fs::{DirBuilder, DirBuilderExt as _}; -use ocidir::oci_spec::image::ImageConfigurationBuilder; use regex::Regex; use std::borrow::Cow; -use std::collections::HashMap; use std::ffi::CString; use std::fmt::Write as _; use std::io::{self, Write}; @@ -1007,37 +1005,20 @@ impl NonOstreeFixture { name: self.path.join(Self::SRCOCI).to_string(), }; - let mut config = ImageConfigurationBuilder::default().build().unwrap(); - let mut manifest = self.src_oci.new_empty_manifest()?.build().unwrap(); - - let bw = self.src_oci.create_gzip_layer(None)?; - let mut bw = tar::Builder::new(bw); - for def in FileDef::iter_from(CONTENTS_V0) { - let def = def.unwrap(); - def.append_tar(&mut bw)?; - } - let bw = bw.into_inner()?; - let new_layer = bw.complete()?; - - let created = config - .created() - .as_deref() - .and_then(bootc_utils::try_deserialize_timestamp) - .unwrap_or_default(); - - self.src_oci.push_layer_full( - &mut manifest, - &mut config, - new_layer, - None::>, - "root", - created, - ); - let config = self.src_oci.write_config(config)?; + crate::integrationtest::build_fresh_oci_from_tar( + &self.src_oci, + |w| { + let mut bw = tar::Builder::new(w); + for def in FileDef::iter_from(CONTENTS_V0) { + def?.append_tar(&mut bw)?; + } + bw.into_inner()?; + Ok(()) + }, + None, + None, + )?; - manifest.set_config(config); - self.src_oci - .replace_with_single_manifest(manifest, oci_image::Platform::default())?; let idx = self.src_oci.read_index()?; let descriptor = idx.manifests().first().unwrap(); diff --git a/crates/ostree-ext/src/integrationtest.rs b/crates/ostree-ext/src/integrationtest.rs index 89b605d6c..974ee97cc 100644 --- a/crates/ostree-ext/src/integrationtest.rs +++ b/crates/ostree-ext/src/integrationtest.rs @@ -1,5 +1,6 @@ //! Module used for integration tests; should not be public. +use std::collections::HashMap; use std::path::Path; use crate::container_utils::{is_ostree_container, ostree_booted}; @@ -14,7 +15,7 @@ use gio::prelude::*; use oci_spec::image as oci_image; use ocidir::{ LayerWriter, - oci_spec::image::{Arch, Platform}, + oci_spec::image::{Arch, ImageConfigurationBuilder, Platform}, }; use ostree::gio; use xshell::cmd; @@ -122,6 +123,86 @@ where Ok(()) } +/// Return a tar header for an empty directory entry (mode 0o755, uid/gid/mtime 0). +pub fn tar_dir_header() -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Directory); + h.set_mode(0o755); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(0); + h +} + +/// Return a tar header for a regular file of the given byte length (mode 0o644, uid/gid/mtime 0). +pub fn tar_reg_header(len: u64) -> tar::Header { + let mut h = tar::Header::new_ustar(); + h.set_entry_type(tar::EntryType::Regular); + h.set_mode(0o644); + h.set_uid(0); + h.set_gid(0); + h.set_mtime(0); + h.set_size(len); + h +} + +/// Build a brand-new single-layer OCI image into `oci_dir` using the tar content +/// produced by `f`. Mirrors [`generate_derived_oci_from_tar`] but starts from an +/// empty manifest/config rather than an existing image. +/// +/// The closure receives a `&mut LayerWriter` so it has the same ergonomics as the +/// derived helper: create a `tar::Builder::new(w)`, append entries, call +/// `tb.finish()?` or `tb.into_inner()?`, and return `Ok(())`. +/// +/// `tag` and `arch` behave identically to the derived variant. +#[context("Building fresh oci")] +pub fn build_fresh_oci_from_tar( + oci_dir: &ocidir::OciDir, + f: F, + tag: Option<&str>, + arch: Option, +) -> Result<()> +where + F: for<'a> FnOnce(&mut LayerWriter<'a, GzEncoder>>) -> Result<()>, +{ + let epoch = chrono::DateTime::UNIX_EPOCH; + let mut config = ImageConfigurationBuilder::default() + .created(epoch.to_rfc3339()) + .build()?; + if let Some(arch) = arch.as_ref() { + config.set_architecture(arch.clone()); + } + let mut manifest = oci_dir.new_empty_manifest()?.build()?; + + let mut bw = oci_dir.create_gzip_layer(None)?; + f(&mut bw)?; + let layer = bw.complete()?; + + oci_dir.push_layer_full( + &mut manifest, + &mut config, + layer, + None::>, + "test layer", + epoch, + ); + + let config_desc = oci_dir.write_config(config)?; + manifest.set_config(config_desc); + + let mut platform = Platform::default(); + if let Some(arch) = arch.as_ref() { + platform.set_architecture(arch.clone()); + } + if let Some(tag) = tag { + oci_dir.insert_manifest(manifest, Some(tag), platform)?; + } else { + oci_dir.replace_with_single_manifest(manifest, platform)?; + } + Ok(()) +} + fn test_proxy_auth() -> Result<()> { use containers_image_proxy::ImageProxyConfig; let merge = crate::container::merge_default_container_proxy_opts; diff --git a/crates/ostree-ext/tests/it/main.rs b/crates/ostree-ext/tests/it/main.rs index 14c6dd3b3..3a1347b57 100644 --- a/crates/ostree-ext/tests/it/main.rs +++ b/crates/ostree-ext/tests/it/main.rs @@ -2365,3 +2365,217 @@ async fn test_diff_id_reuse_across_compression() -> Result<()> { Ok(()) } + +/// Build a minimal OCI image (single gzip layer) into the given OCI directory. +/// +/// The tar layer explicitly includes parent directory entries before files, so +/// that composefs's strict layer importer does not choke on missing parents. +/// /etc is included at the image root and must be remapped to usr/etc during +/// ostree import. +fn build_test_oci(oci_dir: &ocidir::OciDir) -> Result<()> { + use ostree_ext::integrationtest::{build_fresh_oci_from_tar, tar_dir_header, tar_reg_header}; + + build_fresh_oci_from_tar( + oci_dir, + |w| { + let mut tb = tar::Builder::new(w); + + // Parent directories must come before their children. + tb.append_data(&mut tar_dir_header(), "usr/", std::io::empty())?; + tb.append_data(&mut tar_dir_header(), "usr/bin/", std::io::empty())?; + tb.append_data(&mut tar_dir_header(), "usr/lib/", std::io::empty())?; + + let bash_data = b"#!/bin/bash\n"; + tb.append_data( + &mut tar_reg_header(bash_data.len() as u64), + "usr/bin/bash", + std::io::Cursor::new(bash_data), + )?; + + let config_data = b"# test config\n"; + tb.append_data( + &mut tar_reg_header(config_data.len() as u64), + "usr/lib/test.conf", + std::io::Cursor::new(config_data), + )?; + + // /etc at the OCI image root — remapped to usr/etc during ostree import. + tb.append_data(&mut tar_dir_header(), "etc/", std::io::empty())?; + + let hostname_data = b"testhost\n"; + tb.append_data( + &mut tar_reg_header(hostname_data.len() as u64), + "etc/hostname", + std::io::Cursor::new(hostname_data), + )?; + + tb.into_inner()?; + Ok(()) + }, + None, + None, + ) +} + +/// Test that we can synthesize an ostree commit from a composefs repository. +/// +/// The flow is: +/// 1. Build a minimal OCI image into a local OCI directory +/// 2. Pull it into a fresh composefs repository +/// 3. Call `composefs_import::import_from_composefs_repo` +/// 4. Verify the resulting ostree commit has the expected OCI metadata and files +#[tokio::test] +async fn test_composefs_import_to_ostree() -> Result<()> { + use composefs_ctl::composefs::fsverity::{Algorithm, DEFAULT_LG_BLOCKSIZE, Sha512HashValue}; + use composefs_ctl::composefs::repository::Repository; + use composefs_ctl::composefs_oci; + use composefs_ctl::composefs_oci::oci_image::OciImage; + use ostree_ext::container::composefs_import; + use ostree_ext::container::store::{META_CONFIG, META_MANIFEST, META_MANIFEST_DIGEST}; + use std::sync::Arc; + + if !check_skopeo() { + eprintln!("Skipping test_composefs_import_to_ostree: skopeo not available"); + return Ok(()); + } + + let tempdir = tempfile::tempdir_in("/var/tmp")?; + let path: &camino::Utf8Path = tempdir.path().try_into().unwrap(); + + // Step 1: build a minimal OCI image + let oci_path = path.join("test.oci"); + std::fs::create_dir(&oci_path)?; + let oci_cap_dir = cap_std::fs::Dir::open_ambient_dir(&oci_path, cap_std::ambient_authority())?; + let oci_dir = ocidir::OciDir::ensure(oci_cap_dir)?; + build_test_oci(&oci_dir)?; + + // Step 2: create a composefs repository and pull the OCI image into it + let cfs_path = path.join("composefs-repo"); + std::fs::create_dir(&cfs_path)?; + let cfs_dirfd = cap_std::fs::Dir::open_ambient_dir(&cfs_path, cap_std::ambient_authority())?; + let algo = Algorithm::Sha512 { + lg_blocksize: DEFAULT_LG_BLOCKSIZE, + }; + let (cfs_repo, _) = Repository::::init_path(cfs_dirfd, ".", algo, false) + .context("Initializing composefs repository")?; + let cfs_repo = Arc::new(cfs_repo); + + let oci_imgref = format!("oci:{}", oci_path); + let pull_result = composefs_oci::pull(&cfs_repo, &oci_imgref, None, Default::default()) + .await + .context("Pulling OCI image into composefs repo")?; + + // Step 3: get manifest + config from the composefs repo + let oci_image = OciImage::open(&cfs_repo, &pull_result.manifest_digest, None) + .context("Opening OCI image from composefs repo")?; + let manifest = oci_image.manifest().clone(); + let config = oci_image + .config() + .cloned() + .context("OCI image has no config")?; + + // Step 4: create a destination ostree repo and run the import + let ost_repo = ostree::Repo::new(&gio::File::for_path(path.join("ostree-repo"))); + ost_repo + .create(ostree::RepoMode::BareUser, gio::Cancellable::NONE) + .context("Creating destination ostree repo")?; + + let manifest_digest_str = pull_result.manifest_digest.to_string(); + let commit = composefs_import::import_from_composefs_repo( + &ost_repo, + &cfs_repo, + &pull_result.config_digest, + &manifest_digest_str, + &manifest, + &config, + false, + ) + .context("Importing composefs repo into ostree")?; + + // Step 5: idempotency — a second import of the same image must yield the + // same commit checksum. This is only guaranteed when the OCI image carries + // a fixed creation timestamp (which build_fresh_oci_from_tar sets to the + // Unix epoch) so that timestamp_of_manifest_or_config returns a stable value + // rather than falling back to Utc::now(). + let commit2 = composefs_import::import_from_composefs_repo( + &ost_repo, + &cfs_repo, + &pull_result.config_digest, + &manifest_digest_str, + &manifest, + &config, + false, + ) + .context("Second import")?; + assert_eq!(commit, commit2, "re-import should produce identical commit"); + + // Step 6: verify + assert!(!commit.is_empty(), "commit checksum should be non-empty"); + + let (commitv, state) = ost_repo.load_commit(&commit)?; + assert_eq!(state, ostree::RepoCommitState::NORMAL); + + // Commit metadata should contain the three OCI keys + let commit_meta = commitv.child_value(0); + let commitmeta = glib::VariantDict::new(Some(&commit_meta)); + assert!( + commitmeta + .lookup_value(META_MANIFEST_DIGEST, None) + .is_some(), + "commit should have manifest-digest metadata" + ); + assert!( + commitmeta.lookup_value(META_MANIFEST, None).is_some(), + "commit should have manifest metadata" + ); + assert!( + commitmeta.lookup_value(META_CONFIG, None).is_some(), + "commit should have config metadata" + ); + // The stored digest should round-trip + let stored_digest = commitmeta + .lookup::(META_MANIFEST_DIGEST)? + .expect("manifest-digest key"); + assert_eq!(stored_digest, manifest_digest_str); + + // The file tree should have the files we put in the layer + let (root, _) = ost_repo.read_commit(&commit, gio::Cancellable::NONE)?; + for path in ["usr", "usr/bin", "usr/bin/bash", "usr/lib/test.conf"] { + let node = root.resolve_relative_path(path); + assert!( + node.query_exists(gio::Cancellable::NONE), + "expected {path} to exist in the ostree commit" + ); + } + + // Verify /etc was remapped to usr/etc (not left at the root as etc/) + let usr_etc_hostname = root.resolve_relative_path("usr/etc/hostname"); + assert!( + usr_etc_hostname.query_exists(gio::Cancellable::NONE), + "expected usr/etc/hostname to exist (remapped from /etc/hostname)" + ); + + // Verify there is NO top-level etc/ — it should have been remapped + let root_etc = root.resolve_relative_path("etc"); + assert!( + !root_etc.query_exists(gio::Cancellable::NONE), + "top-level etc/ should not exist; it should be remapped to usr/etc/" + ); + + // Run `ostree fsck` against the repo to verify all objects pass integrity + // checks — in particular that file content checksums match, that + // user.ostreemeta xattrs are present and well-formed, and that no + // object is corrupted. + let ostree_repo_path = path.join("ostree-repo"); + let st = std::process::Command::new("ostree") + .args(["fsck", "--repo"]) + .arg(&ostree_repo_path) + .status() + .context("running `ostree fsck`")?; + assert!( + st.success(), + "`ostree fsck` failed on the composefs-imported repo" + ); + + Ok(()) +}