From 138c9f9bf85465025bb35b44570c8692021a657d Mon Sep 17 00:00:00 2001 From: Thomas Schaper Date: Fri, 1 May 2026 10:02:39 +0200 Subject: [PATCH 1/4] harden(rng): cap per-request entropy allocation to 64 KiB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backports upstream firecracker PR #5762 (commit 755086956). Adds a MAX_ENTROPY_BYTES (64 KiB) cap on the per-request rand_bytes allocation in handle_one(). The pre-fix code did `vec![0; iovec.len()]` where `iovec.len()` is the *sum* of all descriptor lengths in a chain, not the distinct guest memory backing them. A guest can craft 255 overlapping descriptors each claiming 16 MiB but all pointing to the same guest physical memory, inflating iovec.len() to ~4 GiB and exhausting host RAM. No CVE was assigned upstream; AWS classifies this as a host DoS hardening rather than a security advisory. Operationally, SAFE microVMs do not attach an entropy device, so the unfixed code path is unreachable in our deployment. This is defence-in-depth for any future config that does attach one. Manual port — could not be cherry-picked cleanly because by v1.15.x the rng device holds an owned `self.buffer` field and process_entropy_queue has a different signature. Most importantly, IoVecBufferMut::len() returns u32 in v1.15.x but usize in v1.6.5, which forced a small change to the cap arithmetic and the function return path. The actual security-relevant change (the cap itself) is the same as upstream. --- src/vmm/src/devices/virtio/rng/device.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs index 10fee1b64b7..e11ceae424b 100644 --- a/src/vmm/src/devices/virtio/rng/device.rs +++ b/src/vmm/src/devices/virtio/rng/device.rs @@ -23,6 +23,13 @@ use crate::vstate::memory::GuestMemoryMmap; pub const ENTROPY_DEV_ID: &str = "rng"; +// Cap the per-request entropy allocation. A guest can craft a virtio +// descriptor chain whose total `iovec.len()` is the sum of many overlapping +// descriptors, reaching multi-GiB sizes from a small guest memory and +// exhausting host RAM. The guest still gets up to this many bytes of entropy +// per request. +const MAX_ENTROPY_BYTES: u32 = 64 * 1024; + #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum EntropyError { /// Error while handling an Event file descriptor: {0} @@ -112,15 +119,17 @@ impl Entropy { return Ok(0); } - let mut rand_bytes = vec![0; iovec.len()]; + let len = std::cmp::min(iovec.len(), MAX_ENTROPY_BYTES as usize); + let mut rand_bytes = vec![0; len]; rand::fill(&mut rand_bytes).map_err(|err| { METRICS.host_rng_fails.inc(); err })?; - // It is ok to unwrap here. We are writing `iovec.len()` bytes at offset 0. + // We are writing at most `iovec.len()` bytes starting at offset 0. iovec.write_all_volatile_at(&rand_bytes, 0).unwrap(); - Ok(iovec.len().try_into().unwrap()) + // `len` fits in u32: bounded above by MAX_ENTROPY_BYTES (64 KiB). + Ok(len.try_into().unwrap()) } fn process_entropy_queue(&mut self) { From 8ff4adfb1b994bb11ce0cc52c9da086bae950a48 Mon Sep 17 00:00:00 2001 From: Thomas Schaper Date: Fri, 1 May 2026 10:02:39 +0200 Subject: [PATCH 2/4] harden(virtio-mmio): preserve cumulative status bits across transitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backports the MMIO portion of upstream PR #5818 (commit 4a5219870). Replaces the match on `!self.device_status & status` with an explicit VALID_TRANSITIONS table and equality check, so writes that drop previously-set bits (e.g. FEATURES_OK alone after the device is in state ACK | DRIVER | FEATURES_OK | DRIVER_OK) are rejected. This is *not* a fix for CVE-2026-5747. Per AWS security bulletin 2026-015, that CVE is specific to the virtio PCI transport, which is opt-in via --enable-pci and was added upstream in v1.13.0. Our fork is based on v1.6.5 and has no PCI transport, so the CVE itself does not apply. Upstream's PR #5818 included a parenthetical defensive hardening of the MMIO transport ("Note: virtio MMIO transport also didn't [enforce cumulative bits]") and that is what this commit backports — for consistency with upstream's stricter behaviour and defence-in-depth, not because of an active CVE. The upstream commit could not be cherry-picked cleanly because by v1.15.x the mmio transport lives at src/vmm/src/devices/virtio/ transport/mmio.rs, the activate() signature gained an interrupt argument, and the failure path uses DEVICE_NEEDS_RESET / VirtioInterruptType, none of which exist in v1.6.5. The control flow and the VALID_TRANSITIONS contents are deliberately kept identical to upstream so future audits can compare line-for-line. --- src/vmm/src/devices/virtio/mmio.rs | 83 ++++++++++++++++-------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/src/vmm/src/devices/virtio/mmio.rs b/src/vmm/src/devices/virtio/mmio.rs index cd9c1a5e033..8338164f210 100644 --- a/src/vmm/src/devices/virtio/mmio.rs +++ b/src/vmm/src/devices/virtio/mmio.rs @@ -172,19 +172,46 @@ impl MmioTransport { #[allow(unused_assignments)] fn set_device_status(&mut self, status: u32) { use device_status::*; - // match changed bits - match !self.device_status & status { - ACKNOWLEDGE if self.device_status == INIT => { - self.device_status = status; - } - DRIVER if self.device_status == ACKNOWLEDGE => { - self.device_status = status; + + const VALID_TRANSITIONS: &[(u32, u32)] = &[ + (INIT, ACKNOWLEDGE), + (ACKNOWLEDGE, ACKNOWLEDGE | DRIVER), + (ACKNOWLEDGE | DRIVER, ACKNOWLEDGE | DRIVER | FEATURES_OK), + ( + ACKNOWLEDGE | DRIVER | FEATURES_OK, + ACKNOWLEDGE | DRIVER | FEATURES_OK | DRIVER_OK, + ), + ]; + + if (status & FAILED) != 0 { + // TODO: notify backend driver to stop the device + self.device_status |= FAILED; + } else if status == INIT { + if self.locked_device().is_activated() { + let mut device_status = self.device_status; + let reset_result = self.locked_device().reset(); + match reset_result { + Some((_interrupt_evt, mut _queue_evts)) => {} + None => { + device_status |= FAILED; + } + } + self.device_status = device_status; } - FEATURES_OK if self.device_status == (ACKNOWLEDGE | DRIVER) => { - self.device_status = status; + + // If the backend device driver doesn't support reset, + // just leave the device marked as FAILED. + if self.device_status & FAILED == 0 { + self.reset(); } - DRIVER_OK if self.device_status == (ACKNOWLEDGE | DRIVER | FEATURES_OK) => { - self.device_status = status; + } else if VALID_TRANSITIONS + .iter() + .any(|&(from, to)| self.device_status == from && status == to) + { + self.device_status = status; + + // Activate the device when transitioning to DRIVER_OK. + if status == (ACKNOWLEDGE | DRIVER | FEATURES_OK | DRIVER_OK) { let device_activated = self.locked_device().is_activated(); if !device_activated && self.are_queues_valid() { self.locked_device() @@ -192,35 +219,11 @@ impl MmioTransport { .expect("Failed to activate device"); } } - _ if (status & FAILED) != 0 => { - // TODO: notify backend driver to stop the device - self.device_status |= FAILED; - } - _ if status == 0 => { - if self.locked_device().is_activated() { - let mut device_status = self.device_status; - let reset_result = self.locked_device().reset(); - match reset_result { - Some((_interrupt_evt, mut _queue_evts)) => {} - None => { - device_status |= FAILED; - } - } - self.device_status = device_status; - } - - // If the backend device driver doesn't support reset, - // just leave the device marked as FAILED. - if self.device_status & FAILED == 0 { - self.reset(); - } - } - _ => { - warn!( - "invalid virtio driver status transition: 0x{:x} -> 0x{:x}", - self.device_status, status - ); - } + } else { + warn!( + "invalid virtio driver status transition: {:#x} -> {:#x}", + self.device_status, status + ); } } } From 1cdb57cb166c7062bce816378c2848d8c19cbc90 Mon Sep 17 00:00:00 2001 From: Thomas Schaper Date: Fri, 1 May 2026 10:02:39 +0200 Subject: [PATCH 3/4] harden(balloon): bound stats descriptor length Backports upstream firecracker PR #5794 (commit 1689689f0). Adds a MAX_STATS_DESC_LEN (256 stat tags = 2560 bytes) cap on the stats descriptor processed by process_stats_queue(). Pre-fix, a guest could submit a descriptor with arbitrarily large `head.len`, causing the inner loop `for index in (0..head.len).step_by(...)` to iterate billions of times and stall the VMM event loop. No CVE was assigned upstream; AWS classifies this as a host DoS hardening rather than a security advisory. Operationally, SAFE microVMs do not attach a balloon device, so the unfixed code path is unreachable in our deployment. This is defence-in-depth for any future config that does attach one. Cherry-picked cleanly from upstream's source-code diff with two mechanical adaptations for v1.6.5: - drop the unrelated CHANGELOG hunk; - drop the second 'interrupt' argument from balloon.activate() in the new test (added in upstream's later refactor) and explicitly import the warn! macro that this module did not yet pull in. (cherry picked from commit 1689689f0a31f9b1107c01ae9c6ed5b6f110050e) --- src/vmm/src/devices/virtio/balloon/device.rs | 94 +++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 541af3e5080..b62c06bbe9d 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use std::time::Duration; use std::{cmp, fmt}; -use log::error; +use log::{error, warn}; use serde::Serialize; use timerfd::{ClockId, SetTimeFlags, TimerFd, TimerState}; use utils::eventfd::EventFd; @@ -35,6 +35,16 @@ use crate::vstate::memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemor const SIZE_OF_U32: usize = std::mem::size_of::(); const SIZE_OF_STAT: usize = std::mem::size_of::(); +/// Upper bound on the number of stats tags a guest may report. +/// The VirtIO spec currently defines 16, but newer kernel versions can +/// add more (e.g. Linux 6.12 added several, see 74c025c5d7e4). We use a +/// generous limit that still bounds computation without breaking on future +/// kernels. +const MAX_STATS_TAGS: u32 = 256; +/// Maximum valid stats descriptor length in bytes. +/// Descriptors exceeding this are rejected to prevent unbounded iteration. +#[allow(clippy::cast_possible_truncation)] +const MAX_STATS_DESC_LEN: u32 = MAX_STATS_TAGS * std::mem::size_of::() as u32; fn mib_to_pages(amount_mib: u32) -> Result { amount_mib @@ -410,6 +420,21 @@ impl Balloon { .add_used(mem, prev_stats_desc, 0) .map_err(BalloonError::Queue)?; } + + // Reject oversized descriptors to prevent a guest from causing + // excessive iteration on the VMM event loop. + // We still hold onto the descriptor (via stats_desc_index below) + // so that the stats request/response protocol is preserved and + // trigger_stats_update can return it to the guest later. + if head.len > MAX_STATS_DESC_LEN { + warn!( + "balloon: stats descriptor too large: {} > {}, skipping", + head.len, MAX_STATS_DESC_LEN + ); + self.stats_desc_index = Some(head.index); + continue; + } + for index in (0..head.len).step_by(SIZE_OF_STAT) { // Read the address at position `index`. The only case // in which this fails is if there is overflow, @@ -1162,4 +1187,71 @@ pub(crate) mod tests { assert_eq!(balloon.num_pages(), 0x1122_3344); assert_eq!(balloon.actual_pages(), 0x1234_5678); } + + /// Test that process_stats_queue holds oversized descriptors without + /// updating stats, and updates stats for valid-length ones. + #[test] + fn test_stats_queue_oversized_descriptor_rejected() { + struct TestCase { + desc_len: u32, + stats_updated: bool, + } + + let cases = [ + TestCase { + desc_len: MAX_STATS_DESC_LEN + 1, + stats_updated: false, + }, + TestCase { + desc_len: MAX_STATS_DESC_LEN, + stats_updated: true, + }, + ]; + + let stat_addr: u64 = 0x1000; + + for tc in &cases { + let mut balloon = Balloon::new(0, true, 1, false, false).unwrap(); + let mem = default_mem(); + let statsq = VirtQueue::new(GuestAddress(0), &mem, 16); + balloon.set_queue(INFLATE_INDEX, statsq.create_queue()); + balloon.set_queue(DEFLATE_INDEX, statsq.create_queue()); + balloon.set_queue(STATS_INDEX, statsq.create_queue()); + balloon.activate(mem.clone()).unwrap(); + + // Fill the descriptor region with a recognisable stat value. + let n_stats = tc.desc_len as usize / SIZE_OF_STAT; + for i in 0..n_stats { + mem.write_obj::( + BalloonStat { + tag: VIRTIO_BALLOON_S_MEMFREE, + val: 0xBEEF, + }, + GuestAddress(stat_addr + (i * SIZE_OF_STAT) as u64), + ) + .unwrap(); + } + + set_request(&statsq, 0, stat_addr, tc.desc_len, VIRTQ_DESC_F_NEXT); + balloon.queue_events()[STATS_INDEX].write(1).unwrap(); + balloon.process_stats_queue_event().unwrap(); + + // The descriptor should always be held (stats protocol preserved) + // regardless of whether the stats were updated. + assert!( + balloon.stats_desc_index.is_some(), + "desc_len={}: descriptor should be held", + tc.desc_len, + ); + + // Verify stats were only updated for valid descriptors. + assert_eq!( + balloon.latest_stats.free_memory.is_some(), + tc.stats_updated, + "desc_len={}: expected stats_updated={}", + tc.desc_len, + tc.stats_updated, + ); + } + } } From c33a49bec070b79bf9a01a812f9566e6d4efd760 Mon Sep 17 00:00:00 2001 From: Thomas Schaper Date: Fri, 1 May 2026 10:02:39 +0200 Subject: [PATCH 4/4] Bump version to 1.6.6 --- Cargo.lock | 12 ++++++------ src/cpu-template-helper/Cargo.toml | 2 +- src/firecracker/Cargo.toml | 2 +- src/jailer/Cargo.toml | 2 +- src/rebase-snap/Cargo.toml | 2 +- src/seccompiler/Cargo.toml | 2 +- src/snapshot-editor/Cargo.toml | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e519294ce86..f107257fbb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -366,7 +366,7 @@ checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "cpu-template-helper" -version = "1.6.5" +version = "1.6.6" dependencies = [ "clap", "displaydoc", @@ -528,7 +528,7 @@ dependencies = [ [[package]] name = "firecracker" -version = "1.6.5" +version = "1.6.6" dependencies = [ "api_server", "bincode", @@ -698,7 +698,7 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jailer" -version = "1.6.5" +version = "1.6.6" dependencies = [ "libc", "log-instrument", @@ -1011,7 +1011,7 @@ dependencies = [ [[package]] name = "rebase-snap" -version = "1.6.5" +version = "1.6.6" dependencies = [ "displaydoc", "libc", @@ -1099,7 +1099,7 @@ dependencies = [ [[package]] name = "seccompiler" -version = "1.6.5" +version = "1.6.6" dependencies = [ "bincode", "displaydoc", @@ -1187,7 +1187,7 @@ dependencies = [ [[package]] name = "snapshot-editor" -version = "1.6.5" +version = "1.6.6" dependencies = [ "clap", "clap-num", diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml index 33294b5ed62..92729b9757a 100644 --- a/src/cpu-template-helper/Cargo.toml +++ b/src/cpu-template-helper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cpu-template-helper" -version = "1.6.5" +version = "1.6.6" authors = ["Amazon Firecracker team "] edition = "2021" license = "Apache-2.0" diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 8722f03ad32..413d4c26095 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "firecracker" -version = "1.6.5" +version = "1.6.6" authors = ["Amazon Firecracker team "] edition = "2021" build = "build.rs" diff --git a/src/jailer/Cargo.toml b/src/jailer/Cargo.toml index 8f55a36f338..7a638742d45 100644 --- a/src/jailer/Cargo.toml +++ b/src/jailer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jailer" -version = "1.6.5" +version = "1.6.6" authors = ["Amazon Firecracker team "] edition = "2021" description = "Process for starting Firecracker in production scenarios; applies a cgroup/namespace isolation barrier and then drops privileges." diff --git a/src/rebase-snap/Cargo.toml b/src/rebase-snap/Cargo.toml index 95a9c18c445..c98f9d2733e 100644 --- a/src/rebase-snap/Cargo.toml +++ b/src/rebase-snap/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rebase-snap" -version = "1.6.5" +version = "1.6.6" authors = ["Amazon Firecracker team "] edition = "2021" license = "Apache-2.0" diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml index 346c62d3af2..a7f6fad6ed0 100644 --- a/src/seccompiler/Cargo.toml +++ b/src/seccompiler/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "seccompiler" -version = "1.6.5" +version = "1.6.6" authors = ["Amazon Firecracker team "] edition = "2021" description = "Program that compiles multi-threaded seccomp-bpf filters expressed as JSON into raw BPF programs, serializing them and outputting them to a file." diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index 6d97ca72411..2a4e6ba9f85 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snapshot-editor" -version = "1.6.5" +version = "1.6.6" authors = ["Amazon Firecracker team "] edition = "2021" license = "Apache-2.0"