From 5168d7290305c2ecebe5273dce2fa8533824e119 Mon Sep 17 00:00:00 2001 From: Julia Vassiliki Date: Wed, 6 May 2026 15:14:41 +1000 Subject: [PATCH 1/4] loader(aarch64): pass PTs to enable MMU assembler As opposed to them being accessed via global variables directly. The reason to do this is to enable us to vary the page table placement from the C side without needing to touch or modify the assembly any further, which will enable the future commits for making the number of page page table structures dynamic, which is necessary for making the loader 1:1 all of RAM. Also, remove some redundant stack pops/pushs to util64.S. Signed-off-by: Julia Vassiliki --- loader/src/aarch64/mmu.c | 8 ++++---- loader/src/aarch64/util64.S | 28 +++++++++++++++++----------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/loader/src/aarch64/mmu.c b/loader/src/aarch64/mmu.c index 8ef6427ce..15fe83564 100644 --- a/loader/src/aarch64/mmu.c +++ b/loader/src/aarch64/mmu.c @@ -12,8 +12,8 @@ #include "../cutil.h" #include "../uart.h" -void el1_mmu_enable(void); -void el2_mmu_enable(void); +void el1_mmu_enable(void *a, void *b); +void el2_mmu_enable(void *a); /* Paging structures for kernel mapping */ uint64_t boot_lvl0_upper[1 << 9] ALIGN(1 << 12); @@ -37,9 +37,9 @@ int arch_mmu_enable(int logical_cpu) LDR_PRINT("INFO", logical_cpu, "enabling MMU\n"); el = current_el(); if (el == EL1) { - el1_mmu_enable(); + el1_mmu_enable(&boot_lvl0_lower, &boot_lvl0_upper); } else if (el == EL2) { - el2_mmu_enable(); + el2_mmu_enable(&boot_lvl0_lower); } else { LDR_PRINT("ERROR", logical_cpu, "unknown EL for MMU enable\n"); } diff --git a/loader/src/aarch64/util64.S b/loader/src/aarch64/util64.S index 5137c76d9..fd94ebe36 100644 --- a/loader/src/aarch64/util64.S +++ b/loader/src/aarch64/util64.S @@ -302,14 +302,12 @@ BEGIN_FUNC(el1_mmu_disable) */ bl invalidate_icache - ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret END_FUNC(el1_mmu_disable) BEGIN_FUNC(el2_mmu_disable) stp x29, x30, [sp, #-16]! - stp x27, x28, [sp, #-16]! mov x29, sp /* Disable caches */ @@ -324,14 +322,18 @@ BEGIN_FUNC(el2_mmu_disable) */ bl invalidate_icache - ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret END_FUNC(el2_mmu_disable) +/* + * Enables the MMU for EL2. + * Takes two arguments the physical address for TTBR0_EL1 (x0) and TTBR1_EL1 (x1). + */ BEGIN_FUNC(el1_mmu_enable) stp x29, x30, [sp, #-16]! stp x27, x28, [sp, #-16]! + /* move caller-saved to callee-saved registers */ mov x29, sp mov x27, x0 mov x28, x1 @@ -359,10 +361,8 @@ BEGIN_FUNC(el1_mmu_enable) msr tcr_el1, x10 /* Setup page tables */ - adrp x8, boot_lvl0_lower - msr ttbr0_el1, x8 - adrp x8, boot_lvl0_upper - msr ttbr1_el1, x8 + msr ttbr0_el1, x27 /* argument 0 */ + msr ttbr1_el1, x28 /* argument 1 */ isb /* invalidate all TLB entries for EL1 */ @@ -372,14 +372,21 @@ BEGIN_FUNC(el1_mmu_enable) enable_mmu sctlr_el1, x8 + ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret - END_FUNC(el1_mmu_enable) +/* + * Enables the MMU for EL2. + * Takes one argument, the physical address for TTBR0_EL2 (x0). + */ BEGIN_FUNC(el2_mmu_enable) stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + /* move caller-saved to callee-saved registers */ mov x29, sp + mov x28, x0 /* Disable the MMU */ bl el2_mmu_disable @@ -403,8 +410,7 @@ BEGIN_FUNC(el2_mmu_enable) isb /* Setup page tables */ - adrp x8, boot_lvl0_lower - msr ttbr0_el2, x8 + msr ttbr0_el2, x28 /* argument 0 */ isb /* invalidate all TLB entries for EL2 */ @@ -423,9 +429,9 @@ BEGIN_FUNC(el2_mmu_enable) dsb ish isb + ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret - END_FUNC(el2_mmu_enable) .extern arm_secondary_cpu_c_entry From e7453f57fc6e0dc964a3cefd4f4c451a3fb26860 Mon Sep 17 00:00:00 2001 From: Julia Vassiliki Date: Mon, 18 May 2026 17:18:47 +1000 Subject: [PATCH 2/4] (TOFIX: breaks SMP) loader(aarch64): dynamically allocate PTs This performs cleanups to the loader code, to remove a bunch of boilerplate, and also makes the page table entries placed in the loader binary at the end. This is in preparation for moving towards mapping all of RAM in the loader. Signed-off-by: Julia Vassiliki --- loader/src/aarch64/mmu.c | 21 +- tool/microkit/src/loader.rs | 372 +++++++++++++++++++++++------------- tool/microkit/src/sel4.rs | 2 +- tool/microkit/src/util.rs | 8 + 4 files changed, 256 insertions(+), 147 deletions(-) diff --git a/loader/src/aarch64/mmu.c b/loader/src/aarch64/mmu.c index 15fe83564..39b4676c8 100644 --- a/loader/src/aarch64/mmu.c +++ b/loader/src/aarch64/mmu.c @@ -12,18 +12,13 @@ #include "../cutil.h" #include "../uart.h" -void el1_mmu_enable(void *a, void *b); -void el2_mmu_enable(void *a); +void el1_mmu_enable(uint64_t aarch64_pt_ttbr0_el1, uint64_t aarch64_pt_ttbr1_el1); +void el2_mmu_enable(uint64_t aarch64_pt_ttbr0_el2); -/* Paging structures for kernel mapping */ -uint64_t boot_lvl0_upper[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl1_upper[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_upper[1 << 9] ALIGN(1 << 12); - -/* Paging structures for identity mapping */ -uint64_t boot_lvl0_lower[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl1_lower[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_lower[1 << 9] ALIGN(1 << 12); +/* Pointers to the top-level paging structures */ +uint64_t aarch64_pt_ttbr0_el1; +uint64_t aarch64_pt_ttbr1_el1; +uint64_t aarch64_pt_ttbr0_el2; int arch_mmu_enable(int logical_cpu) { @@ -37,9 +32,9 @@ int arch_mmu_enable(int logical_cpu) LDR_PRINT("INFO", logical_cpu, "enabling MMU\n"); el = current_el(); if (el == EL1) { - el1_mmu_enable(&boot_lvl0_lower, &boot_lvl0_upper); + el1_mmu_enable(aarch64_pt_ttbr0_el1, aarch64_pt_ttbr1_el1); } else if (el == EL2) { - el2_mmu_enable(&boot_lvl0_lower); + el2_mmu_enable(aarch64_pt_ttbr0_el2); } else { LDR_PRINT("ERROR", logical_cpu, "unknown EL for MMU enable\n"); } diff --git a/tool/microkit/src/loader.rs b/tool/microkit/src/loader.rs index ed208d7d1..024d54e1d 100644 --- a/tool/microkit/src/loader.rs +++ b/tool/microkit/src/loader.rs @@ -6,19 +6,38 @@ use crate::elf::{ElfFile, ElfSegmentData}; use crate::sel4::{Arch, Config}; use crate::uimage::uimage_serialise; -use crate::util::{mb, round_up, struct_to_bytes}; +use crate::util::{align_down, align_up, mask, mb, round_up, struct_to_bytes}; +use std::cmp::min; use std::fs::File; use std::io::{BufWriter, Write}; +use std::mem; use std::ops::Range; use std::path::Path; macro_rules! grab_symbol { - ($elf: expr, $symbol_name: literal) => { + ($elf: expr, $symbol_name: expr) => { $elf.find_symbol($symbol_name) .expect(concat!("Could not find '", $symbol_name, "' symbol")) }; } +macro_rules! write_symbol { + ($loader_image: expr, $image_vaddr: expr, $elf: expr, $symbol: literal, $symbol_var: expr) => { + let (addr, size) = grab_symbol!($elf, $symbol); + let addr = usize::try_from(addr).expect("addr fits in usize"); + let size = usize::try_from(size).expect("size fits in usize"); + let image_vaddr = usize::try_from($image_vaddr).expect("vaddr fits in usize"); + + assert!(addr >= image_vaddr); + assert!(size == ::std::mem::size_of_val(&$symbol_var)); + + let offset: usize = (addr - image_vaddr); + assert!(offset <= $loader_image.len()); + + $loader_image[offset..(offset + size)].copy_from_slice(&$symbol_var.to_le_bytes()); + }; +} + const PAGE_TABLE_SIZE: usize = 4096; pub mod aarch64 { @@ -32,6 +51,7 @@ pub mod aarch64 { pub const LVL0_BITS: u64 = 9; pub const LVL1_BITS: u64 = 9; pub const LVL2_BITS: u64 = 9; + pub const LVL3_BITS: u64 = 9; pub fn lvl0_index(addr: u64) -> usize { let idx = (addr >> (BLOCK_BITS_2MB + LVL2_BITS + LVL1_BITS)) & mask(LVL0_BITS); @@ -48,6 +68,11 @@ pub mod aarch64 { idx as usize } + pub fn lvl3_index(addr: u64) -> usize { + let idx = (addr >> PAGE_BITS_4KB) & mask(LVL3_BITS); + idx as usize + } + /// Stage 1 translation table page/block descriptors have bits[4:2] containing /// AttrIndex[2:0]. The AttrIndex values depends on our configuration of /// the `MAIR_EL1` or `MAIR_EL2` registers done in util64.S; @@ -105,6 +130,11 @@ pub mod aarch64 { /// > and the level 2 descriptor n is 21. pub const BLOCK_BITS_2MB: u64 = 21; + // TODO: + + pub const BLOCK_BITS_512GB: u64 = 39; + pub const PAGE_BITS_4KB: u64 = 12; + /// Per "Table D8-52 Stage 1 VMSAv8-64 Block and Page descriptor fields" and /// "Figure D8-14 VMSAv8-64 Block descriptor formats" of ARM DDI0487L.b; /// specifically subfigure "4KB, 16KB, and 64KB granules, 48-bit OA" @@ -289,18 +319,18 @@ mod riscv64 { /// Checks that each region in the given list does not overlap with any other region. /// Panics upon finding an overlapping region -fn check_non_overlapping(regions: &Vec<(u64, &[u8])>) { +fn check_non_overlapping(regions: &Vec<(u64, u64)>) { let mut checked: Vec<(u64, u64)> = Vec::new(); - for (base, data) in regions { - let end = base + data.len() as u64; + for &(base, size) in regions.iter() { + let end = base + size; // Check that this does not overlap with any checked regions - for (b, e) in &checked { - if !(end <= *b || *base >= *e) { + for &(b, e) in checked.iter() { + if !(end <= b || base >= e) { panic!("Overlapping regions: [{base:x}..{end:x}) overlaps [{b:x}..{e:x})"); } } - checked.push((*base, end)); + checked.push((base, end)); } } @@ -330,6 +360,7 @@ pub struct Loader<'a> { header: LoaderHeader64, region_metadata: Vec, regions: Vec<(u64, &'a [u8])>, + page_table_bytes: Vec, word_size: usize, elf_machine: u16, entry: u64, @@ -427,28 +458,15 @@ impl<'a> Loader<'a> { panic!("INTERNAL: could not determine kernel_first_paddr"); }; - let pagetable_vars = match config.arch { - Arch::Aarch64 => Loader::aarch64_setup_pagetables( - config, - &loader_elf, - kernel_first_vaddr, - kernel_first_paddr, - ), - Arch::Riscv64 => Loader::riscv64_setup_pagetables( - config, - &loader_elf, - kernel_first_vaddr, - kernel_first_paddr, - ), - Arch::X86_64 => unreachable!("x86_64 does not support creating a loader image"), - }; - let image_segment = loader_elf .segments - .into_iter() + .iter() .find(|segment| segment.loadable) .expect("Did not find loadable segment"); + + // Called "vaddr" but due to 1:1 mapping vaddr == paddr. let image_vaddr = image_segment.virt_addr; + // We have to clone here as the image executable is part of this function return object, // and the loader ELF is deserialised in this scope, so its lifetime will be shorter than // the return object. @@ -458,14 +476,6 @@ impl<'a> Loader<'a> { panic!("The loader entry point must be the first byte in the image"); } - for (var_addr, var_size, var_data) in pagetable_vars { - let offset = var_addr - image_vaddr; - assert!(var_size == var_data.len() as u64); - assert!(offset > 0); - assert!(offset <= loader_image.len() as u64); - loader_image[offset as usize..(offset + var_size) as usize].copy_from_slice(&var_data); - } - let kernel_entry = kernel_elf.entry; // initial task virt + pv_offset == initial task physical, so @@ -477,11 +487,6 @@ impl<'a> Loader<'a> { ui_p_reg_start + (initial_task_vaddr_range.end - initial_task_vaddr_range.start); assert!(ui_p_reg_end > ui_p_reg_start); - // This clone isn't too bad as it is just a Vec<(u64, &[u8])> - let mut all_regions_with_loader = regions.clone(); - all_regions_with_loader.push((image_vaddr, &loader_image)); - check_non_overlapping(&all_regions_with_loader); - let mut region_metadata = Vec::new(); let mut offset: u64 = 0; for (addr, data) in ®ions { @@ -494,10 +499,63 @@ impl<'a> Loader<'a> { offset += data.len() as u64; } - let size = std::mem::size_of::() as u64 - + region_metadata.iter().fold(0_u64, |acc, x| { - acc + x.size + std::mem::size_of::() as u64 - }); + let partial_size = loader_image.len() as u64 + + mem::size_of::() as u64 + + (region_metadata.len() * mem::size_of::()) as u64 + + offset; + + let page_tables_paddr_start = image_vaddr + partial_size; + + let mut page_table_bytes = Vec::::new(); + match config.arch { + Arch::Aarch64 => { + let (ttbr0_el2, ttbr0_el1, ttbr1_el1) = Loader::aarch64_setup_pagetables( + config, + &loader_elf, + kernel_first_vaddr, + kernel_first_paddr, + page_tables_paddr_start, + &mut page_table_bytes, + ); + + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "aarch64_pt_ttbr0_el2", + ttbr0_el2 + ); + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "aarch64_pt_ttbr0_el1", + ttbr0_el1 + ); + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "aarch64_pt_ttbr1_el1", + ttbr1_el1 + ); + } + Arch::Riscv64 => { + todo!(); + } + Arch::X86_64 => unreachable!("x86_64 does not support creating a loader image"), + }; + + let size = partial_size + page_table_bytes.len() as u64; + + let mut all_regions_with_loader: Vec<_> = regions + .iter() + .map(|&(base, data)| (base, data.len() as u64)) + .collect(); + all_regions_with_loader.push((image_vaddr, size)); + check_non_overlapping(&all_regions_with_loader); + + // TODO: Check contained within real RAM. let header = LoaderHeader64 { magic, @@ -516,6 +574,7 @@ impl<'a> Loader<'a> { header, region_metadata, regions, + page_table_bytes, word_size: kernel_elf.word_size, elf_machine: kernel_elf.machine, entry: loader_elf.entry, @@ -539,6 +598,10 @@ impl<'a> Loader<'a> { bytes.extend_from_slice(data); } + bytes.extend_from_slice(&self.page_table_bytes); + + assert!(bytes.len() as u64 == self.header.size); + bytes } @@ -671,7 +734,7 @@ impl<'a> Loader<'a> { boot_lvl2_pt[start..end].copy_from_slice(&lvl3_pt_entry.to_le_bytes()); index_lvl2 += 1; } - let first_paddr_aligned = round_up(first_paddr, 1 << riscv64::BLOCK_BITS_2MB); + let first_paddr_aligned = align_up(first_paddr, riscv64::BLOCK_BITS_2MB); for (page, i) in (index_lvl2..512).enumerate() { let start = 8 * i; let end = start + 8; @@ -786,125 +849,168 @@ impl<'a> Loader<'a> { /// ``` /// fn aarch64_setup_pagetables( - _config: &Config, + config: &Config, elf: &ElfFile, - first_vaddr: u64, - first_paddr: u64, - ) -> Vec<(u64, u64, [u8; PAGE_TABLE_SIZE])> { - use aarch64::s1_mair_attr_index::{MT_DEVICE_nGnRnE, MT_NORMAL}; + kernel_first_vaddr: u64, + kernel_first_paddr: u64, + page_tables_paddr_start: u64, + page_table_bytes: &mut Vec, + ) -> (u64, u64, u64) { + use aarch64::{ + block_descriptor, lvl0_index, lvl1_index, lvl2_index, lvl3_index, page_descriptor, + s1_mair_attr_index::{MT_DEVICE_nGnRnE, MT_NORMAL}, + table_descriptor, BLOCK_BITS_1GB, BLOCK_BITS_2MB, BLOCK_BITS_512GB, PAGE_BITS_4KB, + }; + + const PAGE_TABLE_ENTRIES: usize = PAGE_TABLE_SIZE / mem::size_of::(); + + let mut serialise_page_table_to_paddr = { + let page_tables_paddr_start = { + let aligned_pt_paddr_start = + page_tables_paddr_start.next_multiple_of(PAGE_TABLE_SIZE as u64); + if aligned_pt_paddr_start != page_tables_paddr_start { + let alignment_diff = + (aligned_pt_paddr_start - page_tables_paddr_start) as usize; + page_table_bytes.resize(alignment_diff, 0); + } + + aligned_pt_paddr_start + }; - let (boot_lvl1_lower_addr, boot_lvl1_lower_size) = grab_symbol!(elf, "boot_lvl1_lower"); - let (boot_lvl1_upper_addr, boot_lvl1_upper_size) = grab_symbol!(elf, "boot_lvl1_upper"); - let (boot_lvl2_upper_addr, boot_lvl2_upper_size) = grab_symbol!(elf, "boot_lvl2_upper"); - let (boot_lvl0_lower_addr, boot_lvl0_lower_size) = grab_symbol!(elf, "boot_lvl0_lower"); - let (boot_lvl0_upper_addr, boot_lvl0_upper_size) = grab_symbol!(elf, "boot_lvl0_upper"); - let (boot_lvl2_lower_addr, boot_lvl2_lower_size) = grab_symbol!(elf, "boot_lvl2_lower"); + // This maintains the current end of the PT array. + let mut next_pt_paddr = page_tables_paddr_start; + + move |page_table: &mut [u64; PAGE_TABLE_ENTRIES]| -> u64 { + let pt_paddr = next_pt_paddr; + page_table_bytes.extend(page_table.iter().flat_map(|pte| pte.to_le_bytes())); + next_pt_paddr += PAGE_TABLE_SIZE as u64; + page_table.fill(0); + pt_paddr + } + }; let (loader_start_addr, _) = grab_symbol!(elf, "_loader_start"); let (loader_end_addr, _) = grab_symbol!(elf, "_loader_end"); - - if aarch64::lvl1_index(loader_start_addr) != aarch64::lvl1_index(loader_end_addr) { + if lvl1_index(loader_start_addr) != lvl1_index(loader_end_addr) { panic!("We only map 1GiB, but loader paddr range covers multiple GiB"); } - let mut boot_lvl0_lower: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let pt_entry = aarch64::table_descriptor(boot_lvl1_lower_addr); - boot_lvl0_lower[..8].copy_from_slice(&pt_entry.to_le_bytes()); - } - - let mut boot_lvl1_lower: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - - // map optional UART MMIO in l1 1GB page, only available if CONFIG_PRINTING - if let Ok((uart_addr, uart_addr_size)) = elf.find_symbol("uart_addr") { + let uart_base = if let Ok((uart_addr, uart_addr_size)) = elf.find_symbol("uart_addr") { let data = elf .get_data(uart_addr, uart_addr_size) .expect("uart_addr not initialized"); - let uart_base = u64::from_le_bytes(data[0..8].try_into().unwrap()); + Some(u64::from_le_bytes(data[0..8].try_into().unwrap())) + } else { + None + }; + + // Manufacture the constants as per the diagram. + let k = align_down(kernel_first_vaddr, BLOCK_BITS_512GB); + let l = align_down(kernel_first_vaddr, BLOCK_BITS_1GB); + let m = align_down(kernel_first_vaddr, BLOCK_BITS_2MB); + let p = align_down(kernel_first_paddr, BLOCK_BITS_2MB); + let i = align_down(loader_start_addr, BLOCK_BITS_1GB); + let u = uart_base.map(|addr| align_down(addr, BLOCK_BITS_1GB)); + let s = align_down(loader_start_addr, BLOCK_BITS_2MB); + let t = align_up(loader_end_addr, BLOCK_BITS_2MB); + + // Manufacture the kernel page tables, which is relatively straightforward. + let kernel_lvl1_pt_paddr = { + // First, the Level 2 Upr table. + let lvl2_pt_paddr = { + let mut lvl2_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + + let mut vaddr = m; + let mut paddr = p; + while lvl1_index(m) == lvl1_index(vaddr) { + lvl2_pt_kernel[lvl2_index(vaddr)] = block_descriptor(2, paddr, MT_NORMAL); + + vaddr += 1 << BLOCK_BITS_2MB; + paddr += 1 << BLOCK_BITS_2MB; + } - let lvl1_idx = aarch64::lvl1_index(uart_base); + serialise_page_table_to_paddr(&mut lvl2_pt_kernel) + }; - let pt_entry = aarch64::block_descriptor(1, uart_base, MT_DEVICE_nGnRnE); + // Then, the Level 1 Upr table. + let mut lvl1_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + lvl1_pt_kernel[lvl1_index(l)] = table_descriptor(lvl2_pt_paddr); - let start = 8 * lvl1_idx; - let end = 8 * (lvl1_idx + 1); - boot_lvl1_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + serialise_page_table_to_paddr(&mut lvl1_pt_kernel) + }; - let mut boot_lvl2_lower: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; + // Manufacture the loader page tables. + let loader_lvl1_pt_paddr = { + // First, the Level 2 Lwr table + let lvl2_pt_paddr = { + let mut lvl2_pt_loader = [0u64; PAGE_TABLE_ENTRIES]; - // 1GB lvl1 Table entry - let pt_entry = aarch64::table_descriptor(boot_lvl2_lower_addr); - let lvl1_idx = aarch64::lvl1_index(loader_start_addr); - let start = 8 * lvl1_idx; - let end = 8 * (lvl1_idx + 1); - boot_lvl1_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + // Identity mapped: vaddr == paddr. + let mut addr = s; + while addr < t { + lvl2_pt_loader[lvl2_index(addr)] = block_descriptor(2, addr, MT_NORMAL); - // map the loader 1:1 access into 2MB lvl2 Block entries for a 4KB granule - let lvl2_idx = aarch64::lvl2_index(loader_start_addr); - for i in lvl2_idx..=aarch64::lvl2_index(loader_end_addr) { - let entry_idx: u64 = - ((i - aarch64::lvl2_index(loader_start_addr)) << aarch64::BLOCK_BITS_2MB) as u64; + addr += 1 << BLOCK_BITS_2MB; + } - let pt_entry = - aarch64::block_descriptor(2, loader_start_addr + entry_idx, MT_DEVICE_nGnRnE); + // TODO: this is a complete hack specific to BCM2711/Raspberry Pi 4B and + // will be reworked with patches that re-do this loader mapping code. + if elf.find_symbol("cpus_release_addr").is_ok() { + // Make sure we don't override the loader mappings done above; + // and that this is located at 0x0. + assert!(s != 0); + assert!(i == 0); - let start = 8 * i; - let end = 8 * (i + 1); - boot_lvl2_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + lvl2_pt_loader[lvl2_index(0)] = block_descriptor(2, 0, MT_DEVICE_nGnRnE); + } - // TODO: this is a complete hack specific to BCM2711/Raspberry Pi 4B and - // will be removed with patches that re-do this loader mapping code. - if elf.find_symbol("cpus_release_addr").is_ok() { - let lvl2_idx = aarch64::lvl2_index(0); - // Make sure we don't override the loader mappings done above. - assert!(aarch64::lvl2_index(loader_start_addr) != lvl2_idx); - assert!(aarch64::lvl1_index(loader_start_addr) == aarch64::lvl1_index(0)); + serialise_page_table_to_paddr(&mut lvl2_pt_loader) + }; - let pt_entry = aarch64::block_descriptor(2, lvl2_idx as u64, MT_DEVICE_nGnRnE); + // Then, the Level 1 Lwr table. + let mut lvl1_pt_loader = [0u64; PAGE_TABLE_ENTRIES]; + lvl1_pt_loader[lvl1_index(i)] = table_descriptor(lvl2_pt_paddr); - let start = 8 * lvl2_idx; - let end = 8 * (lvl2_idx + 1); - boot_lvl2_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + // map optional UART MMIO in l1 1GB page, only available if CONFIG_PRINTING + if let Some(u) = u { + // UART no overlap with Loader. + assert!(lvl1_index(i) != lvl1_index(u)); + lvl1_pt_loader[lvl1_index(u)] = block_descriptor(1, u, MT_DEVICE_nGnRnE); + } - let boot_lvl0_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let pt_entry = aarch64::table_descriptor(boot_lvl1_upper_addr); - let idx = aarch64::lvl0_index(first_vaddr); - boot_lvl0_lower[8 * idx..8 * (idx + 1)].copy_from_slice(&pt_entry.to_le_bytes()); - } + serialise_page_table_to_paddr(&mut lvl1_pt_loader) + }; - let mut boot_lvl1_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let pt_entry = aarch64::table_descriptor(boot_lvl2_upper_addr); - let idx = aarch64::lvl1_index(first_vaddr); - boot_lvl1_upper[8 * idx..8 * (idx + 1)].copy_from_slice(&pt_entry.to_le_bytes()); - } + // Depending on whether we are in hypervisor mode, we either need to + // return the TTBR0_EL2 or TTBR[0,1]_EL1 values. We return u64::MAX + // so as to return garbage - an unaligned address outside of physical + // memory. + if config.hypervisor { + // Manufacture the Level 0 table, containing the kernel table + // and the RAM tables. - let mut boot_lvl2_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; + let mut ttbr0_el2_pt = [0u64; PAGE_TABLE_ENTRIES]; - let lvl2_idx = aarch64::lvl2_index(first_vaddr); - for i in lvl2_idx..512 { - let entry_idx: u64 = - ((i - aarch64::lvl2_index(first_vaddr)) << aarch64::BLOCK_BITS_2MB) as u64; + ttbr0_el2_pt[lvl0_index(k)] = table_descriptor(kernel_lvl1_pt_paddr); + ttbr0_el2_pt[lvl0_index(0)] = table_descriptor(loader_lvl1_pt_paddr); - let pt_entry = aarch64::block_descriptor(2, first_paddr + entry_idx, MT_NORMAL); + let ttbr0_el2 = serialise_page_table_to_paddr(&mut ttbr0_el2_pt); - let start = 8 * i; - let end = 8 * (i + 1); - boot_lvl2_upper[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + (ttbr0_el2, u64::MAX, u64::MAX) + } else { + let mut ttbr0_el1_pt = [0u64; PAGE_TABLE_ENTRIES]; + let mut ttbr1_el1_pt = [0u64; PAGE_TABLE_ENTRIES]; - vec![ - (boot_lvl0_lower_addr, boot_lvl0_lower_size, boot_lvl0_lower), - (boot_lvl1_lower_addr, boot_lvl1_lower_size, boot_lvl1_lower), - (boot_lvl0_upper_addr, boot_lvl0_upper_size, boot_lvl0_upper), - (boot_lvl1_upper_addr, boot_lvl1_upper_size, boot_lvl1_upper), - (boot_lvl2_upper_addr, boot_lvl2_upper_size, boot_lvl2_upper), - (boot_lvl2_lower_addr, boot_lvl2_lower_size, boot_lvl2_lower), - ] + // Kernel in TTBR1 (Upper) + ttbr1_el1_pt[lvl0_index(k)] = table_descriptor(kernel_lvl1_pt_paddr); + // Loader in TTBR0 (Lower) + ttbr0_el1_pt[lvl0_index(k)] = table_descriptor(loader_lvl1_pt_paddr); + + let ttbr0_el1 = serialise_page_table_to_paddr(&mut ttbr0_el1_pt); + let ttbr1_el1 = serialise_page_table_to_paddr(&mut ttbr1_el1_pt); + + (u64::MAX, ttbr0_el1, ttbr1_el1) + } } } diff --git a/tool/microkit/src/sel4.rs b/tool/microkit/src/sel4.rs index 641fa63c6..207f36c89 100644 --- a/tool/microkit/src/sel4.rs +++ b/tool/microkit/src/sel4.rs @@ -249,7 +249,7 @@ pub fn emulate_kernel_boot( } } -#[derive(Deserialize)] +#[derive(Deserialize, Debug)] pub struct PlatformConfigRegion { pub start: u64, pub end: u64, diff --git a/tool/microkit/src/util.rs b/tool/microkit/src/util.rs index 6f2c0e275..211c794d6 100644 --- a/tool/microkit/src/util.rs +++ b/tool/microkit/src/util.rs @@ -54,6 +54,14 @@ pub const fn round_down(n: u64, x: u64) -> u64 { } } +pub const fn align_up(n: u64, bits: u64) -> u64 { + round_up(n, 1 << bits) +} + +pub const fn align_down(n: u64, bits: u64) -> u64 { + round_down(n, 1 << bits) +} + pub fn is_power_of_two(n: u64) -> bool { assert!(n > 0); n & (n - 1) == 0 From 717e633bd510a5a359d19f7e16cef01d6aca3b3b Mon Sep 17 00:00:00 2001 From: Julia Vassiliki Date: Mon, 18 May 2026 18:28:16 +1000 Subject: [PATCH 3/4] loader(riscv64): apply the aarch64 treatment Signed-off-by: Julia Vassiliki --- loader/src/riscv/mmu.c | 12 +- tool/microkit/src/loader.rs | 262 ++++++++++++++++++++++++++---------- 2 files changed, 191 insertions(+), 83 deletions(-) diff --git a/loader/src/riscv/mmu.c b/loader/src/riscv/mmu.c index 7751b25e9..b40350ca7 100644 --- a/loader/src/riscv/mmu.c +++ b/loader/src/riscv/mmu.c @@ -8,15 +8,9 @@ #include #include "../arch.h" -#include "../cutil.h" - -/* Paging structures for kernel mapping */ -uint64_t boot_lvl1_pt[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_pt[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl3_pt[1 << 9] ALIGN(1 << 12); -/* Paging structures for identity mapping */ -uint64_t boot_lvl2_pt_loader[1 << 9] ALIGN(1 << 12); +/* Pointers to the top-level paging structures */ +uintptr_t riscv64_boot_lvl1_pt; /* * This is the encoding for the MODE field of the satp register when @@ -36,7 +30,7 @@ int arch_mmu_enable(int logical_cpu) asm volatile( "csrw satp, %0\n" : - : "r"(VM_MODE | (uintptr_t)boot_lvl1_pt >> RISCV_PGSHIFT) + : "r"(VM_MODE | riscv64_boot_lvl1_pt >> RISCV_PGSHIFT) : ); asm volatile("fence.i" ::: "memory"); diff --git a/tool/microkit/src/loader.rs b/tool/microkit/src/loader.rs index 024d54e1d..2b62c002d 100644 --- a/tool/microkit/src/loader.rs +++ b/tool/microkit/src/loader.rs @@ -280,6 +280,7 @@ pub mod aarch64 { } mod riscv64 { + pub(crate) const BLOCK_BITS_1GB: u64 = 30; pub(crate) const BLOCK_BITS_2MB: u64 = 21; pub(crate) const PAGE_BITS_4K: u64 = 12; @@ -541,7 +542,21 @@ impl<'a> Loader<'a> { ); } Arch::Riscv64 => { - todo!(); + let boot_lvl1_pt = Loader::riscv64_setup_pagetables( + config, + &loader_elf, + kernel_first_vaddr, + kernel_first_paddr, + page_tables_paddr_start, + &mut page_table_bytes, + ); + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "riscv64_boot_lvl1_pt", + boot_lvl1_pt + ); } Arch::X86_64 => unreachable!("x86_64 does not support creating a loader image"), }; @@ -666,95 +681,194 @@ impl<'a> Loader<'a> { } } + /// RISC-V 64 page tables for our purposes uses the Sv39 translation scheme + /// (3-level page tables). + /// + /// It is split into two halves: the Upper/Kernel part of the page tables, + /// which matches the format seL4 expects. The lower half contains an + /// identity mapped region for the loader. + /// + /// ```txt + /// (512 GiB) + /// 512 +---- Level 1 ---+ 2^39 + /// | | + /// | (empty) | + /// | | + /// k+1 +----------------+ (1 GiB) + /// | Level 2 Kernel | ----------> +---- Level 2 ---+ +-------------+ + /// k +----------------+ | | ----------> | 2 MiB block | + /// | | 511 |----------------| +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | 510 |----------------| +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | |----------------| +------------- + /// | | (...) (...) (...) Kernel Regions + /// | | |----------------| +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | l+1 |----------------| +-------------+ + /// | | | Level 3 Kernel | ----+ + /// | | l |----------------| | + /// | | | | | (2 MiB) + /// | | | | +-----> +-- Level 3 --+ +------------+ + /// | | | | | | ----------> | 4 KiB page | + /// | | | | 511 |-------------| +------------+ + /// | | | (empty) | | | ----------> | 4 KiB page | + /// | (empty) | | | |-------------| +------------+ + /// | | | | | | ----------> | 4 KiB page | + /// | | | | m |-------------| +------------+ p + /// | | | | | (empty) | + /// | | | | +-------------+ + /// | | | | + /// | | 0 +----------------+ + /// | | + /// | | + /// | | + /// | | + /// | | + /// s+1 +----------------+ (1 GiB) + /// | Level 2 Loader | ----------> +-- Level 2 --+ +-------------+ + /// s +----------------+ | | ----------> | 2 MiB block | + /// | | 511 +-------------+ +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | (empty) | 510 +-------------+ +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | |-------------| +-------------+ + /// 0 +----------------+ | | ----------> | 2 MiB block | + /// |-------------| +-------------+ + /// (...) (...) (...) Loader Regions + /// |-------------| +-------------+ + /// | | ----------> | 2 MiB block | + /// |-------------| +-------------+ + /// | | ----------> | 2 MiB block | + /// t +-------------+ +-------------+ + /// | | + /// | (empty) | + /// | | + /// +-------------+ + /// + /// + /// Where: + /// k = align_down(kernel_first_vaddr, 1GiB), + /// l = align_down(kernel_first_vaddr, 2MiB), + /// m = align_down(kernel_first_vaddr, 4KiB), + /// p = align_down(kernel_first_paddr, 4KiB), + /// + /// s = align_down(text_addr, 1GiB), + /// t = align_down(text_addr, 2MiB), + /// ``` + /// fn riscv64_setup_pagetables( config: &Config, elf: &ElfFile, - first_vaddr: u64, - first_paddr: u64, - ) -> Vec<(u64, u64, [u8; PAGE_TABLE_SIZE])> { + kernel_first_vaddr: u64, + kernel_first_paddr: u64, + page_tables_paddr_start: u64, + page_table_bytes: &mut Vec, + ) -> u64 { + use riscv64::{pt_index, pte_leaf, pte_next, BLOCK_BITS_1GB, BLOCK_BITS_2MB, PAGE_BITS_4K}; + let (text_addr, _) = grab_symbol!(elf, "_text"); - let (boot_lvl1_pt_addr, boot_lvl1_pt_size) = grab_symbol!(elf, "boot_lvl1_pt"); - let (boot_lvl2_pt_addr, boot_lvl2_pt_size) = grab_symbol!(elf, "boot_lvl2_pt"); - let (boot_lvl3_pt_addr, boot_lvl3_pt_size) = grab_symbol!(elf, "boot_lvl3_pt"); - let (boot_lvl2_pt_loader_addr, boot_lvl2_pt_loader_size) = - grab_symbol!(elf, "boot_lvl2_pt_loader"); // We map the loader using 2MB pages, so make sure the base is actually aligned. - assert!(text_addr.is_multiple_of(1 << riscv64::BLOCK_BITS_2MB)); + assert!(text_addr.is_multiple_of(1 << BLOCK_BITS_2MB)); - let num_pt_levels = config.riscv_pt_levels.unwrap().levels(); + const PAGE_TABLE_ENTRIES: usize = PAGE_TABLE_SIZE / mem::size_of::(); - let mut boot_lvl1_pt: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let text_index_lvl1 = riscv64::pt_index(num_pt_levels, text_addr, 1); - let pt_entry = riscv64::pte_next(boot_lvl2_pt_loader_addr); - let start = 8 * text_index_lvl1; - let end = start + 8; - boot_lvl1_pt[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + let mut serialise_page_table_to_paddr = { + let page_tables_paddr_start = { + let aligned_pt_paddr_start = + page_tables_paddr_start.next_multiple_of(PAGE_TABLE_SIZE as u64); + if aligned_pt_paddr_start != page_tables_paddr_start { + let alignment_diff = + (aligned_pt_paddr_start - page_tables_paddr_start) as usize; + page_table_bytes.resize(alignment_diff, 0); + } + + aligned_pt_paddr_start + }; + + // This maintains the current end of the PT array. + let mut next_pt_paddr = page_tables_paddr_start; - let mut boot_lvl2_pt_loader: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let text_index_lvl2 = riscv64::pt_index(num_pt_levels, text_addr, 2); - for (page, i) in (text_index_lvl2..512).enumerate() { - let start = 8 * i; - let end = start + 8; - let addr = text_addr + ((page as u64) << riscv64::BLOCK_BITS_2MB); - let pt_entry = riscv64::pte_leaf(addr); - boot_lvl2_pt_loader[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + move |page_table: &mut [u64; PAGE_TABLE_ENTRIES]| -> u64 { + let pt_paddr = next_pt_paddr; + page_table_bytes.extend(page_table.iter().flat_map(|pte| pte.to_le_bytes())); + next_pt_paddr += PAGE_TABLE_SIZE as u64; + page_table.fill(0); + pt_paddr } - } + }; - { - let index = riscv64::pt_index(num_pt_levels, first_vaddr, 1); - let start = 8 * index; - let end = start + 8; - boot_lvl1_pt[start..end] - .copy_from_slice(&riscv64::pte_next(boot_lvl2_pt_addr).to_le_bytes()); - } + let num_pt_levels = config.riscv_pt_levels.unwrap().levels(); + assert!(num_pt_levels == 3); - let mut boot_lvl3_pt: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - let mut boot_lvl2_pt: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let mut index_lvl2 = riscv64::pt_index(num_pt_levels, first_vaddr, 2); - if !first_vaddr.is_multiple_of(1 << riscv64::BLOCK_BITS_2MB) { - let index_lvl3 = riscv64::pt_index(num_pt_levels, first_vaddr, 3); - for (page, i) in (index_lvl3..512).enumerate() { - let start = 8 * i; - let end = start + 8; - let addr = first_paddr + ((page as u64) << riscv64::PAGE_BITS_4K); - assert!(addr.is_multiple_of(1 << riscv64::PAGE_BITS_4K)); - let pt_entry = riscv64::pte_leaf(addr); - boot_lvl3_pt[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + // Manufacture the constants as per the diagram. + let k = align_down(kernel_first_vaddr, BLOCK_BITS_1GB); + let l = align_down(kernel_first_vaddr, BLOCK_BITS_2MB); + let m = align_down(kernel_first_vaddr, PAGE_BITS_4K); + let p = align_down(kernel_first_paddr, PAGE_BITS_4K); + + let s = align_down(text_addr, BLOCK_BITS_1GB); + let t = align_down(text_addr, BLOCK_BITS_2MB); + + // Manufacture the kernel page tables + let kernel_lvl2_pt_paddr = { + let mut lvl2_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + + let mut paddr = p; + let index_l = pt_index(num_pt_levels, l, 2); + + lvl2_pt_kernel[index_l] = if kernel_first_vaddr.is_multiple_of(1 << BLOCK_BITS_2MB) { + assert!(paddr.is_multiple_of(1 << BLOCK_BITS_2MB)); + let pte = pte_leaf(paddr); + paddr += 1 << BLOCK_BITS_2MB; + pte + } else { + let mut lvl3_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + + let index_m = pt_index(num_pt_levels, m, 3); + + for index in index_m..512 { + lvl3_pt_kernel[index] = pte_leaf(paddr); + paddr += 1 << PAGE_BITS_4K; } - let start = 8 * index_lvl2; - let end = start + 8; - let lvl3_pt_entry = riscv64::pte_next(boot_lvl3_pt_addr); - assert!(boot_lvl3_pt_addr.is_multiple_of(1 << riscv64::PAGE_BITS_4K)); - boot_lvl2_pt[start..end].copy_from_slice(&lvl3_pt_entry.to_le_bytes()); - index_lvl2 += 1; + + let kernel_lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt_kernel); + pte_next(kernel_lvl3_pt_paddr) + }; + + for index in (index_l + 1)..512 { + lvl2_pt_kernel[index] = pte_leaf(paddr); + paddr += 1 << BLOCK_BITS_2MB; } - let first_paddr_aligned = align_up(first_paddr, riscv64::BLOCK_BITS_2MB); - for (page, i) in (index_lvl2..512).enumerate() { - let start = 8 * i; - let end = start + 8; - let addr = first_paddr_aligned + ((page as u64) << riscv64::BLOCK_BITS_2MB); - assert!(addr.is_multiple_of(1 << riscv64::BLOCK_BITS_2MB)); - let pt_entry = riscv64::pte_leaf(addr); - boot_lvl2_pt[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + + serialise_page_table_to_paddr(&mut lvl2_pt_kernel) + }; + + // Manufacture the loader page tables, which is relatively straightforward + let loader_lvl2_pt_paddr = { + let mut lvl2_pt_loader = [0u64; PAGE_TABLE_ENTRIES]; + + // Identity mapped, so vaddr == paddr. + let mut paddr = t; + + for index in pt_index(num_pt_levels, t, 2)..512 { + lvl2_pt_loader[index] = pte_leaf(paddr); + paddr += 1 << BLOCK_BITS_2MB; } - } - vec![ - (boot_lvl1_pt_addr, boot_lvl1_pt_size, boot_lvl1_pt), - (boot_lvl2_pt_addr, boot_lvl2_pt_size, boot_lvl2_pt), - (boot_lvl3_pt_addr, boot_lvl3_pt_size, boot_lvl3_pt), - ( - boot_lvl2_pt_loader_addr, - boot_lvl2_pt_loader_size, - boot_lvl2_pt_loader, - ), - ] + serialise_page_table_to_paddr(&mut lvl2_pt_loader) + }; + + // Manufacture the Level 1 table + let mut boot_lvl1_pt = [0u64; PAGE_TABLE_ENTRIES]; + + let index_s = pt_index(num_pt_levels, s, 1); + let index_k = pt_index(num_pt_levels, k, 1); + boot_lvl1_pt[index_k] = pte_next(kernel_lvl2_pt_paddr); + boot_lvl1_pt[index_s] = pte_next(loader_lvl2_pt_paddr); + + serialise_page_table_to_paddr(&mut boot_lvl1_pt) } /// AArch64 loader page tables have two variations: From a600a29789e986795ff59f86aee51be43f1021d1 Mon Sep 17 00:00:00 2001 From: Julia Vassiliki Date: Fri, 22 May 2026 10:48:45 +1000 Subject: [PATCH 4/4] [WIP] ram --- tool/microkit/src/loader.rs | 417 +++++++++++++++++++++++++++++------- tool/microkit/src/sel4.rs | 2 +- 2 files changed, 336 insertions(+), 83 deletions(-) diff --git a/tool/microkit/src/loader.rs b/tool/microkit/src/loader.rs index 2b62c002d..b07fdd8e7 100644 --- a/tool/microkit/src/loader.rs +++ b/tool/microkit/src/loader.rs @@ -4,9 +4,9 @@ // SPDX-License-Identifier: BSD-2-Clause // use crate::elf::{ElfFile, ElfSegmentData}; -use crate::sel4::{Arch, Config}; +use crate::sel4::{Arch, Config, PlatformConfigRegion}; use crate::uimage::uimage_serialise; -use crate::util::{align_down, align_up, mask, mb, round_up, struct_to_bytes}; +use crate::util::{align_down, mb, round_up, struct_to_bytes}; use std::cmp::min; use std::fs::File; use std::io::{BufWriter, Write}; @@ -21,6 +21,18 @@ macro_rules! grab_symbol { }; } +// XX: This could be generic on arbitrary if we could specify T:: implements from_le_bytes, +// but we can't. +fn read_symbol_maybe(elf: &ElfFile, symbol_name: &str) -> Option { + let (addr, size) = elf.find_symbol(symbol_name).ok()?; + + let symbol_bytes = elf.get_data(addr, size)?; + + assert!(mem::size_of::() == symbol_bytes.len()); + + Some(u64::from_le_bytes(symbol_bytes.try_into().ok()?)) +} + macro_rules! write_symbol { ($loader_image: expr, $image_vaddr: expr, $elf: expr, $symbol: literal, $symbol_var: expr) => { let (addr, size) = grab_symbol!($elf, $symbol); @@ -147,7 +159,7 @@ pub mod aarch64 { let shareability = if attr_index == s1_mair_attr_index::MT_NORMAL { // Match what the seL4 kernel uses for its page tables, which // is especially necessary for SMP booting which relies on it - // for coherency. + // for coherency. See the comment in seL4 `release_secondary_cpus()`. shareability_attributes::INNER_SHAREABLE } else { // Per $R_{PYFVQ}$: @@ -921,35 +933,8 @@ impl<'a> Loader<'a> { /// | | /// 1 +-------------+ (512 GiB) /// | Level 1 Lwr | ----------> +-- Level 1 --+ - /// 0 +-------------+ | | - /// | (empty) | - /// | | - /// u+1 +-------------+ +-------------+ - /// | uart_base | ----------> | 1 GiB block | - /// u +-------------+ +-------------+ - /// | | - /// | (empty) | - /// | | - /// i+1 +-------------+ (1 GiB) - /// | Level 2 Lwr | ----------> +-- Level 2 --+ - /// i +-------------+ | | - /// | | | (empty) | - /// | (empty) | | | - /// | | t +-------------+ +-------------+ - /// +-------------+ | | ----------> | 2 MiB block | - /// |-------------| +-------------+ - /// | | ----------> | 2 MiB block | - /// |-------------| +-------------+ - /// Loader Regions (...) (...) (...) - /// |-------------| +-------------+ - /// | | ----------> | 2 MiB block | - /// |-------------| +-------------+ - /// | | ----------> | 2 MiB block | - /// s +-------------+ +-------------+ - /// | | - /// | (empty) | - /// | | - /// +-------------+ + /// 0 +-------------+ TODO: RAM. + /// /// /// Where: /// k = align_down(kernel_first_vaddr, 512GiB), @@ -957,9 +942,6 @@ impl<'a> Loader<'a> { /// m = align_down(kernel_first_vaddr, 2MiB), /// p = align_down(kernel_first_paddr, 2MiB), /// u = align_down(uart_base, 1GiB), - /// i = align_down(loader_start_addr, 1GiB), - /// s = align_down(loader_start_addr, 2MiB), - /// t = align_up(loader_end_addr, 2MiB), /// ``` /// fn aarch64_setup_pagetables( @@ -1003,20 +985,49 @@ impl<'a> Loader<'a> { } }; - let (loader_start_addr, _) = grab_symbol!(elf, "_loader_start"); - let (loader_end_addr, _) = grab_symbol!(elf, "_loader_end"); - if lvl1_index(loader_start_addr) != lvl1_index(loader_end_addr) { - panic!("We only map 1GiB, but loader paddr range covers multiple GiB"); - } + let identity_mapped_regions = { + let ram_regions = config + .normal_regions + .as_ref() + .expect("AArch64 should have normal_regions"); + + // println!("{:#x?}", ram_regions); + + let mut regions: Vec<_> = ram_regions + .iter() + .cloned() + .map(|region| (region, MT_NORMAL)) + .collect(); + + // FIXME: Derive from the kernel build system. + if let Some(uart_base) = read_symbol_maybe(elf, "uart_addr") { + let uart_base = align_down(uart_base, PAGE_BITS_4KB); + regions.push(( + PlatformConfigRegion { + start: uart_base, + end: uart_base + (1 << PAGE_BITS_4KB), + }, + MT_DEVICE_nGnRnE, + )); + } + + // FIXME: This is currently assuming implementation details of the BCM2711/ + // Raspberry Pi 4B spin table implementation, as it is the only + // platform we have that uses spin tables. Specifically, that + // it is always located at the 0 page. + if elf.find_symbol("cpus_release_addr").is_ok() { + regions.push(( + PlatformConfigRegion { + start: 0x0, + end: 1 << PAGE_BITS_4KB, + }, + MT_DEVICE_nGnRnE, + )); + } - let uart_base = if let Ok((uart_addr, uart_addr_size)) = elf.find_symbol("uart_addr") { - let data = elf - .get_data(uart_addr, uart_addr_size) - .expect("uart_addr not initialized"); + regions.sort_by_key(|(region, _)| region.start); - Some(u64::from_le_bytes(data[0..8].try_into().unwrap())) - } else { - None + regions }; // Manufacture the constants as per the diagram. @@ -1024,10 +1035,6 @@ impl<'a> Loader<'a> { let l = align_down(kernel_first_vaddr, BLOCK_BITS_1GB); let m = align_down(kernel_first_vaddr, BLOCK_BITS_2MB); let p = align_down(kernel_first_paddr, BLOCK_BITS_2MB); - let i = align_down(loader_start_addr, BLOCK_BITS_1GB); - let u = uart_base.map(|addr| align_down(addr, BLOCK_BITS_1GB)); - let s = align_down(loader_start_addr, BLOCK_BITS_2MB); - let t = align_up(loader_end_addr, BLOCK_BITS_2MB); // Manufacture the kernel page tables, which is relatively straightforward. let kernel_lvl1_pt_paddr = { @@ -1054,46 +1061,291 @@ impl<'a> Loader<'a> { serialise_page_table_to_paddr(&mut lvl1_pt_kernel) }; - // Manufacture the loader page tables. - let loader_lvl1_pt_paddr = { - // First, the Level 2 Lwr table - let lvl2_pt_paddr = { - let mut lvl2_pt_loader = [0u64; PAGE_TABLE_ENTRIES]; + // Manufacture the RAM page tables, which is a little bit more complicated. + // We assume that normal RAM lies between 0 <= paddr < 512GiB, i.e. + // that lvl0_index(any ram region addr) = 0. + let ram_lvl1_pt_paddr = { + // Validation of assumptions about the identity mapped regions. + let mut previous_end = None; + for (region, _) in identity_mapped_regions.iter() { + assert!(lvl0_index(region.start) == 0); + assert!(lvl0_index(region.end - 1) == 0); + // This is probably an unnecessary assumption. + assert!(region.start.is_multiple_of(4096)); + assert!(region.end.is_multiple_of(4096)); + // This is definitely necessary. + assert!(region.start >= previous_end.unwrap_or(0)); + previous_end = Some(region.end); + } + + // We maintain three active page tables, which contain our previous + // known page table data. As we process regions in ascending order, + // once we have exceeded the bounds of the current reservation we + // can simply push to the page_table_bytes storage and insert into + // the parent PT the descriptor. + // When the current vaddr (/paddr, as identity mapped) exceeds the + // top value we rotate to a new PT. + + let mut lvl1_pt = [0u64; PAGE_TABLE_ENTRIES]; + let mut lvl2_pt = [0u64; PAGE_TABLE_ENTRIES]; + let mut lvl3_pt = [0u64; PAGE_TABLE_ENTRIES]; + // TODO: These should be defines. Note that the top is the size of 1 level of the next level up. + // TODO: LVL1_ENTRY_RANGE? idk + #[allow(unused_mut)] + let mut lvl1_vaddr_top = 1 << BLOCK_BITS_512GB; + let mut lvl2_vaddr_top = 1 << BLOCK_BITS_1GB; + let mut lvl3_vaddr_top = 1 << BLOCK_BITS_2MB; + + // TODO: Tests... + // This is similar to aligned_power_of_two_regions() for the kernel UT, + // but we restrict it such that the output always is either 1GB, 2MB, or 4KB + // pages. + + // Allowed externally for the final iteration + let mut base = 0u64; + for &(ref region, attr_index) in identity_mapped_regions.iter() { + // println!("RAM Region: {:#x}..{:#x}", base, region.end); + // println!( + // " - Current Lvl1: {:#x}..{:#x}, entries: {}", + // (lvl1_vaddr_top - (1 << BLOCK_BITS_512GB)), + // lvl1_vaddr_top, + // lvl1_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl2: {:#x}..{:#x}, entries: {}", + // (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), + // lvl2_vaddr_top, + // lvl2_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl3: {:#x}..{:#x}, entries: {}", + // (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB)), + // lvl3_vaddr_top, + // lvl3_pt.iter().filter(|&&v| v != 0).count() + // ); + + // Handle the fact that the regions are not contiguous and that + // we might need to skip PT. + + { + if region.start >= lvl3_vaddr_top { + if lvl3_pt != [0; _] { + let lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt); + // println!("[iter] Serialise lvl3 table: {lvl3_pt_paddr:#x} for to {:#x}..{lvl3_vaddr_top:#x}", (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB))); + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = table_descriptor(lvl3_pt_paddr); + } + + // TODO: just compute it. + while region.start >= lvl3_vaddr_top { + lvl3_vaddr_top += 1 << BLOCK_BITS_2MB; + } + } - // Identity mapped: vaddr == paddr. - let mut addr = s; - while addr < t { - lvl2_pt_loader[lvl2_index(addr)] = block_descriptor(2, addr, MT_NORMAL); + if region.start >= lvl2_vaddr_top { + if lvl2_pt != [0; _] { + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("[iter] Serialise lvl2 table: {lvl2_pt_paddr:#x} for to {:#x}..{lvl2_vaddr_top:#x}, base: {:#x} lvl1_index(base): {:#x}", (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), base, lvl1_index(base)); + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + } + + // TODO: just compute it. + while region.start >= lvl2_vaddr_top { + lvl2_vaddr_top += 1 << BLOCK_BITS_1GB; + } + } - addr += 1 << BLOCK_BITS_2MB; + if region.start >= lvl1_vaddr_top { + unreachable!( + "impossible as everything should fit here: {lvl1_vaddr_top:#x}" + ); + } } - // TODO: this is a complete hack specific to BCM2711/Raspberry Pi 4B and - // will be reworked with patches that re-do this loader mapping code. - if elf.find_symbol("cpus_release_addr").is_ok() { - // Make sure we don't override the loader mappings done above; - // and that this is located at 0x0. - assert!(s != 0); - assert!(i == 0); + // After serialising the old base, update the new one. + base = region.start; + + // Inner Loop: + // Invariant: the page tables in lvl1_pt, lvl2_pt, lvl3_pt + // are either (1) for the current address range, + // or (2) are empty and for a lower level than the current level. + // Also, the values in lvlXXX_vaddr_top are always correct (even if empty) + // Also contiguous within the loop. + // Loop entry: (1) holds by work at the start of each region + while base != region.end { + // Condition is !=, but assert that we never skip it. + assert!(base < region.end); + + let size_bits = region.end.wrapping_sub(base).ilog2(); + let align_bits = min( + size_bits, + // FIXME: Once MSRV is > 1.97, use .lowest_one() method. + if base == 0 { + size_bits + } else { + base.trailing_zeros() + }, + ); + + // Match the size and alignment of the current region to + // the valid PT region sizes. + let (level, bits) = match u64::from(align_bits) { + BLOCK_BITS_1GB.. => (1, BLOCK_BITS_1GB), + BLOCK_BITS_2MB.. => (2, BLOCK_BITS_2MB), + PAGE_BITS_4KB.. => (3, PAGE_BITS_4KB), + 0.. => panic!("impossible; regions should be aligned to 4K at least"), + }; + + let pt_region_size = 1u64 << bits; + let top = base + pt_region_size; + + // println!("- Aligned PT region: {:#x}..{:#x} (size_bits: {}, align_bits: {}, bits: {})", base, top, size_bits, align_bits, bits); + // println!( + // " - Current Lvl1: {:#x}..{:#x}, entries: {}", + // (lvl1_vaddr_top - (1 << BLOCK_BITS_512GB)), + // lvl1_vaddr_top, + // lvl1_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl2: {:#x}..{:#x}, entries: {}", + // (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), + // lvl2_vaddr_top, + // lvl2_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl3: {:#x}..{:#x}, entries: {}", + // (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB)), + // lvl3_vaddr_top, + // lvl3_pt.iter().filter(|&&v| v != 0).count() + // ); + + match level { + 1 => { + // If it belongs in Level 1 PT, then it must go in + // lvl1 pt. By the inavariant, base < lvl1_vaddr_top. + assert!(base < lvl1_vaddr_top); + // top is <= lvl1_vaddr_top (the case where it is the topmost entry) + assert!(top <= lvl1_vaddr_top); + + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = block_descriptor(1, base, attr_index); + + if top == lvl1_vaddr_top { + // Invariant maintenance: if the new top would be now equal + // the end of the page table's region top, we need a new + // page table object and add it to the list. + + // This should be possible to handle - we just need to break out of this loop + todo!("handle the case where top of lvl1 is occupied - this would be near the top of 512GiB"); + } + + // Invariant: Lower levels are empty. + assert!(lvl2_pt == [0; _]); + assert!(lvl3_pt == [0; _]); + // Invariant maintenance: vaddr_top is right range for current PT. + // it's empty so we need to increment the top to be current top (1G aligned) + 2MIB (512 lvl3 entries) + lvl3_vaddr_top = top + (1 << BLOCK_BITS_2MB); + // it's empty so we need to increment the top to be current top (1G aligned) + 1G (512 lvl2 entries) + lvl2_vaddr_top = top + (1 << BLOCK_BITS_1GB); + } + 2 => { + // If it is a 2MiB block, it must go in the Level 2 PT; + // by our invariants: base < lvl2_vaddr_top and top <= lvl2_vaddr_top + assert!(base < lvl2_vaddr_top); + assert!(top <= lvl2_vaddr_top); + + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = block_descriptor(2, base, attr_index); + + if top == lvl2_vaddr_top { + // Invariant maintenance: keep for current address range. + // As we're the top of the range, we can serialise the table. + + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("Serialise lvl2 table: {lvl2_pt_paddr:#x} up to {lvl2_vaddr_top:#x}"); + lvl2_vaddr_top += 1 << BLOCK_BITS_1GB; + + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + + if top == lvl1_vaddr_top { + todo!("handle the case where top of lvl1 is occupied - this would be near the top of 512GiB"); + } + } + + // Invariant: Lower levels are empty. + assert!(lvl3_pt == [0; _]); + // Invariant maintenance: vaddr_top is right range for current PT. + // it's empty so we need to increment the top to be current top (2MIB aligned) + 2MIB (512 lvl3 entries) + lvl3_vaddr_top = top + (1 << BLOCK_BITS_2MB); + } + 3 => { + // If it is a 4K page, it must go in the Level 3 PT; + // by our invariants: base < lvl3_vaddr_top and top <= lvl3_vaddr_top + assert!(base < lvl3_vaddr_top); + assert!(top <= lvl3_vaddr_top); + + assert!(lvl3_pt[lvl3_index(base)] == 0); + lvl3_pt[lvl3_index(base)] = page_descriptor(base, attr_index); + + if top == lvl3_vaddr_top { + // Invariant maintenance: keep for current address range. + // As we're the top of the range, we can serialise the table. + + let lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt); + // println!("Serialise lvl3 table: {lvl3_pt_paddr:#x} for to {:#x}..{lvl3_vaddr_top:#x}", (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB))); + lvl3_vaddr_top += 1 << BLOCK_BITS_2MB; + + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = table_descriptor(lvl3_pt_paddr); + + if top == lvl2_vaddr_top { + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("Serialise lvl2 table: {lvl2_pt_paddr:#x} for to {:#x}..{lvl2_vaddr_top:#x}", (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB))); + lvl2_vaddr_top += 1 << BLOCK_BITS_1GB; + + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + + if top == lvl1_vaddr_top { + todo!("handle the case where top of lvl1 is occupied - this would be near the top of 512GiB"); + } + } + } + + // Invariant: lower levels empty is vacuuously true + } + _ => unreachable!("level is 1..=3"), + } - lvl2_pt_loader[lvl2_index(0)] = block_descriptor(2, 0, MT_DEVICE_nGnRnE); + base = base + pt_region_size; } + } - serialise_page_table_to_paddr(&mut lvl2_pt_loader) - }; + // By the loop invariant, we know that anything before has been serialised. + // However, as we are at the end of the loop now, we might have + // page tables that have been partially filled out, and we need to + // serialise these. - // Then, the Level 1 Lwr table. - let mut lvl1_pt_loader = [0u64; PAGE_TABLE_ENTRIES]; - lvl1_pt_loader[lvl1_index(i)] = table_descriptor(lvl2_pt_paddr); + if lvl3_pt != [0; _] { + let lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt); + // println!("[end] Serialise lvl3 table: {lvl3_pt_paddr:#x}"); + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = table_descriptor(lvl3_pt_paddr); + } - // map optional UART MMIO in l1 1GB page, only available if CONFIG_PRINTING - if let Some(u) = u { - // UART no overlap with Loader. - assert!(lvl1_index(i) != lvl1_index(u)); - lvl1_pt_loader[lvl1_index(u)] = block_descriptor(1, u, MT_DEVICE_nGnRnE); + if lvl2_pt != [0; _] { + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("[end] Serialise lvl2 table: {lvl2_pt_paddr:#x} for to {:#x}..{lvl2_vaddr_top:#x}, base: {:#x} lvl1_index(base): {:#x}", (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), base, lvl1_index(base)); + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); } - serialise_page_table_to_paddr(&mut lvl1_pt_loader) + // the level1 pt should not be empty. lol. + assert!(lvl1_pt != [0; _]); + + // println!("New lvl1 table"); + serialise_page_table_to_paddr(&mut lvl1_pt) }; // Depending on whether we are in hypervisor mode, we either need to @@ -1106,8 +1358,9 @@ impl<'a> Loader<'a> { let mut ttbr0_el2_pt = [0u64; PAGE_TABLE_ENTRIES]; + assert!(lvl0_index(k) != lvl0_index(0)); ttbr0_el2_pt[lvl0_index(k)] = table_descriptor(kernel_lvl1_pt_paddr); - ttbr0_el2_pt[lvl0_index(0)] = table_descriptor(loader_lvl1_pt_paddr); + ttbr0_el2_pt[lvl0_index(0)] = table_descriptor(ram_lvl1_pt_paddr); let ttbr0_el2 = serialise_page_table_to_paddr(&mut ttbr0_el2_pt); @@ -1119,7 +1372,7 @@ impl<'a> Loader<'a> { // Kernel in TTBR1 (Upper) ttbr1_el1_pt[lvl0_index(k)] = table_descriptor(kernel_lvl1_pt_paddr); // Loader in TTBR0 (Lower) - ttbr0_el1_pt[lvl0_index(k)] = table_descriptor(loader_lvl1_pt_paddr); + ttbr0_el1_pt[lvl0_index(0)] = table_descriptor(ram_lvl1_pt_paddr); let ttbr0_el1 = serialise_page_table_to_paddr(&mut ttbr0_el1_pt); let ttbr1_el1 = serialise_page_table_to_paddr(&mut ttbr1_el1_pt); diff --git a/tool/microkit/src/sel4.rs b/tool/microkit/src/sel4.rs index 207f36c89..e43363699 100644 --- a/tool/microkit/src/sel4.rs +++ b/tool/microkit/src/sel4.rs @@ -249,7 +249,7 @@ pub fn emulate_kernel_boot( } } -#[derive(Deserialize, Debug)] +#[derive(Deserialize, Debug, Clone)] pub struct PlatformConfigRegion { pub start: u64, pub end: u64,