diff --git a/loader/src/aarch64/mmu.c b/loader/src/aarch64/mmu.c index 8ef6427ce..39b4676c8 100644 --- a/loader/src/aarch64/mmu.c +++ b/loader/src/aarch64/mmu.c @@ -12,18 +12,13 @@ #include "../cutil.h" #include "../uart.h" -void el1_mmu_enable(void); -void el2_mmu_enable(void); +void el1_mmu_enable(uint64_t aarch64_pt_ttbr0_el1, uint64_t aarch64_pt_ttbr1_el1); +void el2_mmu_enable(uint64_t aarch64_pt_ttbr0_el2); -/* Paging structures for kernel mapping */ -uint64_t boot_lvl0_upper[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl1_upper[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_upper[1 << 9] ALIGN(1 << 12); - -/* Paging structures for identity mapping */ -uint64_t boot_lvl0_lower[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl1_lower[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_lower[1 << 9] ALIGN(1 << 12); +/* Pointers to the top-level paging structures */ +uint64_t aarch64_pt_ttbr0_el1; +uint64_t aarch64_pt_ttbr1_el1; +uint64_t aarch64_pt_ttbr0_el2; int arch_mmu_enable(int logical_cpu) { @@ -37,9 +32,9 @@ int arch_mmu_enable(int logical_cpu) LDR_PRINT("INFO", logical_cpu, "enabling MMU\n"); el = current_el(); if (el == EL1) { - el1_mmu_enable(); + el1_mmu_enable(aarch64_pt_ttbr0_el1, aarch64_pt_ttbr1_el1); } else if (el == EL2) { - el2_mmu_enable(); + el2_mmu_enable(aarch64_pt_ttbr0_el2); } else { LDR_PRINT("ERROR", logical_cpu, "unknown EL for MMU enable\n"); } diff --git a/loader/src/aarch64/util64.S b/loader/src/aarch64/util64.S index 5137c76d9..fd94ebe36 100644 --- a/loader/src/aarch64/util64.S +++ b/loader/src/aarch64/util64.S @@ -302,14 +302,12 @@ BEGIN_FUNC(el1_mmu_disable) */ bl invalidate_icache - ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret END_FUNC(el1_mmu_disable) BEGIN_FUNC(el2_mmu_disable) stp x29, x30, [sp, #-16]! - stp x27, x28, [sp, #-16]! mov x29, sp /* Disable caches */ @@ -324,14 +322,18 @@ BEGIN_FUNC(el2_mmu_disable) */ bl invalidate_icache - ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret END_FUNC(el2_mmu_disable) +/* + * Enables the MMU for EL2. + * Takes two arguments the physical address for TTBR0_EL1 (x0) and TTBR1_EL1 (x1). + */ BEGIN_FUNC(el1_mmu_enable) stp x29, x30, [sp, #-16]! stp x27, x28, [sp, #-16]! + /* move caller-saved to callee-saved registers */ mov x29, sp mov x27, x0 mov x28, x1 @@ -359,10 +361,8 @@ BEGIN_FUNC(el1_mmu_enable) msr tcr_el1, x10 /* Setup page tables */ - adrp x8, boot_lvl0_lower - msr ttbr0_el1, x8 - adrp x8, boot_lvl0_upper - msr ttbr1_el1, x8 + msr ttbr0_el1, x27 /* argument 0 */ + msr ttbr1_el1, x28 /* argument 1 */ isb /* invalidate all TLB entries for EL1 */ @@ -372,14 +372,21 @@ BEGIN_FUNC(el1_mmu_enable) enable_mmu sctlr_el1, x8 + ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret - END_FUNC(el1_mmu_enable) +/* + * Enables the MMU for EL2. + * Takes one argument, the physical address for TTBR0_EL2 (x0). + */ BEGIN_FUNC(el2_mmu_enable) stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + /* move caller-saved to callee-saved registers */ mov x29, sp + mov x28, x0 /* Disable the MMU */ bl el2_mmu_disable @@ -403,8 +410,7 @@ BEGIN_FUNC(el2_mmu_enable) isb /* Setup page tables */ - adrp x8, boot_lvl0_lower - msr ttbr0_el2, x8 + msr ttbr0_el2, x28 /* argument 0 */ isb /* invalidate all TLB entries for EL2 */ @@ -423,9 +429,9 @@ BEGIN_FUNC(el2_mmu_enable) dsb ish isb + ldp x27, x28, [sp], #16 ldp x29, x30, [sp], #16 ret - END_FUNC(el2_mmu_enable) .extern arm_secondary_cpu_c_entry diff --git a/loader/src/riscv/mmu.c b/loader/src/riscv/mmu.c index 7751b25e9..b40350ca7 100644 --- a/loader/src/riscv/mmu.c +++ b/loader/src/riscv/mmu.c @@ -8,15 +8,9 @@ #include #include "../arch.h" -#include "../cutil.h" - -/* Paging structures for kernel mapping */ -uint64_t boot_lvl1_pt[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl2_pt[1 << 9] ALIGN(1 << 12); -uint64_t boot_lvl3_pt[1 << 9] ALIGN(1 << 12); -/* Paging structures for identity mapping */ -uint64_t boot_lvl2_pt_loader[1 << 9] ALIGN(1 << 12); +/* Pointers to the top-level paging structures */ +uintptr_t riscv64_boot_lvl1_pt; /* * This is the encoding for the MODE field of the satp register when @@ -36,7 +30,7 @@ int arch_mmu_enable(int logical_cpu) asm volatile( "csrw satp, %0\n" : - : "r"(VM_MODE | (uintptr_t)boot_lvl1_pt >> RISCV_PGSHIFT) + : "r"(VM_MODE | riscv64_boot_lvl1_pt >> RISCV_PGSHIFT) : ); asm volatile("fence.i" ::: "memory"); diff --git a/tool/microkit/src/loader.rs b/tool/microkit/src/loader.rs index ed208d7d1..b07fdd8e7 100644 --- a/tool/microkit/src/loader.rs +++ b/tool/microkit/src/loader.rs @@ -4,21 +4,52 @@ // SPDX-License-Identifier: BSD-2-Clause // use crate::elf::{ElfFile, ElfSegmentData}; -use crate::sel4::{Arch, Config}; +use crate::sel4::{Arch, Config, PlatformConfigRegion}; use crate::uimage::uimage_serialise; -use crate::util::{mb, round_up, struct_to_bytes}; +use crate::util::{align_down, mb, round_up, struct_to_bytes}; +use std::cmp::min; use std::fs::File; use std::io::{BufWriter, Write}; +use std::mem; use std::ops::Range; use std::path::Path; macro_rules! grab_symbol { - ($elf: expr, $symbol_name: literal) => { + ($elf: expr, $symbol_name: expr) => { $elf.find_symbol($symbol_name) .expect(concat!("Could not find '", $symbol_name, "' symbol")) }; } +// XX: This could be generic on arbitrary if we could specify T:: implements from_le_bytes, +// but we can't. +fn read_symbol_maybe(elf: &ElfFile, symbol_name: &str) -> Option { + let (addr, size) = elf.find_symbol(symbol_name).ok()?; + + let symbol_bytes = elf.get_data(addr, size)?; + + assert!(mem::size_of::() == symbol_bytes.len()); + + Some(u64::from_le_bytes(symbol_bytes.try_into().ok()?)) +} + +macro_rules! write_symbol { + ($loader_image: expr, $image_vaddr: expr, $elf: expr, $symbol: literal, $symbol_var: expr) => { + let (addr, size) = grab_symbol!($elf, $symbol); + let addr = usize::try_from(addr).expect("addr fits in usize"); + let size = usize::try_from(size).expect("size fits in usize"); + let image_vaddr = usize::try_from($image_vaddr).expect("vaddr fits in usize"); + + assert!(addr >= image_vaddr); + assert!(size == ::std::mem::size_of_val(&$symbol_var)); + + let offset: usize = (addr - image_vaddr); + assert!(offset <= $loader_image.len()); + + $loader_image[offset..(offset + size)].copy_from_slice(&$symbol_var.to_le_bytes()); + }; +} + const PAGE_TABLE_SIZE: usize = 4096; pub mod aarch64 { @@ -32,6 +63,7 @@ pub mod aarch64 { pub const LVL0_BITS: u64 = 9; pub const LVL1_BITS: u64 = 9; pub const LVL2_BITS: u64 = 9; + pub const LVL3_BITS: u64 = 9; pub fn lvl0_index(addr: u64) -> usize { let idx = (addr >> (BLOCK_BITS_2MB + LVL2_BITS + LVL1_BITS)) & mask(LVL0_BITS); @@ -48,6 +80,11 @@ pub mod aarch64 { idx as usize } + pub fn lvl3_index(addr: u64) -> usize { + let idx = (addr >> PAGE_BITS_4KB) & mask(LVL3_BITS); + idx as usize + } + /// Stage 1 translation table page/block descriptors have bits[4:2] containing /// AttrIndex[2:0]. The AttrIndex values depends on our configuration of /// the `MAIR_EL1` or `MAIR_EL2` registers done in util64.S; @@ -105,6 +142,11 @@ pub mod aarch64 { /// > and the level 2 descriptor n is 21. pub const BLOCK_BITS_2MB: u64 = 21; + // TODO: + + pub const BLOCK_BITS_512GB: u64 = 39; + pub const PAGE_BITS_4KB: u64 = 12; + /// Per "Table D8-52 Stage 1 VMSAv8-64 Block and Page descriptor fields" and /// "Figure D8-14 VMSAv8-64 Block descriptor formats" of ARM DDI0487L.b; /// specifically subfigure "4KB, 16KB, and 64KB granules, 48-bit OA" @@ -117,7 +159,7 @@ pub mod aarch64 { let shareability = if attr_index == s1_mair_attr_index::MT_NORMAL { // Match what the seL4 kernel uses for its page tables, which // is especially necessary for SMP booting which relies on it - // for coherency. + // for coherency. See the comment in seL4 `release_secondary_cpus()`. shareability_attributes::INNER_SHAREABLE } else { // Per $R_{PYFVQ}$: @@ -250,6 +292,7 @@ pub mod aarch64 { } mod riscv64 { + pub(crate) const BLOCK_BITS_1GB: u64 = 30; pub(crate) const BLOCK_BITS_2MB: u64 = 21; pub(crate) const PAGE_BITS_4K: u64 = 12; @@ -289,18 +332,18 @@ mod riscv64 { /// Checks that each region in the given list does not overlap with any other region. /// Panics upon finding an overlapping region -fn check_non_overlapping(regions: &Vec<(u64, &[u8])>) { +fn check_non_overlapping(regions: &Vec<(u64, u64)>) { let mut checked: Vec<(u64, u64)> = Vec::new(); - for (base, data) in regions { - let end = base + data.len() as u64; + for &(base, size) in regions.iter() { + let end = base + size; // Check that this does not overlap with any checked regions - for (b, e) in &checked { - if !(end <= *b || *base >= *e) { + for &(b, e) in checked.iter() { + if !(end <= b || base >= e) { panic!("Overlapping regions: [{base:x}..{end:x}) overlaps [{b:x}..{e:x})"); } } - checked.push((*base, end)); + checked.push((base, end)); } } @@ -330,6 +373,7 @@ pub struct Loader<'a> { header: LoaderHeader64, region_metadata: Vec, regions: Vec<(u64, &'a [u8])>, + page_table_bytes: Vec, word_size: usize, elf_machine: u16, entry: u64, @@ -427,28 +471,15 @@ impl<'a> Loader<'a> { panic!("INTERNAL: could not determine kernel_first_paddr"); }; - let pagetable_vars = match config.arch { - Arch::Aarch64 => Loader::aarch64_setup_pagetables( - config, - &loader_elf, - kernel_first_vaddr, - kernel_first_paddr, - ), - Arch::Riscv64 => Loader::riscv64_setup_pagetables( - config, - &loader_elf, - kernel_first_vaddr, - kernel_first_paddr, - ), - Arch::X86_64 => unreachable!("x86_64 does not support creating a loader image"), - }; - let image_segment = loader_elf .segments - .into_iter() + .iter() .find(|segment| segment.loadable) .expect("Did not find loadable segment"); + + // Called "vaddr" but due to 1:1 mapping vaddr == paddr. let image_vaddr = image_segment.virt_addr; + // We have to clone here as the image executable is part of this function return object, // and the loader ELF is deserialised in this scope, so its lifetime will be shorter than // the return object. @@ -458,14 +489,6 @@ impl<'a> Loader<'a> { panic!("The loader entry point must be the first byte in the image"); } - for (var_addr, var_size, var_data) in pagetable_vars { - let offset = var_addr - image_vaddr; - assert!(var_size == var_data.len() as u64); - assert!(offset > 0); - assert!(offset <= loader_image.len() as u64); - loader_image[offset as usize..(offset + var_size) as usize].copy_from_slice(&var_data); - } - let kernel_entry = kernel_elf.entry; // initial task virt + pv_offset == initial task physical, so @@ -477,11 +500,6 @@ impl<'a> Loader<'a> { ui_p_reg_start + (initial_task_vaddr_range.end - initial_task_vaddr_range.start); assert!(ui_p_reg_end > ui_p_reg_start); - // This clone isn't too bad as it is just a Vec<(u64, &[u8])> - let mut all_regions_with_loader = regions.clone(); - all_regions_with_loader.push((image_vaddr, &loader_image)); - check_non_overlapping(&all_regions_with_loader); - let mut region_metadata = Vec::new(); let mut offset: u64 = 0; for (addr, data) in ®ions { @@ -494,10 +512,77 @@ impl<'a> Loader<'a> { offset += data.len() as u64; } - let size = std::mem::size_of::() as u64 - + region_metadata.iter().fold(0_u64, |acc, x| { - acc + x.size + std::mem::size_of::() as u64 - }); + let partial_size = loader_image.len() as u64 + + mem::size_of::() as u64 + + (region_metadata.len() * mem::size_of::()) as u64 + + offset; + + let page_tables_paddr_start = image_vaddr + partial_size; + + let mut page_table_bytes = Vec::::new(); + match config.arch { + Arch::Aarch64 => { + let (ttbr0_el2, ttbr0_el1, ttbr1_el1) = Loader::aarch64_setup_pagetables( + config, + &loader_elf, + kernel_first_vaddr, + kernel_first_paddr, + page_tables_paddr_start, + &mut page_table_bytes, + ); + + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "aarch64_pt_ttbr0_el2", + ttbr0_el2 + ); + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "aarch64_pt_ttbr0_el1", + ttbr0_el1 + ); + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "aarch64_pt_ttbr1_el1", + ttbr1_el1 + ); + } + Arch::Riscv64 => { + let boot_lvl1_pt = Loader::riscv64_setup_pagetables( + config, + &loader_elf, + kernel_first_vaddr, + kernel_first_paddr, + page_tables_paddr_start, + &mut page_table_bytes, + ); + write_symbol!( + loader_image, + image_vaddr, + loader_elf, + "riscv64_boot_lvl1_pt", + boot_lvl1_pt + ); + } + Arch::X86_64 => unreachable!("x86_64 does not support creating a loader image"), + }; + + let size = partial_size + page_table_bytes.len() as u64; + + let mut all_regions_with_loader: Vec<_> = regions + .iter() + .map(|&(base, data)| (base, data.len() as u64)) + .collect(); + all_regions_with_loader.push((image_vaddr, size)); + check_non_overlapping(&all_regions_with_loader); + + // TODO: Check contained within real RAM. let header = LoaderHeader64 { magic, @@ -516,6 +601,7 @@ impl<'a> Loader<'a> { header, region_metadata, regions, + page_table_bytes, word_size: kernel_elf.word_size, elf_machine: kernel_elf.machine, entry: loader_elf.entry, @@ -539,6 +625,10 @@ impl<'a> Loader<'a> { bytes.extend_from_slice(data); } + bytes.extend_from_slice(&self.page_table_bytes); + + assert!(bytes.len() as u64 == self.header.size); + bytes } @@ -603,95 +693,194 @@ impl<'a> Loader<'a> { } } + /// RISC-V 64 page tables for our purposes uses the Sv39 translation scheme + /// (3-level page tables). + /// + /// It is split into two halves: the Upper/Kernel part of the page tables, + /// which matches the format seL4 expects. The lower half contains an + /// identity mapped region for the loader. + /// + /// ```txt + /// (512 GiB) + /// 512 +---- Level 1 ---+ 2^39 + /// | | + /// | (empty) | + /// | | + /// k+1 +----------------+ (1 GiB) + /// | Level 2 Kernel | ----------> +---- Level 2 ---+ +-------------+ + /// k +----------------+ | | ----------> | 2 MiB block | + /// | | 511 |----------------| +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | 510 |----------------| +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | |----------------| +------------- + /// | | (...) (...) (...) Kernel Regions + /// | | |----------------| +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | l+1 |----------------| +-------------+ + /// | | | Level 3 Kernel | ----+ + /// | | l |----------------| | + /// | | | | | (2 MiB) + /// | | | | +-----> +-- Level 3 --+ +------------+ + /// | | | | | | ----------> | 4 KiB page | + /// | | | | 511 |-------------| +------------+ + /// | | | (empty) | | | ----------> | 4 KiB page | + /// | (empty) | | | |-------------| +------------+ + /// | | | | | | ----------> | 4 KiB page | + /// | | | | m |-------------| +------------+ p + /// | | | | | (empty) | + /// | | | | +-------------+ + /// | | | | + /// | | 0 +----------------+ + /// | | + /// | | + /// | | + /// | | + /// | | + /// s+1 +----------------+ (1 GiB) + /// | Level 2 Loader | ----------> +-- Level 2 --+ +-------------+ + /// s +----------------+ | | ----------> | 2 MiB block | + /// | | 511 +-------------+ +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | (empty) | 510 +-------------+ +-------------+ + /// | | | | ----------> | 2 MiB block | + /// | | |-------------| +-------------+ + /// 0 +----------------+ | | ----------> | 2 MiB block | + /// |-------------| +-------------+ + /// (...) (...) (...) Loader Regions + /// |-------------| +-------------+ + /// | | ----------> | 2 MiB block | + /// |-------------| +-------------+ + /// | | ----------> | 2 MiB block | + /// t +-------------+ +-------------+ + /// | | + /// | (empty) | + /// | | + /// +-------------+ + /// + /// + /// Where: + /// k = align_down(kernel_first_vaddr, 1GiB), + /// l = align_down(kernel_first_vaddr, 2MiB), + /// m = align_down(kernel_first_vaddr, 4KiB), + /// p = align_down(kernel_first_paddr, 4KiB), + /// + /// s = align_down(text_addr, 1GiB), + /// t = align_down(text_addr, 2MiB), + /// ``` + /// fn riscv64_setup_pagetables( config: &Config, elf: &ElfFile, - first_vaddr: u64, - first_paddr: u64, - ) -> Vec<(u64, u64, [u8; PAGE_TABLE_SIZE])> { + kernel_first_vaddr: u64, + kernel_first_paddr: u64, + page_tables_paddr_start: u64, + page_table_bytes: &mut Vec, + ) -> u64 { + use riscv64::{pt_index, pte_leaf, pte_next, BLOCK_BITS_1GB, BLOCK_BITS_2MB, PAGE_BITS_4K}; + let (text_addr, _) = grab_symbol!(elf, "_text"); - let (boot_lvl1_pt_addr, boot_lvl1_pt_size) = grab_symbol!(elf, "boot_lvl1_pt"); - let (boot_lvl2_pt_addr, boot_lvl2_pt_size) = grab_symbol!(elf, "boot_lvl2_pt"); - let (boot_lvl3_pt_addr, boot_lvl3_pt_size) = grab_symbol!(elf, "boot_lvl3_pt"); - let (boot_lvl2_pt_loader_addr, boot_lvl2_pt_loader_size) = - grab_symbol!(elf, "boot_lvl2_pt_loader"); // We map the loader using 2MB pages, so make sure the base is actually aligned. - assert!(text_addr.is_multiple_of(1 << riscv64::BLOCK_BITS_2MB)); + assert!(text_addr.is_multiple_of(1 << BLOCK_BITS_2MB)); + + const PAGE_TABLE_ENTRIES: usize = PAGE_TABLE_SIZE / mem::size_of::(); + + let mut serialise_page_table_to_paddr = { + let page_tables_paddr_start = { + let aligned_pt_paddr_start = + page_tables_paddr_start.next_multiple_of(PAGE_TABLE_SIZE as u64); + if aligned_pt_paddr_start != page_tables_paddr_start { + let alignment_diff = + (aligned_pt_paddr_start - page_tables_paddr_start) as usize; + page_table_bytes.resize(alignment_diff, 0); + } - let num_pt_levels = config.riscv_pt_levels.unwrap().levels(); + aligned_pt_paddr_start + }; - let mut boot_lvl1_pt: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let text_index_lvl1 = riscv64::pt_index(num_pt_levels, text_addr, 1); - let pt_entry = riscv64::pte_next(boot_lvl2_pt_loader_addr); - let start = 8 * text_index_lvl1; - let end = start + 8; - boot_lvl1_pt[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + // This maintains the current end of the PT array. + let mut next_pt_paddr = page_tables_paddr_start; - let mut boot_lvl2_pt_loader: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let text_index_lvl2 = riscv64::pt_index(num_pt_levels, text_addr, 2); - for (page, i) in (text_index_lvl2..512).enumerate() { - let start = 8 * i; - let end = start + 8; - let addr = text_addr + ((page as u64) << riscv64::BLOCK_BITS_2MB); - let pt_entry = riscv64::pte_leaf(addr); - boot_lvl2_pt_loader[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + move |page_table: &mut [u64; PAGE_TABLE_ENTRIES]| -> u64 { + let pt_paddr = next_pt_paddr; + page_table_bytes.extend(page_table.iter().flat_map(|pte| pte.to_le_bytes())); + next_pt_paddr += PAGE_TABLE_SIZE as u64; + page_table.fill(0); + pt_paddr } - } + }; - { - let index = riscv64::pt_index(num_pt_levels, first_vaddr, 1); - let start = 8 * index; - let end = start + 8; - boot_lvl1_pt[start..end] - .copy_from_slice(&riscv64::pte_next(boot_lvl2_pt_addr).to_le_bytes()); - } + let num_pt_levels = config.riscv_pt_levels.unwrap().levels(); + assert!(num_pt_levels == 3); + + // Manufacture the constants as per the diagram. + let k = align_down(kernel_first_vaddr, BLOCK_BITS_1GB); + let l = align_down(kernel_first_vaddr, BLOCK_BITS_2MB); + let m = align_down(kernel_first_vaddr, PAGE_BITS_4K); + let p = align_down(kernel_first_paddr, PAGE_BITS_4K); + + let s = align_down(text_addr, BLOCK_BITS_1GB); + let t = align_down(text_addr, BLOCK_BITS_2MB); + + // Manufacture the kernel page tables + let kernel_lvl2_pt_paddr = { + let mut lvl2_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + + let mut paddr = p; + let index_l = pt_index(num_pt_levels, l, 2); - let mut boot_lvl3_pt: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - let mut boot_lvl2_pt: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let mut index_lvl2 = riscv64::pt_index(num_pt_levels, first_vaddr, 2); - if !first_vaddr.is_multiple_of(1 << riscv64::BLOCK_BITS_2MB) { - let index_lvl3 = riscv64::pt_index(num_pt_levels, first_vaddr, 3); - for (page, i) in (index_lvl3..512).enumerate() { - let start = 8 * i; - let end = start + 8; - let addr = first_paddr + ((page as u64) << riscv64::PAGE_BITS_4K); - assert!(addr.is_multiple_of(1 << riscv64::PAGE_BITS_4K)); - let pt_entry = riscv64::pte_leaf(addr); - boot_lvl3_pt[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + lvl2_pt_kernel[index_l] = if kernel_first_vaddr.is_multiple_of(1 << BLOCK_BITS_2MB) { + assert!(paddr.is_multiple_of(1 << BLOCK_BITS_2MB)); + let pte = pte_leaf(paddr); + paddr += 1 << BLOCK_BITS_2MB; + pte + } else { + let mut lvl3_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + + let index_m = pt_index(num_pt_levels, m, 3); + + for index in index_m..512 { + lvl3_pt_kernel[index] = pte_leaf(paddr); + paddr += 1 << PAGE_BITS_4K; } - let start = 8 * index_lvl2; - let end = start + 8; - let lvl3_pt_entry = riscv64::pte_next(boot_lvl3_pt_addr); - assert!(boot_lvl3_pt_addr.is_multiple_of(1 << riscv64::PAGE_BITS_4K)); - boot_lvl2_pt[start..end].copy_from_slice(&lvl3_pt_entry.to_le_bytes()); - index_lvl2 += 1; + + let kernel_lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt_kernel); + pte_next(kernel_lvl3_pt_paddr) + }; + + for index in (index_l + 1)..512 { + lvl2_pt_kernel[index] = pte_leaf(paddr); + paddr += 1 << BLOCK_BITS_2MB; } - let first_paddr_aligned = round_up(first_paddr, 1 << riscv64::BLOCK_BITS_2MB); - for (page, i) in (index_lvl2..512).enumerate() { - let start = 8 * i; - let end = start + 8; - let addr = first_paddr_aligned + ((page as u64) << riscv64::BLOCK_BITS_2MB); - assert!(addr.is_multiple_of(1 << riscv64::BLOCK_BITS_2MB)); - let pt_entry = riscv64::pte_leaf(addr); - boot_lvl2_pt[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + + serialise_page_table_to_paddr(&mut lvl2_pt_kernel) + }; + + // Manufacture the loader page tables, which is relatively straightforward + let loader_lvl2_pt_paddr = { + let mut lvl2_pt_loader = [0u64; PAGE_TABLE_ENTRIES]; + + // Identity mapped, so vaddr == paddr. + let mut paddr = t; + + for index in pt_index(num_pt_levels, t, 2)..512 { + lvl2_pt_loader[index] = pte_leaf(paddr); + paddr += 1 << BLOCK_BITS_2MB; } - } - vec![ - (boot_lvl1_pt_addr, boot_lvl1_pt_size, boot_lvl1_pt), - (boot_lvl2_pt_addr, boot_lvl2_pt_size, boot_lvl2_pt), - (boot_lvl3_pt_addr, boot_lvl3_pt_size, boot_lvl3_pt), - ( - boot_lvl2_pt_loader_addr, - boot_lvl2_pt_loader_size, - boot_lvl2_pt_loader, - ), - ] + serialise_page_table_to_paddr(&mut lvl2_pt_loader) + }; + + // Manufacture the Level 1 table + let mut boot_lvl1_pt = [0u64; PAGE_TABLE_ENTRIES]; + + let index_s = pt_index(num_pt_levels, s, 1); + let index_k = pt_index(num_pt_levels, k, 1); + boot_lvl1_pt[index_k] = pte_next(kernel_lvl2_pt_paddr); + boot_lvl1_pt[index_s] = pte_next(loader_lvl2_pt_paddr); + + serialise_page_table_to_paddr(&mut boot_lvl1_pt) } /// AArch64 loader page tables have two variations: @@ -744,35 +933,8 @@ impl<'a> Loader<'a> { /// | | /// 1 +-------------+ (512 GiB) /// | Level 1 Lwr | ----------> +-- Level 1 --+ - /// 0 +-------------+ | | - /// | (empty) | - /// | | - /// u+1 +-------------+ +-------------+ - /// | uart_base | ----------> | 1 GiB block | - /// u +-------------+ +-------------+ - /// | | - /// | (empty) | - /// | | - /// i+1 +-------------+ (1 GiB) - /// | Level 2 Lwr | ----------> +-- Level 2 --+ - /// i +-------------+ | | - /// | | | (empty) | - /// | (empty) | | | - /// | | t +-------------+ +-------------+ - /// +-------------+ | | ----------> | 2 MiB block | - /// |-------------| +-------------+ - /// | | ----------> | 2 MiB block | - /// |-------------| +-------------+ - /// Loader Regions (...) (...) (...) - /// |-------------| +-------------+ - /// | | ----------> | 2 MiB block | - /// |-------------| +-------------+ - /// | | ----------> | 2 MiB block | - /// s +-------------+ +-------------+ - /// | | - /// | (empty) | - /// | | - /// +-------------+ + /// 0 +-------------+ TODO: RAM. + /// /// /// Where: /// k = align_down(kernel_first_vaddr, 512GiB), @@ -780,131 +942,442 @@ impl<'a> Loader<'a> { /// m = align_down(kernel_first_vaddr, 2MiB), /// p = align_down(kernel_first_paddr, 2MiB), /// u = align_down(uart_base, 1GiB), - /// i = align_down(loader_start_addr, 1GiB), - /// s = align_down(loader_start_addr, 2MiB), - /// t = align_up(loader_end_addr, 2MiB), /// ``` /// fn aarch64_setup_pagetables( - _config: &Config, + config: &Config, elf: &ElfFile, - first_vaddr: u64, - first_paddr: u64, - ) -> Vec<(u64, u64, [u8; PAGE_TABLE_SIZE])> { - use aarch64::s1_mair_attr_index::{MT_DEVICE_nGnRnE, MT_NORMAL}; - - let (boot_lvl1_lower_addr, boot_lvl1_lower_size) = grab_symbol!(elf, "boot_lvl1_lower"); - let (boot_lvl1_upper_addr, boot_lvl1_upper_size) = grab_symbol!(elf, "boot_lvl1_upper"); - let (boot_lvl2_upper_addr, boot_lvl2_upper_size) = grab_symbol!(elf, "boot_lvl2_upper"); - let (boot_lvl0_lower_addr, boot_lvl0_lower_size) = grab_symbol!(elf, "boot_lvl0_lower"); - let (boot_lvl0_upper_addr, boot_lvl0_upper_size) = grab_symbol!(elf, "boot_lvl0_upper"); - let (boot_lvl2_lower_addr, boot_lvl2_lower_size) = grab_symbol!(elf, "boot_lvl2_lower"); - - let (loader_start_addr, _) = grab_symbol!(elf, "_loader_start"); - let (loader_end_addr, _) = grab_symbol!(elf, "_loader_end"); - - if aarch64::lvl1_index(loader_start_addr) != aarch64::lvl1_index(loader_end_addr) { - panic!("We only map 1GiB, but loader paddr range covers multiple GiB"); - } + kernel_first_vaddr: u64, + kernel_first_paddr: u64, + page_tables_paddr_start: u64, + page_table_bytes: &mut Vec, + ) -> (u64, u64, u64) { + use aarch64::{ + block_descriptor, lvl0_index, lvl1_index, lvl2_index, lvl3_index, page_descriptor, + s1_mair_attr_index::{MT_DEVICE_nGnRnE, MT_NORMAL}, + table_descriptor, BLOCK_BITS_1GB, BLOCK_BITS_2MB, BLOCK_BITS_512GB, PAGE_BITS_4KB, + }; - let mut boot_lvl0_lower: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let pt_entry = aarch64::table_descriptor(boot_lvl1_lower_addr); - boot_lvl0_lower[..8].copy_from_slice(&pt_entry.to_le_bytes()); - } + const PAGE_TABLE_ENTRIES: usize = PAGE_TABLE_SIZE / mem::size_of::(); - let mut boot_lvl1_lower: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; + let mut serialise_page_table_to_paddr = { + let page_tables_paddr_start = { + let aligned_pt_paddr_start = + page_tables_paddr_start.next_multiple_of(PAGE_TABLE_SIZE as u64); + if aligned_pt_paddr_start != page_tables_paddr_start { + let alignment_diff = + (aligned_pt_paddr_start - page_tables_paddr_start) as usize; + page_table_bytes.resize(alignment_diff, 0); + } - // map optional UART MMIO in l1 1GB page, only available if CONFIG_PRINTING - if let Ok((uart_addr, uart_addr_size)) = elf.find_symbol("uart_addr") { - let data = elf - .get_data(uart_addr, uart_addr_size) - .expect("uart_addr not initialized"); + aligned_pt_paddr_start + }; - let uart_base = u64::from_le_bytes(data[0..8].try_into().unwrap()); + // This maintains the current end of the PT array. + let mut next_pt_paddr = page_tables_paddr_start; - let lvl1_idx = aarch64::lvl1_index(uart_base); + move |page_table: &mut [u64; PAGE_TABLE_ENTRIES]| -> u64 { + let pt_paddr = next_pt_paddr; + page_table_bytes.extend(page_table.iter().flat_map(|pte| pte.to_le_bytes())); + next_pt_paddr += PAGE_TABLE_SIZE as u64; + page_table.fill(0); + pt_paddr + } + }; - let pt_entry = aarch64::block_descriptor(1, uart_base, MT_DEVICE_nGnRnE); + let identity_mapped_regions = { + let ram_regions = config + .normal_regions + .as_ref() + .expect("AArch64 should have normal_regions"); + + // println!("{:#x?}", ram_regions); + + let mut regions: Vec<_> = ram_regions + .iter() + .cloned() + .map(|region| (region, MT_NORMAL)) + .collect(); + + // FIXME: Derive from the kernel build system. + if let Some(uart_base) = read_symbol_maybe(elf, "uart_addr") { + let uart_base = align_down(uart_base, PAGE_BITS_4KB); + regions.push(( + PlatformConfigRegion { + start: uart_base, + end: uart_base + (1 << PAGE_BITS_4KB), + }, + MT_DEVICE_nGnRnE, + )); + } - let start = 8 * lvl1_idx; - let end = 8 * (lvl1_idx + 1); - boot_lvl1_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + // FIXME: This is currently assuming implementation details of the BCM2711/ + // Raspberry Pi 4B spin table implementation, as it is the only + // platform we have that uses spin tables. Specifically, that + // it is always located at the 0 page. + if elf.find_symbol("cpus_release_addr").is_ok() { + regions.push(( + PlatformConfigRegion { + start: 0x0, + end: 1 << PAGE_BITS_4KB, + }, + MT_DEVICE_nGnRnE, + )); + } - let mut boot_lvl2_lower: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; + regions.sort_by_key(|(region, _)| region.start); - // 1GB lvl1 Table entry - let pt_entry = aarch64::table_descriptor(boot_lvl2_lower_addr); - let lvl1_idx = aarch64::lvl1_index(loader_start_addr); - let start = 8 * lvl1_idx; - let end = 8 * (lvl1_idx + 1); - boot_lvl1_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); + regions + }; - // map the loader 1:1 access into 2MB lvl2 Block entries for a 4KB granule - let lvl2_idx = aarch64::lvl2_index(loader_start_addr); - for i in lvl2_idx..=aarch64::lvl2_index(loader_end_addr) { - let entry_idx: u64 = - ((i - aarch64::lvl2_index(loader_start_addr)) << aarch64::BLOCK_BITS_2MB) as u64; + // Manufacture the constants as per the diagram. + let k = align_down(kernel_first_vaddr, BLOCK_BITS_512GB); + let l = align_down(kernel_first_vaddr, BLOCK_BITS_1GB); + let m = align_down(kernel_first_vaddr, BLOCK_BITS_2MB); + let p = align_down(kernel_first_paddr, BLOCK_BITS_2MB); + + // Manufacture the kernel page tables, which is relatively straightforward. + let kernel_lvl1_pt_paddr = { + // First, the Level 2 Upr table. + let lvl2_pt_paddr = { + let mut lvl2_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + + let mut vaddr = m; + let mut paddr = p; + while lvl1_index(m) == lvl1_index(vaddr) { + lvl2_pt_kernel[lvl2_index(vaddr)] = block_descriptor(2, paddr, MT_NORMAL); + + vaddr += 1 << BLOCK_BITS_2MB; + paddr += 1 << BLOCK_BITS_2MB; + } - let pt_entry = - aarch64::block_descriptor(2, loader_start_addr + entry_idx, MT_DEVICE_nGnRnE); + serialise_page_table_to_paddr(&mut lvl2_pt_kernel) + }; - let start = 8 * i; - let end = 8 * (i + 1); - boot_lvl2_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + // Then, the Level 1 Upr table. + let mut lvl1_pt_kernel = [0u64; PAGE_TABLE_ENTRIES]; + lvl1_pt_kernel[lvl1_index(l)] = table_descriptor(lvl2_pt_paddr); - // TODO: this is a complete hack specific to BCM2711/Raspberry Pi 4B and - // will be removed with patches that re-do this loader mapping code. - if elf.find_symbol("cpus_release_addr").is_ok() { - let lvl2_idx = aarch64::lvl2_index(0); - // Make sure we don't override the loader mappings done above. - assert!(aarch64::lvl2_index(loader_start_addr) != lvl2_idx); - assert!(aarch64::lvl1_index(loader_start_addr) == aarch64::lvl1_index(0)); + serialise_page_table_to_paddr(&mut lvl1_pt_kernel) + }; - let pt_entry = aarch64::block_descriptor(2, lvl2_idx as u64, MT_DEVICE_nGnRnE); + // Manufacture the RAM page tables, which is a little bit more complicated. + // We assume that normal RAM lies between 0 <= paddr < 512GiB, i.e. + // that lvl0_index(any ram region addr) = 0. + let ram_lvl1_pt_paddr = { + // Validation of assumptions about the identity mapped regions. + let mut previous_end = None; + for (region, _) in identity_mapped_regions.iter() { + assert!(lvl0_index(region.start) == 0); + assert!(lvl0_index(region.end - 1) == 0); + // This is probably an unnecessary assumption. + assert!(region.start.is_multiple_of(4096)); + assert!(region.end.is_multiple_of(4096)); + // This is definitely necessary. + assert!(region.start >= previous_end.unwrap_or(0)); + previous_end = Some(region.end); + } - let start = 8 * lvl2_idx; - let end = 8 * (lvl2_idx + 1); - boot_lvl2_lower[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + // We maintain three active page tables, which contain our previous + // known page table data. As we process regions in ascending order, + // once we have exceeded the bounds of the current reservation we + // can simply push to the page_table_bytes storage and insert into + // the parent PT the descriptor. + // When the current vaddr (/paddr, as identity mapped) exceeds the + // top value we rotate to a new PT. + + let mut lvl1_pt = [0u64; PAGE_TABLE_ENTRIES]; + let mut lvl2_pt = [0u64; PAGE_TABLE_ENTRIES]; + let mut lvl3_pt = [0u64; PAGE_TABLE_ENTRIES]; + // TODO: These should be defines. Note that the top is the size of 1 level of the next level up. + // TODO: LVL1_ENTRY_RANGE? idk + #[allow(unused_mut)] + let mut lvl1_vaddr_top = 1 << BLOCK_BITS_512GB; + let mut lvl2_vaddr_top = 1 << BLOCK_BITS_1GB; + let mut lvl3_vaddr_top = 1 << BLOCK_BITS_2MB; + + // TODO: Tests... + // This is similar to aligned_power_of_two_regions() for the kernel UT, + // but we restrict it such that the output always is either 1GB, 2MB, or 4KB + // pages. + + // Allowed externally for the final iteration + let mut base = 0u64; + for &(ref region, attr_index) in identity_mapped_regions.iter() { + // println!("RAM Region: {:#x}..{:#x}", base, region.end); + // println!( + // " - Current Lvl1: {:#x}..{:#x}, entries: {}", + // (lvl1_vaddr_top - (1 << BLOCK_BITS_512GB)), + // lvl1_vaddr_top, + // lvl1_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl2: {:#x}..{:#x}, entries: {}", + // (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), + // lvl2_vaddr_top, + // lvl2_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl3: {:#x}..{:#x}, entries: {}", + // (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB)), + // lvl3_vaddr_top, + // lvl3_pt.iter().filter(|&&v| v != 0).count() + // ); + + // Handle the fact that the regions are not contiguous and that + // we might need to skip PT. - let boot_lvl0_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let pt_entry = aarch64::table_descriptor(boot_lvl1_upper_addr); - let idx = aarch64::lvl0_index(first_vaddr); - boot_lvl0_lower[8 * idx..8 * (idx + 1)].copy_from_slice(&pt_entry.to_le_bytes()); - } + { + if region.start >= lvl3_vaddr_top { + if lvl3_pt != [0; _] { + let lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt); + // println!("[iter] Serialise lvl3 table: {lvl3_pt_paddr:#x} for to {:#x}..{lvl3_vaddr_top:#x}", (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB))); + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = table_descriptor(lvl3_pt_paddr); + } + + // TODO: just compute it. + while region.start >= lvl3_vaddr_top { + lvl3_vaddr_top += 1 << BLOCK_BITS_2MB; + } + } - let mut boot_lvl1_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; - { - let pt_entry = aarch64::table_descriptor(boot_lvl2_upper_addr); - let idx = aarch64::lvl1_index(first_vaddr); - boot_lvl1_upper[8 * idx..8 * (idx + 1)].copy_from_slice(&pt_entry.to_le_bytes()); - } + if region.start >= lvl2_vaddr_top { + if lvl2_pt != [0; _] { + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("[iter] Serialise lvl2 table: {lvl2_pt_paddr:#x} for to {:#x}..{lvl2_vaddr_top:#x}, base: {:#x} lvl1_index(base): {:#x}", (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), base, lvl1_index(base)); + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + } + + // TODO: just compute it. + while region.start >= lvl2_vaddr_top { + lvl2_vaddr_top += 1 << BLOCK_BITS_1GB; + } + } - let mut boot_lvl2_upper: [u8; PAGE_TABLE_SIZE] = [0; PAGE_TABLE_SIZE]; + if region.start >= lvl1_vaddr_top { + unreachable!( + "impossible as everything should fit here: {lvl1_vaddr_top:#x}" + ); + } + } - let lvl2_idx = aarch64::lvl2_index(first_vaddr); - for i in lvl2_idx..512 { - let entry_idx: u64 = - ((i - aarch64::lvl2_index(first_vaddr)) << aarch64::BLOCK_BITS_2MB) as u64; + // After serialising the old base, update the new one. + base = region.start; + + // Inner Loop: + // Invariant: the page tables in lvl1_pt, lvl2_pt, lvl3_pt + // are either (1) for the current address range, + // or (2) are empty and for a lower level than the current level. + // Also, the values in lvlXXX_vaddr_top are always correct (even if empty) + // Also contiguous within the loop. + // Loop entry: (1) holds by work at the start of each region + while base != region.end { + // Condition is !=, but assert that we never skip it. + assert!(base < region.end); + + let size_bits = region.end.wrapping_sub(base).ilog2(); + let align_bits = min( + size_bits, + // FIXME: Once MSRV is > 1.97, use .lowest_one() method. + if base == 0 { + size_bits + } else { + base.trailing_zeros() + }, + ); + + // Match the size and alignment of the current region to + // the valid PT region sizes. + let (level, bits) = match u64::from(align_bits) { + BLOCK_BITS_1GB.. => (1, BLOCK_BITS_1GB), + BLOCK_BITS_2MB.. => (2, BLOCK_BITS_2MB), + PAGE_BITS_4KB.. => (3, PAGE_BITS_4KB), + 0.. => panic!("impossible; regions should be aligned to 4K at least"), + }; + + let pt_region_size = 1u64 << bits; + let top = base + pt_region_size; + + // println!("- Aligned PT region: {:#x}..{:#x} (size_bits: {}, align_bits: {}, bits: {})", base, top, size_bits, align_bits, bits); + // println!( + // " - Current Lvl1: {:#x}..{:#x}, entries: {}", + // (lvl1_vaddr_top - (1 << BLOCK_BITS_512GB)), + // lvl1_vaddr_top, + // lvl1_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl2: {:#x}..{:#x}, entries: {}", + // (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), + // lvl2_vaddr_top, + // lvl2_pt.iter().filter(|&&v| v != 0).count() + // ); + // println!( + // " - Current Lvl3: {:#x}..{:#x}, entries: {}", + // (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB)), + // lvl3_vaddr_top, + // lvl3_pt.iter().filter(|&&v| v != 0).count() + // ); + + match level { + 1 => { + // If it belongs in Level 1 PT, then it must go in + // lvl1 pt. By the inavariant, base < lvl1_vaddr_top. + assert!(base < lvl1_vaddr_top); + // top is <= lvl1_vaddr_top (the case where it is the topmost entry) + assert!(top <= lvl1_vaddr_top); + + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = block_descriptor(1, base, attr_index); + + if top == lvl1_vaddr_top { + // Invariant maintenance: if the new top would be now equal + // the end of the page table's region top, we need a new + // page table object and add it to the list. + + // This should be possible to handle - we just need to break out of this loop + todo!("handle the case where top of lvl1 is occupied - this would be near the top of 512GiB"); + } + + // Invariant: Lower levels are empty. + assert!(lvl2_pt == [0; _]); + assert!(lvl3_pt == [0; _]); + // Invariant maintenance: vaddr_top is right range for current PT. + // it's empty so we need to increment the top to be current top (1G aligned) + 2MIB (512 lvl3 entries) + lvl3_vaddr_top = top + (1 << BLOCK_BITS_2MB); + // it's empty so we need to increment the top to be current top (1G aligned) + 1G (512 lvl2 entries) + lvl2_vaddr_top = top + (1 << BLOCK_BITS_1GB); + } + 2 => { + // If it is a 2MiB block, it must go in the Level 2 PT; + // by our invariants: base < lvl2_vaddr_top and top <= lvl2_vaddr_top + assert!(base < lvl2_vaddr_top); + assert!(top <= lvl2_vaddr_top); + + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = block_descriptor(2, base, attr_index); + + if top == lvl2_vaddr_top { + // Invariant maintenance: keep for current address range. + // As we're the top of the range, we can serialise the table. + + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("Serialise lvl2 table: {lvl2_pt_paddr:#x} up to {lvl2_vaddr_top:#x}"); + lvl2_vaddr_top += 1 << BLOCK_BITS_1GB; + + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + + if top == lvl1_vaddr_top { + todo!("handle the case where top of lvl1 is occupied - this would be near the top of 512GiB"); + } + } + + // Invariant: Lower levels are empty. + assert!(lvl3_pt == [0; _]); + // Invariant maintenance: vaddr_top is right range for current PT. + // it's empty so we need to increment the top to be current top (2MIB aligned) + 2MIB (512 lvl3 entries) + lvl3_vaddr_top = top + (1 << BLOCK_BITS_2MB); + } + 3 => { + // If it is a 4K page, it must go in the Level 3 PT; + // by our invariants: base < lvl3_vaddr_top and top <= lvl3_vaddr_top + assert!(base < lvl3_vaddr_top); + assert!(top <= lvl3_vaddr_top); + + assert!(lvl3_pt[lvl3_index(base)] == 0); + lvl3_pt[lvl3_index(base)] = page_descriptor(base, attr_index); + + if top == lvl3_vaddr_top { + // Invariant maintenance: keep for current address range. + // As we're the top of the range, we can serialise the table. + + let lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt); + // println!("Serialise lvl3 table: {lvl3_pt_paddr:#x} for to {:#x}..{lvl3_vaddr_top:#x}", (lvl3_vaddr_top - (1 << BLOCK_BITS_2MB))); + lvl3_vaddr_top += 1 << BLOCK_BITS_2MB; + + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = table_descriptor(lvl3_pt_paddr); + + if top == lvl2_vaddr_top { + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("Serialise lvl2 table: {lvl2_pt_paddr:#x} for to {:#x}..{lvl2_vaddr_top:#x}", (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB))); + lvl2_vaddr_top += 1 << BLOCK_BITS_1GB; + + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + + if top == lvl1_vaddr_top { + todo!("handle the case where top of lvl1 is occupied - this would be near the top of 512GiB"); + } + } + } + + // Invariant: lower levels empty is vacuuously true + } + _ => unreachable!("level is 1..=3"), + } - let pt_entry = aarch64::block_descriptor(2, first_paddr + entry_idx, MT_NORMAL); + base = base + pt_region_size; + } + } - let start = 8 * i; - let end = 8 * (i + 1); - boot_lvl2_upper[start..end].copy_from_slice(&pt_entry.to_le_bytes()); - } + // By the loop invariant, we know that anything before has been serialised. + // However, as we are at the end of the loop now, we might have + // page tables that have been partially filled out, and we need to + // serialise these. + + if lvl3_pt != [0; _] { + let lvl3_pt_paddr = serialise_page_table_to_paddr(&mut lvl3_pt); + // println!("[end] Serialise lvl3 table: {lvl3_pt_paddr:#x}"); + assert!(lvl2_pt[lvl2_index(base)] == 0); + lvl2_pt[lvl2_index(base)] = table_descriptor(lvl3_pt_paddr); + } - vec![ - (boot_lvl0_lower_addr, boot_lvl0_lower_size, boot_lvl0_lower), - (boot_lvl1_lower_addr, boot_lvl1_lower_size, boot_lvl1_lower), - (boot_lvl0_upper_addr, boot_lvl0_upper_size, boot_lvl0_upper), - (boot_lvl1_upper_addr, boot_lvl1_upper_size, boot_lvl1_upper), - (boot_lvl2_upper_addr, boot_lvl2_upper_size, boot_lvl2_upper), - (boot_lvl2_lower_addr, boot_lvl2_lower_size, boot_lvl2_lower), - ] + if lvl2_pt != [0; _] { + let lvl2_pt_paddr = serialise_page_table_to_paddr(&mut lvl2_pt); + // println!("[end] Serialise lvl2 table: {lvl2_pt_paddr:#x} for to {:#x}..{lvl2_vaddr_top:#x}, base: {:#x} lvl1_index(base): {:#x}", (lvl2_vaddr_top - (1 << BLOCK_BITS_1GB)), base, lvl1_index(base)); + assert!(lvl1_pt[lvl1_index(base)] == 0); + lvl1_pt[lvl1_index(base)] = table_descriptor(lvl2_pt_paddr); + } + + // the level1 pt should not be empty. lol. + assert!(lvl1_pt != [0; _]); + + // println!("New lvl1 table"); + serialise_page_table_to_paddr(&mut lvl1_pt) + }; + + // Depending on whether we are in hypervisor mode, we either need to + // return the TTBR0_EL2 or TTBR[0,1]_EL1 values. We return u64::MAX + // so as to return garbage - an unaligned address outside of physical + // memory. + if config.hypervisor { + // Manufacture the Level 0 table, containing the kernel table + // and the RAM tables. + + let mut ttbr0_el2_pt = [0u64; PAGE_TABLE_ENTRIES]; + + assert!(lvl0_index(k) != lvl0_index(0)); + ttbr0_el2_pt[lvl0_index(k)] = table_descriptor(kernel_lvl1_pt_paddr); + ttbr0_el2_pt[lvl0_index(0)] = table_descriptor(ram_lvl1_pt_paddr); + + let ttbr0_el2 = serialise_page_table_to_paddr(&mut ttbr0_el2_pt); + + (ttbr0_el2, u64::MAX, u64::MAX) + } else { + let mut ttbr0_el1_pt = [0u64; PAGE_TABLE_ENTRIES]; + let mut ttbr1_el1_pt = [0u64; PAGE_TABLE_ENTRIES]; + + // Kernel in TTBR1 (Upper) + ttbr1_el1_pt[lvl0_index(k)] = table_descriptor(kernel_lvl1_pt_paddr); + // Loader in TTBR0 (Lower) + ttbr0_el1_pt[lvl0_index(0)] = table_descriptor(ram_lvl1_pt_paddr); + + let ttbr0_el1 = serialise_page_table_to_paddr(&mut ttbr0_el1_pt); + let ttbr1_el1 = serialise_page_table_to_paddr(&mut ttbr1_el1_pt); + + (u64::MAX, ttbr0_el1, ttbr1_el1) + } } } diff --git a/tool/microkit/src/sel4.rs b/tool/microkit/src/sel4.rs index 641fa63c6..e43363699 100644 --- a/tool/microkit/src/sel4.rs +++ b/tool/microkit/src/sel4.rs @@ -249,7 +249,7 @@ pub fn emulate_kernel_boot( } } -#[derive(Deserialize)] +#[derive(Deserialize, Debug, Clone)] pub struct PlatformConfigRegion { pub start: u64, pub end: u64, diff --git a/tool/microkit/src/util.rs b/tool/microkit/src/util.rs index 6f2c0e275..211c794d6 100644 --- a/tool/microkit/src/util.rs +++ b/tool/microkit/src/util.rs @@ -54,6 +54,14 @@ pub const fn round_down(n: u64, x: u64) -> u64 { } } +pub const fn align_up(n: u64, bits: u64) -> u64 { + round_up(n, 1 << bits) +} + +pub const fn align_down(n: u64, bits: u64) -> u64 { + round_down(n, 1 << bits) +} + pub fn is_power_of_two(n: u64) -> bool { assert!(n > 0); n & (n - 1) == 0