seL4 · dreamliner787-9 · Mar 25, 2026 · May 21, 2026 · Jun 4, 2026 · midnightveil
diff --git a/docs/manual.md b/docs/manual.md
@@ -649,6 +649,43 @@ To find the full list of possible faults that could occur and details regarding
 kind of fault, please see the 'Faults' section of the
 [seL4 reference manual](https://sel4.systems/Info/Docs/seL4-manual-latest.pdf).
 
+### x86 VCPU fault
+Please see the 'VMX BASIC EXIT REASONS' section of the
+[Intel® 64 and IA-32 Architectures Software Developer’s Manual Combined Volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4]
+(https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html) for a list of possible VM Exit reasons.
+
+These message registers contain data relating to the VM Exit:
+- `SEL4_VMENTER_CALL_EIP_MR`: Instruction Pointer,
+- `SEL4_VMENTER_CALL_CONTROL_PPC_MR`: Primary Processor Based VM Execution Controls,
+- `SEL4_VMENTER_CALL_INTERRUPT_INFO_MR`: VM Entry Interruption-Information`,
+- `SEL4_VMENTER_FAULT_REASON_MR`: VM Exit reason,
+- `SEL4_VMENTER_FAULT_QUALIFICATION_MR`: VM Exit qualification,
+- `SEL4_VMENTER_FAULT_INSTRUCTION_LEN_MR`: Length of instruction that caused the VM Exit,
+- `SEL4_VMENTER_FAULT_GUEST_PHYSICAL_MR`: Guest Physical Address of the VM Exit,
+- `SEL4_VMENTER_FAULT_RFLAGS_MR`: Guest FLAGS register,
+- `SEL4_VMENTER_FAULT_GUEST_INT_MR`: Guest interruptability,
+- `SEL4_VMENTER_FAULT_CR3_MR`: Guest CR3.
+
+Some of these message registers may not contain valid data depending on the VM Exit reason,
+please consult the Intel SDM for more details.
+
+These message registers contain the guest general purpose registers at the time of VM Exit:
+- `SEL4_VMENTER_FAULT_EAX`
+- `SEL4_VMENTER_FAULT_EBX`
+- `SEL4_VMENTER_FAULT_ECX`
+- `SEL4_VMENTER_FAULT_EDX`
+- `SEL4_VMENTER_FAULT_ESI`
+- `SEL4_VMENTER_FAULT_EDI`
+- `SEL4_VMENTER_FAULT_EBP`
+- `SEL4_VMENTER_FAULT_R8`
+- `SEL4_VMENTER_FAULT_R9`
+- `SEL4_VMENTER_FAULT_R10`
+- `SEL4_VMENTER_FAULT_R11`
+- `SEL4_VMENTER_FAULT_R12`
+- `SEL4_VMENTER_FAULT_R13`
+- `SEL4_VMENTER_FAULT_R14`
+- `SEL4_VMENTER_FAULT_R15`
+
 ## `microkit_msginfo microkit_ppcall(microkit_channel ch, microkit_msginfo msginfo)`
 
 Performs a call to a protected procedure in a different PD.
@@ -826,6 +863,20 @@ virtual CPU with ID `vcpu`.
 Write the registers of a given virtual CPU with ID `vcpu`. The `regs` argument is the pointer to
 the struct of registers `seL4_VCPUContext` that are written from.
 
+## `void microkit_vcpu_x86_on(void)`
+
+Allow the PD to switch to guest execution mode with the bound vCPU every time `init()`, `notified()` or `fault()` return.
-Allow the PD to switch to guest execution mode with the bound vCPU every time `init()`, `notified()` or `fault()` return.
+Start running the vCPU bound to this PD.
+
+The vCPU runs after every return from a Microkit entrypoint; it does not run concurrently with the native thread.
-Allow the PD to switch to guest execution mode with the bound vCPU every time `init()`, `notified()` or `fault()` return.
+Start running the vCPU bound to this PD.
+
+The vCPU runs after every return from a Microkit entrypoint; it does not run concurrently with the native thread.
+
+The caller is responsible for initialising the VCPU's instruction pointer, Primary Processor-Based VM-Execution Controls,
+VM-Entry Interruption-Information Field and other architectural states to the corresponding VMCS fields via
-VM-Entry Interruption-Information Field and other architectural states to the corresponding VMCS fields via
+VM-Entry Interruption-Information Field, and other architectural states to the corresponding VMCS fields via
-VM-Entry Interruption-Information Field and other architectural states to the corresponding VMCS fields via
+VM-Entry Interruption-Information Field, and other architectural states to the corresponding VMCS fields via
+`microkit_vcpu_x86_write_vmcs()`. For more details, please see the 'Virtualisation' section of the
+[seL4 reference manual](https://sel4.systems/Info/Docs/seL4-manual-latest.pdf). For architectural details,
+please consult the [Intel SDM](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html).
+
+## `void microkit_vcpu_x86_off(void)`
+
+Stop the PD from switching to guest execution mode when `init()`, `notified()` or `fault()` return.
+
 ## `seL4_CPtr microkit_cspace_root_slot_to_cptr(seL4_Word slot)` {#libmicrokit_cspace_root_slot_to_cptr}
 
 Converts the slot identifier of the `<cspace>`'s capability element into an
@@ -948,6 +999,7 @@ The `virtual_machine` element has the following attributes:
 Additionally, it supports the following child elements:
 
 * `vcpu`: (one or more) Describes the virtual CPU that will be tied to the virtual machine.
+    * On x86-64, there is a limit of one VCPU per PD.
 * `map`: (zero or more) Describes mapping of memory regions into the virtual machine.
 
 The `vcpu` element has the following attributes:
@@ -1027,6 +1079,7 @@ The `end` element has the following attributes:
 * `id`: Channel identifier in the context of the named protection domain. Must be at least 0 and less than 63.
 * `pp`: (optional) Indicates that the protection domain for this end can perform a protected procedure call to the other end; defaults to false.
         Protected procedure calls can only be to PDs of strictly higher priority.
+        On x86-64, PDs with virtual machines cannot receive protected procedure calls.
 * `notify`: (optional) Indicates that the protection domain for this end can send a notification to the other end; defaults to true.
 * `setvar_id`: (optional) Specifies a symbol in the program image. This symbol will be rewritten with the channel identifier.
 

diff --git a/libmicrokit/include/microkit.h b/libmicrokit/include/microkit.h
@@ -10,6 +10,9 @@
 #pragma once
 
 #include <sel4/sel4.h>
+#ifdef CONFIG_VTX
+#include <sel4/arch/vmenter.h>
+#endif /* CONFIG_VTX */
 
 typedef unsigned int microkit_channel;
 typedef unsigned int microkit_child;
@@ -47,6 +50,15 @@ extern char microkit_name[MICROKIT_PD_NAME_LENGTH];
 extern seL4_Bool microkit_have_signal;
 extern seL4_CPtr microkit_signal_cap;
 extern seL4_MessageInfo_t microkit_signal_msg;
+#if defined(CONFIG_VTX)
+struct microkit_x86_vcpu_state {
+    seL4_Bool do_resume;
+    seL4_Word rip;
+    seL4_Word prim_proc_ctl;
+    seL4_Word irq_info;
+};
+extern struct microkit_x86_vcpu_state microkit_x86_vcpu_state;
+#endif
 
 /* Symbols for error checking libmicrokit API calls. Patched by the Microkit tool
  * to set bits corresponding to valid channels for this PD. */
@@ -190,13 +202,8 @@ static inline void microkit_vcpu_restart(microkit_child vcpu, seL4_Word entry_po
 {
     seL4_Error err;
     seL4_UserContext ctxt = {0};
-#if defined(CONFIG_ARCH_AARCH64)
     ctxt.pc = entry_point;
-#elif defined(CONFIG_ARCH_X86_64)
-    ctxt.rip = entry_point;
-#else
-#error "unknown architecture for 'microkit_vcpu_restart'"
-#endif
+
     err = seL4_TCB_WriteRegisters(
               BASE_VM_TCB_CAP + vcpu,
               seL4_True,
@@ -220,9 +227,7 @@ static inline void microkit_vcpu_stop(microkit_child vcpu)
         microkit_internal_crash(err);
     }
 }
-#endif
 
-#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT)
 static inline void microkit_vcpu_arm_inject_irq(microkit_child vcpu, seL4_Uint16 irq, seL4_Uint8 priority,
                                                 seL4_Uint8 group, seL4_Uint8 index)
 {
@@ -265,7 +270,7 @@ static inline void microkit_vcpu_arm_write_reg(microkit_child vcpu, seL4_Word re
         microkit_internal_crash(err);
     }
 }
-#endif
+#endif /* CONFIG_ARM_HYPERVISOR_SUPPORT */
 
 #if defined(CONFIG_ALLOW_SMC_CALLS)
 static inline void microkit_arm_smc_call(seL4_ARM_SMCContext *args, seL4_ARM_SMCContext *response)
@@ -277,7 +282,7 @@ static inline void microkit_arm_smc_call(seL4_ARM_SMCContext *args, seL4_ARM_SMC
         microkit_internal_crash(err);
     }
 }
-#endif
+#endif /* CONFIG_ALLOW_SMC_CALLS */
 
 #if defined(CONFIG_ARCH_X86_64)
 static inline void microkit_x86_ioport_write_8(microkit_ioport ioport_id, seL4_Word port_addr, seL4_Word data)
@@ -393,28 +398,85 @@ static inline seL4_Uint32 microkit_x86_ioport_read_32(microkit_ioport ioport_id,
 
     return ret.result;
 }
-#endif
 
-#if defined(CONFIG_ARCH_X86_64) && defined(CONFIG_VTX)
+#if defined(CONFIG_VTX)
+/* Architecturally defined identifiers for a x86 VCPU's VMCS fields,
+ * see seL4 source: `include/arch/x86/arch/object/vcpu.h`
+ * or Intel® 64 and IA-32 Architectures Software Developer’s Manual
+ * Combined Volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4
+ * Order Number: 325462-084US June 2024
+ * "Table B-8. Encodings for 32-Bit Control Fields (0100_00xx_xxxx_xxx0B)" and
+ * "Table B-14. Encodings for Natural-Width Guest-State Fields (0110_10xx_xxxx_xxx0B) (Contd.)".
+ * */
+#define VMX_GUEST_RIP 0x0000681E
+#define VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS 0x00004002
+#define VMX_CONTROL_ENTRY_INTERRUPTION_INFO 0x00004016
+
+/* Serve the guest's:
+ * - RIP,
+ * - Primary Processor Based VM Execution Controls, and
+ * - VM Entry Interruption-Information
+ * from local variables that were written to by `microkit_vcpu_x86_write_vmcs()`.
+ * The kernel will service reading from other fields.
+ */
 static inline seL4_Word microkit_vcpu_x86_read_vmcs(microkit_child vcpu, seL4_Word field)
 {
-    seL4_X86_VCPU_ReadVMCS_t ret;
-    ret = seL4_X86_VCPU_ReadVMCS(BASE_VCPU_CAP + vcpu, field);
-    if (ret.error != seL4_NoError) {
-        microkit_dbg_puts("microkit_x86_read_vmcs: error reading data\n");
-        microkit_internal_crash(ret.error);
-    }
-
-    return ret.value;
-}
-
+    seL4_Word value;
+
+    /* Assumes that a PD would only have access to 1 VCPU object. */
+    switch (field) {
+    case VMX_GUEST_RIP:
+        value = microkit_x86_vcpu_state.rip;
+        break;
+    case VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS:
+        value = microkit_x86_vcpu_state.prim_proc_ctl;
+        break;
+    case VMX_CONTROL_ENTRY_INTERRUPTION_INFO:
+        value = microkit_x86_vcpu_state.irq_info;
+        break;
+    default: {
+        seL4_X86_VCPU_ReadVMCS_t ret = seL4_X86_VCPU_ReadVMCS(BASE_VCPU_CAP + vcpu, field);
+        if (ret.error != seL4_NoError) {
+            microkit_dbg_puts("microkit_x86_read_vmcs: error reading data\n");
+            microkit_internal_crash(ret.error);
+        }
+        value = ret.value;
+        break;
+    }
+    }
+
+    return value;
+}
+
+/* These fields will be written to local variables rather than the actual VMCS:
+ * - RIP,
+ * - Primary Processor Based VM Execution Controls, and
+ * - VM Entry Interruption-Information.
+ * Because they will be passed to the kernel on `seL4_VMEnter()`,
+ * then the kernel will write them to the VMCS for us.
+ * Writing other VMCS fields will go to the real VMCS immeidately via a syscall.
+ */
 static inline void microkit_vcpu_x86_write_vmcs(microkit_child vcpu, seL4_Word field, seL4_Word value)
 {
-    seL4_X86_VCPU_WriteVMCS_t ret;
-    ret = seL4_X86_VCPU_WriteVMCS(BASE_VCPU_CAP + vcpu, field, value);
-    if (ret.error != seL4_NoError) {
-        microkit_dbg_puts("microkit_x86_write_vmcs: error writing data\n");
-        microkit_internal_crash(ret.error);
+    /* Assumes that a PD would only have access to 1 VCPU object. */
+    switch (field) {
+    case VMX_GUEST_RIP:
+        microkit_x86_vcpu_state.rip = value;
+        break;
+    case VMX_CONTROL_PRIMARY_PROCESSOR_CONTROLS:
+        microkit_x86_vcpu_state.prim_proc_ctl = value;
+        break;
+    case VMX_CONTROL_ENTRY_INTERRUPTION_INFO:
+        microkit_x86_vcpu_state.irq_info = value;
+        break;
+    default: {
+        seL4_X86_VCPU_WriteVMCS_t ret = seL4_X86_VCPU_WriteVMCS(BASE_VCPU_CAP + vcpu, field, value);
+        if (ret.error != seL4_NoError) {
+            microkit_dbg_puts("microkit_x86_write_vmcs: error writing data\n");
+            microkit_internal_crash(ret.error);
+        }
+        break;
+    }
     }
 }
 
@@ -479,7 +541,20 @@ static inline void microkit_vcpu_x86_write_regs(microkit_child vcpu, seL4_VCPUCo
     }
 }
 
-#endif
+static inline void microkit_vcpu_x86_on(void)
+{
+    /* On x86, a TCB can only have one bound VCPU at any given time.
+     * So we don't take a `microkit_child vcpu` here. */
+    microkit_x86_vcpu_state.do_resume = seL4_True;
+}
+
+static inline void microkit_vcpu_x86_off(void)
+{
+    microkit_x86_vcpu_state.do_resume = seL4_False;
+}
+
+#endif /* CONFIG_VTX */
+#endif /* CONFIG_ARCH_X86_64 */
 
 static inline void microkit_deferred_notify(microkit_channel ch)
 {