aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/x86_64.rs583
1 files changed, 566 insertions, 17 deletions
diff --git a/src/x86_64.rs b/src/x86_64.rs
index f30e2be..ff23b34 100644
--- a/src/x86_64.rs
+++ b/src/x86_64.rs
@@ -40,6 +40,7 @@ fn u64_to_usize(x: u64) -> usize {
/// host pointer `memory`. this region is used for "control structures"; page tables, GDT, IDT,
/// and stack. it is also the region where code to be executed is placed.
pub struct Vm {
+ settings: VmSettings,
vm: VmFd,
vcpu: VcpuFd,
supported_cpuid: CpuId,
@@ -84,10 +85,16 @@ enum Feature {
/// Hi16_ZMM in CPUID leaf D and corresponding bits in xcr0. if these bits are not set,
/// attempts to use instructions with zmm state may #UD.
StateAVX512,
+ /// support for Page Size Extensions. is relevant only for protected mode; among other things,
+ /// this bit indicates support for page directory entries mapping 4MiB of memory directly,
+ /// rather than mapping a 1024-entry block of PTEs.
+ Pse,
}
const CPUID_00000001_ECX_XSAVE: u32 = 1 << 26;
+const CPUID_00000001_EDX_PSE: u32 = 1 << 3;
+
const CPUID_0000000D_EAX_SSE: u32 = 1 << 1;
const CPUID_0000000D_EAX_AVX: u32 = 1 << 2;
const CPUID_0000000D_EAX_AVX512: u32 = (1 << 5) | (1 << 6) | (1 << 7);
@@ -461,6 +468,105 @@ fn test_xor_runs() {
}
#[test]
+fn test_protected_mode_runs() {
+ let settings = VmSettings::new(128 * 1024, IsaMode::Protected);
+ let mut vm = Vm::create_by_settings(settings).expect("can create vm");
+ let mut regs = vm.get_regs().expect("can get regs");
+
+ let buf = &[
+ 0xc5, 0xe0, 0x54, 0xc3, // vandps xmm0, xmm3, xmm3
+ 0x33, 0xc0, // xor eax, eax
+ 0x8b, 0x09, // mov ecx, [ecx]
+ 0xf4 // hlt
+ ];
+ vm.program(buf, &mut regs);
+
+ regs.rax = 0x1234;
+ regs.rcx = 0x4;
+
+ vm.set_regs(&regs).expect("can set regs");
+
+ let res = vm.run().expect("can run vm");
+
+ match res {
+ VcpuExit::Hlt => {
+ // expected exit from the `0xf4` above.
+ }
+ other => {
+ panic!("unexpected exit: {:?}", other);
+ }
+ };
+
+ let regs_after = vm.get_regs().expect("can get regs");
+ assert_eq!(regs_after.rax, 0);
+ assert_eq!(regs_after.rcx, 0);
+}
+
+#[test]
+fn test_pusha_runs() {
+ let settings = VmSettings::new(128 * 1024, IsaMode::Real);
+ let mut vm = Vm::create_by_settings(settings).expect("can create vm");
+ let mut regs = vm.get_regs().expect("can get regs");
+
+ vm.program(&[0x60], &mut regs);
+
+ regs.rip = 0;
+ regs.rax = 0x1234;
+ eprintln!("{:?}", regs);
+
+ vm.set_regs(&regs).expect("can set regs");
+
+ vm.set_single_step(true).expect("can set single-step");
+ let expected_rip = vm.code_addr().0 + 1;
+
+ let res = vm.run().expect("can run vm");
+
+ match res {
+ VcpuExit::Debug { pc: rip_after, .. } => {
+ eprintln!("rip after: {:08x}", rip_after);
+ assert_eq!(expected_rip, rip_after);
+ }
+ other => {
+ panic!("unexpected exit: {:?}", other);
+ }
+ };
+
+ let regs_after = vm.get_regs().expect("can get regs");
+ assert_eq!(regs_after.rax, 0x1234);
+ assert_eq!(regs_after.rsp, 0x1000 - 0x80 - (8 * 2));
+
+ let mut regs = vm.get_regs().expect("can get regs");
+
+ vm.program(&[0x66, 0x60], &mut regs);
+
+ regs.rip = 0;
+ regs.rax = 0x1234;
+ regs.rsp = 0x1000 - 0x80;
+ eprintln!("{:?}", regs);
+
+ vm.set_regs(&regs).expect("can set regs");
+
+ vm.set_single_step(true).expect("can set single-step");
+ let expected_rip = vm.code_addr().0 + 2;
+
+ let res = vm.run().expect("can run vm");
+
+ match res {
+ VcpuExit::Debug { pc: rip_after, .. } => {
+ eprintln!("rip after: {:08x}", rip_after);
+ assert_eq!(expected_rip, rip_after);
+ }
+ other => {
+ panic!("unexpected exit: {:?}", other);
+ }
+ };
+
+ let regs_after = vm.get_regs().expect("can get regs");
+ assert_eq!(regs_after.rax, 0x1234);
+ assert_eq!(regs_after.rsp, 0x1000 - 0x80 - (8 * 4));
+}
+
+#[test]
fn test_syscall() {
let mut vm = Vm::create(128 * 1024).expect("can create vm");
let mut regs = vm.get_regs().expect("can get regs");
@@ -557,6 +663,40 @@ fn test_vex_vandps_runs() {
}
#[test]
+fn test_vex_vandps_runs_32b() {
+ let settings = VmSettings::new(128 * 1024, IsaMode::Protected);
+ let mut vm = Vm::create_by_settings(settings).expect("can create vm");
+
+ if !vm.cpuid_supports(Feature::StateAVX) {
+ panic!("host CPU does not support AVX");
+ }
+
+ let mut regs = vm.get_regs().expect("can get regs");
+
+ vm.program(&[0xc5, 0xe0, 0x54, 0x03], &mut regs);
+
+ regs.rbx = regs.rip;
+ let rip_before = regs.rip;
+
+ vm.set_regs(&regs).expect("can set regs");
+
+ vm.set_single_step(true).expect("can set single-step");
+
+ let res = vm.run().expect("can run vm");
+
+ let expected_rip = rip_before + 4;
+ eprintln!("exit: {:?}", res);
+ match res {
+ VcpuExit::Debug { pc: rip_after, .. } => {
+ assert_eq!(expected_rip, rip_after);
+ }
+ other => {
+ panic!("unexpected exit: {:?}", other);
+ }
+ };
+}
+
+#[test]
fn test_evex_vandps_runs() {
let mut vm = Vm::create(128 * 1024).expect("can create vm");
@@ -635,8 +775,63 @@ fn kvm_hugepage_bug() {
panic!("no");
}
+/// a selector for the execution mode the VM should be initialized to.
+///
+/// different `IsaMode` will configure the VM wildly differently; generally any VM/vCPU state not
+/// directly required for the requested mode will be left untouched.
+///
+/// in all modes, CPUID leaves and xcr0 are set up to support any ISA extensions supported by the
+/// host CPU.
+///
+/// in all modes, an IDT is installed with interrupt handlers pointed to the 256 bytes from
+/// `interrupt_handlers_start()`.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum IsaMode {
+ /// request that the VM be configured to run x86-64 instructions, aka "AMD64", or "IA-32e" (and
+ /// specifically "IA-32e 64-bit mode") in some Intel nomenclature.
+ ///
+ /// this configures identity paging, selectors sufficient for long mode (with all vCPU
+ /// execution at CPL=0), prepares some MSRs for syscalls, and of course configures `cr0`
+ /// for long mode.
+ Long,
+ /// request that the VM be configured to run 32-bit instructions, with long mode neither
+ /// enabled nor active.
+ ///
+ /// this configures identity paging and selectors covering all 32-bit address space and with
+ /// CPL=0.
+ Protected,
+ /// request that the VM be configured to run 16-bit instructions.
+ ///
+ /// this configures code/data selectors covering all 24 bits of address space and an interrupt
+ /// descriptor table, and CPUID for any host-supported ISA extensions, but that's about it.
+ Real,
+}
+
+/// the settings to configure a [`Vm::create_by_settings`]. see `VmSettings::new` for top-level
+/// configuration.
+pub struct VmSettings {
+ mem_size: usize,
+ isa_mode: IsaMode,
+}
+
+impl VmSettings {
+ /// provide the bare-minimum configuration for a VM: the size of its memory and what execution
+ /// mode the resulting VM should be set for.
+ ///
+ /// VM control settings (IDT, `cs`, `ds`, other selectors, syscalls, page tables, etc) vary
+ /// substantially across different `IsaMode`. in all cases code can be written into the VM with
+ /// [`Vm::program()`], then run with [`Vm::run()`].
+ pub fn new(mem_size: usize, isa_mode: IsaMode) -> Self {
+ Self { mem_size, isa_mode }
+ }
+}
+
impl Vm {
pub fn create(mem_size: usize) -> Result<Vm, VmCreateError> {
+ Self::create_by_settings(VmSettings::new(mem_size, IsaMode::Long))
+ }
+
+ pub fn create_by_settings(settings: VmSettings) -> Result<Vm, VmCreateError> {
let kvm = Kvm::new()
.map_err(|e| VmError::from_kvm("Kvm::new()", e))?;
@@ -646,14 +841,14 @@ impl Vm {
let supported_cpuid = kvm.get_supported_cpuid(KVM_MAX_CPUID_ENTRIES).unwrap();
// actual minimum is somewhere around 0x1a000 bytes, but 0x20_000 aka 128k will do
- if mem_size < 128 * 1024 {
+ if settings.mem_size < 128 * 1024 {
return Err(VmCreateError::TooSmall {
- requested: mem_size,
+ requested: settings.mem_size,
required: 128 * 1024,
});
}
- let mapping = Mapping::create_shared(0, mem_size, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE)?;
+ let mapping = Mapping::create_shared(0, settings.mem_size, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE)?;
let region = kvm_userspace_memory_region {
slot: 0,
@@ -674,6 +869,7 @@ impl Vm {
let mem_ceiling = mapping.size.get().try_into().unwrap();
let mut this = Vm {
+ settings,
vm,
vcpu,
supported_cpuid,
@@ -691,17 +887,48 @@ impl Vm {
assert!(this.cpuid_supports(Feature::Base));
this.cpuid_set(Feature::Base, true);
- unsafe {
- this.configure_identity_paging(Some(&mut vcpu_sregs));
- this.configure_selectors(&mut vcpu_sregs);
- this.configure_idt(&mut vcpu_regs, &mut vcpu_sregs);
- let mut xcrs = this.get_xcrs()?;
- this.configure_extensions(&mut vcpu_sregs, &mut xcrs);
- this.set_xcrs(&xcrs)?;
- this.configure_syscalls(&mut vcpu_sregs);
- }
+ match this.settings.isa_mode {
+ IsaMode::Long => {
+ unsafe {
+ this.configure_identity_paging(Some(&mut vcpu_sregs));
+ this.configure_selectors(&mut vcpu_sregs);
+ this.configure_idt(&mut vcpu_regs, &mut vcpu_sregs);
+ let mut xcrs = this.get_xcrs()?;
+ this.configure_extensions(&mut vcpu_sregs, &mut xcrs);
+ this.set_xcrs(&xcrs)?;
+ this.configure_syscalls(&mut vcpu_sregs);
+ }
+
+ vcpu_sregs.efer |= 0x0000_0500; // LME | LMA
+ }
+ IsaMode::Protected => {
+ unsafe {
+ this.configure_identity_paging_32b(Some(&mut vcpu_sregs));
+ this.configure_selectors_32b(&mut vcpu_sregs);
+ this.configure_idt_32b(&mut vcpu_regs, &mut vcpu_sregs);
+ let mut xcrs = this.get_xcrs()?;
+ this.configure_extensions(&mut vcpu_sregs, &mut xcrs);
+ this.set_xcrs(&xcrs)?;
- vcpu_sregs.efer |= 0x0000_0500; // LME | LMA
+ }
+ }
+ IsaMode::Real => {
+ unsafe {
+ this.configure_selectors_16b(&mut vcpu_sregs);
+ this.configure_idt_16b(&mut vcpu_regs, &mut vcpu_sregs);
+ let mut xcrs = this.get_xcrs()?;
+ this.configure_extensions(&mut vcpu_sregs, &mut xcrs);
+ this.set_xcrs(&xcrs)?;
+
+ // in 16-bit mode we've set cs and ds to cover the last 4kb of memory, starting
+ // at the same place we've written code to execute. there's not much memory to
+ // go around, and not a ton of flexibility in the asmlinator API, so uh ... the
+ // least annoying thing to do might be to just put the stack 0x80 bytes from
+ // the end?
+ vcpu_regs.rsp = 0x1000 - 0x80;
+ }
+ }
+ }
this.set_regs(&vcpu_regs)?;
this.set_sregs(&vcpu_sregs)?;
@@ -971,6 +1198,12 @@ impl Vm {
0x18
}
+ /// selector 0x20 is chosen arbitrarily for 16-bit interrupts, which are placed well away from
+ /// where selector 0x10 is pointed in real mode.
+ pub fn selector_cs_idt_16b(&self) -> u16 {
+ 0x20
+ }
+
fn map_containing_mut(&mut self, base: GuestAddress, size: u64) -> Option<&mut Mapping> {
let mapping = if self.memory.contains(base) {
&mut self.memory
@@ -1069,12 +1302,22 @@ impl Vm {
/// write `code` into guest memory and set `regs.rip` to the address of that code.
///
- /// the chosen code address is [`Self::code_addr`].
+ /// the chosen code address is [`Self::code_addr`]; this is the guest linear address the
+ /// provided code buffer is written to.
+ ///
+ /// if the VM is configured for `IsaMode::Long` or `IsaMode::Protected`, `rip` or `eip` is set
+ /// to this address as well. otherwise, the VM is configured for `IsaMode::Real` and `ip` is
+ /// set to `code_addr() & 0x0f` - in typical cases `ip` will be 0.
+ ///
pub fn program(&mut self, code: &[u8], regs: &mut kvm_regs) {
let addr = self.code_addr();
self.write_mem(addr, code);
- regs.rip = addr.0;
+ if self.settings.isa_mode != IsaMode::Real {
+ regs.rip = addr.0;
+ } else {
+ regs.rip = addr.0 & 0x000f;
+ }
}
fn gdt_entry_mut(&mut self, idx: u16) -> *mut u64 {
@@ -1096,8 +1339,8 @@ impl Vm {
}
}
- // note this returns a u32, but an IDT is four u32. the u32 this points at is the first of
- // the four for the entry.
+ // note this returns a u32, but a long-mode IDT is four u32. the u32 this points at is the
+ // first of the four for the entry.
fn idt_entry_mut(&mut self, idx: u8) -> *mut u32 {
let addr = GuestAddress(self.idt_addr().0 + (idx as u64 * 16));
let mapping = self.map_containing(addr, std::mem::size_of::<[u64; 2]>() as u64).unwrap();
@@ -1107,6 +1350,17 @@ impl Vm {
}
}
+ // note this returns a u32, but a legacy IDT is two u32. the u32 this points at is the
+ // first of the four for the entry.
+ fn idt_entry_legacy_mut(&mut self, idx: u8) -> *mut u32 {
+ let addr = GuestAddress(self.idt_addr().0 + (idx as u64 * 8));
+ let mapping = self.map_containing(addr, std::mem::size_of::<[u64; 2]>() as u64).unwrap();
+
+ unsafe {
+ mapping.host_ptr(addr) as *mut u32
+ }
+ }
+
pub fn page_tables(&self) -> VmPageTables<'_> {
let base = self.page_table_addr();
@@ -1178,6 +1432,11 @@ impl Vm {
leaf.eax & CPUID_0000000D_EAX_AVX512 == CPUID_0000000D_EAX_AVX512
})
}
+ Feature::Pse => {
+ find_leaf(&self.supported_cpuid, 0x0000_0001, 0, |leaf| {
+ leaf.edx & CPUID_00000001_EDX_PSE == CPUID_00000001_EDX_PSE
+ })
+ }
}
}
@@ -1272,6 +1531,12 @@ impl Vm {
edited = true;
});
}
+ Feature::Pse => {
+ edit_leaf(&mut self.current_cpuid, 0x0000_0001, 0, |leaf| {
+ bit_set(&mut leaf.edx, CPUID_00000001_EDX_PSE, wanted);
+ edited = true;
+ });
+ }
}
assert!(edited);
@@ -1350,6 +1615,63 @@ impl Vm {
}
}
+ /// configure page tables for identity mapping of all memory from guest address zero up to the
+ /// end of added memory regions, rounded up to the next 4MiB.
+ ///
+ /// if `sregs` is provided, update `cr0`, `cr3`, and `cr4` in support of protected-mode paging.
+ /// this is a fixed pattern: if control registers have not been changed since `Vm::create` then
+ /// there will be no change to these control registers and `sregs` can be omitted.
+ pub unsafe fn configure_identity_paging_32b(&mut self, sregs: Option<&mut kvm_sregs>) {
+ // because we'll set PDEs to map 4M pages and cr3 points at a page-aligned block of 1024
+ // 4-byte PDEs, that gives us 4KiB of memory used to map 4GiB of address space. that's all
+ // of 32-bit, so we don't need to check an upper bound.
+
+ assert!(self.cpuid_supports(Feature::Pse));
+ self.cpuid_set(Feature::Pse, true);
+
+ let pt = self.page_tables();
+
+ let mut mapped: u64 = 0;
+ // "pml4_mut" is really just the start of page table memory. we'll pun this in 32-bit with
+ // the knowledge it's really a block of PDEs.
+ let pd = pt.pml4_mut() as *mut u32;
+ let mut pde = pd;
+ let entry_bits: u32 =
+ 1 << 0 | // P
+ 1 << 1 | // RW
+ 1 << 2 | // user accesses allowed (everything is under privilege level 0 tho)
+ 0 << 3 | // PWT (TODO: configure PAT explicitly, but PAT0 is sufficient)
+ 0 << 4 | // PCD (TODO: configure PAT explicitly, but PAT0 is sufficient)
+ 0 << 5 | // Accessed
+ 0 << 6 | // Dirty
+ 1 << 7 | // Page size (1 implies 4M page)
+ 1 << 8 | // Global (if cr4.pge)
+ 0 << 9 |
+ 0 << 10 |
+ 0 << 11 | // for ordinary paging, ignored. for HLAT, ...
+ 0 << 12; // PAT (TODO: configure explicitly, but PAT0 is sufficient. verify MTRR sets PAT0 to WB?)
+
+ while mapped < self.mem_ceiling() {
+ let phys_num = (mapped as u32) >> 22;
+ let entry = entry_bits | (phys_num << 22);
+ unsafe {
+ pde.write(entry);
+ pde = pde.offset(1);
+ }
+ mapped += 1 << 22;
+ }
+
+ // page size extensions; collaborates with page tables' PS bit to make 4MiB pages in 32-bit
+ // mode. see SDM section 2.5 "CONTROL REGISTERS".
+ const PSE: u64 = 1 << 4;
+
+ if let Some(sregs) = sregs {
+ sregs.cr0 |= 0x8000_0001; // cr0.PE | cr0.PG
+ sregs.cr3 = pt.pml4_addr().0 as u64;
+ sregs.cr4 |= PSE;
+ }
+ }
+
unsafe fn configure_selectors(&mut self, sregs: &mut kvm_sregs) {
// we have to set descriptor information directly. this avoids having to load selectors
// as the first instructions on the vCPU, which is simplifying. but if we want the
@@ -1399,6 +1721,119 @@ impl Vm {
}
}
+ /// configure selectors for 32-bit code exceution. this is basically the same as 64-bit, but we
+ /// set a limit and set `cs.db` so that the default operand size is a normal 32-bit.
+ unsafe fn configure_selectors_32b(&mut self, sregs: &mut kvm_sregs) {
+ // we have to set descriptor information directly. this avoids having to load selectors
+ // as the first instructions on the vCPU, which is simplifying. but if we want the
+ // information in these selectors to match with anything in a GDT (i do!) we'll have to
+ // keep this initial state lined up with GDT entries ourselves.
+ //
+ // we could avoid setting up the GDT for the most part, but anything that might
+ // legitimately load the "valid" current segment selector would instead clobber the
+ // selector with zeroes.
+
+ sregs.cs.base = 0;
+ sregs.cs.limit = 0xffffffff;
+ sregs.cs.selector = self.selector_cs();
+ sregs.cs.type_ = 0b1011; // see SDM table 3-1 Code- and Data-Segment Types
+ sregs.cs.present = 1;
+ sregs.cs.dpl = 0;
+ sregs.cs.db = 1;
+ sregs.cs.s = 1;
+ sregs.cs.l = 0;
+ sregs.cs.g = 1;
+ sregs.cs.avl = 0;
+
+ sregs.ds.base = 0;
+ sregs.ds.limit = 0xffffffff;
+ sregs.ds.selector = self.selector_ds();
+ sregs.ds.type_ = 0b0011; // see SDM table 3-1 Code- and Data-Segment Types
+ sregs.ds.present = 1;
+ sregs.ds.dpl = 0;
+ sregs.ds.db = 1;
+ sregs.ds.s = 1;
+ sregs.ds.l = 0;
+ sregs.ds.g = 1;
+ sregs.ds.avl = 0;
+
+ sregs.es = sregs.ds;
+ sregs.fs = sregs.ds;
+ sregs.gs = sregs.ds;
+ // linux populates the vmcb cpl field with whatever's in ss.dpl. what the hell???
+ sregs.ss = sregs.ds;
+
+ sregs.gdt.base = self.gdt_addr().0;
+ sregs.gdt.limit = 256 * 8 - 1;
+
+ unsafe {
+ self.gdt_entry_mut(self.selector_cs() >> 3).write(encode_segment(&sregs.cs));
+ self.gdt_entry_mut(self.selector_ds() >> 3).write(encode_segment(&sregs.ds));
+ }
+ }
+
+ /// configure selectors for 16-bit code exceution.
+ ///
+ /// unlike other modes, this sets `cs` to execute code at the linear address given by
+ /// [`Self::code_addr`]. `ds` is configured to overlap with `cs`. this way, when executing
+ /// 16-bit code the VM can simply be configured to `ip = 0`, and code addresses match data
+ /// addresses. additionally, clear `cs.db` so that the default operand size is 16-bit.
+ unsafe fn configure_selectors_16b(&mut self, sregs: &mut kvm_sregs) {
+ // we have to set descriptor information directly. this avoids having to load selectors
+ // as the first instructions on the vCPU, which is simplifying. but if we want the
+ // information in these selectors to match with anything in a GDT (i do!) we'll have to
+ // keep this initial state lined up with GDT entries ourselves.
+ //
+ // we could avoid setting up the GDT for the most part, but anything that might
+ // legitimately load the "valid" current segment selector would instead clobber the
+ // selector with zeroes.
+
+ sregs.cs.base = 0;
+ sregs.cs.limit = 0xfffff;
+ sregs.cs.selector = self.selector_cs();
+ sregs.cs.type_ = 0b1011; // see SDM table 3-1 Code- and Data-Segment Types
+ sregs.cs.present = 1;
+ sregs.cs.dpl = 0;
+ sregs.cs.db = 0;
+ sregs.cs.s = 1;
+ sregs.cs.l = 0;
+ sregs.cs.g = 1;
+ sregs.cs.avl = 0;
+
+ unsafe {
+ self.gdt_entry_mut(self.selector_cs_idt_16b() >> 3).write(encode_segment(&sregs.cs));
+ }
+
+ // and now adjust for the real cs for code execution to happen in..
+ sregs.cs.base = self.code_addr().0;
+
+ sregs.ds.base = self.code_addr().0;
+ sregs.ds.limit = 0xfffff;
+ sregs.ds.selector = self.selector_ds();
+ sregs.ds.type_ = 0b0011; // see SDM table 3-1 Code- and Data-Segment Types
+ sregs.ds.present = 1;
+ sregs.ds.dpl = 0;
+ sregs.ds.db = 0;
+ sregs.ds.s = 1;
+ sregs.ds.l = 0;
+ sregs.ds.g = 1;
+ sregs.ds.avl = 0;
+
+ sregs.es = sregs.ds;
+ sregs.fs = sregs.ds;
+ sregs.gs = sregs.ds;
+ // linux populates the vmcb cpl field with whatever's in ss.dpl. what the hell???
+ sregs.ss = sregs.ds;
+
+ sregs.gdt.base = self.gdt_addr().0;
+ sregs.gdt.limit = 256 * 8 - 1;
+
+ unsafe {
+ self.gdt_entry_mut(self.selector_cs() >> 3).write(encode_segment(&sregs.cs));
+ self.gdt_entry_mut(self.selector_ds() >> 3).write(encode_segment(&sregs.ds));
+ }
+ }
+
fn write_idt_entry(
&mut self,
intr_nr: u8,
@@ -1435,6 +1870,44 @@ impl Vm {
}
}
+ /// 16-bit/32-bit IDT entries, described in the APM as
+ ///
+ /// > Interrupt-Gate and Trap-Gate Descriptors—Legacy Mode
+ ///
+ /// have a different (smaller!) format.
+ fn write_idt_entry_legacy(
+ &mut self,
+ intr_nr: u8,
+ interrupt_handler_cs: u16,
+ interrupt_handler_addr: GuestAddress
+ ) {
+ assert!(interrupt_handler_addr.0 <= u32::MAX as u64);
+ let idt_ptr = self.idt_entry_legacy_mut(intr_nr);
+
+ // entries in the IDT, interrupt and trap descriptors (in the AMD APM, "interrupt-gate"
+ // and "trap-gate" descriptors), are described (in the AMD APM) by
+ // "Figure 4-24. Interrupt-Gate and Trap-Gate Descriptors—Long Mode". reproduced here:
+ //
+ // 3 2 1 | 1 0
+ // 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ // |---------------------------------------------------------------|
+ // | target offset[31:16] |P|DPL|0| type | res,ign | IST | +4
+ // | target selector | target offset[15:0] | +0
+ // |---------------------------------------------------------------|
+ //
+ // descriptors are encoded with P set, DPL at 0, and type set to 0b1110. TODO: frankly
+ // i don't know the mechanical difference between type 0x0e and type 0x0f, but 0x0e
+ // works for now.
+ let idt_attr_bits = 0b1_00_0_1110_00000_000;
+ let low_hi = (interrupt_handler_addr.0 as u32 & 0xffff_0000) | idt_attr_bits;
+ let low_lo = (interrupt_handler_cs as u32) << 16 | (interrupt_handler_addr.0 as u32 & 0x0000_ffff);
+
+ unsafe {
+ idt_ptr.offset(0).write(low_lo);
+ idt_ptr.offset(1).write(low_hi);
+ }
+ }
+
fn configure_idt(&mut self, regs: &mut kvm_regs, sregs: &mut kvm_sregs) {
sregs.idt.base = self.idt_addr().0;
sregs.idt.limit = IDT_ENTRIES * 16 - 1; // IDT is 256 entries of 16 bytes each
@@ -1471,6 +1944,82 @@ impl Vm {
self.idt_configured = true;
}
+ /// IDT configuration in 32-bit mode is funky because the interrupt handlers live in a totally
+ /// different region of memory and need a different value in `cs`.
+ fn configure_idt_32b(&mut self, regs: &mut kvm_regs, sregs: &mut kvm_sregs) {
+ sregs.idt.base = self.idt_addr().0;
+ sregs.idt.limit = IDT_ENTRIES * 8 - 1; // legacy IDT is 256 entries of 8 bytes each
+
+ for i in 0..IDT_ENTRIES {
+ let interrupt_handler_addr = GuestAddress(self.interrupt_handlers_start().0 + i as u64);
+ self.write_idt_entry_legacy(
+ i.try_into().expect("<u8::MAX interrupts"),
+ self.selector_cs(),
+ interrupt_handler_addr
+ );
+ }
+
+ // all interrupt handlers are just `hlt`. their position is used to detect which
+ // exception/interrupt occurred.
+ unsafe {
+ std::slice::from_raw_parts_mut(
+ self.host_ptr(self.interrupt_handlers_start()),
+ IDT_ENTRIES as usize
+ ).fill(0xf4);
+ }
+
+ // finally, set `rsp` to a valid region so that the CPU can push necessary state (see
+ // AMD APM section "8.9.3 Interrupt Stack Frame") to actually enter the interrupt
+ // handler. if we didn't do this, rsp will probably be zero or something, underflow,
+ // page fault on push to 0xffffffff_ffffffff, and just triple fault.
+ //
+ // TODO: this is our option in 16- and 32-bit modes, but in long mode all the interrupt
+ // descriptors could set something in IST to switch stacks outright for exception
+ // handling. this might be nice to test rsp permutations in 64-bit code? alternatively
+ // we might just have to limit possible rsp permutations so as to be able to test in
+ // 16- and 32-bit modes anyway.
+ regs.rsp = self.stack_addr().0;
+ self.idt_configured = true;
+ }
+
+ /// IDT configuration in 16-bit mode is funky because the interrupt handlers live in a totally
+ /// different region of memory and need a different value in `cs`.
+ fn configure_idt_16b(&mut self, regs: &mut kvm_regs, sregs: &mut kvm_sregs) {
+ sregs.idt.base = self.idt_addr().0;
+ sregs.idt.limit = IDT_ENTRIES * 8 - 1; // IDT is 256 entries of 8 bytes each
+
+ for i in 0..IDT_ENTRIES {
+ let interrupt_handler_addr = GuestAddress(self.interrupt_handlers_start().0 + i as u64);
+ self.write_idt_entry_legacy(
+ i.try_into().expect("<u8::MAX interrupts"),
+ self.selector_cs_idt_16b(),
+ interrupt_handler_addr
+ );
+ }
+
+ // all interrupt handlers are just `hlt`. their position is used to detect which
+ // exception/interrupt occurred.
+ unsafe {
+ std::slice::from_raw_parts_mut(
+ self.host_ptr(self.interrupt_handlers_start()),
+ IDT_ENTRIES as usize
+ ).fill(0xf4);
+ }
+
+ // finally, set `rsp` to a valid region so that the CPU can push necessary state (see
+ // AMD APM section "8.9.3 Interrupt Stack Frame") to actually enter the interrupt
+ // handler. if we didn't do this, rsp will probably be zero or something, underflow,
+ // page fault on push to 0xffffffff_ffffffff, and just triple fault.
+ //
+ // TODO: this is our option in 16- and 32-bit modes, but in long mode all the interrupt
+ // descriptors could set something in IST to switch stacks outright for exception
+ // handling. this might be nice to test rsp permutations in 64-bit code? alternatively
+ // we might just have to limit possible rsp permutations so as to be able to test in
+ // 16- and 32-bit modes anyway.
+ regs.rsp = self.stack_addr().0;
+ self.idt_configured = true;
+ }
+
/// configure the vCPU for executing instructions in the hardware-supported extensions.
/// on a fresh vCPU, various extension may be "supported" but result in `#UD` when executed,
/// unless additional configuration is done (as this function does).