1 files changed, 1023 insertions, 0 deletions
diff --git a/src/x86_64.rs b/src/x86_64.rs
new file mode 100644
index 0000000..da46380
--- /dev/null
+++ b/src/x86_64.rs
@@ -0,0 +1,1023 @@
+use core::fmt;
+use core::num::NonZero;
+use core::ptr::NonNull;
+use nix::sys::mman::{MapFlags, ProtFlags};
+
+use kvm_ioctls::{Kvm, VcpuFd, VmFd};
+use kvm_bindings::{
+    kvm_guest_debug, kvm_userspace_memory_region, kvm_segment,
+    KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP,
+};
+
+pub use kvm_bindings::{kvm_regs, kvm_sregs, kvm_debug_exit_arch};
+
+const _TARGET_IS_64BIT: () = {
+    assert!(core::mem::size_of::<u64>() == core::mem::size_of::<usize>(), "asmlinator only supports 64-bit targets");
+};
+
+// the wanton casting between usize and u64 is justifiable here because TARGET_IS_64BIT above:
+fn usize_to_u64(x: usize) -> u64 {
+    let _ = _TARGET_IS_64BIT;
+
+    x as u64
+}
+
+fn u64_to_usize(x: u64) -> usize {
+    let _ = _TARGET_IS_64BIT;
+
+    x as usize
+}
+
+/// a test VM for running arbitrary instructions.
+///
+/// there is one CPU which is configured for long-mode execution. all memory is
+/// identity-mapped with 1GiB pages. page tables are configured to cover 512 GiB of memory, but
+/// much much less than that is actually allocated and usable through `memory.`
+///
+/// it is configured with `mem_size` bytes of memory at guest address 0, accessible through
+/// host pointer `memory`. this region is used for "control structures"; page tables, GDT, IDT,
+/// and stack. it is also the region where code to be executed is placed.
+pub struct Vm {
+    vm: VmFd,
+    vcpu: VcpuFd,
+    idt_configured: bool,
+    mem_ceiling: u64,
+    memory: Mapping,
+    aux_memories: Vec<Mapping>,
+}
+
+#[derive(PartialEq)]
+pub enum VcpuExit<'buf> {
+    MmioRead { addr: u64, buf: &'buf mut [u8] },
+    MmioWrite { addr: u64, buf: &'buf [u8] },
+    IoIn { port: u16, buf: &'buf mut [u8] },
+    IoOut { port: u16, buf: &'buf [u8] },
+    Debug { pc: u64, info: kvm_debug_exit_arch },
+    Exception { nr: u8 },
+    Shutdown,
+    Hlt,
+}
+
+impl<'buf> fmt::Debug for VcpuExit<'buf> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use VcpuExit::*;
+        match self {
+            MmioRead { addr, buf } => {
+                let size = buf.len();
+                write!(f, "VcpuExit::MmioRead {{ addr: {addr:#08x}, size: {size} }}")
+            },
+            MmioWrite { addr, buf } => {
+                let size = buf.len();
+                write!(f, "VcpuExit::MmioWrite {{ addr: {addr:#08x}, size: {size} }}")
+            },
+            IoIn { port, buf } => {
+                let size = buf.len();
+                write!(f, "VcpuExit::IoIn {{ port: {port:#04x}, size: {size} }}")
+            },
+            IoOut { port, buf } => {
+                let size = buf.len();
+                write!(f, "VcpuExit::IoOut {{ port: {port:#04x}, size: {size} }}")
+            },
+            Debug { pc, info: _ } => {
+                write!(f, "VcpuExit::Debug {{ pc: {pc:#016x}, _ }}")
+            },
+            Exception { nr } => {
+                write!(f, "VcpuExit::Exception {{ nr: {nr} }}")
+            },
+            Shutdown => {
+                write!(f, "VcpuExit::Shutdown")
+            },
+            Hlt => {
+                write!(f, "VcpuExit::Hlt")
+            }
+        }
+    }
+}
+
+
+const GB: u64 = 1 << 30;
+
+// TODO: cite APM/SDM
+const IDT_ENTRIES: u16 = 256;
+
+#[derive(Copy, Clone)]
+pub struct GuestAddress(pub u64);
+
+pub struct VmPageTables<'vm> {
+    vm: &'vm Vm,
+    base: GuestAddress,
+}
+
+impl<'vm> VmPageTables<'vm> {
+    pub fn pml4_addr(&self) -> GuestAddress {
+        self.base
+    }
+
+    pub fn pdpt_addr(&self) -> GuestAddress {
+        GuestAddress(self.base.0 + 0x1000)
+    }
+
+    pub fn pml4_mut(&self) -> *mut u64 {
+        // SAFETY: creating VmPageTables implies we've asserted that we can form host pointers
+        // for all addresses in the page tables.
+        unsafe {
+            self.vm.host_ptr(self.pml4_addr()) as *mut u64
+        }
+    }
+
+    pub fn pdpt_mut(&self) -> *mut u64 {
+        // SAFETY: creating VmPageTables implies we've asserted that we can form host pointers
+        // for all addresses in the page tables.
+        unsafe {
+            self.vm.host_ptr(self.pdpt_addr()) as *mut u64
+        }
+    }
+}
+
+fn encode_segment(seg: &kvm_segment) -> u64 {
+    let base = seg.base as u64;
+    let limit = seg.limit as u64;
+
+    let lim_low = limit & 0xffff;
+    let lim_high = (limit >> 16) & 0xf;
+    let addr_low = base & 0xffff;
+    let desc_low = lim_low | (addr_low << 16);
+
+    let base_mid = (base >> 16) & 0xff;
+    let base_high = (base >> 24) & 0xff;
+    let access_byte = (seg.type_ as u64)
+        | (seg.s as u64) << 4
+        | (seg.dpl as u64) << 5
+        | (seg.present as u64) << 7;
+    let flaglim_byte = lim_high
+        | (seg.avl as u64) << 4
+        | (seg.l as u64) << 5
+        | (seg.db as u64) << 6
+        | (seg.g as u64) << 7;
+    let desc_high = base_mid
+        | access_byte << 8
+        | flaglim_byte << 16
+        | base_high << 24;
+
+    desc_low | (desc_high << 32)
+}
+
+pub enum VmCreateError {
+    /// the requested VM was smaller than `asmlinator`'s minimum allowable size.
+    TooSmall { requested: usize, required: usize },
+    /// the requested VM's memory size was not an even number of pages.
+    BadSize { requested: usize, unit: usize },
+    /// one of the several syscalls in setting up a new VM failed.
+    ///
+    /// this is most likely a permissions error, or `/dev/kvm` doesn't exist. otherwise, something
+    /// interesting happened!
+    ///
+    /// this deserves better documentation, but i'm not aware of documentation for KVM ioctl
+    /// failure modes.
+    SyscallError { op: &'static str, err: nix::errno::Errno },
+    /// `base` and `size` are not valid for mapping; either because of over/underflow, or overlaps
+    /// with an existing mapping.
+    InvalidMapping { base: GuestAddress, size: u64 }
+}
+
+impl fmt::Debug for VmCreateError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VmCreateError::TooSmall { requested, required } => {
+                write!(f, "requested memory size ({requested}) is too small, must be at least {required}")
+            }
+            VmCreateError::BadSize { requested, unit } => {
+                write!(f, "requested memory size ({requested}) is not a multiple of ({unit})")
+            }
+            VmCreateError::SyscallError { op, err } => {
+                write!(f, "error at {op}: {err}")
+            }
+            VmCreateError::InvalidMapping { base, size } => {
+                write!(f, "invalid mapping (gpa={:#08x}/size={:08x})", base.0, size)
+            }
+        }
+    }
+}
+
+impl fmt::Debug for VmError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            VmError::BadSize { requested, unit } => {
+                write!(f, "requested memory size ({requested}) is not a multiple of ({unit})")
+            }
+            VmError::SyscallError { op, err } => {
+                write!(f, "error at {op}: {err}")
+            }
+            VmError::InvalidMapping { base, size } => {
+                write!(f, "invalid mapping (gpa={:#08x}/size={:08x})", base.0, size)
+            }
+        }
+    }
+}
+
+
+pub enum VmError {
+    /// the requested VM's memory size was not an even number of pages.
+    BadSize { requested: usize, unit: usize },
+    /// one of the several syscalls in operating a VM failed.
+    ///
+    /// this deserves better documentation, but i'm not aware of documentation for KVM ioctl
+    /// failure modes.
+    SyscallError { op: &'static str, err: nix::errno::Errno },
+    /// `base` and `size` are not valid for mapping; either because of over/underflow, or overlaps
+    /// with an existing mapping.
+    InvalidMapping { base: GuestAddress, size: u64 }
+}
+
+impl VmError {
+    fn from_kvm(op: &'static str, err: kvm_ioctls::Error) -> Self {
+        Self::SyscallError { op, err: nix::errno::Errno::from_raw(err.errno()) }
+    }
+}
+
+impl From<VmError> for VmCreateError {
+    fn from(other: VmError) -> Self {
+        match other {
+            VmError::BadSize { requested, unit } => VmCreateError::BadSize { requested, unit },
+            VmError::SyscallError { op, err } => VmCreateError::SyscallError { op, err },
+            VmError::InvalidMapping { base, size } => VmCreateError::InvalidMapping { base, size },
+        }
+    }
+}
+
+/// a `mmap`'d region, `munmap`'d on drop.
+struct Mapping {
+    guest_addr: usize,
+    addr: NonNull<core::ffi::c_void>,
+    size: NonZero<usize>,
+}
+
+impl Drop for Mapping {
+    fn drop(&mut self) {
+        let res = unsafe {
+            nix::sys::mman::munmap(self.addr, self.size.get())
+        };
+        res.expect("can unmap a region we mapped");
+    }
+}
+
+impl Mapping {
+    fn create_shared(guest_addr: usize, size: usize, prot: ProtFlags) -> Result<Self, VmError> {
+        if size % 4096 != 0 {
+            return Err(VmError::BadSize {
+                requested: size,
+                unit: 4096,
+            });
+        }
+
+        let size = NonZero::new(size)
+            .ok_or(VmError::BadSize {
+                requested: 0,
+                unit: 0,
+            })?;
+
+        let map_res = unsafe {
+            nix::sys::mman::mmap_anonymous(
+                None,
+                size,
+                prot,
+                MapFlags::MAP_ANONYMOUS | MapFlags::MAP_SHARED,
+            )
+        };
+
+        let map_addr = map_res
+            .map_err(|e| VmError::SyscallError { op: "mmap", err: e })?;
+
+        // look, mmap should only be in the business of returning page-aligned addresses but i
+        // just wanna see it, you know...
+        assert!(map_addr.as_ptr() as usize % 4096 == 0);
+
+        Ok(Self {
+            guest_addr,
+            addr: map_addr,
+            size,
+        })
+    }
+
+    /// SAFETY: the caller must not use the returned pointer to violate reference safety of the VM.
+    /// the pointer must not be turned into a reference while running the VM, etc.
+    ///
+    /// panics if `address` is not contained in this mapping.
+    unsafe fn host_ptr(&self, address: GuestAddress) -> *mut u8 {
+        let guest_addr: u64 = usize_to_u64(self.guest_addr);
+        let offset = address.0.checked_sub(guest_addr)
+            .expect("guest address is above mapping base");
+
+        let base = self.addr.as_ptr() as *mut u8;
+
+        unsafe {
+            base.offset(offset as isize)
+        }
+    }
+
+    /// SAFETY: the caller must ensure that this mapping covers `base` and that there are at least
+    /// `size` bytes at `base` before the end of this mapping.
+    unsafe fn slice_mut(&mut self, base: GuestAddress, size: u64) -> &mut [u8] {
+        let ptr = unsafe { self.host_ptr(base) };
+
+        unsafe {
+            core::slice::from_raw_parts_mut(ptr, u64_to_usize(size))
+        }
+    }
+
+    /// SAFETY: the caller must ensure that this mapping covers `base` and that there are at least
+    /// `size` bytes at `base` before the end of this mapping.
+    unsafe fn slice(&self, base: GuestAddress, size: u64) -> &[u8] {
+        let ptr = unsafe { self.host_ptr(base) };
+
+        unsafe {
+            core::slice::from_raw_parts(ptr, u64_to_usize(size))
+        }
+    }
+
+    fn overlaps(&self, base: GuestAddress, index_end: GuestAddress) -> bool {
+        let map_base: u64 = usize_to_u64(self.guest_addr);
+        let map_end = map_base.checked_add(usize_to_u64(self.size.get())).unwrap();
+
+        let enclosed_by = base.0 <= map_base && index_end.0 >= map_end;
+        let contains_base = base.0 >= map_base && base.0 < map_end;
+        let contains_end = index_end.0 >= map_base && index_end.0 <= map_end;
+
+        enclosed_by || contains_base || contains_end
+    }
+
+    fn contains(&self, base: GuestAddress) -> bool {
+        let end = self.guest_addr.checked_add(self.size.get()).unwrap();
+
+        base.0 >= self.guest_addr as u64 && base.0 < end as u64
+    }
+
+    fn check_range(&self, base: GuestAddress, size: u64) -> bool {
+        let map_base: u64 = self.guest_addr.try_into().unwrap();
+        let Some(offset) = base.0.checked_sub(map_base) else {
+            return false;
+        };
+        let Some(end) = offset.checked_add(size) else {
+            return false;
+        };
+
+        end <= self.size.get().try_into().unwrap()
+    }
+}
+
+#[test]
+fn test_check_range_exact() {
+    let mapping = Mapping::create_shared(0x4000, 0x1000, ProtFlags::PROT_READ).expect("can create mapping");
+    assert!(mapping.check_range(GuestAddress(0x4000), 0x1000));
+}
+
+#[test]
+fn test_xor_runs() {
+    let mut vm = Vm::create(128 * 1024).expect("can create vm");
+    let mut regs = vm.get_regs().expect("can get regs");
+
+    vm.program(&[0x33, 0xc0], &mut regs);
+
+    regs.rax = 0x1234;
+    let rip_before = regs.rip;
+
+    vm.set_regs(&regs).expect("can set regs");
+
+    vm.set_single_step(true).expect("can set single-step");
+
+    let res = vm.run().expect("can run vm");
+
+    let rip_after = rip_before + 2;
+    assert!(matches!(res, VcpuExit::Debug { pc: rip_after, .. }));
+
+    let regs_after = vm.get_regs().expect("can get regs");
+    assert_eq!(regs_after.rax, 0);
+}
+
+impl Vm {
+    pub fn create(mem_size: usize) -> Result<Vm, VmCreateError> {
+        let kvm = Kvm::new()
+            .map_err(|e| VmError::from_kvm("Kvm::new()", e))?;
+
+        let vm = kvm.create_vm()
+            .map_err(|e| VmError::from_kvm("craete_vm", e))?;
+
+        // actual minimum is somewhere around 0x1a000 bytes, but 0x20_000 aka 128k will do
+        if mem_size < 128 * 1024 {
+            return Err(VmCreateError::TooSmall {
+                requested: mem_size,
+                required: 128 * 1024,
+            });
+        }
+
+        let mapping = Mapping::create_shared(0, mem_size, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE)?;
+
+        let region = kvm_userspace_memory_region {
+            slot: 0,
+            guest_phys_addr: 0x0000,
+            memory_size: mapping.size.get() as u64,
+            userspace_addr: mapping.addr.as_ptr() as u64,
+            flags: 0,
+        };
+
+        let set_res = unsafe { vm.set_user_memory_region(region) };
+        set_res.map_err(|e| VmError::from_kvm("set_user_memory_region", e))?;
+
+        let vcpu_res = vm.create_vcpu(0);
+        let vcpu = vcpu_res.map_err(|e| VmError::from_kvm("create_vcpu(0)", e))?;
+
+        let mem_ceiling = mapping.size.get().try_into().unwrap();
+
+        let mut this = Vm {
+            vm,
+            vcpu,
+            idt_configured: false,
+            memory: mapping,
+            aux_memories: Vec::new(),
+            mem_ceiling,
+        };
+
+        let mut vcpu_regs = this.get_regs()?;
+        let mut vcpu_sregs = this.get_sregs()?;
+
+        unsafe {
+            this.configure_identity_paging(Some(&mut vcpu_sregs));
+            this.configure_selectors(&mut vcpu_sregs);
+            this.configure_idt(&mut vcpu_regs, &mut vcpu_sregs);
+        }
+
+        vcpu_sregs.efer = 0x0000_0500; // LME | LMA
+
+        this.set_regs(&vcpu_regs)?;
+        this.set_sregs(&vcpu_sregs)?;
+
+        Ok(this)
+    }
+
+    /// map and add a region of size `size` at guest-physical address `gpa`.
+    ///
+    /// this will not update page tables, so if the newly-added memory is not already mapped due to
+    /// a previous `configure_identity_paging` call and it is not mapped due to explicit page table
+    /// management, it will not yet be accessible by guest code.
+    pub fn add_memory(&mut self, gpa: GuestAddress, size: u64) -> Result<(), VmError> {
+        let new_mapping_end = gpa.0.checked_add(size)
+            .map(|addr| GuestAddress(addr))
+            .ok_or_else(|| VmError::InvalidMapping { base: gpa, size })?;
+        if self.memory.overlaps(gpa, new_mapping_end) {
+            return Err(VmError::InvalidMapping { base: gpa, size });
+        } else {
+            for mapping in self.aux_memories.iter() {
+                if mapping.overlaps(gpa, new_mapping_end) {
+                    return Err(VmError::InvalidMapping { base: gpa, size });
+                }
+            }
+        }
+
+        let mapping = Mapping::create_shared(
+            u64_to_usize(gpa.0),
+            u64_to_usize(size),
+            ProtFlags::PROT_READ | ProtFlags::PROT_WRITE
+        )?;
+
+        let used_slots: u32 = self.aux_memories.len().try_into()
+            .map_err(|_| VmError::InvalidMapping { base: gpa, size })?;
+        let next_slot = used_slots.checked_add(1)
+            .ok_or_else(|| VmError::InvalidMapping { base: gpa, size })?;
+
+        let region = kvm_userspace_memory_region {
+            slot: next_slot,
+            guest_phys_addr: gpa.0,
+            memory_size: mapping.size.get() as u64,
+            userspace_addr: mapping.addr.as_ptr() as u64,
+            flags: 0,
+        };
+
+        let set_res = unsafe { self.vm.set_user_memory_region(region) };
+        set_res.map_err(|e| VmError::from_kvm("set_user_memory_region", e))?;
+
+        self.aux_memories.push(mapping);
+
+        if new_mapping_end.0 > self.mem_ceiling {
+            self.mem_ceiling = new_mapping_end.0;
+        }
+
+        Ok(())
+    }
+
+    pub fn get_regs(&self) -> Result<kvm_regs, VmError> {
+        self.vcpu.get_regs()
+            .map_err(|e| VmError::from_kvm("get_regs", e))
+    }
+
+    pub fn get_sregs(&self) -> Result<kvm_sregs, VmError> {
+        self.vcpu.get_sregs()
+            .map_err(|e| VmError::from_kvm("get_sregs", e))
+    }
+
+    pub fn set_regs(&self, regs: &kvm_regs) -> Result<(), VmError> {
+        self.vcpu.set_regs(regs)
+            .map_err(|e| VmError::from_kvm("set_regs", e))
+    }
+
+    pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<(), VmError> {
+        self.vcpu.set_sregs(sregs)
+            .map_err(|e| VmError::from_kvm("set_sregs", e))
+    }
+
+    pub fn idt_configured(&self) -> bool {
+        self.idt_configured
+    }
+
+    // TODO: seems like there's a KVM bug where if the VM is configured for single-step and the
+    // single-stepped instruction is a rmw to MMIO memory (or MMIO hugepages?), the single-step
+    // doesn't actually take effect. compare `0x33 0x00` and `0x31 0x00`. what the hell!
+    pub fn set_single_step(&mut self, active: bool) -> Result<(), VmError> {
+        let mut guest_debug = kvm_guest_debug::default();
+
+        if active {
+            guest_debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP
+        };
+
+        self.vcpu.set_guest_debug(&guest_debug)
+            .map_err(|e| VmError::from_kvm("set_guest_debug", e))
+    }
+
+    pub fn run<'vm>(&'vm mut self) -> Result<VcpuExit<'vm>, VmError> {
+        let exit = self.vcpu.run()
+            .map_err(|e| VmError::from_kvm("vcpu run", e))?;
+
+        match exit {
+            kvm_ioctls::VcpuExit::MmioRead(addr, buf) => {
+                // `buf` is typed with a lifetime from the reborrow of self.vcpu for run() above.
+                // this means it's a shorter lifetime than `'vm`, but since the resulting lifetime
+                // is also `'vm` it *really* has the effect of disallowing any subsequent use of
+                // `self`. these transmutes decouple the lifetime of `exit` from the lifetime of
+                // `self` and returned `VcpuExit`, so other arms that don't involve lifetimes can
+                // drop `exit()` and query the vcpu.
+                //
+                // SAFETY: this actually extends the lifetime of `buf` from the shorter transient
+                // lifetime to `'vm` for the return type.
+                let buf: &'vm mut [u8] = unsafe { core::mem::transmute(buf) };
+                return Ok(VcpuExit::MmioRead { buf, addr });
+            }
+            kvm_ioctls::VcpuExit::MmioWrite(addr, buf) => {
+                // see the same transmute in `MmioRead` for why this is load-bearing.
+                //
+                // SAFETY: this actually extends the lifetime of `buf` from the shorter transient
+                // lifetime to `'vm` for the return type.
+                let buf: &'vm [u8] = unsafe { core::mem::transmute(buf) };
+                return Ok(VcpuExit::MmioWrite { buf, addr });
+            }
+            kvm_ioctls::VcpuExit::IoIn(port, buf) => {
+                // see the same transmute in `MmioRead` for why this is load-bearing.
+                //
+                // SAFETY: this actually extends the lifetime of `buf` from the shorter transient
+                // lifetime to `'vm` for the return type.
+                let buf: &'vm mut [u8] = unsafe { core::mem::transmute(buf) };
+                return Ok(VcpuExit::IoIn { port, buf });
+            }
+            kvm_ioctls::VcpuExit::IoOut(port, buf) => {
+                // see the same transmute in `MmioRead` for why this is load-bearing.
+                //
+                // SAFETY: this actually extends the lifetime of `buf` from the shorter transient
+                // lifetime to `'vm` for the return type.
+                let buf: &'vm [u8] = unsafe { core::mem::transmute(buf) };
+                return Ok(VcpuExit::IoOut { port, buf });
+            }
+            kvm_ioctls::VcpuExit::Debug(info) => {
+                let pc = info.pc;
+                return Ok(VcpuExit::Debug { pc, info });
+            }
+            kvm_ioctls::VcpuExit::Hlt => {
+                let regs = self.get_regs()?;
+
+                if self.idt_configured {
+                    let intrs_start = self.interrupt_handlers_start().0;
+                    let intrs_end = intrs_start + IDT_ENTRIES as u64;
+                    // by the time we've exited the `hlt` of the interrupt handler has completed,
+                    // so rip is advanced by one. subtract back out to convert to an exception
+                    // vector number.
+                    let intr_addr = regs.rip - 1;
+
+                    if intr_addr >= intrs_start && intr_addr < intrs_end {
+                        let nr = intr_addr - intrs_start;
+                        // because IDT_ENTRIES is 256, this should always be true..
+                        assert!(nr < 256);
+                        let nr = nr as u8;
+
+                        return Ok(VcpuExit::Exception { nr });
+                    }
+                }
+
+                Ok(VcpuExit::Hlt)
+            }
+            kvm_ioctls::VcpuExit::Shutdown => {
+                return Ok(VcpuExit::Shutdown);
+            }
+            other => {
+                panic!("unhandled VcpuExit kind: {other:?}");
+            }
+        }
+    }
+
+    /// get a pointer to host memory mapped to guest address `address`.
+    ///
+    /// panics if `address` is not a guest-physical address backed by host memory.
+    pub unsafe fn host_ptr(&self, address: GuestAddress) -> *mut u8 {
+        let mapping = self.map_containing(address, 0)
+            .expect("mapping for address exists");
+
+        unsafe {
+            mapping.host_ptr(address)
+        }
+    }
+
+    pub fn gdt_addr(&self) -> GuestAddress {
+        GuestAddress(0x1000)
+    }
+
+    pub fn idt_addr(&self) -> GuestAddress {
+        GuestAddress(0x2000)
+    }
+
+    pub fn interrupt_handlers_start(&self) -> GuestAddress {
+        GuestAddress(0x3000)
+    }
+
+    pub fn page_table_addr(&self) -> GuestAddress {
+        GuestAddress(0x10000)
+    }
+
+    pub fn code_addr(&self) -> GuestAddress {
+        GuestAddress(self.memory.size.get() as u64 - 4096)
+    }
+
+    pub fn mem_ceiling(&self) -> u64 {
+        self.mem_ceiling
+    }
+
+    /// configuring the IDT implies the IDT might be used which means we want a stack pointer
+    /// that can have at least 0x18 bytes pushed to it if an interrupt happens.
+    pub fn stack_addr(&self) -> GuestAddress {
+        // it would be nice to point the stack somewhere that we could get MMIO exits and see the
+        // processor push words for the interrupt in real time, but that doesn't ... work.
+        // instead, you end up in a loop somewhere around svm_vcpu_run (which you can ^C out of,
+        // thankfully).
+        //
+        // so this picks some guest memory lower down.
+
+        // stack grows *down* but if someone pops a lot of bytes from rsp we'd go up and
+        // clobber the page tables. so leave a bit of space.
+        GuestAddress(0x19800)
+    }
+
+    /// selector 0x10 is chosen arbitrarily for code.
+    pub fn selector_cs(&self) -> u16 {
+        0x10
+    }
+
+    /// selector 0x18 is chosen arbitrarily for data (all segments; ss, ds, es, etc).
+    pub fn selector_ds(&self) -> u16 {
+        0x18
+    }
+
+    fn map_containing_mut(&mut self, base: GuestAddress, size: u64) -> Option<&mut Mapping> {
+        let mapping = if self.memory.contains(base) {
+            &mut self.memory
+        } else {
+            self.aux_memories.iter_mut()
+                .find(|map| map.contains(base))?
+        };
+
+        if !mapping.check_range(base, size) {
+            return None;
+        }
+
+        Some(mapping)
+    }
+
+    fn map_containing(&self, base: GuestAddress, size: u64) -> Option<&Mapping> {
+        let mapping = if self.memory.contains(base) {
+            &self.memory
+        } else {
+            self.aux_memories.iter()
+                .find(|map| map.contains(base))?
+        };
+
+        if !mapping.check_range(base, size) {
+            return None;
+        }
+
+        Some(mapping)
+    }
+
+    /// write all of `data` into guest memory at guest-physical address `addr`.
+    ///
+    /// panics if `data` extends beyond the end of guest memory.
+    pub fn write_mem(&mut self, addr: GuestAddress, data: &[u8]) {
+        let mapping = self.map_containing(addr, data.len() as u64).expect("mapping is valid");
+
+        // SAFETY: `check_range` above validates the range to copy, and... please do not
+        // provide a slice of guest memory as what the guest should be programmed for...
+        unsafe {
+            std::ptr::copy_nonoverlapping(
+                data.as_ptr(),
+                mapping.host_ptr(addr),
+                data.len()
+            );
+        }
+    }
+
+    /// read guest-physical memory at `addr` to `addr + buf.len()` into `buf`.
+    ///
+    /// panics if `addr + buf.len()` extends beyond the end of guest memory.
+    pub fn read_mem(&mut self, addr: GuestAddress, buf: &mut [u8]) {
+        let mapping = self.map_containing(addr, buf.len() as u64).expect("mapping is valid");
+
+        // SAFETY: `check_range` above validates the range to copy, and... please do not
+        // provide a slice of guest memory as what should be read into...
+        unsafe {
+            std::ptr::copy_nonoverlapping(
+                mapping.host_ptr(addr) as *const _,
+                buf.as_mut_ptr(),
+                buf.len()
+            );
+        }
+    }
+
+    /// returns a slice of guest memory pointed to by guest-physical address `addr`, of size
+    /// `size`.
+    ///
+    /// panics if `addr + size` is not enclosed in a single guest mapping. this crate doesn't
+    /// support returning a single slice of adjacent guest memory regions (yet?), sorry.
+    pub fn mem_slice_mut<'vm>(&'vm mut self, addr: GuestAddress, size: u64) -> &'vm mut [u8] {
+        let mapping = self.map_containing_mut(addr, size).expect("mapping is valid");
+
+        // SAFETY: we have an exclusive borrow of the VM, so it is not currently running, and there
+        // is no other outstanding slice of guest memory. `map_containing` has already ensured that
+        // this mapping contains the whole range `[addr, addr + size)`.
+        unsafe {
+            mapping.slice_mut(addr, size)
+        }
+    }
+
+    /// returns a slice of guest memory pointed to by guest-physical address `addr`, of size
+    /// `size`.
+    ///
+    /// panics if `addr + size` is not enclosed in a single guest mapping. this crate doesn't
+    /// support returning a single slice of adjacent guest memory regions (yet?), sorry.
+    pub fn mem_slice<'vm>(&'vm self, addr: GuestAddress, size: u64) -> &'vm [u8] {
+        let mapping = self.map_containing(addr, size).expect("mapping is valid");
+
+        // SAFETY: we have an exclusive borrow of the VM, so it is not currently running, and there
+        // is no other outstanding slice of guest memory. `map_containing` has already ensured that
+        // this mapping contains the whole range `[addr, addr + size)`.
+        unsafe {
+            mapping.slice(addr, size)
+        }
+    }
+
+    /// write `code` into guest memory and set `regs.rip` to the address of that code.
+    ///
+    /// the chosen code address is [`Self::code_addr`].
+    pub fn program(&mut self, code: &[u8], regs: &mut kvm_regs) {
+        let addr = self.code_addr();
+        self.write_mem(addr, code);
+
+        regs.rip = addr.0;
+    }
+
+    fn gdt_entry_mut(&mut self, idx: u16) -> *mut u64 {
+        // the GDT is set up at addresses 0..64k:
+        //
+        // > 3.5.1 Segment Descriptor Tables
+        // > A segment descriptor table is an array of segment descriptors (see Figure 3-10). A
+        // > descriptor table is variable in length and can contain up to 8192 (2^13) 8-byte
+        // > descriptors.
+
+        assert!(idx < 4096 / 8);
+        let addr = GuestAddress(self.gdt_addr().0 + (idx as u64 * 8));
+        let mapping = self.map_containing(addr, std::mem::size_of::<u64>() as u64).unwrap();
+
+        // SAFETY: idx * 8 can't overflow isize, and we've asserted the end of the pointer is
+        // still inside the allocation (`self.memory`).
+        unsafe {
+            mapping.host_ptr(addr) as *mut u64
+        }
+    }
+
+    // note this returns a u32, but an IDT is four u32. the u32 this points at is the first of
+    // the four for the entry.
+    fn idt_entry_mut(&mut self, idx: u8) -> *mut u32 {
+        let addr = GuestAddress(self.idt_addr().0 + (idx as u64 * 16));
+        let mapping = self.map_containing(addr, std::mem::size_of::<[u64; 2]>() as u64).unwrap();
+
+        unsafe {
+            mapping.host_ptr(addr) as *mut u32
+        }
+    }
+
+    pub fn page_tables(&self) -> VmPageTables<'_> {
+        let base = self.page_table_addr();
+
+        // the page tables are really just two pages: a PML4 and a PDPT for its first 512G of
+        // address space.
+        assert!(self.map_containing(base, 0x2000).is_some());
+
+        VmPageTables {
+            vm: self,
+            base,
+        }
+    }
+
+    /// configure page tables for identity mapping of all memory from guest address zero up to the
+    /// end of added memory regions, rounded up to the next GiB.
+    ///
+    /// if `sregs` is provided, update `cr0`, `cr3`, and `cr4` in support of protected-mode or
+    /// long-mode paging. this is a fixed pattern: if control registers have not been changed since
+    /// `Vm::create` then there will be no change to these control registers and `sregs` can be
+    /// omitted.
+    ///
+    /// panics if the end of added memory regions is above 512 GiB.
+    pub unsafe fn configure_identity_paging(&mut self, sregs: Option<&mut kvm_sregs>) {
+        let pt = self.page_tables();
+
+        // we're only setting up one PDPT, which can have up to 512 PDPTE covering 1G each.
+        assert!(self.mem_ceiling() <= 512 * GB);
+
+        // TODO: expects 1G page support
+
+        let pml4_ent =
+            1 << 0 | // P
+            1 << 1 | // RW
+            1 << 2 | // user access allowed. but no user code will run so not strictly needed.
+            0 << 3 | // PWT (TODO: configure PAT explicitly, but PAT0 is sufficient)
+            0 << 4 | // PCD (TODO: configure PAT explicitly, but PAT0 is sufficient)
+            0 << 5 | // A
+            0 << 6 | // ignored
+            0 << 7 | // PS (reserved must-be-0)
+            0 << 11 | // R (for ordinary paging, ignored; for HLAT ...)
+            pt.pdpt_addr().0;
+        unsafe {
+            pt.pml4_mut().write(pml4_ent);
+        }
+
+        let mut mapped: u64 = 0;
+        // we've set up the first PML4 to point to a PDPT, so we should actually set it up!
+        let pdpt = pt.pdpt_mut();
+        // PDPTEs start at the start of PDPT..
+        let mut pdpte = pdpt;
+        let entry_bits: u64 =
+            1 << 0 | // P
+            1 << 1 | // RW
+            1 << 2 | // user accesses allowed (everything is under privilege level 0 tho)
+            0 << 3 | // PWT (TODO: configure PAT explicitly, but PAT0 is sufficient)
+            0 << 4 | // PCD (TODO: configure PAT explicitly, but PAT0 is sufficient)
+            0 << 5 | // Accessed
+            0 << 6 | // Dirty
+            1 << 7 | // Page size (1 implies 1G page)
+            1 << 8 | // Global (if cr4.pge)
+            0 << 9 |
+            0 << 10 |
+            0 << 11 | // for ordinary paging, ignored. for HLAT, ...
+            0 << 12; // PAT (TODO: configure explicitly, but PAT0 is sufficient. verify MTRR sets PAT0 to WB?)
+
+        while mapped < self.mem_ceiling() {
+            let phys_num = mapped >> 30;
+            let entry = entry_bits | (phys_num << 30);
+            unsafe {
+                pdpte.write(entry);
+                pdpte = pdpte.offset(1);
+            }
+            // eprintln!("mapped 1g at {:08x}", mapped);
+            mapped += 1 << 30;
+        }
+
+        if let Some(sregs) = sregs {
+            sregs.cr0 |= 0x8000_0001; // cr0.PE | cr0.PG
+            sregs.cr3 = pt.pml4_addr().0 as u64;
+            sregs.cr4 |= 1 << 5; // enable PAE
+        }
+    }
+
+    unsafe fn configure_selectors(&mut self, sregs: &mut kvm_sregs) {
+        // we have to set descriptor information directly. this avoids having to load selectors
+        // as the first instructions on the vCPU, which is simplifying. but if we want the
+        // information in these selectors to match with anything in a GDT (i do!) we'll have to
+        // keep this initial state lined up with GDT entries ourselves.
+        //
+        // we could avoid setting up the GDT for the most part, but anything that might
+        // legitimately load the "valid" current segment selector would instead clobber the
+        // selector with zeroes.
+
+        sregs.cs.base = 0;
+        sregs.cs.limit = 0;
+        sregs.cs.selector = self.selector_cs();
+        sregs.cs.type_ = 0b1011; // see SDM table 3-1 Code- and Data-Segment Types
+        sregs.cs.present = 1;
+        sregs.cs.dpl = 0;
+        sregs.cs.db = 0;
+        sregs.cs.s = 1;
+        sregs.cs.l = 1;
+        sregs.cs.g = 0;
+        sregs.cs.avl = 0;
+
+        sregs.ds.base = 0;
+        sregs.ds.limit = 0xffffffff;
+        sregs.ds.selector = self.selector_ds();
+        sregs.ds.type_ = 0b0011; // see SDM table 3-1 Code- and Data-Segment Types
+        sregs.ds.present = 1;
+        sregs.ds.dpl = 0;
+        sregs.ds.db = 0;
+        sregs.ds.s = 1;
+        sregs.ds.l = 0;
+        sregs.ds.g = 0;
+        sregs.ds.avl = 0;
+
+        sregs.es = sregs.ds;
+        sregs.fs = sregs.ds;
+        sregs.gs = sregs.ds;
+        // linux populates the vmcb cpl field with whatever's in ss.dpl. what the hell???
+        sregs.ss = sregs.ds;
+
+        sregs.gdt.base = self.gdt_addr().0;
+        sregs.gdt.limit = 256 * 8 - 1;
+
+        unsafe {
+            self.gdt_entry_mut(self.selector_cs() >> 3).write(encode_segment(&sregs.cs));
+            self.gdt_entry_mut(self.selector_ds() >> 3).write(encode_segment(&sregs.ds));
+        }
+    }
+
+    fn write_idt_entry(
+        &mut self,
+        intr_nr: u8,
+        interrupt_handler_cs: u16,
+        interrupt_handler_addr: GuestAddress
+    ) {
+        let idt_ptr = self.idt_entry_mut(intr_nr);
+
+        // entries in the IDT, interrupt and trap descriptors (in the AMD APM, "interrupt-gate"
+        // and "trap-gate" descriptors), are described (in the AMD APM) by
+        // "Figure 4-24. Interrupt-Gate and Trap-Gate Descriptors—Long Mode". reproduced here:
+        //
+        //  3   2                 1        |          1                   0
+        //  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+        // |---------------------------------------------------------------|
+        // |                            res,ign                            | +12
+        // |                      target offset[63:32]                     | +8
+        // |     target offset[31:16]      |P|DPL|0| type  | res,ign | IST | +4
+        // |     target selector           |    target offset[15:0]        | +0
+        // |---------------------------------------------------------------|
+        //
+        // descriptors are encoded with P set, DPL at 0, and type set to 0b1110. TODO: frankly
+        // i don't know the mechanical difference between type 0x0e and type 0x0f, but 0x0e
+        // works for now.
+        let idt_attr_bits = 0b1_00_0_1110_00000_000;
+        let low_hi = (interrupt_handler_addr.0 as u32 & 0xffff_0000) | idt_attr_bits;
+        let low_lo = (interrupt_handler_cs as u32) << 16 | (interrupt_handler_addr.0 as u32 & 0x0000_ffff);
+
+        unsafe {
+            idt_ptr.offset(0).write(low_lo);
+            idt_ptr.offset(1).write(low_hi);
+            idt_ptr.offset(2).write((interrupt_handler_addr.0 >> 32) as u32);
+            idt_ptr.offset(3).write(0); // reserved
+        }
+    }
+
+    fn configure_idt(&mut self, regs: &mut kvm_regs, sregs: &mut kvm_sregs) {
+        sregs.idt.base = self.idt_addr().0;
+        sregs.idt.limit = IDT_ENTRIES * 16 - 1; // IDT is 256 entries of 16 bytes each
+
+        for i in 0..IDT_ENTRIES {
+            let interrupt_handler_addr = GuestAddress(self.interrupt_handlers_start().0 + i as u64);
+            self.write_idt_entry(
+                i.try_into().expect("<u8::MAX interrupts"),
+                self.selector_cs(),
+                interrupt_handler_addr
+            );
+        }
+
+        // all interrupt handlers are just `hlt`. their position is used to detect which
+        // exception/interrupt occurred.
+        unsafe {
+            std::slice::from_raw_parts_mut(
+                self.host_ptr(self.interrupt_handlers_start()),
+                IDT_ENTRIES as usize
+            ).fill(0xf4);
+        }
+
+        // finally, set `rsp` to a valid region so that the CPU can push necessary state (see
+        // AMD APM section "8.9.3 Interrupt Stack Frame") to actually enter the interrupt
+        // handler. if we didn't do this, rsp will probably be zero or something, underflow,
+        // page fault on push to 0xffffffff_ffffffff, and just triple fault.
+        //
+        // TODO: this is our option in 16- and 32-bit modes, but in long mode all the interrupt
+        // descriptors could set something in IST to switch stacks outright for exception
+        // handling. this might be nice to test rsp permutations in 64-bit code? alternatively
+        // we might just have to limit possible rsp permutations so as to be able to test in
+        // 16- and 32-bit modes anyway.
+        regs.rsp = self.stack_addr().0;
+        self.idt_configured = true;
+    }
+}