From 4e0cab8fa5461bb32274e2ebba588c964e5a3cd7 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 22 Apr 2026 05:33:43 +0000 Subject: support syscall and a corresponding exit kind --- src/x86_64.rs | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 3 deletions(-) (limited to 'src/x86_64.rs') diff --git a/src/x86_64.rs b/src/x86_64.rs index a04dfb5..f30e2be 100644 --- a/src/x86_64.rs +++ b/src/x86_64.rs @@ -6,8 +6,8 @@ use nix::sys::mman::{MapFlags, ProtFlags}; use kvm_ioctls::{Kvm, VcpuFd, VmFd}; use kvm_bindings::{ - kvm_cpuid_entry2, kvm_guest_debug, - kvm_userspace_memory_region, kvm_segment, CpuId, + kvm_cpuid_entry2, kvm_guest_debug, kvm_msr_entry, + kvm_userspace_memory_region, kvm_segment, CpuId, Msrs, KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_MAX_CPUID_ENTRIES, }; @@ -45,6 +45,7 @@ pub struct Vm { supported_cpuid: CpuId, current_cpuid: CpuId, idt_configured: bool, + syscall_configured: bool, mem_ceiling: u64, memory: Mapping, aux_memories: Vec, @@ -57,6 +58,12 @@ pub struct Vm { /// not yet sure, so this is not pub. #[derive(Copy, Clone, Debug)] enum Feature { + /// support for long mode and miscellaneous baseline instructions. + /// + /// `asmlinator` assumes these features are always supported. + Base, + /// support for syscall/sysret instructions. + Syscall, /// support for the xsave/xrstor instructions and at least xcr0. /// /// cpuid leaf eax=0x0000_0001 bit ecx[26], see APM @@ -87,7 +94,14 @@ const CPUID_0000000D_EAX_AVX512: u32 = (1 << 5) | (1 << 6) | (1 << 7); const CPUID_80000001_EDX_PDPE1GB: u32 = 1 << 26; +// AMD APM `System Instruction Support Indicated by CPUID Feature Bits` +const CPUID_00000001_EDX_MSR: u32 = 1 << 5; +const CPUID_00000007_EBX_CLSTAC: u32 = 1 << 20; +const CPUID_80000001_EDX_SYSCALL: u32 = 1 << 11; +const CPUID_80000001_EDX_LM: u32 = 1 << 29; + #[derive(PartialEq)] +#[non_exhaustive] pub enum VcpuExit<'buf> { MmioRead { addr: u64, buf: &'buf mut [u8] }, MmioWrite { addr: u64, buf: &'buf [u8] }, @@ -97,6 +111,7 @@ pub enum VcpuExit<'buf> { Exception { nr: u8 }, Shutdown, Hlt, + Syscall, } impl<'buf> fmt::Debug for VcpuExit<'buf> { @@ -130,6 +145,9 @@ impl<'buf> fmt::Debug for VcpuExit<'buf> { }, Hlt => { write!(f, "VcpuExit::Hlt") + }, + Syscall => { + write!(f, "VcpuExit::Syscall") } } } @@ -443,6 +461,42 @@ fn test_xor_runs() { } #[test] +fn test_syscall() { + let mut vm = Vm::create(128 * 1024).expect("can create vm"); + let mut regs = vm.get_regs().expect("can get regs"); + + vm.program(&[0x0f, 0x05], &mut regs); + eprintln!("rip before: {:08x}", regs.rip); + + vm.set_regs(®s).expect("can set regs"); + +// vm.set_single_step(true).expect("can set single-step"); + + let res = vm.run().expect("can run vm"); + match res { + VcpuExit::Syscall => { /* expected */ } + VcpuExit::Debug { pc, .. } => { + if pc == vm.syscall_addr().0 { + panic!( + "VM exited at syscall target. \ + syscall hlt stub not executed. \ + is the VM being single-stepped?" + ); + } + panic!("unexpected debug exit at rip={:08x}", pc); + } + other => { + panic!("unexpected exit: {:?}", other); + } + }; + + let regs_after = vm.get_regs().expect("can get regs"); + + let expected_rip = vm.syscall_addr().0 + 1; + assert_eq!(expected_rip, regs_after.rip); +} + +#[test] fn test_xorps_runs() { let mut vm = Vm::create(128 * 1024).expect("can create vm"); let mut regs = vm.get_regs().expect("can get regs"); @@ -625,6 +679,7 @@ impl Vm { supported_cpuid, current_cpuid, idt_configured: false, + syscall_configured: false, memory: mapping, aux_memories: Vec::new(), mem_ceiling, @@ -633,6 +688,9 @@ impl Vm { let mut vcpu_regs = this.get_regs()?; let mut vcpu_sregs = this.get_sregs()?; + assert!(this.cpuid_supports(Feature::Base)); + this.cpuid_set(Feature::Base, true); + unsafe { this.configure_identity_paging(Some(&mut vcpu_sregs)); this.configure_selectors(&mut vcpu_sregs); @@ -640,9 +698,10 @@ impl Vm { let mut xcrs = this.get_xcrs()?; this.configure_extensions(&mut vcpu_sregs, &mut xcrs); this.set_xcrs(&xcrs)?; + this.configure_syscalls(&mut vcpu_sregs); } - vcpu_sregs.efer = 0x0000_0500; // LME | LMA + vcpu_sregs.efer |= 0x0000_0500; // LME | LMA this.set_regs(&vcpu_regs)?; this.set_sregs(&vcpu_sregs)?; @@ -730,10 +789,21 @@ impl Vm { .map_err(|e| VmError::from_kvm("set_xcrs", e)) } + pub fn set_msrs(&self, msrs: &Msrs) -> Result<(), VmError> { + let n_set = self.vcpu.set_msrs(msrs) + .map_err(|e| VmError::from_kvm("set_msrs", e))?; + assert_eq!(msrs.as_slice().len(), n_set); + Ok(()) + } + pub fn idt_configured(&self) -> bool { self.idt_configured } + pub fn syscall_configured(&self) -> bool { + self.syscall_configured + } + // TODO: seems like there's a KVM bug where if the VM is configured for single-step and the // single-stepped instruction is a rmw to MMIO memory (or MMIO hugepages?), the single-step // doesn't actually take effect. compare `0x33 0x00` and `0x31 0x00`. what the hell! @@ -815,6 +885,16 @@ impl Vm { } } + if self.syscall_configured { + // the behavior of `syscall`, `hlt`, and `rip` is a little funky. similar to + // interrupt handlers, we typically exit with rip pointed immediately after + // `syscall_addr()` because we would syscall to `hlt`, execute the first `hlt`, + // advance `rip` by one byte, and exit to userland for the HLT. + if regs.rip == self.syscall_addr().0 + 1{ + return Ok(VcpuExit::Syscall); + } + } + Ok(VcpuExit::Hlt) } kvm_ioctls::VcpuExit::Shutdown => { @@ -850,6 +930,10 @@ impl Vm { GuestAddress(0x3000) } + pub fn syscall_addr(&self) -> GuestAddress { + GuestAddress(0x4000) + } + pub fn page_table_addr(&self) -> GuestAddress { GuestAddress(0x10000) } @@ -1052,6 +1136,23 @@ impl Vm { } match feature { + Feature::Base => { + let lm = find_leaf(&self.supported_cpuid, 0x8000_0001, 0, |leaf| { + leaf.edx & CPUID_80000001_EDX_LM != 0 + }); + let msr = find_leaf(&self.supported_cpuid, 0x0000_0001, 0, |leaf| { + leaf.edx & CPUID_00000001_EDX_MSR != 0 + }); + let clstac = find_leaf(&self.supported_cpuid, 0x0000_0007, 0, |leaf| { + leaf.ebx & CPUID_00000007_EBX_CLSTAC != 0 + }); + lm && msr && clstac + } + Feature::Syscall => { + find_leaf(&self.supported_cpuid, 0x8000_0001, 0, |leaf| { + leaf.edx & CPUID_80000001_EDX_SYSCALL != 0 + }) + } Feature::XSave => { find_leaf(&self.supported_cpuid, 0x0000_0001, 0, |leaf| { leaf.edx & CPUID_00000001_ECX_XSAVE != 0 @@ -1120,6 +1221,26 @@ impl Vm { let mut edited = false; match feature { + Feature::Base => { + edit_leaf(&mut self.current_cpuid, 0x8000_0001, 0, |leaf| { + bit_set(&mut leaf.edx, CPUID_80000001_EDX_LM, wanted); + edited = true; + }); + edit_leaf(&mut self.current_cpuid, 0x0000_0001, 0, |leaf| { + bit_set(&mut leaf.edx, CPUID_00000001_EDX_MSR, wanted); + edited = true; + }); + edit_leaf(&mut self.current_cpuid, 0x0000_0007, 0, |leaf| { + bit_set(&mut leaf.ebx, CPUID_00000007_EBX_CLSTAC, wanted); + edited = true; + }); + } + Feature::Syscall => { + edit_leaf(&mut self.current_cpuid, 0x8000_0001, 0, |leaf| { + bit_set(&mut leaf.edx, CPUID_80000001_EDX_SYSCALL, wanted); + edited = true; + }); + } Feature::XSave => { edit_leaf(&mut self.current_cpuid, 0x0000_0001, 0, |leaf| { bit_set(&mut leaf.ecx, CPUID_00000001_ECX_XSAVE, wanted); @@ -1456,4 +1577,33 @@ impl Vm { } } } + + fn configure_syscalls(&mut self, vcpu_sregs: &mut kvm_sregs) { + assert!(self.cpuid_supports(Feature::Syscall)); + self.cpuid_set(Feature::Syscall, true); + + // > System-Call Extension (SCE) Bit. + vcpu_sregs.efer |= 0x0000_0001; + + let msrs = Msrs::from_entries(&[ + kvm_msr_entry { + // LSTAR (C000_0082h) + index: 0xc000_0082, + data: self.syscall_addr().0, + reserved: 0, + }, + kvm_msr_entry { + // CSTAR (C000_0083h) + index: 0xc000_0083, + data: self.syscall_addr().0, + reserved: 0, + } + ]).unwrap(); + self.set_msrs(&msrs).unwrap(); + + // fill the syscall landing area with hlt to trap out immediately. + self.mem_slice_mut(self.syscall_addr(), 16).fill(0xf4); + + self.syscall_configured = true; + } } -- cgit v1.1