aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2026-04-22 05:33:43 +0000
committeriximeow <me@iximeow.net>2026-04-22 05:33:43 +0000
commit4e0cab8fa5461bb32274e2ebba588c964e5a3cd7 (patch)
tree5614a4eb7ac4db0c3b2c140210c43547df0d7c6a
parent6eb9d69ebf3472bb9fb39fe4b7144d92f031f105 (diff)
support syscall and a corresponding exit kind
-rw-r--r--CHANGELOG10
-rw-r--r--README.md3
-rw-r--r--src/x86_64.rs156
3 files changed, 160 insertions, 9 deletions
diff --git a/CHANGELOG b/CHANGELOG
index d6fe84f..999bba8 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,10 +1,14 @@
-## 1.1.0
+## 2.0.0
asmlinator now sets up VMs to be able to execute SSE, AVX, and AVX512
instructions as supported by the host processor.
-VM setup also panics in the absence of 1GB page support, rather than proceeding
-with totally bogus page tables. this limits support to:
+additionally, asmlinator sets up IA32_LSTAR and prepares the syscall target
+address to receive syscalls. exits due to syscall are reported as the new
+`VcpuExit::Syscall`.
+
+VM setup now panics in the absence of 1GB page support, rather than proceeding
+with totally bogus page tables. this limits support to processor generations:
* AMD K10 (~Phenom X4 9500, 2007) or later,
* Intel Sandy Bridge-E (~i7-3930K, 2011) or later
diff --git a/README.md b/README.md
index b985f6c..1d229c6 100644
--- a/README.md
+++ b/README.md
@@ -49,9 +49,6 @@ require setting up an IDT, GDT, paging, ...
it'd be nice to set up aarch64 processors for code execution too. and
32-bit/16-bit x86. and to do all this on other OSes with other VM APIs.
-there should be an option to set up `syscall`/`sysenter` and handle such
-instructions as a `VcpuExit::Syscall`, but i won't need that for a bit.
-
it would probably nice to expose a C ffi to embed this into other programs!
such an ffi interface should be straightforward. i haven't needed one yet.
diff --git a/src/x86_64.rs b/src/x86_64.rs
index a04dfb5..f30e2be 100644
--- a/src/x86_64.rs
+++ b/src/x86_64.rs
@@ -6,8 +6,8 @@ use nix::sys::mman::{MapFlags, ProtFlags};
use kvm_ioctls::{Kvm, VcpuFd, VmFd};
use kvm_bindings::{
- kvm_cpuid_entry2, kvm_guest_debug,
- kvm_userspace_memory_region, kvm_segment, CpuId,
+ kvm_cpuid_entry2, kvm_guest_debug, kvm_msr_entry,
+ kvm_userspace_memory_region, kvm_segment, CpuId, Msrs,
KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_MAX_CPUID_ENTRIES,
};
@@ -45,6 +45,7 @@ pub struct Vm {
supported_cpuid: CpuId,
current_cpuid: CpuId,
idt_configured: bool,
+ syscall_configured: bool,
mem_ceiling: u64,
memory: Mapping,
aux_memories: Vec<Mapping>,
@@ -57,6 +58,12 @@ pub struct Vm {
/// not yet sure, so this is not pub.
#[derive(Copy, Clone, Debug)]
enum Feature {
+ /// support for long mode and miscellaneous baseline instructions.
+ ///
+ /// `asmlinator` assumes these features are always supported.
+ Base,
+ /// support for syscall/sysret instructions.
+ Syscall,
/// support for the xsave/xrstor instructions and at least xcr0.
///
/// cpuid leaf eax=0x0000_0001 bit ecx[26], see APM
@@ -87,7 +94,14 @@ const CPUID_0000000D_EAX_AVX512: u32 = (1 << 5) | (1 << 6) | (1 << 7);
const CPUID_80000001_EDX_PDPE1GB: u32 = 1 << 26;
+// AMD APM `System Instruction Support Indicated by CPUID Feature Bits`
+const CPUID_00000001_EDX_MSR: u32 = 1 << 5;
+const CPUID_00000007_EBX_CLSTAC: u32 = 1 << 20;
+const CPUID_80000001_EDX_SYSCALL: u32 = 1 << 11;
+const CPUID_80000001_EDX_LM: u32 = 1 << 29;
+
#[derive(PartialEq)]
+#[non_exhaustive]
pub enum VcpuExit<'buf> {
MmioRead { addr: u64, buf: &'buf mut [u8] },
MmioWrite { addr: u64, buf: &'buf [u8] },
@@ -97,6 +111,7 @@ pub enum VcpuExit<'buf> {
Exception { nr: u8 },
Shutdown,
Hlt,
+ Syscall,
}
impl<'buf> fmt::Debug for VcpuExit<'buf> {
@@ -130,6 +145,9 @@ impl<'buf> fmt::Debug for VcpuExit<'buf> {
},
Hlt => {
write!(f, "VcpuExit::Hlt")
+ },
+ Syscall => {
+ write!(f, "VcpuExit::Syscall")
}
}
}
@@ -443,6 +461,42 @@ fn test_xor_runs() {
}
#[test]
+fn test_syscall() {
+ let mut vm = Vm::create(128 * 1024).expect("can create vm");
+ let mut regs = vm.get_regs().expect("can get regs");
+
+ vm.program(&[0x0f, 0x05], &mut regs);
+ eprintln!("rip before: {:08x}", regs.rip);
+
+ vm.set_regs(&regs).expect("can set regs");
+
+// vm.set_single_step(true).expect("can set single-step");
+
+ let res = vm.run().expect("can run vm");
+ match res {
+ VcpuExit::Syscall => { /* expected */ }
+ VcpuExit::Debug { pc, .. } => {
+ if pc == vm.syscall_addr().0 {
+ panic!(
+ "VM exited at syscall target. \
+ syscall hlt stub not executed. \
+ is the VM being single-stepped?"
+ );
+ }
+ panic!("unexpected debug exit at rip={:08x}", pc);
+ }
+ other => {
+ panic!("unexpected exit: {:?}", other);
+ }
+ };
+
+ let regs_after = vm.get_regs().expect("can get regs");
+
+ let expected_rip = vm.syscall_addr().0 + 1;
+ assert_eq!(expected_rip, regs_after.rip);
+}
+
+#[test]
fn test_xorps_runs() {
let mut vm = Vm::create(128 * 1024).expect("can create vm");
let mut regs = vm.get_regs().expect("can get regs");
@@ -625,6 +679,7 @@ impl Vm {
supported_cpuid,
current_cpuid,
idt_configured: false,
+ syscall_configured: false,
memory: mapping,
aux_memories: Vec::new(),
mem_ceiling,
@@ -633,6 +688,9 @@ impl Vm {
let mut vcpu_regs = this.get_regs()?;
let mut vcpu_sregs = this.get_sregs()?;
+ assert!(this.cpuid_supports(Feature::Base));
+ this.cpuid_set(Feature::Base, true);
+
unsafe {
this.configure_identity_paging(Some(&mut vcpu_sregs));
this.configure_selectors(&mut vcpu_sregs);
@@ -640,9 +698,10 @@ impl Vm {
let mut xcrs = this.get_xcrs()?;
this.configure_extensions(&mut vcpu_sregs, &mut xcrs);
this.set_xcrs(&xcrs)?;
+ this.configure_syscalls(&mut vcpu_sregs);
}
- vcpu_sregs.efer = 0x0000_0500; // LME | LMA
+ vcpu_sregs.efer |= 0x0000_0500; // LME | LMA
this.set_regs(&vcpu_regs)?;
this.set_sregs(&vcpu_sregs)?;
@@ -730,10 +789,21 @@ impl Vm {
.map_err(|e| VmError::from_kvm("set_xcrs", e))
}
+ pub fn set_msrs(&self, msrs: &Msrs) -> Result<(), VmError> {
+ let n_set = self.vcpu.set_msrs(msrs)
+ .map_err(|e| VmError::from_kvm("set_msrs", e))?;
+ assert_eq!(msrs.as_slice().len(), n_set);
+ Ok(())
+ }
+
pub fn idt_configured(&self) -> bool {
self.idt_configured
}
+ pub fn syscall_configured(&self) -> bool {
+ self.syscall_configured
+ }
+
// TODO: seems like there's a KVM bug where if the VM is configured for single-step and the
// single-stepped instruction is a rmw to MMIO memory (or MMIO hugepages?), the single-step
// doesn't actually take effect. compare `0x33 0x00` and `0x31 0x00`. what the hell!
@@ -815,6 +885,16 @@ impl Vm {
}
}
+ if self.syscall_configured {
+ // the behavior of `syscall`, `hlt`, and `rip` is a little funky. similar to
+ // interrupt handlers, we typically exit with rip pointed immediately after
+ // `syscall_addr()` because we would syscall to `hlt`, execute the first `hlt`,
+ // advance `rip` by one byte, and exit to userland for the HLT.
+ if regs.rip == self.syscall_addr().0 + 1{
+ return Ok(VcpuExit::Syscall);
+ }
+ }
+
Ok(VcpuExit::Hlt)
}
kvm_ioctls::VcpuExit::Shutdown => {
@@ -850,6 +930,10 @@ impl Vm {
GuestAddress(0x3000)
}
+ pub fn syscall_addr(&self) -> GuestAddress {
+ GuestAddress(0x4000)
+ }
+
pub fn page_table_addr(&self) -> GuestAddress {
GuestAddress(0x10000)
}
@@ -1052,6 +1136,23 @@ impl Vm {
}
match feature {
+ Feature::Base => {
+ let lm = find_leaf(&self.supported_cpuid, 0x8000_0001, 0, |leaf| {
+ leaf.edx & CPUID_80000001_EDX_LM != 0
+ });
+ let msr = find_leaf(&self.supported_cpuid, 0x0000_0001, 0, |leaf| {
+ leaf.edx & CPUID_00000001_EDX_MSR != 0
+ });
+ let clstac = find_leaf(&self.supported_cpuid, 0x0000_0007, 0, |leaf| {
+ leaf.ebx & CPUID_00000007_EBX_CLSTAC != 0
+ });
+ lm && msr && clstac
+ }
+ Feature::Syscall => {
+ find_leaf(&self.supported_cpuid, 0x8000_0001, 0, |leaf| {
+ leaf.edx & CPUID_80000001_EDX_SYSCALL != 0
+ })
+ }
Feature::XSave => {
find_leaf(&self.supported_cpuid, 0x0000_0001, 0, |leaf| {
leaf.edx & CPUID_00000001_ECX_XSAVE != 0
@@ -1120,6 +1221,26 @@ impl Vm {
let mut edited = false;
match feature {
+ Feature::Base => {
+ edit_leaf(&mut self.current_cpuid, 0x8000_0001, 0, |leaf| {
+ bit_set(&mut leaf.edx, CPUID_80000001_EDX_LM, wanted);
+ edited = true;
+ });
+ edit_leaf(&mut self.current_cpuid, 0x0000_0001, 0, |leaf| {
+ bit_set(&mut leaf.edx, CPUID_00000001_EDX_MSR, wanted);
+ edited = true;
+ });
+ edit_leaf(&mut self.current_cpuid, 0x0000_0007, 0, |leaf| {
+ bit_set(&mut leaf.ebx, CPUID_00000007_EBX_CLSTAC, wanted);
+ edited = true;
+ });
+ }
+ Feature::Syscall => {
+ edit_leaf(&mut self.current_cpuid, 0x8000_0001, 0, |leaf| {
+ bit_set(&mut leaf.edx, CPUID_80000001_EDX_SYSCALL, wanted);
+ edited = true;
+ });
+ }
Feature::XSave => {
edit_leaf(&mut self.current_cpuid, 0x0000_0001, 0, |leaf| {
bit_set(&mut leaf.ecx, CPUID_00000001_ECX_XSAVE, wanted);
@@ -1456,4 +1577,33 @@ impl Vm {
}
}
}
+
+ fn configure_syscalls(&mut self, vcpu_sregs: &mut kvm_sregs) {
+ assert!(self.cpuid_supports(Feature::Syscall));
+ self.cpuid_set(Feature::Syscall, true);
+
+ // > System-Call Extension (SCE) Bit.
+ vcpu_sregs.efer |= 0x0000_0001;
+
+ let msrs = Msrs::from_entries(&[
+ kvm_msr_entry {
+ // LSTAR (C000_0082h)
+ index: 0xc000_0082,
+ data: self.syscall_addr().0,
+ reserved: 0,
+ },
+ kvm_msr_entry {
+ // CSTAR (C000_0083h)
+ index: 0xc000_0083,
+ data: self.syscall_addr().0,
+ reserved: 0,
+ }
+ ]).unwrap();
+ self.set_msrs(&msrs).unwrap();
+
+ // fill the syscall landing area with hlt to trap out immediately.
+ self.mem_slice_mut(self.syscall_addr(), 16).fill(0xf4);
+
+ self.syscall_configured = true;
+ }
}