From a0fd5a24cb0aa0b697f680c451d928cefe8323b4 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 21 May 2020 23:09:39 -0700 Subject: add sha, lzcnt, tsx, f16c, svm, movbe, adx, and prefetchw extensions also add builders to get decoders appropriate for specific microarchitectures from intel and amd * low-power architectures are not yet present --- src/long_mode/display.rs | 48 ++++ src/long_mode/mod.rs | 514 ++++++++++++++++++++++++++++++++++++++++-- src/long_mode/uarch.rs | 221 ++++++++++++++++++ src/protected_mode/display.rs | 48 ++++ src/protected_mode/mod.rs | 505 +++++++++++++++++++++++++++++++++++++++-- src/protected_mode/uarch.rs | 221 ++++++++++++++++++ 6 files changed, 1517 insertions(+), 40 deletions(-) create mode 100644 src/long_mode/uarch.rs create mode 100644 src/protected_mode/uarch.rs (limited to 'src') diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 49d1600..5318ebb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1076,6 +1076,30 @@ impl fmt::Display for Opcode { &Opcode::HSUBPD => write!(f, "hsubpd"), &Opcode::HADDPD => write!(f, "haddpd"), &Opcode::ADDSUBPD => write!(f, "addsubpd"), + &Opcode::XABORT => write!(f, "xabort"), + &Opcode::XBEGIN => write!(f, "xbegin"), + &Opcode::RDSEED => write!(f, "rdseed"), + &Opcode::LZCNT => write!(f, "lzcnt"), + &Opcode::CLGI => write!(f, "clgi"), + &Opcode::STGI => write!(f, "stgi"), + &Opcode::SKINIT => write!(f, "skinit"), + &Opcode::VMLOAD => write!(f, "vmload"), + &Opcode::VMMCALL => write!(f, "vmmcall"), + &Opcode::VMSAVE => write!(f, "vmsave"), + &Opcode::VMRUN => write!(f, "vmrun"), + &Opcode::INVLPGA => write!(f, "invlpga"), + &Opcode::MOVBE => write!(f, "movbe"), + &Opcode::ADCX => write!(f, "adcx"), + &Opcode::ADOX => write!(f, "adox"), + &Opcode::PREFETCHW => write!(f, "prefetchw"), + &Opcode::RDRAND => write!(f, "rdrand"), + &Opcode::SHA1RNDS4 => write!(f, "sha1rnds4"), + &Opcode::SHA1NEXTE => write!(f, "sha1nexte"), + &Opcode::SHA1MSG1 => write!(f, "sha1msg1"), + &Opcode::SHA1MSG2 => write!(f, "sha1msg2"), + &Opcode::SHA256RNDS2 => write!(f, "sha256rnds2"), + &Opcode::SHA256MSG1 => write!(f, "sha256msg1"), + &Opcode::SHA256MSG2 => write!(f, "sha256msg2"), &Opcode::Invalid => write!(f, "invalid"), } } @@ -1306,8 +1330,11 @@ impl > Colorize> Colorize { write!(out, "{}", colors.nop_op(self)) } /* Control flow */ @@ -1680,6 +1708,7 @@ impl > Colorize> Colorize> Colorize> Colorize { write!(out, "{}", colors.platform_op(self)) } + Opcode::RDSEED | + Opcode::RDRAND | + Opcode::SHA1RNDS4 | + Opcode::SHA1NEXTE | + Opcode::SHA1MSG1 | + Opcode::SHA1MSG2 | + Opcode::SHA256RNDS2 | + Opcode::SHA256MSG1 | + Opcode::SHA256MSG2 | Opcode::AESDEC | Opcode::AESDECLAST | Opcode::AESENC | diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index e0a1fdf..20abe1f 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -1,5 +1,6 @@ mod vex; mod display; +pub mod uarch; use core::hint::unreachable_unchecked; @@ -73,7 +74,6 @@ impl RegSpec { #[inline] fn gp_from_parts(num: u8, extended: bool, width: u8, rex: bool) -> RegSpec { -// println!("from_parts width: {}, num: {}, extended: {}", width, num, extended); RegSpec { num: num + if extended { 0b1000 } else { 0 }, bank: width_to_gp_reg_bank(width, rex) @@ -129,6 +129,22 @@ impl RegSpec { } #[inline] + pub fn esp() -> RegSpec { + RegSpec { + num: 4, + bank: RegisterBank::D + } + } + + #[inline] + pub fn sp() -> RegSpec { + RegSpec { + num: 4, + bank: RegisterBank::W + } + } + + #[inline] pub fn fs() -> RegSpec { RegSpec { bank: RegisterBank::S, num: 3 } } @@ -779,12 +795,17 @@ pub enum Opcode { XGETBV, XSETBV, VMFUNC, + XABORT, + XBEGIN, XEND, XTEST, ENCLU, RDPKRU, WRPKRU, + RDSEED, + RDRAND, + ADDPS, ADDPD, ANDNPS, @@ -1330,6 +1351,31 @@ pub enum Opcode { PHADDW, HSUBPD, HADDPD, + + SHA1RNDS4, + SHA1NEXTE, + SHA1MSG1, + SHA1MSG2, + SHA256RNDS2, + SHA256MSG1, + SHA256MSG2, + + LZCNT, + CLGI, + STGI, + SKINIT, + VMLOAD, + VMMCALL, + VMSAVE, + VMRUN, + INVLPGA, + + MOVBE, + + ADCX, + ADOX, + + PREFETCHW, } #[derive(Debug)] @@ -1500,6 +1546,14 @@ pub struct InstDecoder { // 53. intel quirks // 54. amd quirks // 55. avx (intel ?, amd ?) + // 56. amd-v/svm + // 57. lahfsahf + // 58. cmov + // 59. f16c + // 60. fma4 + // 61. prefetchw + // 62. tsx + // 63. lzcnt flags: u64, } @@ -1586,6 +1640,12 @@ impl InstDecoder { self } + pub fn with_sse4(self) -> Self { + self + .with_sse4_1() + .with_sse4_2() + } + pub fn movbe(&self) -> bool { self.flags & (1 << 8) != 0 } @@ -1658,6 +1718,9 @@ impl InstDecoder { self } + /// `bmi2` indicates support for the `BZHI`, `MULX`, `PDEP`, `PEXT`, `RORX`, `SARX`, `SHRX`, + /// and `SHLX` instructions. `bmi2` is implemented in all x86_64 chips that implement `bmi`, + /// except the amd `piledriver` and `steamroller` microarchitectures. pub fn bmi2(&self) -> bool { self.flags & (1 << 16) != 0 } @@ -2018,6 +2081,94 @@ impl InstDecoder { self } + pub fn svm(&self) -> bool { + self.flags & (1 << 56) != 0 + } + + pub fn with_svm(mut self) -> Self { + self.flags |= 1 << 56; + self + } + + /// `lahfsahf` is only unset for early revisions of 64-bit amd and intel chips. unfortunately + /// the clearest documentation on when these instructions were reintroduced into 64-bit + /// architectures seems to be + /// [wikipedia](https://en.wikipedia.org/wiki/X86-64#Older_implementations): + /// ``` + /// Early AMD64 and Intel 64 CPUs lacked LAHF and SAHF instructions in 64-bit mode. AMD + /// introduced these instructions (also in 64-bit mode) with their Athlon 64, Opteron and + /// Turion 64 revision D processors in March 2005[48][49][50] while Intel introduced the + /// instructions with the Pentium 4 G1 stepping in December 2005. The 64-bit version of Windows + /// 8.1 requires this feature.[47] + /// ``` + /// + /// this puts reintroduction of these instructions somewhere in the middle of prescott and k8 + /// lifecycles, for intel and amd respectively. because there is no specific uarch where these + /// features become enabled, prescott and k8 default to not supporting these instructions, + /// where later uarches support these instructions. + pub fn lahfsahf(&self) -> bool { + self.flags & (1 << 57) != 0 + } + + pub fn with_lahfsahf(mut self) -> Self { + self.flags |= 1 << 57; + self + } + + pub fn cmov(&self) -> bool { + self.flags & (1 << 58) != 0 + } + + pub fn with_cmov(mut self) -> Self { + self.flags |= 1 << 58; + self + } + + pub fn f16c(&self) -> bool { + self.flags & (1 << 59) != 0 + } + + pub fn with_f16c(mut self) -> Self { + self.flags |= 1 << 59; + self + } + + pub fn fma4(&self) -> bool { + self.flags & (1 << 60) != 0 + } + + pub fn with_fma4(mut self) -> Self { + self.flags |= 1 << 60; + self + } + + pub fn prefetchw(&self) -> bool { + self.flags & (1 << 61) != 0 + } + + pub fn with_prefetchw(mut self) -> Self { + self.flags |= 1 << 61; + self + } + + pub fn tsx(&self) -> bool { + self.flags & (1 << 62) != 0 + } + + pub fn with_tsx(mut self) -> Self { + self.flags |= 1 << 62; + self + } + + pub fn lzcnt(&self) -> bool { + self.flags & (1 << 63) != 0 + } + + pub fn with_lzcnt(mut self) -> Self { + self.flags |= 1 << 63; + self + } + /// Optionally reject or reinterpret instruction according to the decoder's /// declared extensions. fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> { @@ -2150,21 +2301,15 @@ impl InstDecoder { return Err(DecodeError::InvalidOpcode); } } - // AVX... - /* // TODO Opcode::XABORT | - Opcode::XACQUIRE | - Opcode::XRELEASE | Opcode::XBEGIN | Opcode::XEND | Opcode::XTEST => { if !self.tsx() { inst.opcode = Opcode::Invalid; - return Err(()); + return Err(DecodeError::InvalidOpcode); } } - */ - /* // TODO Opcode::SHA1MSG1 | Opcode::SHA1MSG2 | Opcode::SHA1NEXTE | @@ -2174,9 +2319,9 @@ impl InstDecoder { Opcode::SHA256RNDS2 => { if !self.sha() { inst.opcode = Opcode::Invalid; - return Err(()); + return Err(DecodeError::InvalidOpcode); } - }*/ + } Opcode::ENCLV | Opcode::ENCLS | Opcode::ENCLU => { @@ -2185,6 +2330,7 @@ impl InstDecoder { return Err(DecodeError::InvalidOpcode); } } + // AVX... Opcode::VMOVDDUP | Opcode::VPSHUFLW | Opcode::VHADDPS | @@ -2216,7 +2362,6 @@ impl InstDecoder { Opcode::VCVTDQ2PD | Opcode::VCVTDQ2PS | Opcode::VCVTPD2PS | - Opcode::VCVTPH2PS | Opcode::VCVTPS2DQ | Opcode::VCVTPS2PD | Opcode::VCVTSS2SD | @@ -2224,7 +2369,6 @@ impl InstDecoder { Opcode::VCVTSI2SD | Opcode::VCVTSD2SI | Opcode::VCVTSD2SS | - Opcode::VCVTPS2PH | Opcode::VCVTSS2SI | Opcode::VCVTTPD2DQ | Opcode::VCVTTPS2DQ | @@ -2527,6 +2671,124 @@ impl InstDecoder { return Err(DecodeError::InvalidOpcode); } } + Opcode::MOVBE => { + if !self.movbe() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::POPCNT => { + /* + * from the intel SDM: + * ``` + * Before an application attempts to use the POPCNT instruction, it must check that + * the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1) and POPCNT + * (if CPUID.01H:ECX.POPCNT[bit 23] = 1). + * ``` + */ + if self.intel_quirks() && (!self.sse4_2() || !self.popcnt()) { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } else if !self.popcnt() { + /* + * elsewhere from the amd APM: + * `Instruction Subsets and CPUID Feature Flags` on page 507 indicates that + * popcnt is present when the popcnt bit is reported by cpuid. this seems to be + * the less quirky default, so `intel_quirks` is considered the outlier, and + * before this default. + * */ + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::LZCNT => { + /* + * amd APM, `LZCNT` page 212: + * LZCNT is an Advanced Bit Manipulation (ABM) instruction. Support for the LZCNT + * instruction is indicated by CPUID Fn8000_0001_ECX[ABM] = 1. + * + * meanwhile the intel SDM simply states: + * ``` + * CPUID.EAX=80000001H:ECX.LZCNT[bit 5]: if 1 indicates the processor supports the + * LZCNT instruction. + * ``` + * + * so that's considered the less-quirky (default) case here. + * */ + if self.amd_quirks() && !self.abm() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } else if !self.lzcnt() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::ADCX | + Opcode::ADOX => { + if !self.adx() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::VMRUN | + Opcode::VMLOAD | + Opcode::VMSAVE | + Opcode::CLGI | + Opcode::VMMCALL | + Opcode::INVLPGA => { + if !self.svm() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::STGI | + Opcode::SKINIT => { + if !self.svm() || !self.skinit() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::LAHF | + Opcode::SAHF => { + if !self.lahfsahf() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::VCVTPS2PH | + Opcode::VCVTPH2PS => { + /* + * from intel SDM: + * ``` + * 14.4.1 Detection of F16C Instructions Application using float 16 instruction + * must follow a detection sequence similar to AVX to ensure: • The OS has + * enabled YMM state management support, • The processor support AVX as + * indicated by the CPUID feature flag, i.e. CPUID.01H:ECX.AVX[bit 28] = 1. • + * The processor support 16-bit floating-point conversion instructions via a + * CPUID feature flag (CPUID.01H:ECX.F16C[bit 29] = 1). + * ``` + * + * TODO: only the VEX-coded variant of this instruction should be gated on `f16c`. + * the EVEX-coded variant should be gated on `avx512f` or `avx512vl` if not + * EVEX.512-coded. + */ + if !self.avx() || !self.f16c() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::RDRAND => { + if !self.rdrand() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::RDSEED => { + if !self.rdseed() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } other => { if !self.bmi1() { if BMI1.contains(&other) { @@ -2871,11 +3133,13 @@ impl PrefixRex { pub enum OperandCode { ModRM_0x0f00, ModRM_0x0f01, + ModRM_0x0f0d, ModRM_0x0fae, ModRM_0x0fba, ModRM_0xf238, ModRM_0xf30fc7, ModRM_0x660f38, + ModRM_0xf30f38, ModRM_0x660f3a, CVT_AA, CVT_DA, @@ -3686,7 +3950,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xf30f38), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3827,7 +4091,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::LZCNT), OperandCode::Gv_Ev), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), // 0xc0 @@ -3950,7 +4214,7 @@ const OPCODE_0F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::UD2), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::NOP), OperandCode::Ev), + OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0x0f0d), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), // 0x10 @@ -5104,8 +5368,28 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, }; instruction.operand_count = 2; }, - _op @ OperandCode::ModRM_0xc6_Eb_Ib | - _op @ OperandCode::ModRM_0xc7_Ev_Iv => { + op @ OperandCode::ModRM_0xc6_Eb_Ib | + op @ OperandCode::ModRM_0xc7_Ev_Iv => { + if modrm == 0xf8 { + if op == OperandCode::ModRM_0xc6_Eb_Ib { + instruction.opcode = Opcode::XABORT; + instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u64; + instruction.operands[0] = OperandSpec::ImmI8; + instruction.operand_count = 1; + return Ok(()); + } else { + instruction.opcode = Opcode::XBEGIN; + instruction.disp = if opwidth == 2 { + read_imm_signed(&mut bytes_iter, 2, length)? as i16 as i64 as u64 + } else { + read_imm_signed(&mut bytes_iter, 4, length)? as i32 as i64 as u64 + }; + instruction.modrm_mmm = RegSpec::rip(); + instruction.operands[0] = OperandSpec::RegDisp; + instruction.operand_count = 1; + return Ok(()); + } + } if (modrm & 0b00111000) != 0 { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string()); @@ -5490,6 +5774,115 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.opcode = Opcode::MOVD; } } + OperandCode::ModRM_0x0f0d => { + let modrm = read_modrm(&mut bytes_iter, length)?; + let r = modrm & 0b111; + + let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes); + + match r { + 1 => { + instruction.opcode = Opcode::PREFETCHW; + } + _ => { + instruction.opcode = Opcode::NOP; + } + } + instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, opwidth, length)?; + instruction.operand_count = 1; + } + OperandCode::ModRM_0x0f38 => { + let opcode = read_modrm(&mut bytes_iter, length)?; + + let high = opcode >> 4; + let low = opcode & 0xf; + + let operands = match high { + 0 => { + // PqQq + OperandCode::G_E_mm + }, + 1 => { + // PqQq + OperandCode::G_E_mm + }, + 0xc => { + // Vdq,Wdq + OperandCode::G_E_xmm + } + 0xf => { + match low { + 0 => OperandCode::Gv_Ev, + 1 => OperandCode::Ev_Gv, + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + } + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }; + instruction.opcode = match opcode { + 0xc8 => Opcode::SHA1NEXTE, + 0xc9 => Opcode::SHA1MSG1, + 0xca => Opcode::SHA1MSG2, + 0xcb => Opcode::SHA256RNDS2, + 0xcc => Opcode::SHA256MSG1, + 0xcd => Opcode::SHA256MSG2, + 0xf0 | 0xf1 => Opcode::MOVBE, + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }; + + return read_operands(decoder, bytes_iter, instruction, operands, length); + }, + OperandCode::ModRM_0x0f3a => { + }, + OperandCode::ModRM_0x0fc7 => { + let modrm = read_modrm(&mut bytes_iter, length)?; + if modrm >> 6 == 0b11 { + match (modrm >> 3) & 0b111 { + 0b111 => { + instruction.opcode = Opcode::RDSEED; + instruction.operand_count = 1; + instruction.operands[0] = OperandSpec::RegRRR; + let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes); + instruction.modrm_rrr = + RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth { + 8 => RegisterBank::Q, + 4 => RegisterBank::D, + 2 => RegisterBank::W, + _ => unreachable!() + }); + } + 0b110 => { + instruction.opcode = Opcode::RDRAND; + instruction.operand_count = 1; + instruction.operands[0] = OperandSpec::RegRRR; + let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes); + instruction.modrm_rrr = + RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth { + 8 => RegisterBank::Q, + 4 => RegisterBank::D, + 2 => RegisterBank::W, + _ => unreachable!() + }); + } + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + } else { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }, OperandCode::ModRM_0x0f71 => { instruction.operand_count = 2; @@ -5604,6 +5997,19 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; instruction.operand_count = 2; } + OperandCode::ModRM_0xf30f38 => { + let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; + match op { + 0xf6 => { + instruction.opcode = Opcode::ADOX; + return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length); + } + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }; + } OperandCode::ModRM_0x660f38 => { let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; match op { @@ -5612,6 +6018,10 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter 0xdd => { instruction.opcode = Opcode::AESENCLAST; } 0xde => { instruction.opcode = Opcode::AESDEC; } 0xdf => { instruction.opcode = Opcode::AESDECLAST; } + 0xf6 => { + instruction.opcode = Opcode::ADCX; + return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length); + } _ => { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); @@ -5630,6 +6040,21 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter OperandCode::ModRM_0x660f3a => { let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; match op { + 0xcc => { + instruction.opcode = Opcode::SHA1RNDS4; + + let modrm = read_modrm(&mut bytes_iter, length)?; + instruction.modrm_rrr = + RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X); + + + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; + instruction.imm = + read_imm_unsigned(&mut bytes_iter, 1, length)?; + instruction.operands[2] = OperandSpec::ImmU8; + instruction.operand_count = 3; + } 0xdf => { instruction.opcode = Opcode::AESKEYGENASSIST; // read operands right here right now @@ -6214,10 +6639,59 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } else if r == 3 { let mod_bits = modrm >> 6; + let m = modrm & 7; if mod_bits == 0b11 { - instruction.opcode = Opcode::Invalid; - instruction.operand_count = 0; - return Err(DecodeError::InvalidOperand); + match m { + 0b000 => { + instruction.opcode = Opcode::VMRUN; + instruction.operand_count = 1; + instruction.modrm_rrr = RegSpec::rax(); + instruction.operands[0] = OperandSpec::RegRRR; + }, + 0b001 => { + instruction.opcode = Opcode::VMMCALL; + instruction.operand_count = 0; + }, + 0b010 => { + instruction.opcode = Opcode::VMLOAD; + instruction.operand_count = 1; + instruction.modrm_rrr = RegSpec::rax(); + instruction.operands[0] = OperandSpec::RegRRR; + }, + 0b011 => { + instruction.opcode = Opcode::VMSAVE; + instruction.operand_count = 1; + instruction.modrm_rrr = RegSpec::rax(); + instruction.operands[0] = OperandSpec::RegRRR; + }, + 0b100 => { + instruction.opcode = Opcode::STGI; + instruction.operand_count = 0; + }, + 0b101 => { + instruction.opcode = Opcode::CLGI; + instruction.operand_count = 0; + }, + 0b110 => { + instruction.opcode = Opcode::SKINIT; + instruction.operand_count = 1; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.modrm_rrr = RegSpec::eax(); + }, + 0b111 => { + instruction.opcode = Opcode::INVLPGA; + instruction.operand_count = 2; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = OperandSpec::RegMMM; + instruction.modrm_rrr = RegSpec::rax(); + instruction.modrm_mmm = RegSpec::ecx(); + }, + _ => { + instruction.opcode = Opcode::Invalid; + instruction.operand_count = 0; + return Err(DecodeError::InvalidOperand); + } + } } else { instruction.opcode = Opcode::LIDT; instruction.operand_count = 1; diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs new file mode 100644 index 0000000..b2b1201 --- /dev/null +++ b/src/long_mode/uarch.rs @@ -0,0 +1,221 @@ +pub mod amd { + //! most information about instruction set extensions for microarchitectures here was sourced + //! from https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview and + //! https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features. these mappings are best-effort + //! but fairly unused, so a critical eye should be kept towards these decoders rejecting + //! instructions they should not, or incorrectly accepting instructions. + //! + //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, + //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension + //! reportedly function correctly (agner p217). + //! + //! [agner](https://www.agner.org/optimize/microarchitecture.pdf) + //! as retrieved 2020 may 19 + //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5` + + use long_mode::InstDecoder; + + /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later + /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of + /// support - SSE2 and no later. + pub fn k8() -> InstDecoder { + InstDecoder::minimal() + } + + /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through + /// to SSE4.2a, as well as consistent `cmov` support, among other features. + pub fn k10() -> InstDecoder { + k8() + .with_cmov() + .with_cmpxchg16b() + .with_svm() + .with_abm() + .with_lahfsahf() + .with_sse3() + .with_ssse3() + .with_sse4() + .with_sse4_2() + .with_sse4a() + } + + /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX + /// support among other extensions, and are notable for including `AESNI`. + pub fn bulldozer() -> InstDecoder { + k10() + .with_bmi1() + .with_aesni() + .with_pclmulqdq() + .with_f16c() + .with_avx() + .with_fma4() + .with_xop() + } + + /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. + pub fn piledriver() -> InstDecoder { + bulldozer() + .with_tbm() + .with_fma3() + .with_fma4() + } + + /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver` + /// cores, these cores do not support `TBM` or `FMA3`. + pub fn steamroller() -> InstDecoder { + bulldozer() + } + + /// `Excavator` was the successor to `Steamroller`, launched in 2015. + pub fn excavator() -> InstDecoder { + steamroller() + .with_movbe() + .with_bmi2() + .with_rdrand() + .with_avx() + .with_xop() + .with_bmi2() + .with_sha() + .with_rdrand() + .with_avx2() + } + + /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD + /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, + /// SHA, RDSEED, and other extensions. + pub fn zen() -> InstDecoder { + k10() + .with_avx() + .with_avx2() + .with_bmi1() + .with_aesni() + .with_pclmulqdq() + .with_f16c() + .with_movbe() + .with_bmi2() + .with_rdrand() + .with_adx() + .with_sha() + .with_rdseed() + .with_fma3() + // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? + } +} + +pub mod intel { + //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out! + + use long_mode::InstDecoder; + + /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the + /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000 + /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed + /// included SSE3. + pub fn netburst() -> InstDecoder { + InstDecoder::minimal() + .with_cmov() + .with_sse3() + } + + /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with + /// processors using this architecture shipped under the names "Merom", "Conroe", and + /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused + /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines, + /// `Core 2 *` processors used the `Core` architecture. + pub fn core() -> InstDecoder { + netburst() + .with_ssse3() + .with_sse4() + } + + /// `Peryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with + /// virtualization extensions. + pub fn peryn() -> InstDecoder { + core() + .with_sse4_1() + } + + /// `Nehalem` was the successor to `Peryn`, launched in late 2008. not to be confused with the + /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores. + /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction. + pub fn nehalem() -> InstDecoder { + peryn() + .with_sse4_2() + .with_popcnt() + } + + /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL + /// extensions. + pub fn westmere() -> InstDecoder { + nehalem() + .with_aesni() + .with_pclmulqdq() + } + + /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX + /// instructions. + pub fn sandybridge() -> InstDecoder { + westmere() + .with_avx() + } + + /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C + /// extensions for 16-bit floating point conversion, and the RDRAND instruction. + pub fn ivybridge() -> InstDecoder { + sandybridge() + .with_f16c() + .with_rdrand() + } + + /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction + /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3. + pub fn haswell() -> InstDecoder { + ivybridge() + .with_bmi1() + .with_bmi2() + .with_abm() + .with_fma3() + .with_avx2() + } + + /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores + /// were shipped as `E7-48xx/E7-88xx v3` models of processors. + pub fn haswell_ex() -> InstDecoder { + haswell() + .with_tsx() + } + + /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED, + /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because + /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be + /// reported as an errata (for example, the `Broadwell-Y` line of parts). + pub fn broadwell() -> InstDecoder { + haswell_ex() + .with_adx() + .with_rdseed() + .with_prefetchw() + } + + /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX + /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product + /// lines. + /// + /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest + /// common denominator: if you want a `Skylake` decoder with AVX512, something like the + /// following: + /// ``` + /// InstDecoder::skylake().with_avx512_f().with_avx512_dq() + /// ``` + /// is likely your best option. + pub fn skylake() -> InstDecoder { + broadwell() + .with_mpx() + .with_sgx() + } + + /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to + /// x86_64 implementaiton beyond `skylake`. + pub fn kabylake() -> InstDecoder { + skylake() + } + // ice lake is shipping so that should probably be included... +} diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index d072991..ed8d44c 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -1063,6 +1063,30 @@ impl fmt::Display for Opcode { &Opcode::HSUBPD => write!(f, "hsubpd"), &Opcode::HADDPD => write!(f, "haddpd"), &Opcode::ADDSUBPD => write!(f, "addsubpd"), + &Opcode::XABORT => write!(f, "xabort"), + &Opcode::XBEGIN => write!(f, "xbegin"), + &Opcode::RDSEED => write!(f, "rdseed"), + &Opcode::LZCNT => write!(f, "lzcnt"), + &Opcode::CLGI => write!(f, "clgi"), + &Opcode::STGI => write!(f, "stgi"), + &Opcode::SKINIT => write!(f, "skinit"), + &Opcode::VMLOAD => write!(f, "vmload"), + &Opcode::VMMCALL => write!(f, "vmmcall"), + &Opcode::VMSAVE => write!(f, "vmsave"), + &Opcode::VMRUN => write!(f, "vmrun"), + &Opcode::INVLPGA => write!(f, "invlpga"), + &Opcode::MOVBE => write!(f, "movbe"), + &Opcode::ADCX => write!(f, "adcx"), + &Opcode::ADOX => write!(f, "adox"), + &Opcode::PREFETCHW => write!(f, "prefetchw"), + &Opcode::RDRAND => write!(f, "rdrand"), + &Opcode::SHA1RNDS4 => write!(f, "sha1rnds4"), + &Opcode::SHA1NEXTE => write!(f, "sha1nexte"), + &Opcode::SHA1MSG1 => write!(f, "sha1msg1"), + &Opcode::SHA1MSG2 => write!(f, "sha1msg2"), + &Opcode::SHA256RNDS2 => write!(f, "sha256rnds2"), + &Opcode::SHA256MSG1 => write!(f, "sha256msg1"), + &Opcode::SHA256MSG2 => write!(f, "sha256msg2"), &Opcode::Invalid => write!(f, "invalid"), } } @@ -1293,8 +1317,11 @@ impl > Colorize> Colorize { write!(out, "{}", colors.nop_op(self)) } /* Control flow */ @@ -1667,6 +1695,7 @@ impl > Colorize> Colorize> Colorize> Colorize { write!(out, "{}", colors.platform_op(self)) } + Opcode::RDSEED | + Opcode::RDRAND | + Opcode::SHA1RNDS4 | + Opcode::SHA1NEXTE | + Opcode::SHA1MSG1 | + Opcode::SHA1MSG2 | + Opcode::SHA256RNDS2 | + Opcode::SHA256MSG1 | + Opcode::SHA256MSG2 | Opcode::AESDEC | Opcode::AESDECLAST | Opcode::AESENC | diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 58d7a85..89c485f 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -1,5 +1,6 @@ mod vex; mod display; +pub mod uarch; use core::hint::unreachable_unchecked; @@ -120,6 +121,14 @@ impl RegSpec { } #[inline] + pub fn sp() -> RegSpec { + RegSpec { + num: 4, + bank: RegisterBank::W + } + } + + #[inline] pub fn fs() -> RegSpec { RegSpec { bank: RegisterBank::S, num: 3 } } @@ -738,12 +747,17 @@ pub enum Opcode { XGETBV, XSETBV, VMFUNC, + XABORT, + XBEGIN, XEND, XTEST, ENCLU, RDPKRU, WRPKRU, + RDSEED, + RDRAND, + ADDPS, ADDPD, ANDNPS, @@ -1289,6 +1303,31 @@ pub enum Opcode { PHADDW, HSUBPD, HADDPD, + + SHA1RNDS4, + SHA1NEXTE, + SHA1MSG1, + SHA1MSG2, + SHA256RNDS2, + SHA256MSG1, + SHA256MSG2, + + LZCNT, + CLGI, + STGI, + SKINIT, + VMLOAD, + VMMCALL, + VMSAVE, + VMRUN, + INVLPGA, + + MOVBE, + + ADCX, + ADOX, + + PREFETCHW, } #[derive(Debug)] @@ -1457,6 +1496,14 @@ pub struct InstDecoder { // 53. intel quirks // 54. amd quirks // 55. avx (intel ?, amd ?) + // 56. amd-v/svm + // 57. lahfsahf + // 58. cmov + // 59. f16c + // 60. fma4 + // 61. prefetchw + // 62. tsx + // 63. lzcnt flags: u64, } @@ -1543,6 +1590,12 @@ impl InstDecoder { self } + pub fn with_sse4(self) -> Self { + self + .with_sse4_1() + .with_sse4_2() + } + pub fn movbe(&self) -> bool { self.flags & (1 << 8) != 0 } @@ -1615,6 +1668,9 @@ impl InstDecoder { self } + /// `bmi2` indicates support for the `BZHI`, `MULX`, `PDEP`, `PEXT`, `RORX`, `SARX`, `SHRX`, + /// and `SHLX` instructions. `bmi2` is implemented in all x86_64 chips that implement `bmi`, + /// except the amd `piledriver` and `steamroller` microarchitectures. pub fn bmi2(&self) -> bool { self.flags & (1 << 16) != 0 } @@ -1975,6 +2031,94 @@ impl InstDecoder { self } + pub fn svm(&self) -> bool { + self.flags & (1 << 56) != 0 + } + + pub fn with_svm(mut self) -> Self { + self.flags |= 1 << 56; + self + } + + /// `lahfsahf` is only unset for early revisions of 64-bit amd and intel chips. unfortunately + /// the clearest documentation on when these instructions were reintroduced into 64-bit + /// architectures seems to be + /// [wikipedia](https://en.wikipedia.org/wiki/X86-64#Older_implementations): + /// ``` + /// Early AMD64 and Intel 64 CPUs lacked LAHF and SAHF instructions in 64-bit mode. AMD + /// introduced these instructions (also in 64-bit mode) with their Athlon 64, Opteron and + /// Turion 64 revision D processors in March 2005[48][49][50] while Intel introduced the + /// instructions with the Pentium 4 G1 stepping in December 2005. The 64-bit version of Windows + /// 8.1 requires this feature.[47] + /// ``` + /// + /// this puts reintroduction of these instructions somewhere in the middle of prescott and k8 + /// lifecycles, for intel and amd respectively. because there is no specific uarch where these + /// features become enabled, prescott and k8 default to not supporting these instructions, + /// where later uarches support these instructions. + pub fn lahfsahf(&self) -> bool { + self.flags & (1 << 57) != 0 + } + + pub fn with_lahfsahf(mut self) -> Self { + self.flags |= 1 << 57; + self + } + + pub fn cmov(&self) -> bool { + self.flags & (1 << 58) != 0 + } + + pub fn with_cmov(mut self) -> Self { + self.flags |= 1 << 58; + self + } + + pub fn f16c(&self) -> bool { + self.flags & (1 << 59) != 0 + } + + pub fn with_f16c(mut self) -> Self { + self.flags |= 1 << 59; + self + } + + pub fn fma4(&self) -> bool { + self.flags & (1 << 60) != 0 + } + + pub fn with_fma4(mut self) -> Self { + self.flags |= 1 << 60; + self + } + + pub fn prefetchw(&self) -> bool { + self.flags & (1 << 61) != 0 + } + + pub fn with_prefetchw(mut self) -> Self { + self.flags |= 1 << 61; + self + } + + pub fn tsx(&self) -> bool { + self.flags & (1 << 62) != 0 + } + + pub fn with_tsx(mut self) -> Self { + self.flags |= 1 << 62; + self + } + + pub fn lzcnt(&self) -> bool { + self.flags & (1 << 63) != 0 + } + + pub fn with_lzcnt(mut self) -> Self { + self.flags |= 1 << 63; + self + } + /// Optionally reject or reinterpret instruction according to the decoder's /// declared extensions. fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> { @@ -2107,21 +2251,15 @@ impl InstDecoder { return Err(DecodeError::InvalidOpcode); } } - // AVX... - /* // TODO Opcode::XABORT | - Opcode::XACQUIRE | - Opcode::XRELEASE | Opcode::XBEGIN | Opcode::XEND | Opcode::XTEST => { if !self.tsx() { inst.opcode = Opcode::Invalid; - return Err(()); + return Err(DecodeError::InvalidOpcode); } } - */ - /* // TODO Opcode::SHA1MSG1 | Opcode::SHA1MSG2 | Opcode::SHA1NEXTE | @@ -2131,9 +2269,9 @@ impl InstDecoder { Opcode::SHA256RNDS2 => { if !self.sha() { inst.opcode = Opcode::Invalid; - return Err(()); + return Err(DecodeError::InvalidOpcode); } - }*/ + } Opcode::ENCLV | Opcode::ENCLS | Opcode::ENCLU => { @@ -2173,7 +2311,6 @@ impl InstDecoder { Opcode::VCVTDQ2PD | Opcode::VCVTDQ2PS | Opcode::VCVTPD2PS | - Opcode::VCVTPH2PS | Opcode::VCVTPS2DQ | Opcode::VCVTPS2PD | Opcode::VCVTSS2SD | @@ -2181,7 +2318,6 @@ impl InstDecoder { Opcode::VCVTSI2SD | Opcode::VCVTSD2SI | Opcode::VCVTSD2SS | - Opcode::VCVTPS2PH | Opcode::VCVTSS2SI | Opcode::VCVTTPD2DQ | Opcode::VCVTTPS2DQ | @@ -2484,6 +2620,124 @@ impl InstDecoder { return Err(DecodeError::InvalidOpcode); } } + Opcode::MOVBE => { + if !self.movbe() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::POPCNT => { + /* + * from the intel SDM: + * ``` + * Before an application attempts to use the POPCNT instruction, it must check that + * the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1) and POPCNT + * (if CPUID.01H:ECX.POPCNT[bit 23] = 1). + * ``` + */ + if self.intel_quirks() && (!self.sse4_2() || !self.popcnt()) { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } else if !self.popcnt() { + /* + * elsewhere from the amd APM: + * `Instruction Subsets and CPUID Feature Flags` on page 507 indicates that + * popcnt is present when the popcnt bit is reported by cpuid. this seems to be + * the less quirky default, so `intel_quirks` is considered the outlier, and + * before this default. + * */ + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::LZCNT => { + /* + * amd APM, `LZCNT` page 212: + * LZCNT is an Advanced Bit Manipulation (ABM) instruction. Support for the LZCNT + * instruction is indicated by CPUID Fn8000_0001_ECX[ABM] = 1. + * + * meanwhile the intel SDM simply states: + * ``` + * CPUID.EAX=80000001H:ECX.LZCNT[bit 5]: if 1 indicates the processor supports the + * LZCNT instruction. + * ``` + * + * so that's considered the less-quirky (default) case here. + * */ + if self.amd_quirks() && !self.abm() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } else if !self.lzcnt() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::ADCX | + Opcode::ADOX => { + if !self.adx() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::VMRUN | + Opcode::VMLOAD | + Opcode::VMSAVE | + Opcode::CLGI | + Opcode::VMMCALL | + Opcode::INVLPGA => { + if !self.svm() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::STGI | + Opcode::SKINIT => { + if !self.svm() || !self.skinit() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::LAHF | + Opcode::SAHF => { + if !self.lahfsahf() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::VCVTPS2PH | + Opcode::VCVTPH2PS => { + /* + * from intel SDM: + * ``` + * 14.4.1 Detection of F16C Instructions Application using float 16 instruction + * must follow a detection sequence similar to AVX to ensure: • The OS has + * enabled YMM state management support, • The processor support AVX as + * indicated by the CPUID feature flag, i.e. CPUID.01H:ECX.AVX[bit 28] = 1. • + * The processor support 16-bit floating-point conversion instructions via a + * CPUID feature flag (CPUID.01H:ECX.F16C[bit 29] = 1). + * ``` + * + * TODO: only the VEX-coded variant of this instruction should be gated on `f16c`. + * the EVEX-coded variant should be gated on `avx512f` or `avx512vl` if not + * EVEX.512-coded. + */ + if !self.avx() || !self.f16c() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::RDRAND => { + if !self.rdrand() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + Opcode::RDSEED => { + if !self.rdseed() { + inst.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } other => { if !self.bmi1() { if BMI1.contains(&other) { @@ -2808,11 +3062,13 @@ impl Prefixes { pub enum OperandCode { ModRM_0x0f00, ModRM_0x0f01, + ModRM_0x0f0d, ModRM_0x0fae, ModRM_0x0fba, ModRM_0xf238, ModRM_0xf30fc7, ModRM_0x660f38, + ModRM_0xf30f38, ModRM_0x660f3a, CVT_AA, CVT_DA, @@ -3625,7 +3881,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xf30f38), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3766,7 +4022,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::LZCNT), OperandCode::Gv_Ev), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), // 0xc0 @@ -3889,7 +4145,7 @@ const OPCODE_0F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::UD2), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::NOP), OperandCode::Ev), + OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0x0f0d), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), // 0x10 @@ -5104,8 +5360,28 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, instruction.operands[1] = read_M(&mut bytes_iter, instruction, modrm, length)?; } }, - _op @ OperandCode::ModRM_0xc6_Eb_Ib | - _op @ OperandCode::ModRM_0xc7_Ev_Iv => { + op @ OperandCode::ModRM_0xc6_Eb_Ib | + op @ OperandCode::ModRM_0xc7_Ev_Iv => { + if modrm == 0xf8 { + if op == OperandCode::ModRM_0xc6_Eb_Ib { + instruction.opcode = Opcode::XABORT; + instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u32; + instruction.operands[0] = OperandSpec::ImmI8; + instruction.operand_count = 1; + return Ok(()); + } else { + instruction.opcode = Opcode::XBEGIN; + instruction.disp = if opwidth == 2 { + read_imm_signed(&mut bytes_iter, 2, length)? as i16 as i32 as u32 + } else { + read_imm_signed(&mut bytes_iter, 4, length)? as i32 as u32 + }; + instruction.modrm_mmm = RegSpec::eip(); + instruction.operands[0] = OperandSpec::RegDisp; + instruction.operand_count = 1; + return Ok(()); + } + } if (modrm & 0b00111000) != 0 { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string()); @@ -5113,7 +5389,8 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, instruction.operands[0] = mem_oper; instruction.opcode = Opcode::MOV; - instruction.imm = read_imm_signed(&mut bytes_iter, opwidth, length)? as u32; + let numwidth = if opwidth == 8 { 4 } else { opwidth }; + instruction.imm = read_imm_signed(&mut bytes_iter, numwidth, length)? as u32; instruction.operands[1] = match opwidth { 1 => OperandSpec::ImmI8, 2 => OperandSpec::ImmI16, @@ -5468,6 +5745,113 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; } + OperandCode::ModRM_0x0f0d => { + let modrm = read_modrm(&mut bytes_iter, length)?; + let r = modrm & 0b111; + + let opwidth = imm_width_from_prefixes(SizeCode::vd, instruction.prefixes); + + match r { + 1 => { + instruction.opcode = Opcode::PREFETCHW; + } + _ => { + instruction.opcode = Opcode::NOP; + } + } + instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, opwidth, length)?; + instruction.operand_count = 1; + } + OperandCode::ModRM_0x0f38 => { + let opcode = read_modrm(&mut bytes_iter, length)?; + + let high = opcode >> 4; + let low = opcode & 0xf; + + let operands = match high { + 0 => { + // PqQq + OperandCode::G_E_mm + }, + 1 => { + // PqQq + OperandCode::G_E_mm + }, + 0xc => { + // Vdq,Wdq + OperandCode::G_E_xmm + } + 0xf => { + match low { + 0 => OperandCode::Gv_Ev, + 1 => OperandCode::Ev_Gv, + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + } + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }; + instruction.opcode = match opcode { + 0xc8 => Opcode::SHA1NEXTE, + 0xc9 => Opcode::SHA1MSG1, + 0xca => Opcode::SHA1MSG2, + 0xcb => Opcode::SHA256RNDS2, + 0xcc => Opcode::SHA256MSG1, + 0xcd => Opcode::SHA256MSG2, + 0xf0 | 0xf1 => Opcode::MOVBE, + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }; + + return read_operands(decoder, bytes_iter, instruction, operands, length); + }, + OperandCode::ModRM_0x0f3a => { + }, + OperandCode::ModRM_0x0fc7 => { + let modrm = read_modrm(&mut bytes_iter, length)?; + if modrm >> 6 == 0b11 { + match (modrm >> 3) & 0b111 { + 0b111 => { + instruction.opcode = Opcode::RDSEED; + instruction.operand_count = 1; + instruction.operands[0] = OperandSpec::RegRRR; + let opwidth = imm_width_from_prefixes(SizeCode::vd, instruction.prefixes); + instruction.modrm_rrr = + RegSpec::from_parts(modrm & 7, match opwidth { + 4 => RegisterBank::D, + 2 => RegisterBank::W, + _ => unreachable!() + }); + } + 0b110 => { + instruction.opcode = Opcode::RDRAND; + instruction.operand_count = 1; + instruction.operands[0] = OperandSpec::RegRRR; + let opwidth = imm_width_from_prefixes(SizeCode::vd, instruction.prefixes); + instruction.modrm_rrr = + RegSpec::from_parts(modrm & 7, match opwidth { + 4 => RegisterBank::D, + 2 => RegisterBank::W, + _ => unreachable!() + }); + } + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + } + } else { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }, OperandCode::ModRM_0x0f71 => { instruction.operand_count = 2; @@ -5582,6 +5966,19 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; instruction.operand_count = 2; } + OperandCode::ModRM_0xf30f38 => { + let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; + match op { + 0xf6 => { + instruction.opcode = Opcode::ADOX; + return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length); + } + _ => { + instruction.opcode = Opcode::Invalid; + return Err(DecodeError::InvalidOpcode); + } + }; + } OperandCode::ModRM_0x660f38 => { let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; match op { @@ -5590,6 +5987,10 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter 0xdd => { instruction.opcode = Opcode::AESENCLAST; } 0xde => { instruction.opcode = Opcode::AESDEC; } 0xdf => { instruction.opcode = Opcode::AESDECLAST; } + 0xf6 => { + instruction.opcode = Opcode::ADCX; + return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length); + } _ => { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); @@ -5608,6 +6009,21 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter OperandCode::ModRM_0x660f3a => { let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; match op { + 0xcc => { + instruction.opcode = Opcode::SHA1RNDS4; + + let modrm = read_modrm(&mut bytes_iter, length)?; + instruction.modrm_rrr = + RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::X); + + + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; + instruction.imm = + read_imm_unsigned(&mut bytes_iter, 1, length)?; + instruction.operands[2] = OperandSpec::ImmU8; + instruction.operand_count = 3; + } 0xdf => { instruction.opcode = Opcode::AESKEYGENASSIST; // read operands right here right now @@ -6170,10 +6586,59 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } else if r == 3 { let mod_bits = modrm >> 6; + let m = modrm & 7; if mod_bits == 0b11 { - instruction.opcode = Opcode::Invalid; - instruction.operand_count = 0; - return Err(DecodeError::InvalidOperand); + match m { + 0b000 => { + instruction.opcode = Opcode::VMRUN; + instruction.operand_count = 1; + instruction.modrm_rrr = RegSpec::eax(); + instruction.operands[0] = OperandSpec::RegRRR; + }, + 0b001 => { + instruction.opcode = Opcode::VMMCALL; + instruction.operand_count = 0; + }, + 0b010 => { + instruction.opcode = Opcode::VMLOAD; + instruction.operand_count = 1; + instruction.modrm_rrr = RegSpec::eax(); + instruction.operands[0] = OperandSpec::RegRRR; + }, + 0b011 => { + instruction.opcode = Opcode::VMSAVE; + instruction.operand_count = 1; + instruction.modrm_rrr = RegSpec::eax(); + instruction.operands[0] = OperandSpec::RegRRR; + }, + 0b100 => { + instruction.opcode = Opcode::STGI; + instruction.operand_count = 0; + }, + 0b101 => { + instruction.opcode = Opcode::CLGI; + instruction.operand_count = 0; + }, + 0b110 => { + instruction.opcode = Opcode::SKINIT; + instruction.operand_count = 1; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.modrm_rrr = RegSpec::eax(); + }, + 0b111 => { + instruction.opcode = Opcode::INVLPGA; + instruction.operand_count = 2; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = OperandSpec::RegMMM; + instruction.modrm_rrr = RegSpec::eax(); + instruction.modrm_mmm = RegSpec::ecx(); + }, + _ => { + instruction.opcode = Opcode::Invalid; + instruction.operand_count = 0; + return Err(DecodeError::InvalidOperand); + } + } } else { instruction.opcode = Opcode::LIDT; instruction.operand_count = 1; diff --git a/src/protected_mode/uarch.rs b/src/protected_mode/uarch.rs new file mode 100644 index 0000000..b2b1201 --- /dev/null +++ b/src/protected_mode/uarch.rs @@ -0,0 +1,221 @@ +pub mod amd { + //! most information about instruction set extensions for microarchitectures here was sourced + //! from https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview and + //! https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features. these mappings are best-effort + //! but fairly unused, so a critical eye should be kept towards these decoders rejecting + //! instructions they should not, or incorrectly accepting instructions. + //! + //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, + //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension + //! reportedly function correctly (agner p217). + //! + //! [agner](https://www.agner.org/optimize/microarchitecture.pdf) + //! as retrieved 2020 may 19 + //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5` + + use long_mode::InstDecoder; + + /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later + /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of + /// support - SSE2 and no later. + pub fn k8() -> InstDecoder { + InstDecoder::minimal() + } + + /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through + /// to SSE4.2a, as well as consistent `cmov` support, among other features. + pub fn k10() -> InstDecoder { + k8() + .with_cmov() + .with_cmpxchg16b() + .with_svm() + .with_abm() + .with_lahfsahf() + .with_sse3() + .with_ssse3() + .with_sse4() + .with_sse4_2() + .with_sse4a() + } + + /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX + /// support among other extensions, and are notable for including `AESNI`. + pub fn bulldozer() -> InstDecoder { + k10() + .with_bmi1() + .with_aesni() + .with_pclmulqdq() + .with_f16c() + .with_avx() + .with_fma4() + .with_xop() + } + + /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. + pub fn piledriver() -> InstDecoder { + bulldozer() + .with_tbm() + .with_fma3() + .with_fma4() + } + + /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver` + /// cores, these cores do not support `TBM` or `FMA3`. + pub fn steamroller() -> InstDecoder { + bulldozer() + } + + /// `Excavator` was the successor to `Steamroller`, launched in 2015. + pub fn excavator() -> InstDecoder { + steamroller() + .with_movbe() + .with_bmi2() + .with_rdrand() + .with_avx() + .with_xop() + .with_bmi2() + .with_sha() + .with_rdrand() + .with_avx2() + } + + /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD + /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, + /// SHA, RDSEED, and other extensions. + pub fn zen() -> InstDecoder { + k10() + .with_avx() + .with_avx2() + .with_bmi1() + .with_aesni() + .with_pclmulqdq() + .with_f16c() + .with_movbe() + .with_bmi2() + .with_rdrand() + .with_adx() + .with_sha() + .with_rdseed() + .with_fma3() + // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? + } +} + +pub mod intel { + //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out! + + use long_mode::InstDecoder; + + /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the + /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000 + /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed + /// included SSE3. + pub fn netburst() -> InstDecoder { + InstDecoder::minimal() + .with_cmov() + .with_sse3() + } + + /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with + /// processors using this architecture shipped under the names "Merom", "Conroe", and + /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused + /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines, + /// `Core 2 *` processors used the `Core` architecture. + pub fn core() -> InstDecoder { + netburst() + .with_ssse3() + .with_sse4() + } + + /// `Peryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with + /// virtualization extensions. + pub fn peryn() -> InstDecoder { + core() + .with_sse4_1() + } + + /// `Nehalem` was the successor to `Peryn`, launched in late 2008. not to be confused with the + /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores. + /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction. + pub fn nehalem() -> InstDecoder { + peryn() + .with_sse4_2() + .with_popcnt() + } + + /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL + /// extensions. + pub fn westmere() -> InstDecoder { + nehalem() + .with_aesni() + .with_pclmulqdq() + } + + /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX + /// instructions. + pub fn sandybridge() -> InstDecoder { + westmere() + .with_avx() + } + + /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C + /// extensions for 16-bit floating point conversion, and the RDRAND instruction. + pub fn ivybridge() -> InstDecoder { + sandybridge() + .with_f16c() + .with_rdrand() + } + + /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction + /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3. + pub fn haswell() -> InstDecoder { + ivybridge() + .with_bmi1() + .with_bmi2() + .with_abm() + .with_fma3() + .with_avx2() + } + + /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores + /// were shipped as `E7-48xx/E7-88xx v3` models of processors. + pub fn haswell_ex() -> InstDecoder { + haswell() + .with_tsx() + } + + /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED, + /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because + /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be + /// reported as an errata (for example, the `Broadwell-Y` line of parts). + pub fn broadwell() -> InstDecoder { + haswell_ex() + .with_adx() + .with_rdseed() + .with_prefetchw() + } + + /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX + /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product + /// lines. + /// + /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest + /// common denominator: if you want a `Skylake` decoder with AVX512, something like the + /// following: + /// ``` + /// InstDecoder::skylake().with_avx512_f().with_avx512_dq() + /// ``` + /// is likely your best option. + pub fn skylake() -> InstDecoder { + broadwell() + .with_mpx() + .with_sgx() + } + + /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to + /// x86_64 implementaiton beyond `skylake`. + pub fn kabylake() -> InstDecoder { + skylake() + } + // ice lake is shipping so that should probably be included... +} -- cgit v1.1