aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2020-05-21 23:09:39 -0700
committeriximeow <me@iximeow.net>2020-05-21 23:09:39 -0700
commita0fd5a24cb0aa0b697f680c451d928cefe8323b4 (patch)
treed95069afe48249ff1226cb077e242d093bb2794a
parent905dc4c7feac1e09cde70db52c0762e8990d4d96 (diff)
add sha, lzcnt, tsx, f16c, svm, movbe, adx, and prefetchw extensions
also add builders to get decoders appropriate for specific microarchitectures from intel and amd * low-power architectures are not yet present
-rw-r--r--src/long_mode/display.rs48
-rw-r--r--src/long_mode/mod.rs514
-rw-r--r--src/long_mode/uarch.rs221
-rw-r--r--src/protected_mode/display.rs48
-rw-r--r--src/protected_mode/mod.rs505
-rw-r--r--src/protected_mode/uarch.rs221
-rw-r--r--test/long_mode/mod.rs70
-rw-r--r--test/long_mode/operand.rs2
-rw-r--r--test/protected_mode/mod.rs64
-rw-r--r--test/protected_mode/operand.rs4
10 files changed, 1654 insertions, 43 deletions
diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs
index 49d1600..5318ebb 100644
--- a/src/long_mode/display.rs
+++ b/src/long_mode/display.rs
@@ -1076,6 +1076,30 @@ impl fmt::Display for Opcode {
&Opcode::HSUBPD => write!(f, "hsubpd"),
&Opcode::HADDPD => write!(f, "haddpd"),
&Opcode::ADDSUBPD => write!(f, "addsubpd"),
+ &Opcode::XABORT => write!(f, "xabort"),
+ &Opcode::XBEGIN => write!(f, "xbegin"),
+ &Opcode::RDSEED => write!(f, "rdseed"),
+ &Opcode::LZCNT => write!(f, "lzcnt"),
+ &Opcode::CLGI => write!(f, "clgi"),
+ &Opcode::STGI => write!(f, "stgi"),
+ &Opcode::SKINIT => write!(f, "skinit"),
+ &Opcode::VMLOAD => write!(f, "vmload"),
+ &Opcode::VMMCALL => write!(f, "vmmcall"),
+ &Opcode::VMSAVE => write!(f, "vmsave"),
+ &Opcode::VMRUN => write!(f, "vmrun"),
+ &Opcode::INVLPGA => write!(f, "invlpga"),
+ &Opcode::MOVBE => write!(f, "movbe"),
+ &Opcode::ADCX => write!(f, "adcx"),
+ &Opcode::ADOX => write!(f, "adox"),
+ &Opcode::PREFETCHW => write!(f, "prefetchw"),
+ &Opcode::RDRAND => write!(f, "rdrand"),
+ &Opcode::SHA1RNDS4 => write!(f, "sha1rnds4"),
+ &Opcode::SHA1NEXTE => write!(f, "sha1nexte"),
+ &Opcode::SHA1MSG1 => write!(f, "sha1msg1"),
+ &Opcode::SHA1MSG2 => write!(f, "sha1msg2"),
+ &Opcode::SHA256RNDS2 => write!(f, "sha256rnds2"),
+ &Opcode::SHA256MSG1 => write!(f, "sha256msg1"),
+ &Opcode::SHA256MSG2 => write!(f, "sha256msg2"),
&Opcode::Invalid => write!(f, "invalid"),
}
}
@@ -1306,8 +1330,11 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::LEA |
Opcode::ADD |
Opcode::ADC |
+ Opcode::ADCX |
+ Opcode::ADOX |
Opcode::SUB |
Opcode::POPCNT |
+ Opcode::LZCNT |
Opcode::BT |
Opcode::BTS |
Opcode::BTR |
@@ -1399,6 +1426,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::PREFETCH0 |
Opcode::PREFETCH1 |
Opcode::PREFETCH2 |
+ Opcode::PREFETCHW |
Opcode::NOP => { write!(out, "{}", colors.nop_op(self)) }
/* Control flow */
@@ -1680,6 +1708,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::PEXTRW |
Opcode::PINSRW |
Opcode::MOV |
+ Opcode::MOVBE |
Opcode::LODS |
Opcode::STOS |
Opcode::LAHF |
@@ -1830,6 +1859,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::SWAPGS |
Opcode::RDTSCP |
Opcode::INVLPG |
+ Opcode::INVLPGA |
Opcode::CPUID |
Opcode::WBINVD |
Opcode::INVD |
@@ -1860,9 +1890,16 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::VMCALL |
Opcode::VMLAUNCH |
Opcode::VMRESUME |
+ Opcode::VMLOAD |
+ Opcode::VMMCALL |
+ Opcode::VMSAVE |
+ Opcode::VMRUN |
Opcode::VMXOFF |
Opcode::MONITOR |
Opcode::MWAIT |
+ Opcode::SKINIT |
+ Opcode::CLGI |
+ Opcode::STGI |
Opcode::CLAC |
Opcode::STAC |
Opcode::ENCLS |
@@ -1872,11 +1909,22 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::VMFUNC |
Opcode::XEND |
Opcode::XTEST |
+ Opcode::XABORT |
+ Opcode::XBEGIN |
Opcode::ENCLU |
Opcode::RDPKRU |
Opcode::WRPKRU |
Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) }
+ Opcode::RDSEED |
+ Opcode::RDRAND |
+ Opcode::SHA1RNDS4 |
+ Opcode::SHA1NEXTE |
+ Opcode::SHA1MSG1 |
+ Opcode::SHA1MSG2 |
+ Opcode::SHA256RNDS2 |
+ Opcode::SHA256MSG1 |
+ Opcode::SHA256MSG2 |
Opcode::AESDEC |
Opcode::AESDECLAST |
Opcode::AESENC |
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index e0a1fdf..20abe1f 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -1,5 +1,6 @@
mod vex;
mod display;
+pub mod uarch;
use core::hint::unreachable_unchecked;
@@ -73,7 +74,6 @@ impl RegSpec {
#[inline]
fn gp_from_parts(num: u8, extended: bool, width: u8, rex: bool) -> RegSpec {
-// println!("from_parts width: {}, num: {}, extended: {}", width, num, extended);
RegSpec {
num: num + if extended { 0b1000 } else { 0 },
bank: width_to_gp_reg_bank(width, rex)
@@ -129,6 +129,22 @@ impl RegSpec {
}
#[inline]
+ pub fn esp() -> RegSpec {
+ RegSpec {
+ num: 4,
+ bank: RegisterBank::D
+ }
+ }
+
+ #[inline]
+ pub fn sp() -> RegSpec {
+ RegSpec {
+ num: 4,
+ bank: RegisterBank::W
+ }
+ }
+
+ #[inline]
pub fn fs() -> RegSpec {
RegSpec { bank: RegisterBank::S, num: 3 }
}
@@ -779,12 +795,17 @@ pub enum Opcode {
XGETBV,
XSETBV,
VMFUNC,
+ XABORT,
+ XBEGIN,
XEND,
XTEST,
ENCLU,
RDPKRU,
WRPKRU,
+ RDSEED,
+ RDRAND,
+
ADDPS,
ADDPD,
ANDNPS,
@@ -1330,6 +1351,31 @@ pub enum Opcode {
PHADDW,
HSUBPD,
HADDPD,
+
+ SHA1RNDS4,
+ SHA1NEXTE,
+ SHA1MSG1,
+ SHA1MSG2,
+ SHA256RNDS2,
+ SHA256MSG1,
+ SHA256MSG2,
+
+ LZCNT,
+ CLGI,
+ STGI,
+ SKINIT,
+ VMLOAD,
+ VMMCALL,
+ VMSAVE,
+ VMRUN,
+ INVLPGA,
+
+ MOVBE,
+
+ ADCX,
+ ADOX,
+
+ PREFETCHW,
}
#[derive(Debug)]
@@ -1500,6 +1546,14 @@ pub struct InstDecoder {
// 53. intel quirks
// 54. amd quirks
// 55. avx (intel ?, amd ?)
+ // 56. amd-v/svm
+ // 57. lahfsahf
+ // 58. cmov
+ // 59. f16c
+ // 60. fma4
+ // 61. prefetchw
+ // 62. tsx
+ // 63. lzcnt
flags: u64,
}
@@ -1586,6 +1640,12 @@ impl InstDecoder {
self
}
+ pub fn with_sse4(self) -> Self {
+ self
+ .with_sse4_1()
+ .with_sse4_2()
+ }
+
pub fn movbe(&self) -> bool {
self.flags & (1 << 8) != 0
}
@@ -1658,6 +1718,9 @@ impl InstDecoder {
self
}
+ /// `bmi2` indicates support for the `BZHI`, `MULX`, `PDEP`, `PEXT`, `RORX`, `SARX`, `SHRX`,
+ /// and `SHLX` instructions. `bmi2` is implemented in all x86_64 chips that implement `bmi`,
+ /// except the amd `piledriver` and `steamroller` microarchitectures.
pub fn bmi2(&self) -> bool {
self.flags & (1 << 16) != 0
}
@@ -2018,6 +2081,94 @@ impl InstDecoder {
self
}
+ pub fn svm(&self) -> bool {
+ self.flags & (1 << 56) != 0
+ }
+
+ pub fn with_svm(mut self) -> Self {
+ self.flags |= 1 << 56;
+ self
+ }
+
+ /// `lahfsahf` is only unset for early revisions of 64-bit amd and intel chips. unfortunately
+ /// the clearest documentation on when these instructions were reintroduced into 64-bit
+ /// architectures seems to be
+ /// [wikipedia](https://en.wikipedia.org/wiki/X86-64#Older_implementations):
+ /// ```
+ /// Early AMD64 and Intel 64 CPUs lacked LAHF and SAHF instructions in 64-bit mode. AMD
+ /// introduced these instructions (also in 64-bit mode) with their Athlon 64, Opteron and
+ /// Turion 64 revision D processors in March 2005[48][49][50] while Intel introduced the
+ /// instructions with the Pentium 4 G1 stepping in December 2005. The 64-bit version of Windows
+ /// 8.1 requires this feature.[47]
+ /// ```
+ ///
+ /// this puts reintroduction of these instructions somewhere in the middle of prescott and k8
+ /// lifecycles, for intel and amd respectively. because there is no specific uarch where these
+ /// features become enabled, prescott and k8 default to not supporting these instructions,
+ /// where later uarches support these instructions.
+ pub fn lahfsahf(&self) -> bool {
+ self.flags & (1 << 57) != 0
+ }
+
+ pub fn with_lahfsahf(mut self) -> Self {
+ self.flags |= 1 << 57;
+ self
+ }
+
+ pub fn cmov(&self) -> bool {
+ self.flags & (1 << 58) != 0
+ }
+
+ pub fn with_cmov(mut self) -> Self {
+ self.flags |= 1 << 58;
+ self
+ }
+
+ pub fn f16c(&self) -> bool {
+ self.flags & (1 << 59) != 0
+ }
+
+ pub fn with_f16c(mut self) -> Self {
+ self.flags |= 1 << 59;
+ self
+ }
+
+ pub fn fma4(&self) -> bool {
+ self.flags & (1 << 60) != 0
+ }
+
+ pub fn with_fma4(mut self) -> Self {
+ self.flags |= 1 << 60;
+ self
+ }
+
+ pub fn prefetchw(&self) -> bool {
+ self.flags & (1 << 61) != 0
+ }
+
+ pub fn with_prefetchw(mut self) -> Self {
+ self.flags |= 1 << 61;
+ self
+ }
+
+ pub fn tsx(&self) -> bool {
+ self.flags & (1 << 62) != 0
+ }
+
+ pub fn with_tsx(mut self) -> Self {
+ self.flags |= 1 << 62;
+ self
+ }
+
+ pub fn lzcnt(&self) -> bool {
+ self.flags & (1 << 63) != 0
+ }
+
+ pub fn with_lzcnt(mut self) -> Self {
+ self.flags |= 1 << 63;
+ self
+ }
+
/// Optionally reject or reinterpret instruction according to the decoder's
/// declared extensions.
fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> {
@@ -2150,21 +2301,15 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
- // AVX...
- /* // TODO
Opcode::XABORT |
- Opcode::XACQUIRE |
- Opcode::XRELEASE |
Opcode::XBEGIN |
Opcode::XEND |
Opcode::XTEST => {
if !self.tsx() {
inst.opcode = Opcode::Invalid;
- return Err(());
+ return Err(DecodeError::InvalidOpcode);
}
}
- */
- /* // TODO
Opcode::SHA1MSG1 |
Opcode::SHA1MSG2 |
Opcode::SHA1NEXTE |
@@ -2174,9 +2319,9 @@ impl InstDecoder {
Opcode::SHA256RNDS2 => {
if !self.sha() {
inst.opcode = Opcode::Invalid;
- return Err(());
+ return Err(DecodeError::InvalidOpcode);
}
- }*/
+ }
Opcode::ENCLV |
Opcode::ENCLS |
Opcode::ENCLU => {
@@ -2185,6 +2330,7 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
+ // AVX...
Opcode::VMOVDDUP |
Opcode::VPSHUFLW |
Opcode::VHADDPS |
@@ -2216,7 +2362,6 @@ impl InstDecoder {
Opcode::VCVTDQ2PD |
Opcode::VCVTDQ2PS |
Opcode::VCVTPD2PS |
- Opcode::VCVTPH2PS |
Opcode::VCVTPS2DQ |
Opcode::VCVTPS2PD |
Opcode::VCVTSS2SD |
@@ -2224,7 +2369,6 @@ impl InstDecoder {
Opcode::VCVTSI2SD |
Opcode::VCVTSD2SI |
Opcode::VCVTSD2SS |
- Opcode::VCVTPS2PH |
Opcode::VCVTSS2SI |
Opcode::VCVTTPD2DQ |
Opcode::VCVTTPS2DQ |
@@ -2527,6 +2671,124 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
+ Opcode::MOVBE => {
+ if !self.movbe() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::POPCNT => {
+ /*
+ * from the intel SDM:
+ * ```
+ * Before an application attempts to use the POPCNT instruction, it must check that
+ * the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1) and POPCNT
+ * (if CPUID.01H:ECX.POPCNT[bit 23] = 1).
+ * ```
+ */
+ if self.intel_quirks() && (!self.sse4_2() || !self.popcnt()) {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ } else if !self.popcnt() {
+ /*
+ * elsewhere from the amd APM:
+ * `Instruction Subsets and CPUID Feature Flags` on page 507 indicates that
+ * popcnt is present when the popcnt bit is reported by cpuid. this seems to be
+ * the less quirky default, so `intel_quirks` is considered the outlier, and
+ * before this default.
+ * */
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::LZCNT => {
+ /*
+ * amd APM, `LZCNT` page 212:
+ * LZCNT is an Advanced Bit Manipulation (ABM) instruction. Support for the LZCNT
+ * instruction is indicated by CPUID Fn8000_0001_ECX[ABM] = 1.
+ *
+ * meanwhile the intel SDM simply states:
+ * ```
+ * CPUID.EAX=80000001H:ECX.LZCNT[bit 5]: if 1 indicates the processor supports the
+ * LZCNT instruction.
+ * ```
+ *
+ * so that's considered the less-quirky (default) case here.
+ * */
+ if self.amd_quirks() && !self.abm() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ } else if !self.lzcnt() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::ADCX |
+ Opcode::ADOX => {
+ if !self.adx() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::VMRUN |
+ Opcode::VMLOAD |
+ Opcode::VMSAVE |
+ Opcode::CLGI |
+ Opcode::VMMCALL |
+ Opcode::INVLPGA => {
+ if !self.svm() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::STGI |
+ Opcode::SKINIT => {
+ if !self.svm() || !self.skinit() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::LAHF |
+ Opcode::SAHF => {
+ if !self.lahfsahf() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::VCVTPS2PH |
+ Opcode::VCVTPH2PS => {
+ /*
+ * from intel SDM:
+ * ```
+ * 14.4.1 Detection of F16C Instructions Application using float 16 instruction
+ * must follow a detection sequence similar to AVX to ensure: • The OS has
+ * enabled YMM state management support, • The processor support AVX as
+ * indicated by the CPUID feature flag, i.e. CPUID.01H:ECX.AVX[bit 28] = 1. •
+ * The processor support 16-bit floating-point conversion instructions via a
+ * CPUID feature flag (CPUID.01H:ECX.F16C[bit 29] = 1).
+ * ```
+ *
+ * TODO: only the VEX-coded variant of this instruction should be gated on `f16c`.
+ * the EVEX-coded variant should be gated on `avx512f` or `avx512vl` if not
+ * EVEX.512-coded.
+ */
+ if !self.avx() || !self.f16c() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::RDRAND => {
+ if !self.rdrand() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::RDSEED => {
+ if !self.rdseed() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
other => {
if !self.bmi1() {
if BMI1.contains(&other) {
@@ -2871,11 +3133,13 @@ impl PrefixRex {
pub enum OperandCode {
ModRM_0x0f00,
ModRM_0x0f01,
+ ModRM_0x0f0d,
ModRM_0x0fae,
ModRM_0x0fba,
ModRM_0xf238,
ModRM_0xf30fc7,
ModRM_0x660f38,
+ ModRM_0xf30f38,
ModRM_0x660f3a,
CVT_AA,
CVT_DA,
@@ -3686,7 +3950,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xf30f38),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
@@ -3827,7 +4091,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+ OpcodeRecord(Interpretation::Instruction(Opcode::LZCNT), OperandCode::Gv_Ev),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
// 0xc0
@@ -3950,7 +4214,7 @@ const OPCODE_0F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::UD2), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::NOP), OperandCode::Ev),
+ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0x0f0d),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
// 0x10
@@ -5104,8 +5368,28 @@ fn read_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter: T,
};
instruction.operand_count = 2;
},
- _op @ OperandCode::ModRM_0xc6_Eb_Ib |
- _op @ OperandCode::ModRM_0xc7_Ev_Iv => {
+ op @ OperandCode::ModRM_0xc6_Eb_Ib |
+ op @ OperandCode::ModRM_0xc7_Ev_Iv => {
+ if modrm == 0xf8 {
+ if op == OperandCode::ModRM_0xc6_Eb_Ib {
+ instruction.opcode = Opcode::XABORT;
+ instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u64;
+ instruction.operands[0] = OperandSpec::ImmI8;
+ instruction.operand_count = 1;
+ return Ok(());
+ } else {
+ instruction.opcode = Opcode::XBEGIN;
+ instruction.disp = if opwidth == 2 {
+ read_imm_signed(&mut bytes_iter, 2, length)? as i16 as i64 as u64
+ } else {
+ read_imm_signed(&mut bytes_iter, 4, length)? as i32 as i64 as u64
+ };
+ instruction.modrm_mmm = RegSpec::rip();
+ instruction.operands[0] = OperandSpec::RegDisp;
+ instruction.operand_count = 1;
+ return Ok(());
+ }
+ }
if (modrm & 0b00111000) != 0 {
instruction.opcode = Opcode::Invalid;
return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string());
@@ -5490,6 +5774,115 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
instruction.opcode = Opcode::MOVD;
}
}
+ OperandCode::ModRM_0x0f0d => {
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ let r = modrm & 0b111;
+
+ let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes);
+
+ match r {
+ 1 => {
+ instruction.opcode = Opcode::PREFETCHW;
+ }
+ _ => {
+ instruction.opcode = Opcode::NOP;
+ }
+ }
+ instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, opwidth, length)?;
+ instruction.operand_count = 1;
+ }
+ OperandCode::ModRM_0x0f38 => {
+ let opcode = read_modrm(&mut bytes_iter, length)?;
+
+ let high = opcode >> 4;
+ let low = opcode & 0xf;
+
+ let operands = match high {
+ 0 => {
+ // PqQq
+ OperandCode::G_E_mm
+ },
+ 1 => {
+ // PqQq
+ OperandCode::G_E_mm
+ },
+ 0xc => {
+ // Vdq,Wdq
+ OperandCode::G_E_xmm
+ }
+ 0xf => {
+ match low {
+ 0 => OperandCode::Gv_Ev,
+ 1 => OperandCode::Ev_Gv,
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+ instruction.opcode = match opcode {
+ 0xc8 => Opcode::SHA1NEXTE,
+ 0xc9 => Opcode::SHA1MSG1,
+ 0xca => Opcode::SHA1MSG2,
+ 0xcb => Opcode::SHA256RNDS2,
+ 0xcc => Opcode::SHA256MSG1,
+ 0xcd => Opcode::SHA256MSG2,
+ 0xf0 | 0xf1 => Opcode::MOVBE,
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+
+ return read_operands(decoder, bytes_iter, instruction, operands, length);
+ },
+ OperandCode::ModRM_0x0f3a => {
+ },
+ OperandCode::ModRM_0x0fc7 => {
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ if modrm >> 6 == 0b11 {
+ match (modrm >> 3) & 0b111 {
+ 0b111 => {
+ instruction.opcode = Opcode::RDSEED;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes);
+ instruction.modrm_rrr =
+ RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth {
+ 8 => RegisterBank::Q,
+ 4 => RegisterBank::D,
+ 2 => RegisterBank::W,
+ _ => unreachable!()
+ });
+ }
+ 0b110 => {
+ instruction.opcode = Opcode::RDRAND;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes);
+ instruction.modrm_rrr =
+ RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth {
+ 8 => RegisterBank::Q,
+ 4 => RegisterBank::D,
+ 2 => RegisterBank::W,
+ _ => unreachable!()
+ });
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ } else {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ },
OperandCode::ModRM_0x0f71 => {
instruction.operand_count = 2;
@@ -5604,6 +5997,19 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
instruction.operand_count = 2;
}
+ OperandCode::ModRM_0xf30f38 => {
+ let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
+ match op {
+ 0xf6 => {
+ instruction.opcode = Opcode::ADOX;
+ return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length);
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+ }
OperandCode::ModRM_0x660f38 => {
let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
match op {
@@ -5612,6 +6018,10 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
0xdd => { instruction.opcode = Opcode::AESENCLAST; }
0xde => { instruction.opcode = Opcode::AESDEC; }
0xdf => { instruction.opcode = Opcode::AESDECLAST; }
+ 0xf6 => {
+ instruction.opcode = Opcode::ADCX;
+ return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length);
+ }
_ => {
instruction.opcode = Opcode::Invalid;
return Err(DecodeError::InvalidOpcode);
@@ -5630,6 +6040,21 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
OperandCode::ModRM_0x660f3a => {
let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
match op {
+ 0xcc => {
+ instruction.opcode = Opcode::SHA1RNDS4;
+
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ instruction.modrm_rrr =
+ RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X);
+
+
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
+ instruction.imm =
+ read_imm_unsigned(&mut bytes_iter, 1, length)?;
+ instruction.operands[2] = OperandSpec::ImmU8;
+ instruction.operand_count = 3;
+ }
0xdf => {
instruction.opcode = Opcode::AESKEYGENASSIST;
// read operands right here right now
@@ -6214,10 +6639,59 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
}
} else if r == 3 {
let mod_bits = modrm >> 6;
+ let m = modrm & 7;
if mod_bits == 0b11 {
- instruction.opcode = Opcode::Invalid;
- instruction.operand_count = 0;
- return Err(DecodeError::InvalidOperand);
+ match m {
+ 0b000 => {
+ instruction.opcode = Opcode::VMRUN;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b001 => {
+ instruction.opcode = Opcode::VMMCALL;
+ instruction.operand_count = 0;
+ },
+ 0b010 => {
+ instruction.opcode = Opcode::VMLOAD;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b011 => {
+ instruction.opcode = Opcode::VMSAVE;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b100 => {
+ instruction.opcode = Opcode::STGI;
+ instruction.operand_count = 0;
+ },
+ 0b101 => {
+ instruction.opcode = Opcode::CLGI;
+ instruction.operand_count = 0;
+ },
+ 0b110 => {
+ instruction.opcode = Opcode::SKINIT;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.modrm_rrr = RegSpec::eax();
+ },
+ 0b111 => {
+ instruction.opcode = Opcode::INVLPGA;
+ instruction.operand_count = 2;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.operands[1] = OperandSpec::RegMMM;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.modrm_mmm = RegSpec::ecx();
+ },
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ instruction.operand_count = 0;
+ return Err(DecodeError::InvalidOperand);
+ }
+ }
} else {
instruction.opcode = Opcode::LIDT;
instruction.operand_count = 1;
diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs
new file mode 100644
index 0000000..b2b1201
--- /dev/null
+++ b/src/long_mode/uarch.rs
@@ -0,0 +1,221 @@
+pub mod amd {
+ //! most information about instruction set extensions for microarchitectures here was sourced
+ //! from https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview and
+ //! https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features. these mappings are best-effort
+ //! but fairly unused, so a critical eye should be kept towards these decoders rejecting
+ //! instructions they should not, or incorrectly accepting instructions.
+ //!
+ //! microarchitectures as defined here are with respect to flags reported by CPUID. notably,
+ //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension
+ //! reportedly function correctly (agner p217).
+ //!
+ //! [agner](https://www.agner.org/optimize/microarchitecture.pdf)
+ //! as retrieved 2020 may 19
+ //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5`
+
+ use long_mode::InstDecoder;
+
+ /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later
+ /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of
+ /// support - SSE2 and no later.
+ pub fn k8() -> InstDecoder {
+ InstDecoder::minimal()
+ }
+
+ /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through
+ /// to SSE4.2a, as well as consistent `cmov` support, among other features.
+ pub fn k10() -> InstDecoder {
+ k8()
+ .with_cmov()
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
+ .with_sse4a()
+ }
+
+ /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX
+ /// support among other extensions, and are notable for including `AESNI`.
+ pub fn bulldozer() -> InstDecoder {
+ k10()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
+ .with_avx()
+ .with_fma4()
+ .with_xop()
+ }
+
+ /// `Piledriver` was the successor to `Bulldozer`, launched in 2012.
+ pub fn piledriver() -> InstDecoder {
+ bulldozer()
+ .with_tbm()
+ .with_fma3()
+ .with_fma4()
+ }
+
+ /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver`
+ /// cores, these cores do not support `TBM` or `FMA3`.
+ pub fn steamroller() -> InstDecoder {
+ bulldozer()
+ }
+
+ /// `Excavator` was the successor to `Steamroller`, launched in 2015.
+ pub fn excavator() -> InstDecoder {
+ steamroller()
+ .with_movbe()
+ .with_bmi2()
+ .with_rdrand()
+ .with_avx()
+ .with_xop()
+ .with_bmi2()
+ .with_sha()
+ .with_rdrand()
+ .with_avx2()
+ }
+
+ /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD
+ /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX,
+ /// SHA, RDSEED, and other extensions.
+ pub fn zen() -> InstDecoder {
+ k10()
+ .with_avx()
+ .with_avx2()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
+ .with_movbe()
+ .with_bmi2()
+ .with_rdrand()
+ .with_adx()
+ .with_sha()
+ .with_rdseed()
+ .with_fma3()
+ // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO?
+ }
+}
+
+pub mod intel {
+ //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out!
+
+ use long_mode::InstDecoder;
+
+ /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the
+ /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000
+ /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed
+ /// included SSE3.
+ pub fn netburst() -> InstDecoder {
+ InstDecoder::minimal()
+ .with_cmov()
+ .with_sse3()
+ }
+
+ /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with
+ /// processors using this architecture shipped under the names "Merom", "Conroe", and
+ /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused
+ /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines,
+ /// `Core 2 *` processors used the `Core` architecture.
+ pub fn core() -> InstDecoder {
+ netburst()
+ .with_ssse3()
+ .with_sse4()
+ }
+
+ /// `Peryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with
+ /// virtualization extensions.
+ pub fn peryn() -> InstDecoder {
+ core()
+ .with_sse4_1()
+ }
+
+ /// `Nehalem` was the successor to `Peryn`, launched in late 2008. not to be confused with the
+ /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores.
+ /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction.
+ pub fn nehalem() -> InstDecoder {
+ peryn()
+ .with_sse4_2()
+ .with_popcnt()
+ }
+
+ /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL
+ /// extensions.
+ pub fn westmere() -> InstDecoder {
+ nehalem()
+ .with_aesni()
+ .with_pclmulqdq()
+ }
+
+ /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX
+ /// instructions.
+ pub fn sandybridge() -> InstDecoder {
+ westmere()
+ .with_avx()
+ }
+
+ /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C
+ /// extensions for 16-bit floating point conversion, and the RDRAND instruction.
+ pub fn ivybridge() -> InstDecoder {
+ sandybridge()
+ .with_f16c()
+ .with_rdrand()
+ }
+
+ /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction
+ /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3.
+ pub fn haswell() -> InstDecoder {
+ ivybridge()
+ .with_bmi1()
+ .with_bmi2()
+ .with_abm()
+ .with_fma3()
+ .with_avx2()
+ }
+
+ /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores
+ /// were shipped as `E7-48xx/E7-88xx v3` models of processors.
+ pub fn haswell_ex() -> InstDecoder {
+ haswell()
+ .with_tsx()
+ }
+
+ /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED,
+ /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because
+ /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be
+ /// reported as an errata (for example, the `Broadwell-Y` line of parts).
+ pub fn broadwell() -> InstDecoder {
+ haswell_ex()
+ .with_adx()
+ .with_rdseed()
+ .with_prefetchw()
+ }
+
+ /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX
+ /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product
+ /// lines.
+ ///
+ /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest
+ /// common denominator: if you want a `Skylake` decoder with AVX512, something like the
+ /// following:
+ /// ```
+ /// InstDecoder::skylake().with_avx512_f().with_avx512_dq()
+ /// ```
+ /// is likely your best option.
+ pub fn skylake() -> InstDecoder {
+ broadwell()
+ .with_mpx()
+ .with_sgx()
+ }
+
+ /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to
+ /// x86_64 implementaiton beyond `skylake`.
+ pub fn kabylake() -> InstDecoder {
+ skylake()
+ }
+ // ice lake is shipping so that should probably be included...
+}
diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs
index d072991..ed8d44c 100644
--- a/src/protected_mode/display.rs
+++ b/src/protected_mode/display.rs
@@ -1063,6 +1063,30 @@ impl fmt::Display for Opcode {
&Opcode::HSUBPD => write!(f, "hsubpd"),
&Opcode::HADDPD => write!(f, "haddpd"),
&Opcode::ADDSUBPD => write!(f, "addsubpd"),
+ &Opcode::XABORT => write!(f, "xabort"),
+ &Opcode::XBEGIN => write!(f, "xbegin"),
+ &Opcode::RDSEED => write!(f, "rdseed"),
+ &Opcode::LZCNT => write!(f, "lzcnt"),
+ &Opcode::CLGI => write!(f, "clgi"),
+ &Opcode::STGI => write!(f, "stgi"),
+ &Opcode::SKINIT => write!(f, "skinit"),
+ &Opcode::VMLOAD => write!(f, "vmload"),
+ &Opcode::VMMCALL => write!(f, "vmmcall"),
+ &Opcode::VMSAVE => write!(f, "vmsave"),
+ &Opcode::VMRUN => write!(f, "vmrun"),
+ &Opcode::INVLPGA => write!(f, "invlpga"),
+ &Opcode::MOVBE => write!(f, "movbe"),
+ &Opcode::ADCX => write!(f, "adcx"),
+ &Opcode::ADOX => write!(f, "adox"),
+ &Opcode::PREFETCHW => write!(f, "prefetchw"),
+ &Opcode::RDRAND => write!(f, "rdrand"),
+ &Opcode::SHA1RNDS4 => write!(f, "sha1rnds4"),
+ &Opcode::SHA1NEXTE => write!(f, "sha1nexte"),
+ &Opcode::SHA1MSG1 => write!(f, "sha1msg1"),
+ &Opcode::SHA1MSG2 => write!(f, "sha1msg2"),
+ &Opcode::SHA256RNDS2 => write!(f, "sha256rnds2"),
+ &Opcode::SHA256MSG1 => write!(f, "sha256msg1"),
+ &Opcode::SHA256MSG2 => write!(f, "sha256msg2"),
&Opcode::Invalid => write!(f, "invalid"),
}
}
@@ -1293,8 +1317,11 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::LEA |
Opcode::ADD |
Opcode::ADC |
+ Opcode::ADCX |
+ Opcode::ADOX |
Opcode::SUB |
Opcode::POPCNT |
+ Opcode::LZCNT |
Opcode::BT |
Opcode::BTS |
Opcode::BTR |
@@ -1386,6 +1413,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::PREFETCH0 |
Opcode::PREFETCH1 |
Opcode::PREFETCH2 |
+ Opcode::PREFETCHW |
Opcode::NOP => { write!(out, "{}", colors.nop_op(self)) }
/* Control flow */
@@ -1667,6 +1695,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::PEXTRW |
Opcode::PINSRW |
Opcode::MOV |
+ Opcode::MOVBE |
Opcode::LODS |
Opcode::STOS |
Opcode::LAHF |
@@ -1818,6 +1847,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::SWAPGS |
Opcode::RDTSCP |
Opcode::INVLPG |
+ Opcode::INVLPGA |
Opcode::CPUID |
Opcode::WBINVD |
Opcode::INVD |
@@ -1849,9 +1879,16 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::VMCALL |
Opcode::VMLAUNCH |
Opcode::VMRESUME |
+ Opcode::VMLOAD |
+ Opcode::VMMCALL |
+ Opcode::VMSAVE |
+ Opcode::VMRUN |
Opcode::VMXOFF |
Opcode::MONITOR |
Opcode::MWAIT |
+ Opcode::SKINIT |
+ Opcode::CLGI |
+ Opcode::STGI |
Opcode::CLAC |
Opcode::STAC |
Opcode::ENCLS |
@@ -1861,11 +1898,22 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::VMFUNC |
Opcode::XEND |
Opcode::XTEST |
+ Opcode::XABORT |
+ Opcode::XBEGIN |
Opcode::ENCLU |
Opcode::RDPKRU |
Opcode::WRPKRU |
Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) }
+ Opcode::RDSEED |
+ Opcode::RDRAND |
+ Opcode::SHA1RNDS4 |
+ Opcode::SHA1NEXTE |
+ Opcode::SHA1MSG1 |
+ Opcode::SHA1MSG2 |
+ Opcode::SHA256RNDS2 |
+ Opcode::SHA256MSG1 |
+ Opcode::SHA256MSG2 |
Opcode::AESDEC |
Opcode::AESDECLAST |
Opcode::AESENC |
diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs
index 58d7a85..89c485f 100644
--- a/src/protected_mode/mod.rs
+++ b/src/protected_mode/mod.rs
@@ -1,5 +1,6 @@
mod vex;
mod display;
+pub mod uarch;
use core::hint::unreachable_unchecked;
@@ -120,6 +121,14 @@ impl RegSpec {
}
#[inline]
+ pub fn sp() -> RegSpec {
+ RegSpec {
+ num: 4,
+ bank: RegisterBank::W
+ }
+ }
+
+ #[inline]
pub fn fs() -> RegSpec {
RegSpec { bank: RegisterBank::S, num: 3 }
}
@@ -738,12 +747,17 @@ pub enum Opcode {
XGETBV,
XSETBV,
VMFUNC,
+ XABORT,
+ XBEGIN,
XEND,
XTEST,
ENCLU,
RDPKRU,
WRPKRU,
+ RDSEED,
+ RDRAND,
+
ADDPS,
ADDPD,
ANDNPS,
@@ -1289,6 +1303,31 @@ pub enum Opcode {
PHADDW,
HSUBPD,
HADDPD,
+
+ SHA1RNDS4,
+ SHA1NEXTE,
+ SHA1MSG1,
+ SHA1MSG2,
+ SHA256RNDS2,
+ SHA256MSG1,
+ SHA256MSG2,
+
+ LZCNT,
+ CLGI,
+ STGI,
+ SKINIT,
+ VMLOAD,
+ VMMCALL,
+ VMSAVE,
+ VMRUN,
+ INVLPGA,
+
+ MOVBE,
+
+ ADCX,
+ ADOX,
+
+ PREFETCHW,
}
#[derive(Debug)]
@@ -1457,6 +1496,14 @@ pub struct InstDecoder {
// 53. intel quirks
// 54. amd quirks
// 55. avx (intel ?, amd ?)
+ // 56. amd-v/svm
+ // 57. lahfsahf
+ // 58. cmov
+ // 59. f16c
+ // 60. fma4
+ // 61. prefetchw
+ // 62. tsx
+ // 63. lzcnt
flags: u64,
}
@@ -1543,6 +1590,12 @@ impl InstDecoder {
self
}
+ pub fn with_sse4(self) -> Self {
+ self
+ .with_sse4_1()
+ .with_sse4_2()
+ }
+
pub fn movbe(&self) -> bool {
self.flags & (1 << 8) != 0
}
@@ -1615,6 +1668,9 @@ impl InstDecoder {
self
}
+ /// `bmi2` indicates support for the `BZHI`, `MULX`, `PDEP`, `PEXT`, `RORX`, `SARX`, `SHRX`,
+ /// and `SHLX` instructions. `bmi2` is implemented in all x86_64 chips that implement `bmi`,
+ /// except the amd `piledriver` and `steamroller` microarchitectures.
pub fn bmi2(&self) -> bool {
self.flags & (1 << 16) != 0
}
@@ -1975,6 +2031,94 @@ impl InstDecoder {
self
}
+ pub fn svm(&self) -> bool {
+ self.flags & (1 << 56) != 0
+ }
+
+ pub fn with_svm(mut self) -> Self {
+ self.flags |= 1 << 56;
+ self
+ }
+
+ /// `lahfsahf` is only unset for early revisions of 64-bit amd and intel chips. unfortunately
+ /// the clearest documentation on when these instructions were reintroduced into 64-bit
+ /// architectures seems to be
+ /// [wikipedia](https://en.wikipedia.org/wiki/X86-64#Older_implementations):
+ /// ```
+ /// Early AMD64 and Intel 64 CPUs lacked LAHF and SAHF instructions in 64-bit mode. AMD
+ /// introduced these instructions (also in 64-bit mode) with their Athlon 64, Opteron and
+ /// Turion 64 revision D processors in March 2005[48][49][50] while Intel introduced the
+ /// instructions with the Pentium 4 G1 stepping in December 2005. The 64-bit version of Windows
+ /// 8.1 requires this feature.[47]
+ /// ```
+ ///
+ /// this puts reintroduction of these instructions somewhere in the middle of prescott and k8
+ /// lifecycles, for intel and amd respectively. because there is no specific uarch where these
+ /// features become enabled, prescott and k8 default to not supporting these instructions,
+ /// where later uarches support these instructions.
+ pub fn lahfsahf(&self) -> bool {
+ self.flags & (1 << 57) != 0
+ }
+
+ pub fn with_lahfsahf(mut self) -> Self {
+ self.flags |= 1 << 57;
+ self
+ }
+
+ pub fn cmov(&self) -> bool {
+ self.flags & (1 << 58) != 0
+ }
+
+ pub fn with_cmov(mut self) -> Self {
+ self.flags |= 1 << 58;
+ self
+ }
+
+ pub fn f16c(&self) -> bool {
+ self.flags & (1 << 59) != 0
+ }
+
+ pub fn with_f16c(mut self) -> Self {
+ self.flags |= 1 << 59;
+ self
+ }
+
+ pub fn fma4(&self) -> bool {
+ self.flags & (1 << 60) != 0
+ }
+
+ pub fn with_fma4(mut self) -> Self {
+ self.flags |= 1 << 60;
+ self
+ }
+
+ pub fn prefetchw(&self) -> bool {
+ self.flags & (1 << 61) != 0
+ }
+
+ pub fn with_prefetchw(mut self) -> Self {
+ self.flags |= 1 << 61;
+ self
+ }
+
+ pub fn tsx(&self) -> bool {
+ self.flags & (1 << 62) != 0
+ }
+
+ pub fn with_tsx(mut self) -> Self {
+ self.flags |= 1 << 62;
+ self
+ }
+
+ pub fn lzcnt(&self) -> bool {
+ self.flags & (1 << 63) != 0
+ }
+
+ pub fn with_lzcnt(mut self) -> Self {
+ self.flags |= 1 << 63;
+ self
+ }
+
/// Optionally reject or reinterpret instruction according to the decoder's
/// declared extensions.
fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> {
@@ -2107,21 +2251,15 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
- // AVX...
- /* // TODO
Opcode::XABORT |
- Opcode::XACQUIRE |
- Opcode::XRELEASE |
Opcode::XBEGIN |
Opcode::XEND |
Opcode::XTEST => {
if !self.tsx() {
inst.opcode = Opcode::Invalid;
- return Err(());
+ return Err(DecodeError::InvalidOpcode);
}
}
- */
- /* // TODO
Opcode::SHA1MSG1 |
Opcode::SHA1MSG2 |
Opcode::SHA1NEXTE |
@@ -2131,9 +2269,9 @@ impl InstDecoder {
Opcode::SHA256RNDS2 => {
if !self.sha() {
inst.opcode = Opcode::Invalid;
- return Err(());
+ return Err(DecodeError::InvalidOpcode);
}
- }*/
+ }
Opcode::ENCLV |
Opcode::ENCLS |
Opcode::ENCLU => {
@@ -2173,7 +2311,6 @@ impl InstDecoder {
Opcode::VCVTDQ2PD |
Opcode::VCVTDQ2PS |
Opcode::VCVTPD2PS |
- Opcode::VCVTPH2PS |
Opcode::VCVTPS2DQ |
Opcode::VCVTPS2PD |
Opcode::VCVTSS2SD |
@@ -2181,7 +2318,6 @@ impl InstDecoder {
Opcode::VCVTSI2SD |
Opcode::VCVTSD2SI |
Opcode::VCVTSD2SS |
- Opcode::VCVTPS2PH |
Opcode::VCVTSS2SI |
Opcode::VCVTTPD2DQ |
Opcode::VCVTTPS2DQ |
@@ -2484,6 +2620,124 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
+ Opcode::MOVBE => {
+ if !self.movbe() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::POPCNT => {
+ /*
+ * from the intel SDM:
+ * ```
+ * Before an application attempts to use the POPCNT instruction, it must check that
+ * the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1) and POPCNT
+ * (if CPUID.01H:ECX.POPCNT[bit 23] = 1).
+ * ```
+ */
+ if self.intel_quirks() && (!self.sse4_2() || !self.popcnt()) {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ } else if !self.popcnt() {
+ /*
+ * elsewhere from the amd APM:
+ * `Instruction Subsets and CPUID Feature Flags` on page 507 indicates that
+ * popcnt is present when the popcnt bit is reported by cpuid. this seems to be
+ * the less quirky default, so `intel_quirks` is considered the outlier, and
+ * before this default.
+ * */
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::LZCNT => {
+ /*
+ * amd APM, `LZCNT` page 212:
+ * LZCNT is an Advanced Bit Manipulation (ABM) instruction. Support for the LZCNT
+ * instruction is indicated by CPUID Fn8000_0001_ECX[ABM] = 1.
+ *
+ * meanwhile the intel SDM simply states:
+ * ```
+ * CPUID.EAX=80000001H:ECX.LZCNT[bit 5]: if 1 indicates the processor supports the
+ * LZCNT instruction.
+ * ```
+ *
+ * so that's considered the less-quirky (default) case here.
+ * */
+ if self.amd_quirks() && !self.abm() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ } else if !self.lzcnt() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::ADCX |
+ Opcode::ADOX => {
+ if !self.adx() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::VMRUN |
+ Opcode::VMLOAD |
+ Opcode::VMSAVE |
+ Opcode::CLGI |
+ Opcode::VMMCALL |
+ Opcode::INVLPGA => {
+ if !self.svm() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::STGI |
+ Opcode::SKINIT => {
+ if !self.svm() || !self.skinit() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::LAHF |
+ Opcode::SAHF => {
+ if !self.lahfsahf() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::VCVTPS2PH |
+ Opcode::VCVTPH2PS => {
+ /*
+ * from intel SDM:
+ * ```
+ * 14.4.1 Detection of F16C Instructions Application using float 16 instruction
+ * must follow a detection sequence similar to AVX to ensure: • The OS has
+ * enabled YMM state management support, • The processor support AVX as
+ * indicated by the CPUID feature flag, i.e. CPUID.01H:ECX.AVX[bit 28] = 1. •
+ * The processor support 16-bit floating-point conversion instructions via a
+ * CPUID feature flag (CPUID.01H:ECX.F16C[bit 29] = 1).
+ * ```
+ *
+ * TODO: only the VEX-coded variant of this instruction should be gated on `f16c`.
+ * the EVEX-coded variant should be gated on `avx512f` or `avx512vl` if not
+ * EVEX.512-coded.
+ */
+ if !self.avx() || !self.f16c() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::RDRAND => {
+ if !self.rdrand() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::RDSEED => {
+ if !self.rdseed() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
other => {
if !self.bmi1() {
if BMI1.contains(&other) {
@@ -2808,11 +3062,13 @@ impl Prefixes {
pub enum OperandCode {
ModRM_0x0f00,
ModRM_0x0f01,
+ ModRM_0x0f0d,
ModRM_0x0fae,
ModRM_0x0fba,
ModRM_0xf238,
ModRM_0xf30fc7,
ModRM_0x660f38,
+ ModRM_0xf30f38,
ModRM_0x660f3a,
CVT_AA,
CVT_DA,
@@ -3625,7 +3881,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xf30f38),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
@@ -3766,7 +4022,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+ OpcodeRecord(Interpretation::Instruction(Opcode::LZCNT), OperandCode::Gv_Ev),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
// 0xc0
@@ -3889,7 +4145,7 @@ const OPCODE_0F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::UD2), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::NOP), OperandCode::Ev),
+ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0x0f0d),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
// 0x10
@@ -5104,8 +5360,28 @@ fn read_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter: T,
instruction.operands[1] = read_M(&mut bytes_iter, instruction, modrm, length)?;
}
},
- _op @ OperandCode::ModRM_0xc6_Eb_Ib |
- _op @ OperandCode::ModRM_0xc7_Ev_Iv => {
+ op @ OperandCode::ModRM_0xc6_Eb_Ib |
+ op @ OperandCode::ModRM_0xc7_Ev_Iv => {
+ if modrm == 0xf8 {
+ if op == OperandCode::ModRM_0xc6_Eb_Ib {
+ instruction.opcode = Opcode::XABORT;
+ instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u32;
+ instruction.operands[0] = OperandSpec::ImmI8;
+ instruction.operand_count = 1;
+ return Ok(());
+ } else {
+ instruction.opcode = Opcode::XBEGIN;
+ instruction.disp = if opwidth == 2 {
+ read_imm_signed(&mut bytes_iter, 2, length)? as i16 as i32 as u32
+ } else {
+ read_imm_signed(&mut bytes_iter, 4, length)? as i32 as u32
+ };
+ instruction.modrm_mmm = RegSpec::eip();
+ instruction.operands[0] = OperandSpec::RegDisp;
+ instruction.operand_count = 1;
+ return Ok(());
+ }
+ }
if (modrm & 0b00111000) != 0 {
instruction.opcode = Opcode::Invalid;
return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string());
@@ -5113,7 +5389,8 @@ fn read_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter: T,
instruction.operands[0] = mem_oper;
instruction.opcode = Opcode::MOV;
- instruction.imm = read_imm_signed(&mut bytes_iter, opwidth, length)? as u32;
+ let numwidth = if opwidth == 8 { 4 } else { opwidth };
+ instruction.imm = read_imm_signed(&mut bytes_iter, numwidth, length)? as u32;
instruction.operands[1] = match opwidth {
1 => OperandSpec::ImmI8,
2 => OperandSpec::ImmI16,
@@ -5468,6 +5745,113 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
instruction.operands[0] = OperandSpec::RegRRR;
instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
}
+ OperandCode::ModRM_0x0f0d => {
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ let r = modrm & 0b111;
+
+ let opwidth = imm_width_from_prefixes(SizeCode::vd, instruction.prefixes);
+
+ match r {
+ 1 => {
+ instruction.opcode = Opcode::PREFETCHW;
+ }
+ _ => {
+ instruction.opcode = Opcode::NOP;
+ }
+ }
+ instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, opwidth, length)?;
+ instruction.operand_count = 1;
+ }
+ OperandCode::ModRM_0x0f38 => {
+ let opcode = read_modrm(&mut bytes_iter, length)?;
+
+ let high = opcode >> 4;
+ let low = opcode & 0xf;
+
+ let operands = match high {
+ 0 => {
+ // PqQq
+ OperandCode::G_E_mm
+ },
+ 1 => {
+ // PqQq
+ OperandCode::G_E_mm
+ },
+ 0xc => {
+ // Vdq,Wdq
+ OperandCode::G_E_xmm
+ }
+ 0xf => {
+ match low {
+ 0 => OperandCode::Gv_Ev,
+ 1 => OperandCode::Ev_Gv,
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+ instruction.opcode = match opcode {
+ 0xc8 => Opcode::SHA1NEXTE,
+ 0xc9 => Opcode::SHA1MSG1,
+ 0xca => Opcode::SHA1MSG2,
+ 0xcb => Opcode::SHA256RNDS2,
+ 0xcc => Opcode::SHA256MSG1,
+ 0xcd => Opcode::SHA256MSG2,
+ 0xf0 | 0xf1 => Opcode::MOVBE,
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+
+ return read_operands(decoder, bytes_iter, instruction, operands, length);
+ },
+ OperandCode::ModRM_0x0f3a => {
+ },
+ OperandCode::ModRM_0x0fc7 => {
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ if modrm >> 6 == 0b11 {
+ match (modrm >> 3) & 0b111 {
+ 0b111 => {
+ instruction.opcode = Opcode::RDSEED;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ let opwidth = imm_width_from_prefixes(SizeCode::vd, instruction.prefixes);
+ instruction.modrm_rrr =
+ RegSpec::from_parts(modrm & 7, match opwidth {
+ 4 => RegisterBank::D,
+ 2 => RegisterBank::W,
+ _ => unreachable!()
+ });
+ }
+ 0b110 => {
+ instruction.opcode = Opcode::RDRAND;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ let opwidth = imm_width_from_prefixes(SizeCode::vd, instruction.prefixes);
+ instruction.modrm_rrr =
+ RegSpec::from_parts(modrm & 7, match opwidth {
+ 4 => RegisterBank::D,
+ 2 => RegisterBank::W,
+ _ => unreachable!()
+ });
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ } else {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ },
OperandCode::ModRM_0x0f71 => {
instruction.operand_count = 2;
@@ -5582,6 +5966,19 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
instruction.operand_count = 2;
}
+ OperandCode::ModRM_0xf30f38 => {
+ let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
+ match op {
+ 0xf6 => {
+ instruction.opcode = Opcode::ADOX;
+ return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length);
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+ }
OperandCode::ModRM_0x660f38 => {
let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
match op {
@@ -5590,6 +5987,10 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
0xdd => { instruction.opcode = Opcode::AESENCLAST; }
0xde => { instruction.opcode = Opcode::AESDEC; }
0xdf => { instruction.opcode = Opcode::AESDECLAST; }
+ 0xf6 => {
+ instruction.opcode = Opcode::ADCX;
+ return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length);
+ }
_ => {
instruction.opcode = Opcode::Invalid;
return Err(DecodeError::InvalidOpcode);
@@ -5608,6 +6009,21 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
OperandCode::ModRM_0x660f3a => {
let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
match op {
+ 0xcc => {
+ instruction.opcode = Opcode::SHA1RNDS4;
+
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ instruction.modrm_rrr =
+ RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::X);
+
+
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
+ instruction.imm =
+ read_imm_unsigned(&mut bytes_iter, 1, length)?;
+ instruction.operands[2] = OperandSpec::ImmU8;
+ instruction.operand_count = 3;
+ }
0xdf => {
instruction.opcode = Opcode::AESKEYGENASSIST;
// read operands right here right now
@@ -6170,10 +6586,59 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
}
} else if r == 3 {
let mod_bits = modrm >> 6;
+ let m = modrm & 7;
if mod_bits == 0b11 {
- instruction.opcode = Opcode::Invalid;
- instruction.operand_count = 0;
- return Err(DecodeError::InvalidOperand);
+ match m {
+ 0b000 => {
+ instruction.opcode = Opcode::VMRUN;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::eax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b001 => {
+ instruction.opcode = Opcode::VMMCALL;
+ instruction.operand_count = 0;
+ },
+ 0b010 => {
+ instruction.opcode = Opcode::VMLOAD;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::eax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b011 => {
+ instruction.opcode = Opcode::VMSAVE;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::eax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b100 => {
+ instruction.opcode = Opcode::STGI;
+ instruction.operand_count = 0;
+ },
+ 0b101 => {
+ instruction.opcode = Opcode::CLGI;
+ instruction.operand_count = 0;
+ },
+ 0b110 => {
+ instruction.opcode = Opcode::SKINIT;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.modrm_rrr = RegSpec::eax();
+ },
+ 0b111 => {
+ instruction.opcode = Opcode::INVLPGA;
+ instruction.operand_count = 2;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.operands[1] = OperandSpec::RegMMM;
+ instruction.modrm_rrr = RegSpec::eax();
+ instruction.modrm_mmm = RegSpec::ecx();
+ },
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ instruction.operand_count = 0;
+ return Err(DecodeError::InvalidOperand);
+ }
+ }
} else {
instruction.opcode = Opcode::LIDT;
instruction.operand_count = 1;
diff --git a/src/protected_mode/uarch.rs b/src/protected_mode/uarch.rs
new file mode 100644
index 0000000..b2b1201
--- /dev/null
+++ b/src/protected_mode/uarch.rs
@@ -0,0 +1,221 @@
+pub mod amd {
+ //! most information about instruction set extensions for microarchitectures here was sourced
+ //! from https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview and
+ //! https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features. these mappings are best-effort
+ //! but fairly unused, so a critical eye should be kept towards these decoders rejecting
+ //! instructions they should not, or incorrectly accepting instructions.
+ //!
+ //! microarchitectures as defined here are with respect to flags reported by CPUID. notably,
+ //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension
+ //! reportedly function correctly (agner p217).
+ //!
+ //! [agner](https://www.agner.org/optimize/microarchitecture.pdf)
+ //! as retrieved 2020 may 19
+ //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5`
+
+ use long_mode::InstDecoder;
+
+ /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later
+ /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of
+ /// support - SSE2 and no later.
+ pub fn k8() -> InstDecoder {
+ InstDecoder::minimal()
+ }
+
+ /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through
+ /// to SSE4.2a, as well as consistent `cmov` support, among other features.
+ pub fn k10() -> InstDecoder {
+ k8()
+ .with_cmov()
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
+ .with_sse4a()
+ }
+
+ /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX
+ /// support among other extensions, and are notable for including `AESNI`.
+ pub fn bulldozer() -> InstDecoder {
+ k10()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
+ .with_avx()
+ .with_fma4()
+ .with_xop()
+ }
+
+ /// `Piledriver` was the successor to `Bulldozer`, launched in 2012.
+ pub fn piledriver() -> InstDecoder {
+ bulldozer()
+ .with_tbm()
+ .with_fma3()
+ .with_fma4()
+ }
+
+ /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver`
+ /// cores, these cores do not support `TBM` or `FMA3`.
+ pub fn steamroller() -> InstDecoder {
+ bulldozer()
+ }
+
+ /// `Excavator` was the successor to `Steamroller`, launched in 2015.
+ pub fn excavator() -> InstDecoder {
+ steamroller()
+ .with_movbe()
+ .with_bmi2()
+ .with_rdrand()
+ .with_avx()
+ .with_xop()
+ .with_bmi2()
+ .with_sha()
+ .with_rdrand()
+ .with_avx2()
+ }
+
+ /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD
+ /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX,
+ /// SHA, RDSEED, and other extensions.
+ pub fn zen() -> InstDecoder {
+ k10()
+ .with_avx()
+ .with_avx2()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
+ .with_movbe()
+ .with_bmi2()
+ .with_rdrand()
+ .with_adx()
+ .with_sha()
+ .with_rdseed()
+ .with_fma3()
+ // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO?
+ }
+}
+
+pub mod intel {
+ //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out!
+
+ use long_mode::InstDecoder;
+
+ /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the
+ /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000
+ /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed
+ /// included SSE3.
+ pub fn netburst() -> InstDecoder {
+ InstDecoder::minimal()
+ .with_cmov()
+ .with_sse3()
+ }
+
+ /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with
+ /// processors using this architecture shipped under the names "Merom", "Conroe", and
+ /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused
+ /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines,
+ /// `Core 2 *` processors used the `Core` architecture.
+ pub fn core() -> InstDecoder {
+ netburst()
+ .with_ssse3()
+ .with_sse4()
+ }
+
+ /// `Peryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with
+ /// virtualization extensions.
+ pub fn peryn() -> InstDecoder {
+ core()
+ .with_sse4_1()
+ }
+
+ /// `Nehalem` was the successor to `Peryn`, launched in late 2008. not to be confused with the
+ /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores.
+ /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction.
+ pub fn nehalem() -> InstDecoder {
+ peryn()
+ .with_sse4_2()
+ .with_popcnt()
+ }
+
+ /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL
+ /// extensions.
+ pub fn westmere() -> InstDecoder {
+ nehalem()
+ .with_aesni()
+ .with_pclmulqdq()
+ }
+
+ /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX
+ /// instructions.
+ pub fn sandybridge() -> InstDecoder {
+ westmere()
+ .with_avx()
+ }
+
+ /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C
+ /// extensions for 16-bit floating point conversion, and the RDRAND instruction.
+ pub fn ivybridge() -> InstDecoder {
+ sandybridge()
+ .with_f16c()
+ .with_rdrand()
+ }
+
+ /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction
+ /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3.
+ pub fn haswell() -> InstDecoder {
+ ivybridge()
+ .with_bmi1()
+ .with_bmi2()
+ .with_abm()
+ .with_fma3()
+ .with_avx2()
+ }
+
+ /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores
+ /// were shipped as `E7-48xx/E7-88xx v3` models of processors.
+ pub fn haswell_ex() -> InstDecoder {
+ haswell()
+ .with_tsx()
+ }
+
+ /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED,
+ /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because
+ /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be
+ /// reported as an errata (for example, the `Broadwell-Y` line of parts).
+ pub fn broadwell() -> InstDecoder {
+ haswell_ex()
+ .with_adx()
+ .with_rdseed()
+ .with_prefetchw()
+ }
+
+ /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX
+ /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product
+ /// lines.
+ ///
+ /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest
+ /// common denominator: if you want a `Skylake` decoder with AVX512, something like the
+ /// following:
+ /// ```
+ /// InstDecoder::skylake().with_avx512_f().with_avx512_dq()
+ /// ```
+ /// is likely your best option.
+ pub fn skylake() -> InstDecoder {
+ broadwell()
+ .with_mpx()
+ .with_sgx()
+ }
+
+ /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to
+ /// x86_64 implementaiton beyond `skylake`.
+ pub fn kabylake() -> InstDecoder {
+ skylake()
+ }
+ // ice lake is shipping so that should probably be included...
+}
diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs
index 2fbe4fb..b37581e 100644
--- a/test/long_mode/mod.rs
+++ b/test/long_mode/mod.rs
@@ -1095,3 +1095,73 @@ fn prefixed_f30f() {
test_display(&[0xf3, 0x0f, 0x16, 0xcf], "movshdup xmm1, xmm7");
test_display(&[0xf3, 0x4d, 0x0f, 0x16, 0xcf], "movshdup xmm9, xmm15");
}
+
+#[test]
+fn test_adx() {
+ test_display(&[0x66, 0x0f, 0x38, 0xf6, 0xc1], "adcx eax, ecx");
+ test_display(&[0x66, 0x0f, 0x38, 0xf6, 0x01], "adcx eax, [rcx]");
+ test_display(&[0x66, 0x4f, 0x0f, 0x38, 0xf6, 0x01], "adcx r8, [r9]");
+ test_display(&[0xf3, 0x0f, 0x38, 0xf6, 0xc1], "adox eax, ecx");
+ test_display(&[0xf3, 0x0f, 0x38, 0xf6, 0x01], "adox eax, [rcx]");
+ test_display(&[0xf3, 0x4f, 0x0f, 0x38, 0xf6, 0x01], "adox r8, [r9]");
+}
+
+#[test]
+fn test_prefetchw() {
+ test_display(&[0x0f, 0x0d, 0x08], "prefetchw [rax]");
+}
+
+#[test]
+fn test_lzcnt() {
+ test_display(&[0x66, 0xf3, 0x0f, 0xbd, 0xc1], "lzcnt ax, cx");
+ test_display(&[0xf3, 0x0f, 0xbd, 0xc1], "lzcnt eax, ecx");
+ test_display(&[0xf3, 0x48, 0x0f, 0xbd, 0xc1], "lzcnt rax, rcx");
+}
+
+#[test]
+fn test_svm() {
+ test_display(&[0x0f, 0x01, 0xdf], "invlpga rax, ecx");
+ test_display(&[0x0f, 0x01, 0xde], "skinit eax");
+ test_display(&[0x0f, 0x01, 0xdd], "clgi");
+ test_display(&[0x0f, 0x01, 0xdc], "stgi");
+ test_display(&[0x0f, 0x01, 0xdb], "vmsave rax");
+ test_display(&[0x0f, 0x01, 0xda], "vmload rax");
+ test_display(&[0x0f, 0x01, 0xd9], "vmmcall");
+ test_display(&[0x0f, 0x01, 0xd8], "vmrun rax");
+}
+
+#[test]
+fn test_movbe() {
+ test_display(&[0x0f, 0x38, 0xf0, 0x06], "movbe eax, [rsi]");
+ test_display(&[0x4f, 0x0f, 0x38, 0xf0, 0x06], "movbe r8, [r14]");
+}
+
+#[test]
+fn test_tsx() {
+ test_display(&[0xc6, 0xf8, 0x10], "xabort 0x10");
+ test_display(&[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56, 0x78], "xbegin 0x78563412");
+ test_display(&[0x66, 0xc7, 0xf8, 0x10, 0x12, 0x34], "xbegin 0x3412");
+ test_display(&[0x0f, 0x01, 0xd5], "xend");
+ test_display(&[0x0f, 0x01, 0xd6], "xtest");
+}
+
+#[test]
+fn test_rand() {
+ test_display(&[0x0f, 0xc7, 0xfd], "rdseed ebp");
+ test_display(&[0x66, 0x0f, 0xc7, 0xfd], "rdseed bp");
+ test_display(&[0x48, 0x0f, 0xc7, 0xfd], "rdseed rbp");
+ test_display(&[0x0f, 0xc7, 0xf5], "rdrand ebp");
+ test_display(&[0x66, 0x0f, 0xc7, 0xf5], "rdrand bp");
+ test_display(&[0x48, 0x0f, 0xc7, 0xf5], "rdrand rbp");
+}
+
+#[test]
+fn test_sha() {
+ test_display(&[0x0f, 0x3a, 0xcc, 0x12, 0x40], "sha1rnds4 xmm2, [rdx], 0x40");
+ test_display(&[0x0f, 0x38, 0xc8, 0x12], "sha1nexte xmm2, [rdx]");
+ test_display(&[0x0f, 0x38, 0xc9, 0x12], "sha1msg1 xmm2, [rdx]");
+ test_display(&[0x0f, 0x38, 0xca, 0x12], "sha1msg2 xmm2, [rdx]");
+ test_display(&[0x0f, 0x38, 0xcb, 0x12], "sha256rnds2 xmm2, [rdx]");
+ test_display(&[0x0f, 0x38, 0xcc, 0x12], "sha256msg1 xmm2, [rdx]");
+ test_display(&[0x0f, 0x38, 0xcd, 0x12], "sha256msg2 xmm2, [rdx]");
+}
diff --git a/test/long_mode/operand.rs b/test/long_mode/operand.rs
index 885c6d1..1250b8a 100644
--- a/test/long_mode/operand.rs
+++ b/test/long_mode/operand.rs
@@ -1,5 +1,5 @@
use yaxpeax_arch::{Decoder, LengthedInstruction};
-use yaxpeax_x86::long_mode::{DecodeError, InstDecoder, Opcode};
+use yaxpeax_x86::long_mode::{DecodeError, InstDecoder, Opcode, Operand, RegSpec};
#[test]
fn register_widths() {
diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs
index ab3cdc8..9fc603d 100644
--- a/test/protected_mode/mod.rs
+++ b/test/protected_mode/mod.rs
@@ -1035,3 +1035,67 @@ fn only_32bit() {
test_display(&[0x67, 0xa1, 0xc0, 0xb0], "mov eax, [0xb0c0]");
test_display(&[0x66, 0x67, 0xa1, 0xc0, 0xb0], "mov ax, [0xb0c0]");
}
+
+#[test]
+fn test_adx() {
+ test_display(&[0x66, 0x0f, 0x38, 0xf6, 0xc1], "adcx eax, ecx");
+ test_display(&[0x66, 0x0f, 0x38, 0xf6, 0x01], "adcx eax, [ecx]");
+ test_display(&[0xf3, 0x0f, 0x38, 0xf6, 0xc1], "adox eax, ecx");
+ test_display(&[0xf3, 0x0f, 0x38, 0xf6, 0x01], "adox eax, [ecx]");
+}
+
+#[test]
+fn test_prefetchw() {
+ test_display(&[0x0f, 0x0d, 0x08], "prefetchw [eax]");
+}
+
+#[test]
+fn test_lzcnt() {
+ test_display(&[0x66, 0xf3, 0x0f, 0xbd, 0xc1], "lzcnt ax, cx");
+ test_display(&[0xf3, 0x0f, 0xbd, 0xc1], "lzcnt eax, ecx");
+}
+
+#[test]
+fn test_svm() {
+ test_display(&[0x0f, 0x01, 0xdf], "invlpga eax, ecx");
+ test_display(&[0x0f, 0x01, 0xde], "skinit eax");
+ test_display(&[0x0f, 0x01, 0xdd], "clgi");
+ test_display(&[0x0f, 0x01, 0xdc], "stgi");
+ test_display(&[0x0f, 0x01, 0xdb], "vmsave eax");
+ test_display(&[0x0f, 0x01, 0xda], "vmload eax");
+ test_display(&[0x0f, 0x01, 0xd9], "vmmcall");
+ test_display(&[0x0f, 0x01, 0xd8], "vmrun eax");
+}
+
+#[test]
+fn test_movbe() {
+ test_display(&[0x0f, 0x38, 0xf0, 0x06], "movbe eax, [esi]");
+}
+
+#[test]
+fn test_tsx() {
+ test_display(&[0xc6, 0xf8, 0x10], "xabort 0x10");
+ test_display(&[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56, 0x78], "xbegin 0x78563412");
+ test_display(&[0x66, 0xc7, 0xf8, 0x10, 0x12, 0x34], "xbegin 0x3412");
+ test_display(&[0x0f, 0x01, 0xd5], "xend");
+ test_display(&[0x0f, 0x01, 0xd6], "xtest");
+}
+
+#[test]
+fn test_rand() {
+ test_display(&[0x0f, 0xc7, 0xfd], "rdseed ebp");
+ test_display(&[0x66, 0x0f, 0xc7, 0xfd], "rdseed bp");
+ test_display(&[0x0f, 0xc7, 0xf5], "rdrand ebp");
+ test_display(&[0x66, 0x0f, 0xc7, 0xf5], "rdrand bp");
+}
+
+#[test]
+fn test_sha() {
+ test_display(&[0x0f, 0x3a, 0xcc, 0x12, 0x40], "sha1rnds4 xmm2, [edx], 0x40");
+ test_display(&[0x0f, 0x38, 0xc8, 0x12], "sha1nexte xmm2, [edx]");
+ test_display(&[0x0f, 0x38, 0xc9, 0x12], "sha1msg1 xmm2, [edx]");
+ test_display(&[0x0f, 0x38, 0xca, 0x12], "sha1msg2 xmm2, [edx]");
+ test_display(&[0x0f, 0x38, 0xcb, 0x12], "sha256rnds2 xmm2, [edx]");
+ test_display(&[0x0f, 0x38, 0xcc, 0x12], "sha256msg1 xmm2, [edx]");
+ test_display(&[0x0f, 0x38, 0xcd, 0x12], "sha256msg2 xmm2, [edx]");
+}
diff --git a/test/protected_mode/operand.rs b/test/protected_mode/operand.rs
index 08a24be..8fda181 100644
--- a/test/protected_mode/operand.rs
+++ b/test/protected_mode/operand.rs
@@ -1,5 +1,5 @@
use yaxpeax_arch::{Decoder, LengthedInstruction};
-use yaxpeax_x86::long_mode::{DecodeError, InstDecoder, Opcode};
+use yaxpeax_x86::protected_mode::{DecodeError, InstDecoder, Opcode, Operand, RegSpec};
#[test]
fn register_widths() {
@@ -12,5 +12,5 @@ fn register_widths() {
#[test]
fn memory_widths() {
- assert_eq!(Operand::RegDeref(RegSpec::rsp()).width(), 4);
+ assert_eq!(Operand::RegDeref(RegSpec::esp()).width(), 4);
}