aboutsummaryrefslogtreecommitdiff
path: root/src/long_mode
diff options
context:
space:
mode:
Diffstat (limited to 'src/long_mode')
-rw-r--r--src/long_mode/display.rs48
-rw-r--r--src/long_mode/mod.rs514
-rw-r--r--src/long_mode/uarch.rs221
3 files changed, 763 insertions, 20 deletions
diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs
index 49d1600..5318ebb 100644
--- a/src/long_mode/display.rs
+++ b/src/long_mode/display.rs
@@ -1076,6 +1076,30 @@ impl fmt::Display for Opcode {
&Opcode::HSUBPD => write!(f, "hsubpd"),
&Opcode::HADDPD => write!(f, "haddpd"),
&Opcode::ADDSUBPD => write!(f, "addsubpd"),
+ &Opcode::XABORT => write!(f, "xabort"),
+ &Opcode::XBEGIN => write!(f, "xbegin"),
+ &Opcode::RDSEED => write!(f, "rdseed"),
+ &Opcode::LZCNT => write!(f, "lzcnt"),
+ &Opcode::CLGI => write!(f, "clgi"),
+ &Opcode::STGI => write!(f, "stgi"),
+ &Opcode::SKINIT => write!(f, "skinit"),
+ &Opcode::VMLOAD => write!(f, "vmload"),
+ &Opcode::VMMCALL => write!(f, "vmmcall"),
+ &Opcode::VMSAVE => write!(f, "vmsave"),
+ &Opcode::VMRUN => write!(f, "vmrun"),
+ &Opcode::INVLPGA => write!(f, "invlpga"),
+ &Opcode::MOVBE => write!(f, "movbe"),
+ &Opcode::ADCX => write!(f, "adcx"),
+ &Opcode::ADOX => write!(f, "adox"),
+ &Opcode::PREFETCHW => write!(f, "prefetchw"),
+ &Opcode::RDRAND => write!(f, "rdrand"),
+ &Opcode::SHA1RNDS4 => write!(f, "sha1rnds4"),
+ &Opcode::SHA1NEXTE => write!(f, "sha1nexte"),
+ &Opcode::SHA1MSG1 => write!(f, "sha1msg1"),
+ &Opcode::SHA1MSG2 => write!(f, "sha1msg2"),
+ &Opcode::SHA256RNDS2 => write!(f, "sha256rnds2"),
+ &Opcode::SHA256MSG1 => write!(f, "sha256msg1"),
+ &Opcode::SHA256MSG2 => write!(f, "sha256msg2"),
&Opcode::Invalid => write!(f, "invalid"),
}
}
@@ -1306,8 +1330,11 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::LEA |
Opcode::ADD |
Opcode::ADC |
+ Opcode::ADCX |
+ Opcode::ADOX |
Opcode::SUB |
Opcode::POPCNT |
+ Opcode::LZCNT |
Opcode::BT |
Opcode::BTS |
Opcode::BTR |
@@ -1399,6 +1426,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::PREFETCH0 |
Opcode::PREFETCH1 |
Opcode::PREFETCH2 |
+ Opcode::PREFETCHW |
Opcode::NOP => { write!(out, "{}", colors.nop_op(self)) }
/* Control flow */
@@ -1680,6 +1708,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::PEXTRW |
Opcode::PINSRW |
Opcode::MOV |
+ Opcode::MOVBE |
Opcode::LODS |
Opcode::STOS |
Opcode::LAHF |
@@ -1830,6 +1859,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::SWAPGS |
Opcode::RDTSCP |
Opcode::INVLPG |
+ Opcode::INVLPGA |
Opcode::CPUID |
Opcode::WBINVD |
Opcode::INVD |
@@ -1860,9 +1890,16 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::VMCALL |
Opcode::VMLAUNCH |
Opcode::VMRESUME |
+ Opcode::VMLOAD |
+ Opcode::VMMCALL |
+ Opcode::VMSAVE |
+ Opcode::VMRUN |
Opcode::VMXOFF |
Opcode::MONITOR |
Opcode::MWAIT |
+ Opcode::SKINIT |
+ Opcode::CLGI |
+ Opcode::STGI |
Opcode::CLAC |
Opcode::STAC |
Opcode::ENCLS |
@@ -1872,11 +1909,22 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color
Opcode::VMFUNC |
Opcode::XEND |
Opcode::XTEST |
+ Opcode::XABORT |
+ Opcode::XBEGIN |
Opcode::ENCLU |
Opcode::RDPKRU |
Opcode::WRPKRU |
Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) }
+ Opcode::RDSEED |
+ Opcode::RDRAND |
+ Opcode::SHA1RNDS4 |
+ Opcode::SHA1NEXTE |
+ Opcode::SHA1MSG1 |
+ Opcode::SHA1MSG2 |
+ Opcode::SHA256RNDS2 |
+ Opcode::SHA256MSG1 |
+ Opcode::SHA256MSG2 |
Opcode::AESDEC |
Opcode::AESDECLAST |
Opcode::AESENC |
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index e0a1fdf..20abe1f 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -1,5 +1,6 @@
mod vex;
mod display;
+pub mod uarch;
use core::hint::unreachable_unchecked;
@@ -73,7 +74,6 @@ impl RegSpec {
#[inline]
fn gp_from_parts(num: u8, extended: bool, width: u8, rex: bool) -> RegSpec {
-// println!("from_parts width: {}, num: {}, extended: {}", width, num, extended);
RegSpec {
num: num + if extended { 0b1000 } else { 0 },
bank: width_to_gp_reg_bank(width, rex)
@@ -129,6 +129,22 @@ impl RegSpec {
}
#[inline]
+ pub fn esp() -> RegSpec {
+ RegSpec {
+ num: 4,
+ bank: RegisterBank::D
+ }
+ }
+
+ #[inline]
+ pub fn sp() -> RegSpec {
+ RegSpec {
+ num: 4,
+ bank: RegisterBank::W
+ }
+ }
+
+ #[inline]
pub fn fs() -> RegSpec {
RegSpec { bank: RegisterBank::S, num: 3 }
}
@@ -779,12 +795,17 @@ pub enum Opcode {
XGETBV,
XSETBV,
VMFUNC,
+ XABORT,
+ XBEGIN,
XEND,
XTEST,
ENCLU,
RDPKRU,
WRPKRU,
+ RDSEED,
+ RDRAND,
+
ADDPS,
ADDPD,
ANDNPS,
@@ -1330,6 +1351,31 @@ pub enum Opcode {
PHADDW,
HSUBPD,
HADDPD,
+
+ SHA1RNDS4,
+ SHA1NEXTE,
+ SHA1MSG1,
+ SHA1MSG2,
+ SHA256RNDS2,
+ SHA256MSG1,
+ SHA256MSG2,
+
+ LZCNT,
+ CLGI,
+ STGI,
+ SKINIT,
+ VMLOAD,
+ VMMCALL,
+ VMSAVE,
+ VMRUN,
+ INVLPGA,
+
+ MOVBE,
+
+ ADCX,
+ ADOX,
+
+ PREFETCHW,
}
#[derive(Debug)]
@@ -1500,6 +1546,14 @@ pub struct InstDecoder {
// 53. intel quirks
// 54. amd quirks
// 55. avx (intel ?, amd ?)
+ // 56. amd-v/svm
+ // 57. lahfsahf
+ // 58. cmov
+ // 59. f16c
+ // 60. fma4
+ // 61. prefetchw
+ // 62. tsx
+ // 63. lzcnt
flags: u64,
}
@@ -1586,6 +1640,12 @@ impl InstDecoder {
self
}
+ pub fn with_sse4(self) -> Self {
+ self
+ .with_sse4_1()
+ .with_sse4_2()
+ }
+
pub fn movbe(&self) -> bool {
self.flags & (1 << 8) != 0
}
@@ -1658,6 +1718,9 @@ impl InstDecoder {
self
}
+ /// `bmi2` indicates support for the `BZHI`, `MULX`, `PDEP`, `PEXT`, `RORX`, `SARX`, `SHRX`,
+ /// and `SHLX` instructions. `bmi2` is implemented in all x86_64 chips that implement `bmi`,
+ /// except the amd `piledriver` and `steamroller` microarchitectures.
pub fn bmi2(&self) -> bool {
self.flags & (1 << 16) != 0
}
@@ -2018,6 +2081,94 @@ impl InstDecoder {
self
}
+ pub fn svm(&self) -> bool {
+ self.flags & (1 << 56) != 0
+ }
+
+ pub fn with_svm(mut self) -> Self {
+ self.flags |= 1 << 56;
+ self
+ }
+
+ /// `lahfsahf` is only unset for early revisions of 64-bit amd and intel chips. unfortunately
+ /// the clearest documentation on when these instructions were reintroduced into 64-bit
+ /// architectures seems to be
+ /// [wikipedia](https://en.wikipedia.org/wiki/X86-64#Older_implementations):
+ /// ```
+ /// Early AMD64 and Intel 64 CPUs lacked LAHF and SAHF instructions in 64-bit mode. AMD
+ /// introduced these instructions (also in 64-bit mode) with their Athlon 64, Opteron and
+ /// Turion 64 revision D processors in March 2005[48][49][50] while Intel introduced the
+ /// instructions with the Pentium 4 G1 stepping in December 2005. The 64-bit version of Windows
+ /// 8.1 requires this feature.[47]
+ /// ```
+ ///
+ /// this puts reintroduction of these instructions somewhere in the middle of prescott and k8
+ /// lifecycles, for intel and amd respectively. because there is no specific uarch where these
+ /// features become enabled, prescott and k8 default to not supporting these instructions,
+ /// where later uarches support these instructions.
+ pub fn lahfsahf(&self) -> bool {
+ self.flags & (1 << 57) != 0
+ }
+
+ pub fn with_lahfsahf(mut self) -> Self {
+ self.flags |= 1 << 57;
+ self
+ }
+
+ pub fn cmov(&self) -> bool {
+ self.flags & (1 << 58) != 0
+ }
+
+ pub fn with_cmov(mut self) -> Self {
+ self.flags |= 1 << 58;
+ self
+ }
+
+ pub fn f16c(&self) -> bool {
+ self.flags & (1 << 59) != 0
+ }
+
+ pub fn with_f16c(mut self) -> Self {
+ self.flags |= 1 << 59;
+ self
+ }
+
+ pub fn fma4(&self) -> bool {
+ self.flags & (1 << 60) != 0
+ }
+
+ pub fn with_fma4(mut self) -> Self {
+ self.flags |= 1 << 60;
+ self
+ }
+
+ pub fn prefetchw(&self) -> bool {
+ self.flags & (1 << 61) != 0
+ }
+
+ pub fn with_prefetchw(mut self) -> Self {
+ self.flags |= 1 << 61;
+ self
+ }
+
+ pub fn tsx(&self) -> bool {
+ self.flags & (1 << 62) != 0
+ }
+
+ pub fn with_tsx(mut self) -> Self {
+ self.flags |= 1 << 62;
+ self
+ }
+
+ pub fn lzcnt(&self) -> bool {
+ self.flags & (1 << 63) != 0
+ }
+
+ pub fn with_lzcnt(mut self) -> Self {
+ self.flags |= 1 << 63;
+ self
+ }
+
/// Optionally reject or reinterpret instruction according to the decoder's
/// declared extensions.
fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> {
@@ -2150,21 +2301,15 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
- // AVX...
- /* // TODO
Opcode::XABORT |
- Opcode::XACQUIRE |
- Opcode::XRELEASE |
Opcode::XBEGIN |
Opcode::XEND |
Opcode::XTEST => {
if !self.tsx() {
inst.opcode = Opcode::Invalid;
- return Err(());
+ return Err(DecodeError::InvalidOpcode);
}
}
- */
- /* // TODO
Opcode::SHA1MSG1 |
Opcode::SHA1MSG2 |
Opcode::SHA1NEXTE |
@@ -2174,9 +2319,9 @@ impl InstDecoder {
Opcode::SHA256RNDS2 => {
if !self.sha() {
inst.opcode = Opcode::Invalid;
- return Err(());
+ return Err(DecodeError::InvalidOpcode);
}
- }*/
+ }
Opcode::ENCLV |
Opcode::ENCLS |
Opcode::ENCLU => {
@@ -2185,6 +2330,7 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
+ // AVX...
Opcode::VMOVDDUP |
Opcode::VPSHUFLW |
Opcode::VHADDPS |
@@ -2216,7 +2362,6 @@ impl InstDecoder {
Opcode::VCVTDQ2PD |
Opcode::VCVTDQ2PS |
Opcode::VCVTPD2PS |
- Opcode::VCVTPH2PS |
Opcode::VCVTPS2DQ |
Opcode::VCVTPS2PD |
Opcode::VCVTSS2SD |
@@ -2224,7 +2369,6 @@ impl InstDecoder {
Opcode::VCVTSI2SD |
Opcode::VCVTSD2SI |
Opcode::VCVTSD2SS |
- Opcode::VCVTPS2PH |
Opcode::VCVTSS2SI |
Opcode::VCVTTPD2DQ |
Opcode::VCVTTPS2DQ |
@@ -2527,6 +2671,124 @@ impl InstDecoder {
return Err(DecodeError::InvalidOpcode);
}
}
+ Opcode::MOVBE => {
+ if !self.movbe() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::POPCNT => {
+ /*
+ * from the intel SDM:
+ * ```
+ * Before an application attempts to use the POPCNT instruction, it must check that
+ * the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1) and POPCNT
+ * (if CPUID.01H:ECX.POPCNT[bit 23] = 1).
+ * ```
+ */
+ if self.intel_quirks() && (!self.sse4_2() || !self.popcnt()) {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ } else if !self.popcnt() {
+ /*
+ * elsewhere from the amd APM:
+ * `Instruction Subsets and CPUID Feature Flags` on page 507 indicates that
+ * popcnt is present when the popcnt bit is reported by cpuid. this seems to be
+ * the less quirky default, so `intel_quirks` is considered the outlier, and
+ * before this default.
+ * */
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::LZCNT => {
+ /*
+ * amd APM, `LZCNT` page 212:
+ * LZCNT is an Advanced Bit Manipulation (ABM) instruction. Support for the LZCNT
+ * instruction is indicated by CPUID Fn8000_0001_ECX[ABM] = 1.
+ *
+ * meanwhile the intel SDM simply states:
+ * ```
+ * CPUID.EAX=80000001H:ECX.LZCNT[bit 5]: if 1 indicates the processor supports the
+ * LZCNT instruction.
+ * ```
+ *
+ * so that's considered the less-quirky (default) case here.
+ * */
+ if self.amd_quirks() && !self.abm() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ } else if !self.lzcnt() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::ADCX |
+ Opcode::ADOX => {
+ if !self.adx() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::VMRUN |
+ Opcode::VMLOAD |
+ Opcode::VMSAVE |
+ Opcode::CLGI |
+ Opcode::VMMCALL |
+ Opcode::INVLPGA => {
+ if !self.svm() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::STGI |
+ Opcode::SKINIT => {
+ if !self.svm() || !self.skinit() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::LAHF |
+ Opcode::SAHF => {
+ if !self.lahfsahf() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::VCVTPS2PH |
+ Opcode::VCVTPH2PS => {
+ /*
+ * from intel SDM:
+ * ```
+ * 14.4.1 Detection of F16C Instructions Application using float 16 instruction
+ * must follow a detection sequence similar to AVX to ensure: • The OS has
+ * enabled YMM state management support, • The processor support AVX as
+ * indicated by the CPUID feature flag, i.e. CPUID.01H:ECX.AVX[bit 28] = 1. •
+ * The processor support 16-bit floating-point conversion instructions via a
+ * CPUID feature flag (CPUID.01H:ECX.F16C[bit 29] = 1).
+ * ```
+ *
+ * TODO: only the VEX-coded variant of this instruction should be gated on `f16c`.
+ * the EVEX-coded variant should be gated on `avx512f` or `avx512vl` if not
+ * EVEX.512-coded.
+ */
+ if !self.avx() || !self.f16c() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::RDRAND => {
+ if !self.rdrand() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ Opcode::RDSEED => {
+ if !self.rdseed() {
+ inst.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
other => {
if !self.bmi1() {
if BMI1.contains(&other) {
@@ -2871,11 +3133,13 @@ impl PrefixRex {
pub enum OperandCode {
ModRM_0x0f00,
ModRM_0x0f01,
+ ModRM_0x0f0d,
ModRM_0x0fae,
ModRM_0x0fba,
ModRM_0xf238,
ModRM_0xf30fc7,
ModRM_0x660f38,
+ ModRM_0xf30f38,
ModRM_0x660f3a,
CVT_AA,
CVT_DA,
@@ -3686,7 +3950,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xf30f38),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
@@ -3827,7 +4091,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+ OpcodeRecord(Interpretation::Instruction(Opcode::LZCNT), OperandCode::Gv_Ev),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
// 0xc0
@@ -3950,7 +4214,7 @@ const OPCODE_0F_MAP: [OpcodeRecord; 256] = [
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::UD2), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
- OpcodeRecord(Interpretation::Instruction(Opcode::NOP), OperandCode::Ev),
+ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0x0f0d),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
// 0x10
@@ -5104,8 +5368,28 @@ fn read_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter: T,
};
instruction.operand_count = 2;
},
- _op @ OperandCode::ModRM_0xc6_Eb_Ib |
- _op @ OperandCode::ModRM_0xc7_Ev_Iv => {
+ op @ OperandCode::ModRM_0xc6_Eb_Ib |
+ op @ OperandCode::ModRM_0xc7_Ev_Iv => {
+ if modrm == 0xf8 {
+ if op == OperandCode::ModRM_0xc6_Eb_Ib {
+ instruction.opcode = Opcode::XABORT;
+ instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u64;
+ instruction.operands[0] = OperandSpec::ImmI8;
+ instruction.operand_count = 1;
+ return Ok(());
+ } else {
+ instruction.opcode = Opcode::XBEGIN;
+ instruction.disp = if opwidth == 2 {
+ read_imm_signed(&mut bytes_iter, 2, length)? as i16 as i64 as u64
+ } else {
+ read_imm_signed(&mut bytes_iter, 4, length)? as i32 as i64 as u64
+ };
+ instruction.modrm_mmm = RegSpec::rip();
+ instruction.operands[0] = OperandSpec::RegDisp;
+ instruction.operand_count = 1;
+ return Ok(());
+ }
+ }
if (modrm & 0b00111000) != 0 {
instruction.opcode = Opcode::Invalid;
return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string());
@@ -5490,6 +5774,115 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
instruction.opcode = Opcode::MOVD;
}
}
+ OperandCode::ModRM_0x0f0d => {
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ let r = modrm & 0b111;
+
+ let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes);
+
+ match r {
+ 1 => {
+ instruction.opcode = Opcode::PREFETCHW;
+ }
+ _ => {
+ instruction.opcode = Opcode::NOP;
+ }
+ }
+ instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, opwidth, length)?;
+ instruction.operand_count = 1;
+ }
+ OperandCode::ModRM_0x0f38 => {
+ let opcode = read_modrm(&mut bytes_iter, length)?;
+
+ let high = opcode >> 4;
+ let low = opcode & 0xf;
+
+ let operands = match high {
+ 0 => {
+ // PqQq
+ OperandCode::G_E_mm
+ },
+ 1 => {
+ // PqQq
+ OperandCode::G_E_mm
+ },
+ 0xc => {
+ // Vdq,Wdq
+ OperandCode::G_E_xmm
+ }
+ 0xf => {
+ match low {
+ 0 => OperandCode::Gv_Ev,
+ 1 => OperandCode::Ev_Gv,
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+ instruction.opcode = match opcode {
+ 0xc8 => Opcode::SHA1NEXTE,
+ 0xc9 => Opcode::SHA1MSG1,
+ 0xca => Opcode::SHA1MSG2,
+ 0xcb => Opcode::SHA256RNDS2,
+ 0xcc => Opcode::SHA256MSG1,
+ 0xcd => Opcode::SHA256MSG2,
+ 0xf0 | 0xf1 => Opcode::MOVBE,
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+
+ return read_operands(decoder, bytes_iter, instruction, operands, length);
+ },
+ OperandCode::ModRM_0x0f3a => {
+ },
+ OperandCode::ModRM_0x0fc7 => {
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ if modrm >> 6 == 0b11 {
+ match (modrm >> 3) & 0b111 {
+ 0b111 => {
+ instruction.opcode = Opcode::RDSEED;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes);
+ instruction.modrm_rrr =
+ RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth {
+ 8 => RegisterBank::Q,
+ 4 => RegisterBank::D,
+ 2 => RegisterBank::W,
+ _ => unreachable!()
+ });
+ }
+ 0b110 => {
+ instruction.opcode = Opcode::RDRAND;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes);
+ instruction.modrm_rrr =
+ RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth {
+ 8 => RegisterBank::Q,
+ 4 => RegisterBank::D,
+ 2 => RegisterBank::W,
+ _ => unreachable!()
+ });
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ }
+ } else {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ },
OperandCode::ModRM_0x0f71 => {
instruction.operand_count = 2;
@@ -5604,6 +5997,19 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
instruction.operand_count = 2;
}
+ OperandCode::ModRM_0xf30f38 => {
+ let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
+ match op {
+ 0xf6 => {
+ instruction.opcode = Opcode::ADOX;
+ return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length);
+ }
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ return Err(DecodeError::InvalidOpcode);
+ }
+ };
+ }
OperandCode::ModRM_0x660f38 => {
let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
match op {
@@ -5612,6 +6018,10 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
0xdd => { instruction.opcode = Opcode::AESENCLAST; }
0xde => { instruction.opcode = Opcode::AESDEC; }
0xdf => { instruction.opcode = Opcode::AESDECLAST; }
+ 0xf6 => {
+ instruction.opcode = Opcode::ADCX;
+ return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length);
+ }
_ => {
instruction.opcode = Opcode::Invalid;
return Err(DecodeError::InvalidOpcode);
@@ -5630,6 +6040,21 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
OperandCode::ModRM_0x660f3a => {
let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;
match op {
+ 0xcc => {
+ instruction.opcode = Opcode::SHA1RNDS4;
+
+ let modrm = read_modrm(&mut bytes_iter, length)?;
+ instruction.modrm_rrr =
+ RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X);
+
+
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;
+ instruction.imm =
+ read_imm_unsigned(&mut bytes_iter, 1, length)?;
+ instruction.operands[2] = OperandSpec::ImmU8;
+ instruction.operand_count = 3;
+ }
0xdf => {
instruction.opcode = Opcode::AESKEYGENASSIST;
// read operands right here right now
@@ -6214,10 +6639,59 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
}
} else if r == 3 {
let mod_bits = modrm >> 6;
+ let m = modrm & 7;
if mod_bits == 0b11 {
- instruction.opcode = Opcode::Invalid;
- instruction.operand_count = 0;
- return Err(DecodeError::InvalidOperand);
+ match m {
+ 0b000 => {
+ instruction.opcode = Opcode::VMRUN;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b001 => {
+ instruction.opcode = Opcode::VMMCALL;
+ instruction.operand_count = 0;
+ },
+ 0b010 => {
+ instruction.opcode = Opcode::VMLOAD;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b011 => {
+ instruction.opcode = Opcode::VMSAVE;
+ instruction.operand_count = 1;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.operands[0] = OperandSpec::RegRRR;
+ },
+ 0b100 => {
+ instruction.opcode = Opcode::STGI;
+ instruction.operand_count = 0;
+ },
+ 0b101 => {
+ instruction.opcode = Opcode::CLGI;
+ instruction.operand_count = 0;
+ },
+ 0b110 => {
+ instruction.opcode = Opcode::SKINIT;
+ instruction.operand_count = 1;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.modrm_rrr = RegSpec::eax();
+ },
+ 0b111 => {
+ instruction.opcode = Opcode::INVLPGA;
+ instruction.operand_count = 2;
+ instruction.operands[0] = OperandSpec::RegRRR;
+ instruction.operands[1] = OperandSpec::RegMMM;
+ instruction.modrm_rrr = RegSpec::rax();
+ instruction.modrm_mmm = RegSpec::ecx();
+ },
+ _ => {
+ instruction.opcode = Opcode::Invalid;
+ instruction.operand_count = 0;
+ return Err(DecodeError::InvalidOperand);
+ }
+ }
} else {
instruction.opcode = Opcode::LIDT;
instruction.operand_count = 1;
diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs
new file mode 100644
index 0000000..b2b1201
--- /dev/null
+++ b/src/long_mode/uarch.rs
@@ -0,0 +1,221 @@
+pub mod amd {
+ //! most information about instruction set extensions for microarchitectures here was sourced
+ //! from https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview and
+ //! https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features. these mappings are best-effort
+ //! but fairly unused, so a critical eye should be kept towards these decoders rejecting
+ //! instructions they should not, or incorrectly accepting instructions.
+ //!
+ //! microarchitectures as defined here are with respect to flags reported by CPUID. notably,
+ //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension
+ //! reportedly function correctly (agner p217).
+ //!
+ //! [agner](https://www.agner.org/optimize/microarchitecture.pdf)
+ //! as retrieved 2020 may 19
+ //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5`
+
+ use long_mode::InstDecoder;
+
+ /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later
+ /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of
+ /// support - SSE2 and no later.
+ pub fn k8() -> InstDecoder {
+ InstDecoder::minimal()
+ }
+
+ /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through
+ /// to SSE4.2a, as well as consistent `cmov` support, among other features.
+ pub fn k10() -> InstDecoder {
+ k8()
+ .with_cmov()
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
+ .with_sse4a()
+ }
+
+ /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX
+ /// support among other extensions, and are notable for including `AESNI`.
+ pub fn bulldozer() -> InstDecoder {
+ k10()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
+ .with_avx()
+ .with_fma4()
+ .with_xop()
+ }
+
+ /// `Piledriver` was the successor to `Bulldozer`, launched in 2012.
+ pub fn piledriver() -> InstDecoder {
+ bulldozer()
+ .with_tbm()
+ .with_fma3()
+ .with_fma4()
+ }
+
+ /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver`
+ /// cores, these cores do not support `TBM` or `FMA3`.
+ pub fn steamroller() -> InstDecoder {
+ bulldozer()
+ }
+
+ /// `Excavator` was the successor to `Steamroller`, launched in 2015.
+ pub fn excavator() -> InstDecoder {
+ steamroller()
+ .with_movbe()
+ .with_bmi2()
+ .with_rdrand()
+ .with_avx()
+ .with_xop()
+ .with_bmi2()
+ .with_sha()
+ .with_rdrand()
+ .with_avx2()
+ }
+
+ /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD
+ /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX,
+ /// SHA, RDSEED, and other extensions.
+ pub fn zen() -> InstDecoder {
+ k10()
+ .with_avx()
+ .with_avx2()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
+ .with_movbe()
+ .with_bmi2()
+ .with_rdrand()
+ .with_adx()
+ .with_sha()
+ .with_rdseed()
+ .with_fma3()
+ // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO?
+ }
+}
+
+pub mod intel {
+ //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out!
+
+ use long_mode::InstDecoder;
+
+ /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the
+ /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000
+ /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed
+ /// included SSE3.
+ pub fn netburst() -> InstDecoder {
+ InstDecoder::minimal()
+ .with_cmov()
+ .with_sse3()
+ }
+
+ /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with
+ /// processors using this architecture shipped under the names "Merom", "Conroe", and
+ /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused
+ /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines,
+ /// `Core 2 *` processors used the `Core` architecture.
+ pub fn core() -> InstDecoder {
+ netburst()
+ .with_ssse3()
+ .with_sse4()
+ }
+
+ /// `Peryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with
+ /// virtualization extensions.
+ pub fn peryn() -> InstDecoder {
+ core()
+ .with_sse4_1()
+ }
+
+ /// `Nehalem` was the successor to `Peryn`, launched in late 2008. not to be confused with the
+ /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores.
+ /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction.
+ pub fn nehalem() -> InstDecoder {
+ peryn()
+ .with_sse4_2()
+ .with_popcnt()
+ }
+
+ /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL
+ /// extensions.
+ pub fn westmere() -> InstDecoder {
+ nehalem()
+ .with_aesni()
+ .with_pclmulqdq()
+ }
+
+ /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX
+ /// instructions.
+ pub fn sandybridge() -> InstDecoder {
+ westmere()
+ .with_avx()
+ }
+
+ /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C
+ /// extensions for 16-bit floating point conversion, and the RDRAND instruction.
+ pub fn ivybridge() -> InstDecoder {
+ sandybridge()
+ .with_f16c()
+ .with_rdrand()
+ }
+
+ /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction
+ /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3.
+ pub fn haswell() -> InstDecoder {
+ ivybridge()
+ .with_bmi1()
+ .with_bmi2()
+ .with_abm()
+ .with_fma3()
+ .with_avx2()
+ }
+
+ /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores
+ /// were shipped as `E7-48xx/E7-88xx v3` models of processors.
+ pub fn haswell_ex() -> InstDecoder {
+ haswell()
+ .with_tsx()
+ }
+
+ /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED,
+ /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because
+ /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be
+ /// reported as an errata (for example, the `Broadwell-Y` line of parts).
+ pub fn broadwell() -> InstDecoder {
+ haswell_ex()
+ .with_adx()
+ .with_rdseed()
+ .with_prefetchw()
+ }
+
+ /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX
+ /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product
+ /// lines.
+ ///
+ /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest
+ /// common denominator: if you want a `Skylake` decoder with AVX512, something like the
+ /// following:
+ /// ```
+ /// InstDecoder::skylake().with_avx512_f().with_avx512_dq()
+ /// ```
+ /// is likely your best option.
+ pub fn skylake() -> InstDecoder {
+ broadwell()
+ .with_mpx()
+ .with_sgx()
+ }
+
+ /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to
+ /// x86_64 implementaiton beyond `skylake`.
+ pub fn kabylake() -> InstDecoder {
+ skylake()
+ }
+ // ice lake is shipping so that should probably be included...
+}