diff options
| author | iximeow <me@iximeow.net> | 2020-05-21 23:09:39 -0700 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2020-05-21 23:09:39 -0700 | 
| commit | a0fd5a24cb0aa0b697f680c451d928cefe8323b4 (patch) | |
| tree | d95069afe48249ff1226cb077e242d093bb2794a /src/long_mode | |
| parent | 905dc4c7feac1e09cde70db52c0762e8990d4d96 (diff) | |
add sha, lzcnt, tsx, f16c, svm, movbe, adx, and prefetchw extensions
also add builders to get decoders appropriate for specific
microarchitectures from intel and amd
* low-power architectures are not yet present
Diffstat (limited to 'src/long_mode')
| -rw-r--r-- | src/long_mode/display.rs | 48 | ||||
| -rw-r--r-- | src/long_mode/mod.rs | 514 | ||||
| -rw-r--r-- | src/long_mode/uarch.rs | 221 | 
3 files changed, 763 insertions, 20 deletions
| diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 49d1600..5318ebb 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -1076,6 +1076,30 @@ impl fmt::Display for Opcode {              &Opcode::HSUBPD => write!(f, "hsubpd"),              &Opcode::HADDPD => write!(f, "haddpd"),              &Opcode::ADDSUBPD => write!(f, "addsubpd"), +            &Opcode::XABORT => write!(f, "xabort"), +            &Opcode::XBEGIN => write!(f, "xbegin"), +            &Opcode::RDSEED => write!(f, "rdseed"), +            &Opcode::LZCNT => write!(f, "lzcnt"), +            &Opcode::CLGI => write!(f, "clgi"), +            &Opcode::STGI => write!(f, "stgi"), +            &Opcode::SKINIT => write!(f, "skinit"), +            &Opcode::VMLOAD => write!(f, "vmload"), +            &Opcode::VMMCALL => write!(f, "vmmcall"), +            &Opcode::VMSAVE => write!(f, "vmsave"), +            &Opcode::VMRUN => write!(f, "vmrun"), +            &Opcode::INVLPGA => write!(f, "invlpga"), +            &Opcode::MOVBE => write!(f, "movbe"), +            &Opcode::ADCX => write!(f, "adcx"), +            &Opcode::ADOX => write!(f, "adox"), +            &Opcode::PREFETCHW => write!(f, "prefetchw"), +            &Opcode::RDRAND => write!(f, "rdrand"), +            &Opcode::SHA1RNDS4 => write!(f, "sha1rnds4"), +            &Opcode::SHA1NEXTE => write!(f, "sha1nexte"), +            &Opcode::SHA1MSG1 => write!(f, "sha1msg1"), +            &Opcode::SHA1MSG2 => write!(f, "sha1msg2"), +            &Opcode::SHA256RNDS2 => write!(f, "sha256rnds2"), +            &Opcode::SHA256MSG1 => write!(f, "sha256msg1"), +            &Opcode::SHA256MSG2 => write!(f, "sha256msg2"),              &Opcode::Invalid => write!(f, "invalid"),          }      } @@ -1306,8 +1330,11 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color              Opcode::LEA |              Opcode::ADD |              Opcode::ADC | +            Opcode::ADCX | +            Opcode::ADOX |              Opcode::SUB |              Opcode::POPCNT | +            Opcode::LZCNT |              Opcode::BT |              Opcode::BTS |              Opcode::BTR | @@ -1399,6 +1426,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color              Opcode::PREFETCH0 |              Opcode::PREFETCH1 |              Opcode::PREFETCH2 | +            Opcode::PREFETCHW |              Opcode::NOP => { write!(out, "{}", colors.nop_op(self)) }              /* Control flow */ @@ -1680,6 +1708,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color              Opcode::PEXTRW |              Opcode::PINSRW |              Opcode::MOV | +            Opcode::MOVBE |              Opcode::LODS |              Opcode::STOS |              Opcode::LAHF | @@ -1830,6 +1859,7 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color              Opcode::SWAPGS |              Opcode::RDTSCP |              Opcode::INVLPG | +            Opcode::INVLPGA |              Opcode::CPUID |              Opcode::WBINVD |              Opcode::INVD | @@ -1860,9 +1890,16 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color              Opcode::VMCALL |              Opcode::VMLAUNCH |              Opcode::VMRESUME | +            Opcode::VMLOAD | +            Opcode::VMMCALL | +            Opcode::VMSAVE | +            Opcode::VMRUN |              Opcode::VMXOFF |              Opcode::MONITOR |              Opcode::MWAIT | +            Opcode::SKINIT | +            Opcode::CLGI | +            Opcode::STGI |              Opcode::CLAC |              Opcode::STAC |              Opcode::ENCLS | @@ -1872,11 +1909,22 @@ impl <T: fmt::Write, Color: fmt::Display, Y: YaxColors<Color>> Colorize<T, Color              Opcode::VMFUNC |              Opcode::XEND |              Opcode::XTEST | +            Opcode::XABORT | +            Opcode::XBEGIN |              Opcode::ENCLU |              Opcode::RDPKRU |              Opcode::WRPKRU |              Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) } +            Opcode::RDSEED | +            Opcode::RDRAND | +            Opcode::SHA1RNDS4 | +            Opcode::SHA1NEXTE | +            Opcode::SHA1MSG1 | +            Opcode::SHA1MSG2 | +            Opcode::SHA256RNDS2 | +            Opcode::SHA256MSG1 | +            Opcode::SHA256MSG2 |              Opcode::AESDEC |              Opcode::AESDECLAST |              Opcode::AESENC | diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index e0a1fdf..20abe1f 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -1,5 +1,6 @@  mod vex;  mod display; +pub mod uarch;  use core::hint::unreachable_unchecked; @@ -73,7 +74,6 @@ impl RegSpec {      #[inline]      fn gp_from_parts(num: u8, extended: bool, width: u8, rex: bool) -> RegSpec { -//        println!("from_parts width: {}, num: {}, extended: {}", width, num, extended);          RegSpec {              num: num + if extended { 0b1000 } else { 0 },              bank: width_to_gp_reg_bank(width, rex) @@ -129,6 +129,22 @@ impl RegSpec {      }      #[inline] +    pub fn esp() -> RegSpec { +        RegSpec { +            num: 4, +            bank: RegisterBank::D +        } +    } + +    #[inline] +    pub fn sp() -> RegSpec { +        RegSpec { +            num: 4, +            bank: RegisterBank::W +        } +    } + +    #[inline]      pub fn fs() -> RegSpec {          RegSpec { bank: RegisterBank::S, num: 3 }      } @@ -779,12 +795,17 @@ pub enum Opcode {      XGETBV,      XSETBV,      VMFUNC, +    XABORT, +    XBEGIN,      XEND,      XTEST,      ENCLU,      RDPKRU,      WRPKRU, +    RDSEED, +    RDRAND, +      ADDPS,      ADDPD,      ANDNPS, @@ -1330,6 +1351,31 @@ pub enum Opcode {      PHADDW,      HSUBPD,      HADDPD, + +    SHA1RNDS4, +    SHA1NEXTE, +    SHA1MSG1, +    SHA1MSG2, +    SHA256RNDS2, +    SHA256MSG1, +    SHA256MSG2, + +    LZCNT, +    CLGI, +    STGI, +    SKINIT, +    VMLOAD, +    VMMCALL, +    VMSAVE, +    VMRUN, +    INVLPGA, + +    MOVBE, + +    ADCX, +    ADOX, + +    PREFETCHW,  }  #[derive(Debug)] @@ -1500,6 +1546,14 @@ pub struct InstDecoder {      // 53. intel quirks      // 54. amd quirks      // 55. avx (intel ?, amd ?) +    // 56. amd-v/svm +    // 57. lahfsahf +    // 58. cmov +    // 59. f16c +    // 60. fma4 +    // 61. prefetchw +    // 62. tsx +    // 63. lzcnt      flags: u64,  } @@ -1586,6 +1640,12 @@ impl InstDecoder {          self      } +    pub fn with_sse4(self) -> Self { +        self +            .with_sse4_1() +            .with_sse4_2() +    } +      pub fn movbe(&self) -> bool {          self.flags & (1 << 8) != 0      } @@ -1658,6 +1718,9 @@ impl InstDecoder {          self      } +    /// `bmi2` indicates support for the `BZHI`, `MULX`, `PDEP`, `PEXT`, `RORX`, `SARX`, `SHRX`, +    /// and `SHLX` instructions. `bmi2` is implemented in all x86_64 chips that implement `bmi`, +    /// except the amd `piledriver` and `steamroller` microarchitectures.      pub fn bmi2(&self) -> bool {          self.flags & (1 << 16) != 0      } @@ -2018,6 +2081,94 @@ impl InstDecoder {          self      } +    pub fn svm(&self) -> bool { +        self.flags & (1 << 56) != 0 +    } + +    pub fn with_svm(mut self) -> Self { +        self.flags |= 1 << 56; +        self +    } + +    /// `lahfsahf` is only unset for early revisions of 64-bit amd and intel chips. unfortunately +    /// the clearest documentation on when these instructions were reintroduced into 64-bit +    /// architectures seems to be +    /// [wikipedia](https://en.wikipedia.org/wiki/X86-64#Older_implementations): +    /// ``` +    /// Early AMD64 and Intel 64 CPUs lacked LAHF and SAHF instructions in 64-bit mode. AMD +    /// introduced these instructions (also in 64-bit mode) with their Athlon 64, Opteron and +    /// Turion 64 revision D processors in March 2005[48][49][50] while Intel introduced the +    /// instructions with the Pentium 4 G1 stepping in December 2005. The 64-bit version of Windows +    /// 8.1 requires this feature.[47] +    /// ``` +    /// +    /// this puts reintroduction of these instructions somewhere in the middle of prescott and k8 +    /// lifecycles, for intel and amd respectively. because there is no specific uarch where these +    /// features become enabled, prescott and k8 default to not supporting these instructions, +    /// where later uarches support these instructions. +    pub fn lahfsahf(&self) -> bool { +        self.flags & (1 << 57) != 0 +    } + +    pub fn with_lahfsahf(mut self) -> Self { +        self.flags |= 1 << 57; +        self +    } + +    pub fn cmov(&self) -> bool { +        self.flags & (1 << 58) != 0 +    } + +    pub fn with_cmov(mut self) -> Self { +        self.flags |= 1 << 58; +        self +    } + +    pub fn f16c(&self) -> bool { +        self.flags & (1 << 59) != 0 +    } + +    pub fn with_f16c(mut self) -> Self { +        self.flags |= 1 << 59; +        self +    } + +    pub fn fma4(&self) -> bool { +        self.flags & (1 << 60) != 0 +    } + +    pub fn with_fma4(mut self) -> Self { +        self.flags |= 1 << 60; +        self +    } + +    pub fn prefetchw(&self) -> bool { +        self.flags & (1 << 61) != 0 +    } + +    pub fn with_prefetchw(mut self) -> Self { +        self.flags |= 1 << 61; +        self +    } + +    pub fn tsx(&self) -> bool { +        self.flags & (1 << 62) != 0 +    } + +    pub fn with_tsx(mut self) -> Self { +        self.flags |= 1 << 62; +        self +    } + +    pub fn lzcnt(&self) -> bool { +        self.flags & (1 << 63) != 0 +    } + +    pub fn with_lzcnt(mut self) -> Self { +        self.flags |= 1 << 63; +        self +    } +      /// Optionally reject or reinterpret instruction according to the decoder's      /// declared extensions.      fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> { @@ -2150,21 +2301,15 @@ impl InstDecoder {                      return Err(DecodeError::InvalidOpcode);                  }              } -            // AVX... -            /* // TODO              Opcode::XABORT | -            Opcode::XACQUIRE | -            Opcode::XRELEASE |              Opcode::XBEGIN |              Opcode::XEND |              Opcode::XTEST => {                  if !self.tsx() {                      inst.opcode = Opcode::Invalid; -                    return Err(()); +                    return Err(DecodeError::InvalidOpcode);                  }              } -            */ -            /* // TODO              Opcode::SHA1MSG1 |              Opcode::SHA1MSG2 |              Opcode::SHA1NEXTE | @@ -2174,9 +2319,9 @@ impl InstDecoder {              Opcode::SHA256RNDS2 => {                  if !self.sha() {                      inst.opcode = Opcode::Invalid; -                    return Err(()); +                    return Err(DecodeError::InvalidOpcode);                  } -            }*/ +            }              Opcode::ENCLV |              Opcode::ENCLS |              Opcode::ENCLU => { @@ -2185,6 +2330,7 @@ impl InstDecoder {                      return Err(DecodeError::InvalidOpcode);                  }              } +            // AVX...              Opcode::VMOVDDUP |              Opcode::VPSHUFLW |              Opcode::VHADDPS | @@ -2216,7 +2362,6 @@ impl InstDecoder {              Opcode::VCVTDQ2PD |              Opcode::VCVTDQ2PS |              Opcode::VCVTPD2PS | -            Opcode::VCVTPH2PS |              Opcode::VCVTPS2DQ |              Opcode::VCVTPS2PD |              Opcode::VCVTSS2SD | @@ -2224,7 +2369,6 @@ impl InstDecoder {              Opcode::VCVTSI2SD |              Opcode::VCVTSD2SI |              Opcode::VCVTSD2SS | -            Opcode::VCVTPS2PH |              Opcode::VCVTSS2SI |              Opcode::VCVTTPD2DQ |              Opcode::VCVTTPS2DQ | @@ -2527,6 +2671,124 @@ impl InstDecoder {                      return Err(DecodeError::InvalidOpcode);                  }              } +            Opcode::MOVBE => { +                if !self.movbe() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::POPCNT => { +                /* +                 * from the intel SDM: +                 * ``` +                 * Before an application attempts to use the POPCNT instruction, it must check that +                 * the processor supports SSE4.2 (if CPUID.01H:ECX.SSE4_2[bit 20] = 1) and POPCNT +                 * (if CPUID.01H:ECX.POPCNT[bit 23] = 1). +                 * ``` +                 */ +                if self.intel_quirks() && (!self.sse4_2() || !self.popcnt()) { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } else if !self.popcnt() { +                    /* +                     * elsewhere from the amd APM: +                     * `Instruction Subsets and CPUID Feature Flags` on page 507 indicates that +                     * popcnt is present when the popcnt bit is reported by cpuid. this seems to be +                     * the less quirky default, so `intel_quirks` is considered the outlier, and +                     * before this default. +                     * */ +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::LZCNT => { +                /* +                 * amd APM, `LZCNT` page 212: +                 * LZCNT is an Advanced Bit Manipulation (ABM) instruction. Support for the LZCNT +                 * instruction is indicated by CPUID Fn8000_0001_ECX[ABM] = 1. +                 * +                 * meanwhile the intel SDM simply states: +                 * ``` +                 * CPUID.EAX=80000001H:ECX.LZCNT[bit 5]: if 1 indicates the processor supports the +                 * LZCNT instruction. +                 * ``` +                 * +                 * so that's considered the less-quirky (default) case here. +                 * */ +                if self.amd_quirks() && !self.abm() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } else if !self.lzcnt() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::ADCX | +            Opcode::ADOX => { +                if !self.adx() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::VMRUN | +            Opcode::VMLOAD | +            Opcode::VMSAVE | +            Opcode::CLGI | +            Opcode::VMMCALL | +            Opcode::INVLPGA => { +                if !self.svm() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::STGI | +            Opcode::SKINIT => { +                if !self.svm() || !self.skinit() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::LAHF | +            Opcode::SAHF => { +                if !self.lahfsahf() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::VCVTPS2PH | +            Opcode::VCVTPH2PS => { +                /* +                 * from intel SDM: +                 * ``` +                 * 14.4.1 Detection of F16C Instructions Application using float 16 instruction +                 *    must follow a detection sequence similar to AVX to ensure: • The OS has +                 *    enabled YMM state management support, • The processor support AVX as +                 *    indicated by the CPUID feature flag, i.e. CPUID.01H:ECX.AVX[bit 28] = 1.  • +                 *    The processor support 16-bit floating-point conversion instructions via a +                 *    CPUID feature flag (CPUID.01H:ECX.F16C[bit 29] = 1). +                 * ``` +                 * +                 * TODO: only the VEX-coded variant of this instruction should be gated on `f16c`. +                 * the EVEX-coded variant should be gated on `avx512f` or `avx512vl` if not +                 * EVEX.512-coded. +                 */ +                if !self.avx() || !self.f16c() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::RDRAND => { +                if !self.rdrand() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            } +            Opcode::RDSEED => { +                if !self.rdseed() { +                    inst.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            }              other => {                  if !self.bmi1() {                      if BMI1.contains(&other) { @@ -2871,11 +3133,13 @@ impl PrefixRex {  pub enum OperandCode {      ModRM_0x0f00,      ModRM_0x0f01, +    ModRM_0x0f0d,      ModRM_0x0fae,      ModRM_0x0fba,      ModRM_0xf238,      ModRM_0xf30fc7,      ModRM_0x660f38, +    ModRM_0xf30f38,      ModRM_0x660f3a,      CVT_AA,      CVT_DA, @@ -3686,7 +3950,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), -    OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), +    OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xf30f38),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3827,7 +4091,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), -    OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), +    OpcodeRecord(Interpretation::Instruction(Opcode::LZCNT), OperandCode::Gv_Ev),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),  // 0xc0 @@ -3950,7 +4214,7 @@ const OPCODE_0F_MAP: [OpcodeRecord; 256] = [      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::UD2), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), -    OpcodeRecord(Interpretation::Instruction(Opcode::NOP), OperandCode::Ev), +    OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0x0f0d),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),  // 0x10 @@ -5104,8 +5368,28 @@ fn read_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter: T,              };              instruction.operand_count = 2;          }, -        _op @ OperandCode::ModRM_0xc6_Eb_Ib | -        _op @ OperandCode::ModRM_0xc7_Ev_Iv => { +        op @ OperandCode::ModRM_0xc6_Eb_Ib | +        op @ OperandCode::ModRM_0xc7_Ev_Iv => { +            if modrm == 0xf8 { +                if op == OperandCode::ModRM_0xc6_Eb_Ib { +                    instruction.opcode = Opcode::XABORT; +                    instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u64; +                    instruction.operands[0] = OperandSpec::ImmI8; +                    instruction.operand_count = 1; +                    return Ok(()); +                } else { +                    instruction.opcode = Opcode::XBEGIN; +                    instruction.disp = if opwidth == 2 { +                        read_imm_signed(&mut bytes_iter, 2, length)? as i16 as i64 as u64 +                    } else { +                        read_imm_signed(&mut bytes_iter, 4, length)? as i32 as i64 as u64 +                    }; +                    instruction.modrm_mmm = RegSpec::rip(); +                    instruction.operands[0] = OperandSpec::RegDisp; +                    instruction.operand_count = 1; +                    return Ok(()); +                } +            }              if (modrm & 0b00111000) != 0 {                  instruction.opcode = Opcode::Invalid;                  return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string()); @@ -5490,6 +5774,115 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter                  instruction.opcode = Opcode::MOVD;              }          } +        OperandCode::ModRM_0x0f0d => { +            let modrm = read_modrm(&mut bytes_iter, length)?; +            let r = modrm & 0b111; + +            let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes); + +            match r { +                1 => { +                    instruction.opcode = Opcode::PREFETCHW; +                } +                _ => { +                    instruction.opcode = Opcode::NOP; +                } +            } +            instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, opwidth, length)?; +            instruction.operand_count = 1; +        } +        OperandCode::ModRM_0x0f38 => { +            let opcode = read_modrm(&mut bytes_iter, length)?; + +            let high = opcode >> 4; +            let low = opcode & 0xf; + +            let operands = match high { +                0 => { +                    // PqQq +                    OperandCode::G_E_mm +                }, +                1 => { +                    // PqQq +                    OperandCode::G_E_mm +                }, +                0xc => { +                    // Vdq,Wdq +                    OperandCode::G_E_xmm +                } +                0xf => { +                    match low { +                        0 => OperandCode::Gv_Ev, +                        1 => OperandCode::Ev_Gv, +                        _ => { +                            instruction.opcode = Opcode::Invalid; +                            return Err(DecodeError::InvalidOpcode); +                        } +                    } +                } +                _ => { +                    instruction.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            }; +            instruction.opcode = match opcode { +                0xc8 => Opcode::SHA1NEXTE, +                0xc9 => Opcode::SHA1MSG1, +                0xca => Opcode::SHA1MSG2, +                0xcb => Opcode::SHA256RNDS2, +                0xcc => Opcode::SHA256MSG1, +                0xcd => Opcode::SHA256MSG2, +                0xf0 | 0xf1 => Opcode::MOVBE, +                _ => { +                    instruction.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            }; + +            return read_operands(decoder, bytes_iter, instruction, operands, length); +        }, +        OperandCode::ModRM_0x0f3a => { +        }, +        OperandCode::ModRM_0x0fc7 => { +            let modrm = read_modrm(&mut bytes_iter, length)?; +            if modrm >> 6 == 0b11 { +                match (modrm >> 3) & 0b111 { +                    0b111 => { +                        instruction.opcode = Opcode::RDSEED; +                        instruction.operand_count = 1; +                        instruction.operands[0] = OperandSpec::RegRRR; +                        let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes); +                        instruction.modrm_rrr = +                            RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth { +                                8 => RegisterBank::Q, +                                4 => RegisterBank::D, +                                2 => RegisterBank::W, +                                _ => unreachable!() +                            }); +                    } +                    0b110 => { +                        instruction.opcode = Opcode::RDRAND; +                        instruction.operand_count = 1; +                        instruction.operands[0] = OperandSpec::RegRRR; +                        let opwidth = imm_width_from_prefixes_64(SizeCode::vq, instruction.prefixes); +                        instruction.modrm_rrr = +                            RegSpec::from_parts(modrm & 7, instruction.prefixes.rex().r(), match opwidth { +                                8 => RegisterBank::Q, +                                4 => RegisterBank::D, +                                2 => RegisterBank::W, +                                _ => unreachable!() +                            }); +                    } +                    _ => { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOpcode); +                    } +                } +            } else { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOpcode); +            } +        },          OperandCode::ModRM_0x0f71 => {              instruction.operand_count = 2; @@ -5604,6 +5997,19 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter              instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?;              instruction.operand_count = 2;          } +        OperandCode::ModRM_0xf30f38 => { +            let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; +            match op { +                0xf6 => { +                    instruction.opcode = Opcode::ADOX; +                    return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length); +                } +                _ => { +                    instruction.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            }; +        }          OperandCode::ModRM_0x660f38 => {              let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;              match op { @@ -5612,6 +6018,10 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter                  0xdd => { instruction.opcode = Opcode::AESENCLAST; }                  0xde => { instruction.opcode = Opcode::AESDEC; }                  0xdf => { instruction.opcode = Opcode::AESDECLAST; } +                0xf6 => { +                    instruction.opcode = Opcode::ADCX; +                    return read_operands(decoder, bytes_iter, instruction, OperandCode::Gv_Ev, length); +                }                  _ => {                      instruction.opcode = Opcode::Invalid;                      return Err(DecodeError::InvalidOpcode); @@ -5630,6 +6040,21 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter          OperandCode::ModRM_0x660f3a => {              let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?;              match op { +                0xcc => { +                    instruction.opcode = Opcode::SHA1RNDS4; + +                    let modrm = read_modrm(&mut bytes_iter, length)?; +                    instruction.modrm_rrr = +                        RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X); + + +                    instruction.operands[0] = OperandSpec::RegRRR; +                    instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; +                    instruction.imm = +                        read_imm_unsigned(&mut bytes_iter, 1, length)?; +                    instruction.operands[2] = OperandSpec::ImmU8; +                    instruction.operand_count = 3; +                }                  0xdf => {                      instruction.opcode = Opcode::AESKEYGENASSIST;                      // read operands right here right now @@ -6214,10 +6639,59 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter                  }              } else if r == 3 {                  let mod_bits = modrm >> 6; +                let m = modrm & 7;                  if mod_bits == 0b11 { -                    instruction.opcode = Opcode::Invalid; -                    instruction.operand_count = 0; -                    return Err(DecodeError::InvalidOperand); +                    match m { +                        0b000 => { +                            instruction.opcode = Opcode::VMRUN; +                            instruction.operand_count = 1; +                            instruction.modrm_rrr = RegSpec::rax(); +                            instruction.operands[0] = OperandSpec::RegRRR; +                        }, +                        0b001 => { +                            instruction.opcode = Opcode::VMMCALL; +                            instruction.operand_count = 0; +                        }, +                        0b010 => { +                            instruction.opcode = Opcode::VMLOAD; +                            instruction.operand_count = 1; +                            instruction.modrm_rrr = RegSpec::rax(); +                            instruction.operands[0] = OperandSpec::RegRRR; +                        }, +                        0b011 => { +                            instruction.opcode = Opcode::VMSAVE; +                            instruction.operand_count = 1; +                            instruction.modrm_rrr = RegSpec::rax(); +                            instruction.operands[0] = OperandSpec::RegRRR; +                        }, +                        0b100 => { +                            instruction.opcode = Opcode::STGI; +                            instruction.operand_count = 0; +                        }, +                        0b101 => { +                            instruction.opcode = Opcode::CLGI; +                            instruction.operand_count = 0; +                        }, +                        0b110 => { +                            instruction.opcode = Opcode::SKINIT; +                            instruction.operand_count = 1; +                            instruction.operands[0] = OperandSpec::RegRRR; +                            instruction.modrm_rrr = RegSpec::eax(); +                        }, +                        0b111 => { +                            instruction.opcode = Opcode::INVLPGA; +                            instruction.operand_count = 2; +                            instruction.operands[0] = OperandSpec::RegRRR; +                            instruction.operands[1] = OperandSpec::RegMMM; +                            instruction.modrm_rrr = RegSpec::rax(); +                            instruction.modrm_mmm = RegSpec::ecx(); +                        }, +                        _ => { +                            instruction.opcode = Opcode::Invalid; +                            instruction.operand_count = 0; +                            return Err(DecodeError::InvalidOperand); +                        } +                    }                  } else {                      instruction.opcode = Opcode::LIDT;                      instruction.operand_count = 1; diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs new file mode 100644 index 0000000..b2b1201 --- /dev/null +++ b/src/long_mode/uarch.rs @@ -0,0 +1,221 @@ +pub mod amd { +    //! most information about instruction set extensions for microarchitectures here was sourced +    //! from https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview and +    //! https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features. these mappings are best-effort +    //! but fairly unused, so a critical eye should be kept towards these decoders rejecting +    //! instructions they should not, or incorrectly accepting instructions. +    //! +    //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, +    //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension +    //! reportedly function correctly (agner p217). +    //! +    //! [agner](https://www.agner.org/optimize/microarchitecture.pdf) +    //! as retrieved 2020 may 19 +    //! `sha256: 87ff152ae18c017dcbfb9f7ee6e88a9f971f6250fd15a70a3dd87c3546323bd5` + +    use long_mode::InstDecoder; + +    /// `k8` was the first AMD microarchitecture to implement x86_64, launched in 2003. while later +    /// `k8`-based processors supported SSE3, these predefined decoders pick the lower end of +    /// support - SSE2 and no later. +    pub fn k8() -> InstDecoder { +        InstDecoder::minimal() +    } + +    /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through +    /// to SSE4.2a, as well as consistent `cmov` support, among other features. +    pub fn k10() -> InstDecoder { +        k8() +            .with_cmov() +            .with_cmpxchg16b() +            .with_svm() +            .with_abm() +            .with_lahfsahf() +            .with_sse3() +            .with_ssse3() +            .with_sse4() +            .with_sse4_2() +            .with_sse4a() +    } + +    /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX +    /// support among other extensions, and are notable for including `AESNI`. +    pub fn bulldozer() -> InstDecoder { +        k10() +            .with_bmi1() +            .with_aesni() +            .with_pclmulqdq() +            .with_f16c() +            .with_avx() +            .with_fma4() +            .with_xop() +    } + +    /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. +    pub fn piledriver() -> InstDecoder { +        bulldozer() +            .with_tbm() +            .with_fma3() +            .with_fma4() +    } + +    /// `Steamroller` was the successor to `Piledriver`, launched in 2014. unlike `Piledriver` +    /// cores, these cores do not support `TBM` or `FMA3`. +    pub fn steamroller() -> InstDecoder { +        bulldozer() +    } + +    /// `Excavator` was the successor to `Steamroller`, launched in 2015. +    pub fn excavator() -> InstDecoder { +        steamroller() +            .with_movbe() +            .with_bmi2() +            .with_rdrand() +            .with_avx() +            .with_xop() +            .with_bmi2() +            .with_sha() +            .with_rdrand() +            .with_avx2() +    } + +    /// `Zen` was the successor to `Excavator`, launched in 2017. `Zen` cores extend SIMD +    /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, +    /// SHA, RDSEED, and other extensions. +    pub fn zen() -> InstDecoder { +        k10() +            .with_avx() +            .with_avx2() +            .with_bmi1() +            .with_aesni() +            .with_pclmulqdq() +            .with_f16c() +            .with_movbe() +            .with_bmi2() +            .with_rdrand() +            .with_adx() +            .with_sha() +            .with_rdseed() +            .with_fma3() +            // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? +    } +} + +pub mod intel { +    //! sourced by walking wikipedia pages. seriously! this stuff is kinda hard to figure out! + +    use long_mode::InstDecoder; + +    /// `Netburst` was the first Intel microarchitecture to implement x86_64, beginning with the +    /// `Prescott` family launched in 2004. while the wider `Netburst` family launched in 2000 +    /// with only SSE2, the first `x86_64`-supporting incarnation was `Prescott` which indeed +    /// included SSE3. +    pub fn netburst() -> InstDecoder { +        InstDecoder::minimal() +            .with_cmov() +            .with_sse3() +    } + +    /// `Core` was the successor to `Netburst`, launched in 2006. it included up to SSE4, with +    /// processors using this architecture shipped under the names "Merom", "Conroe", and +    /// "Woodcrest", for mobile, desktop, and server processors respectively. not to be confused +    /// with the later `Nehalem` microarchitecture that introduced the `Core i*` product lines, +    /// `Core 2 *` processors used the `Core` architecture. +    pub fn core() -> InstDecoder { +        netburst() +            .with_ssse3() +            .with_sse4() +    } + +    /// `Peryn` was the successor to `Core`, launched in early 2008. it added SSE4.1, along with +    /// virtualization extensions. +    pub fn peryn() -> InstDecoder { +        core() +            .with_sse4_1() +    } + +    /// `Nehalem` was the successor to `Peryn`, launched in late 2008. not to be confused with the +    /// earlier `Core` microarchitecture, the `Core i*` products were based on `Nehalem` cores. +    /// `Nehalem` added SSE4.2 extensions, along with the `POPCNT` instruction. +    pub fn nehalem() -> InstDecoder { +        peryn() +            .with_sse4_2() +            .with_popcnt() +    } + +    /// `Westmere` was the successor to `Nehalem`, launched in 2010. it added AES-NI and CLMUL +    /// extensions. +    pub fn westmere() -> InstDecoder { +        nehalem() +            .with_aesni() +            .with_pclmulqdq() +    } + +    /// `Sandy Bridge` was the successor to `Westmere`, launched in 2011. it added AVX +    /// instructions. +    pub fn sandybridge() -> InstDecoder { +        westmere() +            .with_avx() +    } + +    /// `Ivy Bridge` was the successor to `Sandy Bridge`, launched in 2012. it added F16C +    /// extensions for 16-bit floating point conversion, and the RDRAND instruction. +    pub fn ivybridge() -> InstDecoder { +        sandybridge() +            .with_f16c() +            .with_rdrand() +    } + +    /// `Haswell` was the successor to `Ivy Bridge`, launched in 2013. it added several instruction +    /// set extensions: AVX2, BMI1, BMI2, ABM, and FMA3. +    pub fn haswell() -> InstDecoder { +        ivybridge() +            .with_bmi1() +            .with_bmi2() +            .with_abm() +            .with_fma3() +            .with_avx2() +    } + +    /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores +    /// were shipped as `E7-48xx/E7-88xx v3` models of processors. +    pub fn haswell_ex() -> InstDecoder { +        haswell() +            .with_tsx() +    } + +    /// `Broadwell` was the successor to `Haswell`, launched in late 2014. it added ADX, RDSEED, +    /// and PREFETCHW, as well as broadly rolling out TSX. TSX is enabled on this decoder because +    /// some chips of this microarchitecture rolled out with TSX, and lack of TSX seems to be +    /// reported as an errata (for example, the `Broadwell-Y` line of parts). +    pub fn broadwell() -> InstDecoder { +        haswell_ex() +            .with_adx() +            .with_rdseed() +            .with_prefetchw() +    } + +    /// `Skylake` was the successor to `Broadwell`, launched in mid 2015. it added MPX and SGX +    /// extensions, as well as a mixed rollout of AVX512 in different subsets for different product +    /// lines. +    /// +    /// AVX512 is not enabled on this decoder by default because there doesn't seem to be a lowest +    /// common denominator: if you want a `Skylake` decoder with AVX512, something like the +    /// following: +    /// ``` +    /// InstDecoder::skylake().with_avx512_f().with_avx512_dq() +    /// ``` +    /// is likely your best option. +    pub fn skylake() -> InstDecoder { +        broadwell() +            .with_mpx() +            .with_sgx() +    } + +    /// `Kaby Lake` was the successor to `Sky Lake`, launched in 2016. it adds no extensions to +    /// x86_64 implementaiton beyond `skylake`. +    pub fn kabylake() -> InstDecoder { +        skylake() +    } +    // ice lake is shipping so that should probably be included... +} | 
