diff options
| -rw-r--r-- | src/long_mode/display.rs | 581 | ||||
| -rw-r--r-- | src/long_mode/mod.rs | 307 | ||||
| -rw-r--r-- | src/long_mode/vex.rs | 815 | ||||
| -rw-r--r-- | test/long_mode/mod.rs | 998 | 
4 files changed, 2561 insertions, 140 deletions
| diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index f84ff7a..02d8382 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -664,6 +664,7 @@ const MNEMONICS: &[&'static str] = &[      "xorpd",      "vmovddup",      "vpshuflw", +    "vpshufhw",      "vhaddps",      "vhsubps",      "vaddsubps", @@ -835,6 +836,7 @@ const MNEMONICS: &[&'static str] = &[      "vpabsd",      "vpabsw",      "vpackssdw", +    "vpackusdw",      "vpacksswb",      "vpackuswb",      "vpaddb", @@ -846,12 +848,12 @@ const MNEMONICS: &[&'static str] = &[      "vpaddusw",      "vpaddw",      "vpalignr", -    "vandps",      "vandpd", -    "vorps", +    "vandps",      "vorpd", -    "vandnps", +    "vorps",      "vandnpd", +    "vandnps",      "vpand",      "vpandn",      "vpavgb", @@ -872,6 +874,8 @@ const MNEMONICS: &[&'static str] = &[      "vpcmpgtd",      "vpcmpgtq",      "vpcmpgtw", +    "vpcmpestri", +    "vpcmpestrm",      "vpcmpistri",      "vpcmpistrm",      "vperm2f128", @@ -893,7 +897,7 @@ const MNEMONICS: &[&'static str] = &[      "vphaddd",      "vphaddsw",      "vphaddw", -    "vphaddubsw", +    "vpmaddubsw",      "vphminposuw",      "vphsubd",      "vphsubsw", @@ -911,8 +915,11 @@ const MNEMONICS: &[&'static str] = &[      "vpmaxub",      "vpmaxuw",      "vpmaxud", +    "vpminsb",      "vpminsw",      "vpminsd", +    "vpminub", +    "vpminuw",      "vpminud",      "vpmovmskb",      "vpmovsxbd", @@ -1001,6 +1008,9 @@ const MNEMONICS: &[&'static str] = &[      "vxorpd",      "vxorps",      "vzeroupper", +    "vzeroall", +    "vldmxcsr", +    "vstmxcsr",      "pclmulqdq",      "aeskeygenassist",      "aesimc", @@ -1315,6 +1325,287 @@ const MNEMONICS: &[&'static str] = &[      // TSXLDTRK      "xsusldtrk",      "xresldtrk", + +    // AVX512F +    "valignd", +    "valignq", +    "vblendmpd", +    "vblendmps", +    "vcompresspd", +    "vcompressps", +    "vcvtpd2udq", +    "vcvttpd2udq", +    "vcvtps2udq", +    "vcvttps2udq", +    "vcvtqq2pd", +    "vcvtqq2ps", +    "vcvtsd2usi", +    "vcvttsd2usi", +    "vcvtss2usi", +    "vcvttss2usi", +    "vcvtudq2pd", +    "vcvtudq2ps", +    "vcvtusi2usd", +    "vcvtusi2uss", +    "vexpandpd", +    "vexpandps", +    "vextractf32x4", +    "vextractf64x4", +    "vextracti32x4", +    "vextracti64x4", +    "vfixupimmpd", +    "vfixupimmps", +    "vfixupimmsd", +    "vfixupimmss", +    "vgetexppd", +    "vgetexpps", +    "vgetexpsd", +    "vgetexpss", +    "vgetmantpd", +    "vgetmantps", +    "vgetmantsd", +    "vgetmantss", +    "vinsertf32x4", +    "vinsertf64x4", +    "vmovdqa32", +    "vmovdqa64", +    "vmovdqu32", +    "vmovdqu64", +    "vpblendmd", +    "vpblendmq", +    "vpcmpd", +    "vpcmpud", +    "vpcmpq", +    "vpcmpuq", +    "vpcompressq", +    "vpcompressd", +    "vpermi2d", +    "vpermi2q", +    "vpermi2pd", +    "vpermi2ps", +    "vpermt2d", +    "vpermt2q", +    "vpermt2pd", +    "vpermt2ps", +    "vpmaxsq", +    "vpmaxuq", +    "vpminsq", +    "vpminuq", +    "vpmovsqb", +    "vpmovusqb", +    "vpmovsqw", +    "vpmovusqw", +    "vpmovsqd", +    "vpmovusqd", +    "vpmovsdb", +    "vpmovusdb", +    "vpmovsdw", +    "vpmovusdw", +    "vprold", +    "vprolq", +    "vprolvd", +    "vprolvq", +    "vprord", +    "vprorq", +    "vprorrd", +    "vprorrq", +    "vpscatterdd", +    "vpscatterdq", +    "vpscatterqd", +    "vpscatterqq", +    "vpsraq", +    "vpsravq", +    "vptestnmd", +    "vptestnmq", +    "vpterlogd", +    "vpterlogq", +    "vptestmd", +    "vptestmq", +    "vrcp14pd", +    "vrcp14ps", +    "vrcp14sd", +    "vrcp14ss", +    "vrndscalepd", +    "vrndscaleps", +    "vrndcsalesd", +    "vrndscaless", +    "vrsqrt14pd", +    "vrsqrt14ps", +    "vrsqrt14sd", +    "vrsqrt14ss", +    "vscaledpd", +    "vscaledps", +    "vscaledsd", +    "vscaledss", +    "vscatterdd", +    "vscatterdq", +    "vscatterqd", +    "vscatterqq", +    "vshuff32x4", +    "vshuff64x2", +    "vshufi32x4", +    "vshufi64x2", + +    // AVX512DQ +    "vcvttpd2qq", +    "vcvtpd2qq", +    "vcvttpd2uqq", +    "vcvtpd2uqq", +    "vcvttps2qq", +    "vcvtps2qq", +    "vcvttps2uqq", +    "vcvtps2uqq", +    "vcvtuqq2pd", +    "vcvtuqq2ps", +    "vextractf64x2", +    "vextracti64x2", +    "vfpclasspd", +    "vfpclassps", +    "vfpclasssd", +    "vfpclassss", +    "vinsertf64x2", +    "vinserti64x2", +    "vpmovm2d", +    "vpmovm2q", +    "vpmovb2d", +    "vpmovq2m", +    "vpmulllq", +    "vrangepd", +    "vrangeps", +    "vrangesd", +    "vrangess", +    "vreducepd", +    "vreduceps", +    "vreducesd", +    "vreducess", + +    // AVX512BW +    "vdbpsadbw", +    "vmovdqu8", +    "vmovdqu16", +    "vpblendmb", +    "vpblendmw", +    "vpcmpb", +    "vpcmpub", +    "vpcmpw", +    "vpcmpuw", +    "vpermw", +    "vpermi2b", +    "vpermi2w", +    "vpmovm2b", +    "vpmovm2w", +    "vpmovb2m", +    "vpmovw2m", +    "vpmovswb", +    "vpmovuswb", +    "vpsllvw", +    "vpsravw", +    "vpsrlvw", +    "vptestnmb", +    "vptestnmw", +    "vptestmb", +    "vptestmw", + +    // AVX512CD +    "vpbroadcastm", +    "vpconflictd", +    "vpconflictq", +    "vplzcntd", +    "vplzcntq", + +    "kunpckbw", +    "kunpckwd", +    "kunpckdq", + +    "kaddb", +    "kandb", +    "kandnb", +    "kmovb", +    "knotb", +    "korb", +    "kortestb", +    "kshiftlb", +    "kshiftrb", +    "ktestb", +    "kxnorb", +    "kxorb", +    "kaddw", +    "kandw", +    "kandnw", +    "kmovw", +    "knotw", +    "korw", +    "kortestw", +    "kshiftlw", +    "kshiftrw", +    "ktestw", +    "kxnorw", +    "kxorw", +    "kaddd", +    "kandd", +    "kandnd", +    "kmovd", +    "knotd", +    "kord", +    "kortestd", +    "kshiftld", +    "kshiftrd", +    "ktestd", +    "kxnord", +    "kxord", +    "kaddq", +    "kandq", +    "kandnq", +    "kmovq", +    "knotq", +    "korq", +    "kortestq", +    "kshiftlq", +    "kshiftrq", +    "ktestq", +    "kxnorq", +    "kxorq", + +    // AVX512ER +    "vexp2pd", +    "vexp2ps", +    "vexp2sd", +    "vexp2ss", +    "vrcp28pd", +    "vrcp28ps", +    "vrcp28sd", +    "vrcp28ss", +    "vrsqrt28pd", +    "vrsqrt28ps", +    "vrsqrt28sd", +    "vrsqrt28ss", + +    // AVX512PF +    "vgatherpf0dpd", +    "vgatherpf0dps", +    "vgatherpf0qpd", +    "vgatherpf0qps", +    "vgatherpf1dpd", +    "vgatherpf1dps", +    "vgatherpf1qpd", +    "vgatherpf1qps", +    "vscatterpf0dpd", +    "vscatterpf0dps", +    "vscatterpf0qpd", +    "vscatterpf0qps", +    "vscatterpf1dpd", +    "vscatterpf1dps", +    "vscatterpf1qpd", +    "vscatterpf1qps", + +    // MPX +    "bndmk", +    "bndcl", +    "bndcu", +    "bndcn", +    "bndmov", +    "bndldx", +    "bndstx",  ];  impl Opcode { @@ -1325,6 +1616,11 @@ impl Opcode {      }  } +    // AVX512CD + + +    // MPX +  impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {      fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result {          match self { @@ -1408,6 +1704,7 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VMULPS |              Opcode::VMULSD |              Opcode::VMULSS | +            Opcode::VPMULLLQ |              Opcode::VPABSB |              Opcode::VPABSD |              Opcode::VPABSW | @@ -1451,6 +1748,34 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VPSUBW |              Opcode::VROUNDPD |              Opcode::VROUNDPS | +            Opcode::VEXP2PD | +            Opcode::VEXP2PS | +            Opcode::VEXP2SD | +            Opcode::VEXP2SS | +            Opcode::VRCP28PD | +            Opcode::VRCP28PS | +            Opcode::VRCP28SD | +            Opcode::VRCP28SS | +            Opcode::VRCP14PD | +            Opcode::VRCP14PS | +            Opcode::VRCP14SD | +            Opcode::VRCP14SS | +            Opcode::VRNDSCALEPD | +            Opcode::VRNDSCALEPS | +            Opcode::VRNDCSALESD | +            Opcode::VRNDSCALESS | +            Opcode::VRSQRT14PD | +            Opcode::VRSQRT14PS | +            Opcode::VRSQRT14SD | +            Opcode::VRSQRT14SS | +            Opcode::VSCALEDPD | +            Opcode::VSCALEDPS | +            Opcode::VSCALEDSD | +            Opcode::VSCALEDSS | +            Opcode::VRSQRT28PD | +            Opcode::VRSQRT28PS | +            Opcode::VRSQRT28SD | +            Opcode::VRSQRT28SS |              Opcode::VRSQRTPS |              Opcode::VSQRTPD |              Opcode::VSQRTPS | @@ -1470,13 +1795,14 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VSQRTSS |              Opcode::VPSADBW |              Opcode::VMPSADBW | +            Opcode::VDBPSADBW |              Opcode::VPHADDD |              Opcode::VPHADDSW |              Opcode::VPHADDW |              Opcode::VPHSUBD |              Opcode::VPHSUBSW |              Opcode::VPHSUBW | -            Opcode::VPHADDUBSW | +            Opcode::VPMADDUBSW |              Opcode::VPMADDWD |              Opcode::VDPPD |              Opcode::VDPPS | @@ -1499,6 +1825,19 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VPSLLVD |              Opcode::VPSLLVQ |              Opcode::VPSLLW | +            Opcode::VPROLD | +            Opcode::VPROLQ | +            Opcode::VPROLVD | +            Opcode::VPROLVQ | +            Opcode::VPRORD | +            Opcode::VPRORQ | +            Opcode::VPRORRD | +            Opcode::VPRORRQ | +            Opcode::VPSLLVW | +            Opcode::VPSRAQ | +            Opcode::VPSRAVQ | +            Opcode::VPSRAVW | +            Opcode::VPSRLVW |              Opcode::VPSRAD |              Opcode::VPSRAVD |              Opcode::VPSRAW | @@ -1584,6 +1923,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::SUB |              Opcode::POPCNT |              Opcode::LZCNT | +            Opcode::VPLZCNTD | +            Opcode::VPLZCNTQ |              Opcode::BT |              Opcode::BTS |              Opcode::BTR | @@ -1702,6 +2043,42 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::FXTRACT |              Opcode::FYL2X |              Opcode::FYL2XP1 | +            Opcode::KADDB | +            Opcode::KANDB | +            Opcode::KANDNB | +            Opcode::KNOTB | +            Opcode::KORB | +            Opcode::KSHIFTLB | +            Opcode::KSHIFTRB | +            Opcode::KXNORB | +            Opcode::KXORB | +            Opcode::KADDW | +            Opcode::KANDW | +            Opcode::KANDNW | +            Opcode::KNOTW | +            Opcode::KORW | +            Opcode::KSHIFTLW | +            Opcode::KSHIFTRW | +            Opcode::KXNORW | +            Opcode::KXORW | +            Opcode::KADDD | +            Opcode::KANDD | +            Opcode::KANDND | +            Opcode::KNOTD | +            Opcode::KORD | +            Opcode::KSHIFTLD | +            Opcode::KSHIFTRD | +            Opcode::KXNORD | +            Opcode::KXORD | +            Opcode::KADDQ | +            Opcode::KANDQ | +            Opcode::KANDNQ | +            Opcode::KNOTQ | +            Opcode::KORQ | +            Opcode::KSHIFTLQ | +            Opcode::KSHIFTRQ | +            Opcode::KXNORQ | +            Opcode::KXORQ |              Opcode::IMUL => { write!(out, "{}", colors.arithmetic_op(self)) }              Opcode::POPF |              Opcode::PUSHF | @@ -1778,12 +2155,43 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VCVTSS2SI |              Opcode::VCVTTSD2SI |              Opcode::VCVTTSS2SI | +            Opcode::VCVTPD2UDQ | +            Opcode::VCVTTPD2UDQ | +            Opcode::VCVTPS2UDQ | +            Opcode::VCVTTPS2UDQ | +            Opcode::VCVTQQ2PD | +            Opcode::VCVTQQ2PS | +            Opcode::VCVTSD2USI | +            Opcode::VCVTTSD2USI | +            Opcode::VCVTSS2USI | +            Opcode::VCVTTSS2USI | +            Opcode::VCVTUDQ2PD | +            Opcode::VCVTUDQ2PS | +            Opcode::VCVTUSI2USD | +            Opcode::VCVTUSI2USS | +            Opcode::VCVTTPD2QQ | +            Opcode::VCVTPD2QQ | +            Opcode::VCVTTPD2UQQ | +            Opcode::VCVTPD2UQQ | +            Opcode::VCVTTPS2QQ | +            Opcode::VCVTPS2QQ | +            Opcode::VCVTTPS2UQQ | +            Opcode::VCVTPS2UQQ | +            Opcode::VCVTUQQ2PD | +            Opcode::VCVTUQQ2PS |              Opcode::VMOVDDUP |              Opcode::VPSHUFLW | +            Opcode::VPSHUFHW | +            Opcode::VBLENDMPD | +            Opcode::VBLENDMPS | +            Opcode::VPBLENDMD | +            Opcode::VPBLENDMQ |              Opcode::VBLENDPD |              Opcode::VBLENDPS |              Opcode::VBLENDVPD |              Opcode::VBLENDVPS | +            Opcode::VPBLENDMB | +            Opcode::VPBLENDMW |              Opcode::PBLENDVB |              Opcode::PBLENDW |              Opcode::BLENDPD | @@ -1795,6 +2203,7 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VBROADCASTI128 |              Opcode::VBROADCASTSD |              Opcode::VBROADCASTSS | +            Opcode::VPBROADCASTM |              Opcode::VEXTRACTF128 |              Opcode::VEXTRACTI128 |              Opcode::VEXTRACTPS | @@ -1803,10 +2212,48 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VGATHERDPS |              Opcode::VGATHERQPD |              Opcode::VGATHERQPS | +            Opcode::VGATHERPF0DPD | +            Opcode::VGATHERPF0DPS | +            Opcode::VGATHERPF0QPD | +            Opcode::VGATHERPF0QPS | +            Opcode::VGATHERPF1DPD | +            Opcode::VGATHERPF1DPS | +            Opcode::VGATHERPF1QPD | +            Opcode::VGATHERPF1QPS | +            Opcode::VSCATTERDD | +            Opcode::VSCATTERDQ | +            Opcode::VSCATTERQD | +            Opcode::VSCATTERQQ | +            Opcode::VPSCATTERDD | +            Opcode::VPSCATTERDQ | +            Opcode::VPSCATTERQD | +            Opcode::VPSCATTERQQ | +            Opcode::VSCATTERPF0DPD | +            Opcode::VSCATTERPF0DPS | +            Opcode::VSCATTERPF0QPD | +            Opcode::VSCATTERPF0QPS | +            Opcode::VSCATTERPF1DPD | +            Opcode::VSCATTERPF1DPS | +            Opcode::VSCATTERPF1QPD | +            Opcode::VSCATTERPF1QPS |              Opcode::VINSERTF128 |              Opcode::VINSERTI128 |              Opcode::VINSERTPS |              Opcode::INSERTPS | +            Opcode::VEXTRACTF32X4 | +            Opcode::VEXTRACTF64X2 | +            Opcode::VEXTRACTF64X4 | +            Opcode::VEXTRACTI32X4 | +            Opcode::VEXTRACTI64X2 | +            Opcode::VEXTRACTI64X4 | +            Opcode::VINSERTF32X4 | +            Opcode::VINSERTF64X2 | +            Opcode::VINSERTF64X4 | +            Opcode::VINSERTI64X2 | +            Opcode::VSHUFF32X4 | +            Opcode::VSHUFF64X2 | +            Opcode::VSHUFI32X4 | +            Opcode::VSHUFI64X2 |              Opcode::VMASKMOVDQU |              Opcode::VMASKMOVPD |              Opcode::VMASKMOVPS | @@ -1837,6 +2284,32 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VMOVUPS |              Opcode::VMOVSD |              Opcode::VMOVSS | +            Opcode::VMOVDQA32 | +            Opcode::VMOVDQA64 | +            Opcode::VMOVDQU32 | +            Opcode::VMOVDQU64 | +            Opcode::VPMOVM2B | +            Opcode::VPMOVM2W | +            Opcode::VPMOVB2M | +            Opcode::VPMOVW2M | +            Opcode::VPMOVSWB | +            Opcode::VPMOVUSWB | +            Opcode::VPMOVSQB | +            Opcode::VPMOVUSQB | +            Opcode::VPMOVSQW | +            Opcode::VPMOVUSQW | +            Opcode::VPMOVSQD | +            Opcode::VPMOVUSQD | +            Opcode::VPMOVSDB | +            Opcode::VPMOVUSDB | +            Opcode::VPMOVSDW | +            Opcode::VPMOVUSDW | +            Opcode::VPMOVM2D | +            Opcode::VPMOVM2Q | +            Opcode::VPMOVB2D | +            Opcode::VPMOVQ2M | +            Opcode::VMOVDQU8 | +            Opcode::VMOVDQU16 |              Opcode::VPBLENDD |              Opcode::VPBLENDVB | @@ -1875,6 +2348,9 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::PMOVZXDQ |              Opcode::PMOVZXWD |              Opcode::PMOVZXWQ | +            Opcode::KUNPCKBW | +            Opcode::KUNPCKWD | +            Opcode::KUNPCKDQ |              Opcode::VUNPCKHPD |              Opcode::VUNPCKHPS |              Opcode::VUNPCKLPD | @@ -1890,9 +2366,12 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VSHUFPD |              Opcode::VSHUFPS |              Opcode::VPACKSSDW | +            Opcode::VPACKUSDW |              Opcode::PACKUSDW |              Opcode::VPACKSSWB |              Opcode::VPACKUSWB | +            Opcode::VALIGND | +            Opcode::VALIGNQ |              Opcode::VPALIGNR |              Opcode::PALIGNR |              Opcode::VPERM2F128 | @@ -1903,6 +2382,17 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VPERMPD |              Opcode::VPERMPS |              Opcode::VPERMQ | +            Opcode::VPERMI2D | +            Opcode::VPERMI2Q | +            Opcode::VPERMI2PD | +            Opcode::VPERMI2PS | +            Opcode::VPERMT2D | +            Opcode::VPERMT2Q | +            Opcode::VPERMT2PD | +            Opcode::VPERMT2PS | +            Opcode::VPERMI2B | +            Opcode::VPERMI2W | +            Opcode::VPERMW |              Opcode::VPEXTRB |              Opcode::VPEXTRD |              Opcode::VPEXTRQ | @@ -1921,11 +2411,34 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VPINSRW |              Opcode::VPMASKMOVD |              Opcode::VPMASKMOVQ | +            Opcode::VCOMPRESSPD | +            Opcode::VCOMPRESSPS | +            Opcode::VPCOMPRESSQ | +            Opcode::VPCOMPRESSD | +            Opcode::VEXPANDPD | +            Opcode::VEXPANDPS |              Opcode::VPSHUFB |              Opcode::VPSHUFD |              Opcode::VPHMINPOSUW |              Opcode::PHMINPOSUW |              Opcode::VZEROUPPER | +            Opcode::VZEROALL | +            Opcode::VFIXUPIMMPD | +            Opcode::VFIXUPIMMPS | +            Opcode::VFIXUPIMMSD | +            Opcode::VFIXUPIMMSS | +            Opcode::VREDUCEPD | +            Opcode::VREDUCEPS | +            Opcode::VREDUCESD | +            Opcode::VREDUCESS | +            Opcode::VGETEXPPD | +            Opcode::VGETEXPPS | +            Opcode::VGETEXPSD | +            Opcode::VGETEXPSS | +            Opcode::VGETMANTPD | +            Opcode::VGETMANTPS | +            Opcode::VGETMANTSD | +            Opcode::VGETMANTSS |              Opcode::VLDDQU |              Opcode::BSWAP |              Opcode::CVTDQ2PD | @@ -1993,6 +2506,11 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::SHUFPD |              Opcode::SHUFPS |              Opcode::PMOVMSKB | +            Opcode::KMOVB | +            Opcode::KMOVW | +            Opcode::KMOVD | +            Opcode::KMOVQ | +            Opcode::BNDMOV |              Opcode::LDDQU |              Opcode::CMC |              Opcode::CLC | @@ -2109,6 +2627,32 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VCOMISS |              Opcode::VUCOMISD |              Opcode::VUCOMISS | +            Opcode::KORTESTB | +            Opcode::KTESTB | +            Opcode::KORTESTW | +            Opcode::KTESTW | +            Opcode::KORTESTD | +            Opcode::KTESTD | +            Opcode::KORTESTQ | +            Opcode::KTESTQ | +            Opcode::VPTESTNMD | +            Opcode::VPTESTNMQ | +            Opcode::VPTERLOGD | +            Opcode::VPTERLOGQ | +            Opcode::VPTESTMD | +            Opcode::VPTESTMQ | +            Opcode::VPTESTNMB | +            Opcode::VPTESTNMW | +            Opcode::VPTESTMB | +            Opcode::VPTESTMW | +            Opcode::VPCMPD | +            Opcode::VPCMPUD | +            Opcode::VPCMPQ | +            Opcode::VPCMPUQ | +            Opcode::VPCMPB | +            Opcode::VPCMPUB | +            Opcode::VPCMPW | +            Opcode::VPCMPUW |              Opcode::VCMPPD |              Opcode::VCMPPS |              Opcode::VCMPSD | @@ -2117,6 +2661,10 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VMAXPS |              Opcode::VMAXSD |              Opcode::VMAXSS | +            Opcode::VPMAXSQ | +            Opcode::VPMAXUQ | +            Opcode::VPMINSQ | +            Opcode::VPMINUQ |              Opcode::VMINPD |              Opcode::VMINPS |              Opcode::VMINSD | @@ -2129,6 +2677,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VPCMPGTD |              Opcode::VPCMPGTQ |              Opcode::VPCMPGTW | +            Opcode::VPCMPESTRI | +            Opcode::VPCMPESTRM |              Opcode::VPCMPISTRI |              Opcode::VPCMPISTRM |              Opcode::VPMAXSB | @@ -2137,9 +2687,22 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::VPMAXUB |              Opcode::VPMAXUW |              Opcode::VPMAXUD | +            Opcode::VPMINSB |              Opcode::VPMINSW |              Opcode::VPMINSD | +            Opcode::VPMINUB | +            Opcode::VPMINUW |              Opcode::VPMINUD | +            Opcode::VFPCLASSPD | +            Opcode::VFPCLASSPS | +            Opcode::VFPCLASSSD | +            Opcode::VFPCLASSSS | +            Opcode::VRANGEPD | +            Opcode::VRANGEPS | +            Opcode::VRANGESD | +            Opcode::VRANGESS | +            Opcode::VPCONFLICTD | +            Opcode::VPCONFLICTQ |              Opcode::VPTEST |              Opcode::VTESTPD |              Opcode::VTESTPS | @@ -2220,6 +2783,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::FXRSTOR |              Opcode::LDMXCSR |              Opcode::STMXCSR | +            Opcode::VLDMXCSR | +            Opcode::VSTMXCSR |              Opcode::XSAVE |              Opcode::XSAVEC |              Opcode::XSAVES | @@ -2327,6 +2892,12 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::SENDUIPI |              Opcode::XSUSLDTRK |              Opcode::XRESLDTRK | +            Opcode::BNDMK | +            Opcode::BNDCL | +            Opcode::BNDCU | +            Opcode::BNDCN | +            Opcode::BNDLDX | +            Opcode::BNDSTX |              Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) }              Opcode::CRC32 | diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 57508e4..88cae67 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -1293,6 +1293,7 @@ pub enum Opcode {      VMOVDDUP,      VPSHUFLW, +    VPSHUFHW,      VHADDPS,      VHSUBPS,      VADDSUBPS, @@ -1465,6 +1466,7 @@ pub enum Opcode {      VPABSD,      VPABSW,      VPACKSSDW, +    VPACKUSDW,      VPACKSSWB,      VPACKUSWB,      VPADDB, @@ -1502,6 +1504,8 @@ pub enum Opcode {      VPCMPGTD,      VPCMPGTQ,      VPCMPGTW, +    VPCMPESTRI, +    VPCMPESTRM,      VPCMPISTRI,      VPCMPISTRM,      VPERM2F128, @@ -1523,7 +1527,7 @@ pub enum Opcode {      VPHADDD,      VPHADDSW,      VPHADDW, -    VPHADDUBSW, +    VPMADDUBSW,      VPHMINPOSUW,      VPHSUBD,      VPHSUBSW, @@ -1541,8 +1545,11 @@ pub enum Opcode {      VPMAXUB,      VPMAXUW,      VPMAXUD, +    VPMINSB,      VPMINSW,      VPMINSD, +    VPMINUB, +    VPMINUW,      VPMINUD,      VPMOVMSKB,      VPMOVSXBD, @@ -1631,6 +1638,9 @@ pub enum Opcode {      VXORPD,      VXORPS,      VZEROUPPER, +    VZEROALL, +    VLDMXCSR, +    VSTMXCSR,      PCLMULQDQ,      AESKEYGENASSIST, @@ -1960,6 +1970,287 @@ pub enum Opcode {      // TSXLDTRK      XSUSLDTRK,      XRESLDTRK, + +    // AVX512F +    VALIGND, +    VALIGNQ, +    VBLENDMPD, +    VBLENDMPS, +    VCOMPRESSPD, +    VCOMPRESSPS, +    VCVTPD2UDQ, +    VCVTTPD2UDQ, +    VCVTPS2UDQ, +    VCVTTPS2UDQ, +    VCVTQQ2PD, +    VCVTQQ2PS, +    VCVTSD2USI, +    VCVTTSD2USI, +    VCVTSS2USI, +    VCVTTSS2USI, +    VCVTUDQ2PD, +    VCVTUDQ2PS, +    VCVTUSI2USD, +    VCVTUSI2USS, +    VEXPANDPD, +    VEXPANDPS, +    VEXTRACTF32X4, +    VEXTRACTF64X4, +    VEXTRACTI32X4, +    VEXTRACTI64X4, +    VFIXUPIMMPD, +    VFIXUPIMMPS, +    VFIXUPIMMSD, +    VFIXUPIMMSS, +    VGETEXPPD, +    VGETEXPPS, +    VGETEXPSD, +    VGETEXPSS, +    VGETMANTPD, +    VGETMANTPS, +    VGETMANTSD, +    VGETMANTSS, +    VINSERTF32X4, +    VINSERTF64X4, +    VMOVDQA32, +    VMOVDQA64, +    VMOVDQU32, +    VMOVDQU64, +    VPBLENDMD, +    VPBLENDMQ, +    VPCMPD, +    VPCMPUD, +    VPCMPQ, +    VPCMPUQ, +    VPCOMPRESSQ, +    VPCOMPRESSD, +    VPERMI2D, +    VPERMI2Q, +    VPERMI2PD, +    VPERMI2PS, +    VPERMT2D, +    VPERMT2Q, +    VPERMT2PD, +    VPERMT2PS, +    VPMAXSQ, +    VPMAXUQ, +    VPMINSQ, +    VPMINUQ, +    VPMOVSQB, +    VPMOVUSQB, +    VPMOVSQW, +    VPMOVUSQW, +    VPMOVSQD, +    VPMOVUSQD, +    VPMOVSDB, +    VPMOVUSDB, +    VPMOVSDW, +    VPMOVUSDW, +    VPROLD, +    VPROLQ, +    VPROLVD, +    VPROLVQ, +    VPRORD, +    VPRORQ, +    VPRORRD, +    VPRORRQ, +    VPSCATTERDD, +    VPSCATTERDQ, +    VPSCATTERQD, +    VPSCATTERQQ, +    VPSRAQ, +    VPSRAVQ, +    VPTESTNMD, +    VPTESTNMQ, +    VPTERLOGD, +    VPTERLOGQ, +    VPTESTMD, +    VPTESTMQ, +    VRCP14PD, +    VRCP14PS, +    VRCP14SD, +    VRCP14SS, +    VRNDSCALEPD, +    VRNDSCALEPS, +    VRNDCSALESD, +    VRNDSCALESS, +    VRSQRT14PD, +    VRSQRT14PS, +    VRSQRT14SD, +    VRSQRT14SS, +    VSCALEDPD, +    VSCALEDPS, +    VSCALEDSD, +    VSCALEDSS, +    VSCATTERDD, +    VSCATTERDQ, +    VSCATTERQD, +    VSCATTERQQ, +    VSHUFF32X4, +    VSHUFF64X2, +    VSHUFI32X4, +    VSHUFI64X2, + +    // AVX512DQ +    VCVTTPD2QQ, +    VCVTPD2QQ, +    VCVTTPD2UQQ, +    VCVTPD2UQQ, +    VCVTTPS2QQ, +    VCVTPS2QQ, +    VCVTTPS2UQQ, +    VCVTPS2UQQ, +    VCVTUQQ2PD, +    VCVTUQQ2PS, +    VEXTRACTF64X2, +    VEXTRACTI64X2, +    VFPCLASSPD, +    VFPCLASSPS, +    VFPCLASSSD, +    VFPCLASSSS, +    VINSERTF64X2, +    VINSERTI64X2, +    VPMOVM2D, +    VPMOVM2Q, +    VPMOVB2D, +    VPMOVQ2M, +    VPMULLLQ, +    VRANGEPD, +    VRANGEPS, +    VRANGESD, +    VRANGESS, +    VREDUCEPD, +    VREDUCEPS, +    VREDUCESD, +    VREDUCESS, + +    // AVX512BW +    VDBPSADBW, +    VMOVDQU8, +    VMOVDQU16, +    VPBLENDMB, +    VPBLENDMW, +    VPCMPB, +    VPCMPUB, +    VPCMPW, +    VPCMPUW, +    VPERMW, +    VPERMI2B, +    VPERMI2W, +    VPMOVM2B, +    VPMOVM2W, +    VPMOVB2M, +    VPMOVW2M, +    VPMOVSWB, +    VPMOVUSWB, +    VPSLLVW, +    VPSRAVW, +    VPSRLVW, +    VPTESTNMB, +    VPTESTNMW, +    VPTESTMB, +    VPTESTMW, + +    // AVX512CD +    VPBROADCASTM, +    VPCONFLICTD, +    VPCONFLICTQ, +    VPLZCNTD, +    VPLZCNTQ, + +    KUNPCKBW, +    KUNPCKWD, +    KUNPCKDQ, + +    KADDB, +    KANDB, +    KANDNB, +    KMOVB, +    KNOTB, +    KORB, +    KORTESTB, +    KSHIFTLB, +    KSHIFTRB, +    KTESTB, +    KXNORB, +    KXORB, +    KADDW, +    KANDW, +    KANDNW, +    KMOVW, +    KNOTW, +    KORW, +    KORTESTW, +    KSHIFTLW, +    KSHIFTRW, +    KTESTW, +    KXNORW, +    KXORW, +    KADDD, +    KANDD, +    KANDND, +    KMOVD, +    KNOTD, +    KORD, +    KORTESTD, +    KSHIFTLD, +    KSHIFTRD, +    KTESTD, +    KXNORD, +    KXORD, +    KADDQ, +    KANDQ, +    KANDNQ, +    KMOVQ, +    KNOTQ, +    KORQ, +    KORTESTQ, +    KSHIFTLQ, +    KSHIFTRQ, +    KTESTQ, +    KXNORQ, +    KXORQ, + +    // AVX512ER +    VEXP2PD, +    VEXP2PS, +    VEXP2SD, +    VEXP2SS, +    VRCP28PD, +    VRCP28PS, +    VRCP28SD, +    VRCP28SS, +    VRSQRT28PD, +    VRSQRT28PS, +    VRSQRT28SD, +    VRSQRT28SS, + +    // AVX512PF +    VGATHERPF0DPD, +    VGATHERPF0DPS, +    VGATHERPF0QPD, +    VGATHERPF0QPS, +    VGATHERPF1DPD, +    VGATHERPF1DPS, +    VGATHERPF1QPD, +    VGATHERPF1QPS, +    VSCATTERPF0DPD, +    VSCATTERPF0DPS, +    VSCATTERPF0QPD, +    VSCATTERPF0QPS, +    VSCATTERPF1DPD, +    VSCATTERPF1DPS, +    VSCATTERPF1QPD, +    VSCATTERPF1QPS, + +    // MPX +    BNDMK, +    BNDCL, +    BNDCU, +    BNDCN, +    BNDMOV, +    BNDLDX, +    BNDSTX,  }  #[derive(Debug)] @@ -2936,6 +3227,7 @@ impl InstDecoder {              // AVX...              Opcode::VMOVDDUP |              Opcode::VPSHUFLW | +            Opcode::VPSHUFHW |              Opcode::VHADDPS |              Opcode::VHSUBPS |              Opcode::VADDSUBPS | @@ -3099,6 +3391,7 @@ impl InstDecoder {              Opcode::VPABSD |              Opcode::VPABSW |              Opcode::VPACKSSDW | +            Opcode::VPACKUSDW |              Opcode::VPACKSSWB |              Opcode::VPACKUSWB |              Opcode::VPADDB | @@ -3136,6 +3429,8 @@ impl InstDecoder {              Opcode::VPCMPGTD |              Opcode::VPCMPGTQ |              Opcode::VPCMPGTW | +            Opcode::VPCMPESTRI | +            Opcode::VPCMPESTRM |              Opcode::VPCMPISTRI |              Opcode::VPCMPISTRM |              Opcode::VPERM2F128 | @@ -3157,7 +3452,7 @@ impl InstDecoder {              Opcode::VPHADDD |              Opcode::VPHADDSW |              Opcode::VPHADDW | -            Opcode::VPHADDUBSW | +            Opcode::VPMADDUBSW |              Opcode::VPHMINPOSUW |              Opcode::VPHSUBD |              Opcode::VPHSUBSW | @@ -3175,8 +3470,11 @@ impl InstDecoder {              Opcode::VPMAXUB |              Opcode::VPMAXUW |              Opcode::VPMAXUD | +            Opcode::VPMINSB |              Opcode::VPMINSW |              Opcode::VPMINSD | +            Opcode::VPMINUB | +            Opcode::VPMINUW |              Opcode::VPMINUD |              Opcode::VPMOVMSKB |              Opcode::VPMOVSXBD | @@ -3264,7 +3562,10 @@ impl InstDecoder {              Opcode::VUNPCKLPS |              Opcode::VXORPD |              Opcode::VXORPS | -            Opcode::VZEROUPPER => { +            Opcode::VZEROUPPER | +            Opcode::VZEROALL | +            Opcode::VLDMXCSR | +            Opcode::VSTMXCSR => {                  // TODO: check a table for these                  if !self.avx() {                      inst.opcode = Opcode::Invalid; diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index 9d51ba4..401c61f 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -39,23 +39,33 @@ enum VEXOperandCode {      VMOVSD_10,      VMOVSD_11,      VMOVSS_11, +    VMOVLPS_12, +    VMOVHPS_16,      E_G_xmm,      U_G_xmm,      M_G_xmm,      G_M_xmm,      G_U_xmm, +    Gd_U_xmm,      E_G_xmm_imm8, -    U_G_xmm_imm8, +    Ud_G_xmm_imm8, +    Ud_G_xmm, +    Ud_G_ymm,      E_G_ymm,      U_G_ymm,      M_G_ymm,      G_E_ymm,      G_M_ymm,      G_U_ymm, +    Gd_U_ymm,      E_V_G_ymm,      E_V_G_xmm,      E_xmm_G_ymm_imm8,      Ev_G_xmm_imm8, +    Eb_G_xmm_imm8, +    Ew_G_xmm_imm8, +    Ed_G_xmm_imm8, +    Eq_G_xmm_imm8,      G_Ex_V_xmm,      G_Ey_V_ymm,      G_E_xmm, @@ -68,12 +78,17 @@ enum VEXOperandCode {      G_V_ymm_E_xmm,      M_V_G_xmm,      M_V_G_ymm, +    G_V_xmm_Ed, +    G_V_xmm_Eq,      G_V_E_xmm,      G_V_E_xmm_imm8,      G_V_E_xmm_xmm4,      G_V_E_ymm,      G_V_E_ymm_imm8,      G_V_E_ymm_ymm4, +    G_V_xmm_Eb_imm8, +    G_V_xmm_Ed_imm8, +    G_V_xmm_Eq_imm8,      G_V_M_xmm,      G_V_M_ymm,      V_xmm_G_ymm_E_ymm_imm8, @@ -86,7 +101,12 @@ enum VEXOperandCode {      G_E_V,      G_V_E,      G_E_Ib, +    VCVT_Gd_Ed_xmm, +    VCVT_Gd_Eq_xmm, +    VCVT_Gq_Ed_xmm, +    VCVT_Gq_Eq_xmm,      BMI1_F3, +    MXCSR,  }  #[inline(never)] @@ -103,7 +123,6 @@ pub(crate) fn three_byte_vex<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &          _ => { unreachable!("p is two bits"); }      };      let m = vex_byte_one & 0b11111; -//    println!("m: {:05b}", m);      let m = match m {          0b00001 => VEXOpcodeMap::Map0F,          0b00010 => VEXOpcodeMap::Map0F38, @@ -151,6 +170,10 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst      match operand_code {          VEXOperandCode::VPS_71 => {              let modrm = read_modrm(bytes, length)?; +            if modrm & 0xc0 != 0xc0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            }              match (modrm >> 3) & 0b111 {                  0b010 => {                      instruction.opcode = Opcode::VPSRLW; @@ -177,7 +200,14 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst          }          VEXOperandCode::VPS_71_L => {              let modrm = read_modrm(bytes, length)?; +            if modrm & 0xc0 != 0xc0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            }              match (modrm >> 3) & 0b111 { +                0b001 => { +                    instruction.opcode = Opcode::VPSLLW; +                }                  0b010 => {                      instruction.opcode = Opcode::VPSRLW;                  } @@ -203,6 +233,10 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst          }          VEXOperandCode::VPS_72 => {              let modrm = read_modrm(bytes, length)?; +            if modrm & 0xc0 != 0xc0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            }              match (modrm >> 3) & 0b111 {                  0b010 => {                      instruction.opcode = Opcode::VPSRLD; @@ -229,6 +263,10 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst          }          VEXOperandCode::VPS_72_L => {              let modrm = read_modrm(bytes, length)?; +            if modrm & 0xc0 != 0xc0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            }              match (modrm >> 3) & 0b111 {                  0b010 => {                      instruction.opcode = Opcode::VPSRLD; @@ -255,6 +293,10 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst          }          VEXOperandCode::VPS_73 => {              let modrm = read_modrm(bytes, length)?; +            if modrm & 0xc0 != 0xc0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            }              match (modrm >> 3) & 0b111 {                  0b010 => {                      instruction.opcode = Opcode::VPSRLQ; @@ -284,6 +326,10 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst          }          VEXOperandCode::VPS_73_L => {              let modrm = read_modrm(bytes, length)?; +            if modrm & 0xc0 != 0xc0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            }              match (modrm >> 3) & 0b111 {                  0b000 |                  0b001 | @@ -331,6 +377,10 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst                      instruction.operand_count = 3;                  },                  other => { +                    if instruction.vex_reg.num != 0 { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOperand); +                    }                      instruction.operands[1] = other;                      instruction.operand_count = 2;                  } @@ -343,23 +393,171 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X);              let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; -            instruction.operands[1] = OperandSpec::RegRRR; +            instruction.operands[2] = OperandSpec::RegRRR;              match mem_oper {                  OperandSpec::RegMMM => { -                    instruction.operands[0] = OperandSpec::RegVex; -                    instruction.operands[2] = OperandSpec::RegMMM; +                    instruction.operands[0] = OperandSpec::RegMMM; +                    instruction.operands[1] = OperandSpec::RegVex;                      instruction.operand_count = 3;                  },                  other => { +                    if instruction.vex_reg.num != 0 { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOperand); +                    }                      instruction.operands[0] = other; +                    instruction.operands[1] = instruction.operands[2];                      instruction.operand_count = 2;                  }              }              Ok(())          }, +        VEXOperandCode::VMOVLPS_12 => { +            let modrm = read_modrm(bytes, length)?; +            instruction.opcode = if modrm & 0xc0 == 0xc0 { +                Opcode::VMOVHLPS +            } else { +                Opcode::VMOVLPS +            }; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = read_E_xmm(bytes, instruction, modrm, length)?; +            instruction.operand_count = 3; +            Ok(()) +        } +        VEXOperandCode::VMOVHPS_16 => { +            let modrm = read_modrm(bytes, length)?; +            instruction.opcode = if modrm & 0xc0 == 0xc0 { +                Opcode::VMOVLHPS +            } else { +                Opcode::VMOVHPS +            }; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = read_E_xmm(bytes, instruction, modrm, length)?; +            instruction.operand_count = 3; +            Ok(()) +        }          VEXOperandCode::Nothing => {              Ok(())          }, +        VEXOperandCode::Eb_G_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            instruction.operands[0] = mem_oper; +            instruction.operands[1] = OperandSpec::RegRRR; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.mem_size = 1; +            instruction.operand_count = 3; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            Ok(()) +        }, +        VEXOperandCode::Ew_G_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            instruction.operands[0] = mem_oper; +            instruction.operands[1] = OperandSpec::RegRRR; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.mem_size = 2; +            instruction.operand_count = 3; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            Ok(()) +        }, +        VEXOperandCode::Ed_G_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            instruction.operands[0] = mem_oper; +            instruction.operands[1] = OperandSpec::RegRRR; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.mem_size = 4; +            instruction.operand_count = 3; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            Ok(()) +        }, +        VEXOperandCode::Eq_G_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E(bytes, instruction, modrm, 8, length)?; +            instruction.operands[0] = mem_oper; +            instruction.operands[1] = OperandSpec::RegRRR; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.mem_size = 8; +            instruction.operand_count = 3; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            Ok(()) +        }, +        VEXOperandCode::G_V_xmm_Eb_imm8 => { +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            instruction.vex_reg.bank = RegisterBank::X; +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = mem_oper; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            instruction.operands[3] = OperandSpec::ImmU8; +            instruction.mem_size = 1; +            instruction.operand_count = 4; +            Ok(()) +        } +        VEXOperandCode::G_V_xmm_Ed_imm8 => { +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            instruction.vex_reg.bank = RegisterBank::X; +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = mem_oper; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            instruction.operands[3] = OperandSpec::ImmU8; +            instruction.mem_size = 4; +            instruction.operand_count = 4; +            Ok(()) +        } +        VEXOperandCode::G_V_xmm_Eq_imm8 => { +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            instruction.vex_reg.bank = RegisterBank::X; +            let mem_oper = read_E(bytes, instruction, modrm, 8, length)?; +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = mem_oper; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            instruction.operands[3] = OperandSpec::ImmU8; +            instruction.mem_size = 8; +            instruction.operand_count = 4; +            Ok(()) +        }          VEXOperandCode::Ev_G_xmm_imm8 => {              if instruction.vex_reg.num != 0 {                  instruction.opcode = Opcode::Invalid; @@ -432,11 +630,85 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              instruction.operand_count = 2;              Ok(())          } -        _op @ VEXOperandCode::E_G_xmm | -        _op @ VEXOperandCode::U_G_xmm | -        _op @ VEXOperandCode::M_G_xmm | -        _op @ VEXOperandCode::E_G_xmm_imm8 | -        _op @ VEXOperandCode::U_G_xmm_imm8 => { +        VEXOperandCode::VCVT_Gd_Ed_xmm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            if let OperandSpec::RegMMM = mem_oper { +                instruction.modrm_mmm.bank = RegisterBank::X; +            } else { +                instruction.mem_size = 4; +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        VEXOperandCode::VCVT_Gd_Eq_xmm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            if let OperandSpec::RegMMM = mem_oper { +                instruction.modrm_mmm.bank = RegisterBank::X; +            } else { +                instruction.mem_size = 8; +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        VEXOperandCode::VCVT_Gq_Ed_xmm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::Q); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            if let OperandSpec::RegMMM = mem_oper { +                instruction.modrm_mmm.bank = RegisterBank::X; +            } else { +                instruction.mem_size = 4; +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        VEXOperandCode::VCVT_Gq_Eq_xmm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::Q); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            if let OperandSpec::RegMMM = mem_oper { +                instruction.modrm_mmm.bank = RegisterBank::X; +            } else { +                instruction.mem_size = 8; +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        op @ VEXOperandCode::E_G_xmm | +        op @ VEXOperandCode::U_G_xmm | +        op @ VEXOperandCode::M_G_xmm => {              if instruction.vex_reg.num != 0 {                  instruction.opcode = Opcode::Invalid;                  return Err(DecodeError::InvalidOperand); @@ -445,11 +717,93 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X);              let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            match (op, mem_oper) { +                (VEXOperandCode::E_G_xmm, OperandSpec::RegMMM) => { +                    /* this is the only accepted operand */ +                } +                (VEXOperandCode::U_G_xmm, _) | +                (VEXOperandCode::M_G_xmm, OperandSpec::RegMMM) => { +                    return Err(DecodeError::InvalidOperand); +                } +                (VEXOperandCode::M_G_xmm, _) | // otherwise it's memory-constrained and a memory operand +                (_, _) => {                    // ... or unconstrained +                    /* and this is always accepted */ +                } +            }              instruction.operands[0] = mem_oper;              instruction.operands[1] = OperandSpec::RegRRR;              instruction.operand_count = 2;              Ok(())          } +        VEXOperandCode::Ud_G_xmm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            if mem_oper != OperandSpec::RegMMM { +                return Err(DecodeError::InvalidOperand); +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        VEXOperandCode::Ud_G_ymm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E_ymm(bytes, instruction, modrm, length)?; +            if mem_oper != OperandSpec::RegMMM { +                return Err(DecodeError::InvalidOperand); +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        VEXOperandCode::Ud_G_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            if mem_oper != OperandSpec::RegMMM { +                return Err(DecodeError::InvalidOperand); +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.operand_count = 3; +            Ok(()) +        } +        VEXOperandCode::E_G_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            instruction.operands[0] = mem_oper; +            instruction.operands[1] = OperandSpec::RegRRR; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.operand_count = 3; +            Ok(()) +        }          _op @ VEXOperandCode::E_xmm_G_ymm_imm8 => {              if instruction.vex_reg.num != 0 {                  instruction.opcode = Opcode::Invalid; @@ -467,16 +821,63 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              Ok(())          } -        _op @ VEXOperandCode::G_M_xmm | -        _op @ VEXOperandCode::G_U_xmm | -        _op @ VEXOperandCode::G_E_xmm | -        _op @ VEXOperandCode::G_E_xmm_imm8 => { +        VEXOperandCode::Gd_U_xmm => {              if instruction.vex_reg.num != 0 {                  instruction.opcode = Opcode::Invalid;                  return Err(DecodeError::InvalidOperand);              }              let modrm = read_modrm(bytes, length)?;              instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            if mem_oper != OperandSpec::RegMMM { +                return Err(DecodeError::InvalidOperand); +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } +        VEXOperandCode::Gd_U_ymm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::D); +            let mem_oper = read_E_ymm(bytes, instruction, modrm, length)?; +            if mem_oper != OperandSpec::RegMMM { +                return Err(DecodeError::InvalidOperand); +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.operand_count = 2; +            Ok(()) +        } + +        op @ VEXOperandCode::G_M_xmm | +        op @ VEXOperandCode::G_U_xmm | +        op @ VEXOperandCode::G_E_xmm => { +            if instruction.vex_reg.num != 0 { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            match (op, modrm & 0xc0) { +                (VEXOperandCode::G_U_xmm, 0xc0) => { +                    /* this is the only accepted operand */ +                } +                (VEXOperandCode::G_U_xmm, _) | +                (VEXOperandCode::G_M_xmm, 0xc0) => { +                    return Err(DecodeError::InvalidOperand); +                } +                (VEXOperandCode::G_M_xmm, _) | // otherwise it's memory-constrained and a memory operand +                (_, _) => {                    // ... or unconstrained +                    /* and this is always accepted */ +                } +            } +            instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X);              let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?;              instruction.operands[0] = OperandSpec::RegRRR; @@ -541,14 +942,27 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              Ok(())          } -        _op @ VEXOperandCode::E_G_ymm | -        _op @ VEXOperandCode::U_G_ymm | -        _op @ VEXOperandCode::M_G_ymm => { +        op @ VEXOperandCode::E_G_ymm | +        op @ VEXOperandCode::U_G_ymm | +        op @ VEXOperandCode::M_G_ymm => {              if instruction.vex_reg.num != 0 {                  instruction.opcode = Opcode::Invalid;                  return Err(DecodeError::InvalidOperand);              }              let modrm = read_modrm(bytes, length)?; +            match (op, modrm & 0xc0) { +                (VEXOperandCode::U_G_ymm, 0xc0) => { +                    /* this is the only accepted operand */ +                } +                (VEXOperandCode::U_G_ymm, _) | +                (VEXOperandCode::M_G_ymm, 0xc0) => { +                    return Err(DecodeError::InvalidOperand); +                } +                (VEXOperandCode::M_G_ymm, _) | // otherwise it's memory-constrained and a memory operand +                (_, _) => {                    // ... or unconstrained +                    /* and this is always accepted */ +                } +            }              instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::Y);              let mem_oper = read_E_ymm(bytes, instruction, modrm, length)?; @@ -558,14 +972,27 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              Ok(())          } -        _op @ VEXOperandCode::G_M_ymm | -        _op @ VEXOperandCode::G_U_ymm | -        _op @ VEXOperandCode::G_E_ymm => { +        op @ VEXOperandCode::G_M_ymm | +        op @ VEXOperandCode::G_U_ymm | +        op @ VEXOperandCode::G_E_ymm => {              if instruction.vex_reg.num != 0 {                  instruction.opcode = Opcode::Invalid;                  return Err(DecodeError::InvalidOperand);              }              let modrm = read_modrm(bytes, length)?; +            match (op, modrm & 0xc0) { +                (VEXOperandCode::G_U_ymm, 0xc0) => { +                    /* this is the only accepted operand */ +                } +                (VEXOperandCode::G_U_ymm, _) | +                (VEXOperandCode::G_M_ymm, 0xc0) => { +                    return Err(DecodeError::InvalidOperand); +                } +                (VEXOperandCode::G_M_ymm, _) | // otherwise it's memory-constrained and a memory operand +                (_, _) => {                    // ... or unconstrained +                    /* and this is always accepted */ +                } +            }              instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::Y);              let mem_oper = read_E_ymm(bytes, instruction, modrm, length)?; @@ -614,12 +1041,48 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              instruction.operand_count = 3;              Ok(())          } -        _op @ VEXOperandCode::G_V_M_xmm | -        _op @ VEXOperandCode::G_V_E_xmm => { +        VEXOperandCode::G_V_M_xmm => {              let modrm = read_modrm(bytes, length)?;              instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X);              let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            if mem_oper == OperandSpec::RegMMM { +                instruction.opcode = Opcode::Invalid; +                return Err(DecodeError::InvalidOpcode); +            } +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = mem_oper; +            instruction.operand_count = 3; +            Ok(()) +        } +        VEXOperandCode::G_V_E_xmm => { +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = mem_oper; +            instruction.operand_count = 3; +            Ok(()) +        } +        VEXOperandCode::G_V_xmm_Ed => { +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = OperandSpec::RegVex; +            instruction.operands[2] = mem_oper; +            instruction.operand_count = 3; +            Ok(()) +        } +        VEXOperandCode::G_V_xmm_Eq => { +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E(bytes, instruction, modrm, 8, length)?;              instruction.operands[0] = OperandSpec::RegRRR;              instruction.operands[1] = OperandSpec::RegVex;              instruction.operands[2] = mem_oper; @@ -786,7 +1249,47 @@ fn read_vex_operands<T: Iterator<Item=u8>>(bytes: &mut T, instruction: &mut Inst              instruction.vex_reg.bank = bank;              Ok(())          } +        VEXOperandCode::MXCSR => { +            let modrm = read_modrm(bytes, length)?; +            instruction.opcode = match (modrm >> 3) & 7 { +                2 => { +                    Opcode::VLDMXCSR +                } +                3 => { +                    Opcode::VSTMXCSR +                } +                _ => { +                    instruction.opcode = Opcode::Invalid; +                    return Err(DecodeError::InvalidOpcode); +                } +            }; +            let mem_oper = read_E(bytes, instruction, modrm, 4, length)?; +            if let OperandSpec::RegMMM = mem_oper { +                return Err(DecodeError::InvalidOperand); +            } +            instruction.operands[0] = mem_oper; +            instruction.operand_count = 1; +            Ok(()) +        } +        VEXOperandCode::G_E_xmm_imm8 => { +            if instruction.vex_reg.num != 0 { +                return Err(DecodeError::InvalidOperand); +            } +            let modrm = read_modrm(bytes, length)?; +            instruction.modrm_rrr = +                RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::X); +            let mem_oper = read_E_xmm(bytes, instruction, modrm, length)?; +            instruction.operands[0] = OperandSpec::RegRRR; +            instruction.operands[1] = mem_oper; +            instruction.imm = read_imm_unsigned(bytes, 1, length)?; +            instruction.operands[2] = OperandSpec::ImmU8; +            instruction.operand_count = 3; +            Ok(()) +        }          VEXOperandCode::G_E_ymm_imm8 => { +            if instruction.vex_reg.num != 0 { +                return Err(DecodeError::InvalidOperand); +            }              let modrm = read_modrm(bytes, length)?;              instruction.modrm_rrr =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex().r(), RegisterBank::Y); @@ -877,9 +1380,12 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      match opc {                          0x10 => (Opcode::VMOVUPS, if L { VEXOperandCode::G_E_ymm } else { VEXOperandCode::G_E_xmm }),                          0x11 => (Opcode::VMOVUPS, if L { VEXOperandCode::E_G_ymm } else { VEXOperandCode::E_G_xmm }), -                        // ugh -//                        0x12 => (Opcode::VMOVHLPS, ..), -//                        0x12 => (Opcode::VMOVLPS, ..), +                        0x12 => (Opcode::Invalid, if L { +                            instruction.opcode = Opcode::Invalid; +                            return Err(DecodeError::InvalidOpcode); +                        } else { +                            VEXOperandCode::VMOVLPS_12 +                        }),                          0x13 => (Opcode::VMOVLPS, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode); @@ -896,9 +1402,12 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          } else {                              VEXOperandCode::G_V_E_xmm                          }), -                        // ugh -//                        0x16 => (Opcode::VMOVHPS, ..), -//                        0x16 => (Opcode::VMOVLHPS, ..), +                        0x16 => (Opcode::Invalid, if L { +                            instruction.opcode = Opcode::Invalid; +                            return Err(DecodeError::InvalidOpcode); +                        } else { +                            VEXOperandCode::VMOVHPS_16 +                        }),                          0x17 => (Opcode::VMOVHPS, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode); @@ -923,9 +1432,9 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          0x2e => (Opcode::VUCOMISS, VEXOperandCode::G_E_xmm),                          0x2f => (Opcode::VCOMISS, VEXOperandCode::G_E_xmm),                          0x50 => (Opcode::VMOVMSKPS, if L { -                            VEXOperandCode::U_G_ymm +                            VEXOperandCode::Ud_G_ymm                          } else { -                            VEXOperandCode::U_G_xmm +                            VEXOperandCode::Ud_G_xmm                          }),                          0x51 => (Opcode::VSQRTPS, if L {                              VEXOperandCode::G_E_ymm @@ -973,7 +1482,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              VEXOperandCode::G_V_E_xmm                          }),                          0x5A => (Opcode::VCVTPS2PD, if L { -                            VEXOperandCode::G_E_ymm +                            VEXOperandCode::G_ymm_E_xmm                          } else {                              VEXOperandCode::G_E_xmm                          }), @@ -1003,6 +1512,11 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              VEXOperandCode::G_V_E_xmm                          }),                          0x77 => (Opcode::VZEROUPPER, VEXOperandCode::Nothing), +                        0xAE => (Opcode::Invalid, if L { +                            return Err(DecodeError::InvalidOpcode); +                        } else { +                            VEXOperandCode::MXCSR +                        }),                          0xC2 => (Opcode::VCMPPS, if L {                              VEXOperandCode::G_V_E_ymm_imm8                          } else { @@ -1055,12 +1569,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          } else {                              VEXOperandCode::G_V_E_xmm                          }), -                        0x16 => (Opcode::VMOVHPD, if L { -                            instruction.opcode = Opcode::Invalid; -                            return Err(DecodeError::InvalidOpcode); -                        } else { -                            VEXOperandCode::G_V_M_xmm -                        }), +                        0x16 => (Opcode::VMOVHPD, VEXOperandCode::G_V_M_xmm),                          0x17 => (Opcode::VMOVHPD, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode); @@ -1085,9 +1594,9 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          0x2e => (Opcode::VUCOMISD, VEXOperandCode::G_E_xmm),                          0x2f => (Opcode::VCOMISD, VEXOperandCode::G_E_xmm),                          0x50 => (Opcode::VMOVMSKPD, if L { -                            VEXOperandCode::G_U_ymm +                            VEXOperandCode::Gd_U_ymm                          } else { -                            VEXOperandCode::G_U_xmm +                            VEXOperandCode::Gd_U_xmm                          }),                          0x51 => (Opcode::VSQRTPD, if L {                              VEXOperandCode::G_E_ymm @@ -1310,9 +1819,9 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              VEXOperandCode::E_G_xmm                          }),                          0xC2 => (Opcode::VCMPPD, if L { -                            VEXOperandCode::G_V_E_ymm +                            VEXOperandCode::G_V_E_ymm_imm8                          } else { -                            VEXOperandCode::G_V_E_xmm +                            VEXOperandCode::G_V_E_xmm_imm8                          }),                          0xC4 => (Opcode::VPINSRW, if L {                              instruction.opcode = Opcode::Invalid; @@ -1324,12 +1833,12 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode);                          } else { -                            VEXOperandCode::U_G_xmm_imm8 +                            VEXOperandCode::Ud_G_xmm_imm8                          }),                          0xC6 => (Opcode::VSHUFPD, if L { -                            VEXOperandCode::G_V_E_ymm +                            VEXOperandCode::G_V_E_ymm_imm8                          } else { -                            VEXOperandCode::G_V_E_xmm +                            VEXOperandCode::G_V_E_xmm_imm8                          }),                          0xD0 => (Opcode::VADDSUBPD, if L {                              VEXOperandCode::G_V_E_ymm @@ -1337,17 +1846,17 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              VEXOperandCode::G_V_E_xmm                          }),                          0xD1 => (Opcode::VPSRLW, if L { -                            VEXOperandCode::G_V_E_ymm +                            VEXOperandCode::G_V_ymm_E_xmm                          } else {                              VEXOperandCode::G_V_E_xmm                          }),                          0xD2 => (Opcode::VPSRLD, if L { -                            VEXOperandCode::G_V_E_ymm +                            VEXOperandCode::G_V_ymm_E_xmm                          } else {                              VEXOperandCode::G_V_E_xmm                          }),                          0xD3 => (Opcode::VPSRLQ, if L { -                            VEXOperandCode::G_V_E_ymm +                            VEXOperandCode::G_V_ymm_E_xmm                          } else {                              VEXOperandCode::G_V_E_xmm                          }), @@ -1368,9 +1877,9 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              VEXOperandCode::G_E_xmm                          }),                          0xD7 => (Opcode::VPMOVMSKB, if L { -                            VEXOperandCode::U_G_ymm +                            VEXOperandCode::Ud_G_ymm                          } else { -                            VEXOperandCode::U_G_xmm +                            VEXOperandCode::Ud_G_xmm                          }),                          0xD8 => (Opcode::VPSUBUSB, if L {                              VEXOperandCode::G_V_E_ymm @@ -1382,6 +1891,11 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          } else {                              VEXOperandCode::G_V_E_xmm                          }), +                        0xDA => (Opcode::VPMINSW, if L { +                            VEXOperandCode::G_V_E_ymm +                        } else { +                            VEXOperandCode::G_V_E_xmm +                        }),                          0xDB => (Opcode::VPAND, if L {                              VEXOperandCode::G_V_E_ymm                          } else { @@ -1521,7 +2035,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode);                          } else { -                            VEXOperandCode::G_E_xmm +                            VEXOperandCode::G_U_xmm                          }),                          0xF8 => (Opcode::VPSUBB, if L {                              VEXOperandCode::G_V_E_ymm @@ -1574,24 +2088,24 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              VEXOperandCode::G_E_xmm                          }),                          0x2a => (Opcode::VCVTSI2SD, if instruction.prefixes.vex().w() { -                            VEXOperandCode::G_V_E_xmm // 64-bit last operand +                            VEXOperandCode::G_V_xmm_Eq // 64-bit last operand                          } else { -                            VEXOperandCode::G_V_E_xmm // 32-bit last operand +                            VEXOperandCode::G_V_xmm_Ed // 32-bit last operand                          }),                          0x2c => (Opcode::VCVTTSD2SI, if instruction.prefixes.vex().w() { -                            VEXOperandCode::G_E_xmm // 64-bit +                            VEXOperandCode::VCVT_Gq_Eq_xmm // 64-bit                          } else { -                            VEXOperandCode::G_E_xmm // 32-bit +                            VEXOperandCode::VCVT_Gd_Ed_xmm // 32-bit                          }),                          0x2d => (Opcode::VCVTSD2SI, if instruction.prefixes.vex().w() { -                            VEXOperandCode::G_E_xmm // 64-bit +                            VEXOperandCode::VCVT_Gq_Eq_xmm // 64-bit                          } else { -                            VEXOperandCode::G_E_xmm // 32-bit +                            VEXOperandCode::VCVT_Gd_Ed_xmm // 32-bit                          }),                          0x51 => (Opcode::VSQRTSD, VEXOperandCode::G_V_E_xmm),                          0x58 => (Opcode::VADDSD, VEXOperandCode::G_V_E_xmm),                          0x59 => (Opcode::VMULSD, VEXOperandCode::G_V_E_xmm), -                        0x5a => (Opcode::CVTSD2SS, VEXOperandCode::G_V_E_xmm), +                        0x5a => (Opcode::VCVTSD2SS, VEXOperandCode::G_V_E_xmm),                          0x5c => (Opcode::VSUBSD, VEXOperandCode::G_V_E_xmm),                          0x5d => (Opcode::VMINSD, VEXOperandCode::G_V_E_xmm),                          0x5e => (Opcode::VDIVSD, VEXOperandCode::G_V_E_xmm), @@ -1625,7 +2139,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          0xf0 => (Opcode::VLDDQU, if L {                              VEXOperandCode::G_M_ymm                          } else { -                            VEXOperandCode::G_M_ymm +                            VEXOperandCode::G_M_xmm                          }),                          _ => {                              instruction.opcode = Opcode::Invalid; @@ -1640,19 +2154,19 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          0x12 => (Opcode::VMOVSLDUP, if L { VEXOperandCode::G_E_ymm } else { VEXOperandCode::G_E_xmm }),                          0x16 => (Opcode::VMOVSHDUP, if L { VEXOperandCode::G_E_ymm } else { VEXOperandCode::G_E_xmm }),                          0x2a => (Opcode::VCVTSI2SS, if instruction.prefixes.vex().w() { -                            VEXOperandCode::G_V_E_xmm // 64-bit last operand +                            VEXOperandCode::G_V_xmm_Eq                          } else { -                            VEXOperandCode::G_V_E_xmm // 32-bit last operand +                            VEXOperandCode::G_V_xmm_Ed                          }),                          0x2c => (Opcode::VCVTTSS2SI, if instruction.prefixes.vex().w() { -                            VEXOperandCode::G_E_xmm // 64-bit +                            VEXOperandCode::VCVT_Gq_Eq_xmm                          } else { -                            VEXOperandCode::G_E_xmm // 32-bit +                            VEXOperandCode::VCVT_Gd_Ed_xmm                          }), -                        0x2d => (Opcode::VCVTSD2SI, if instruction.prefixes.vex().w() { -                            VEXOperandCode::G_E_xmm // 64-bit +                        0x2d => (Opcode::VCVTSS2SI, if instruction.prefixes.vex().w() { +                            VEXOperandCode::VCVT_Gq_Eq_xmm                          } else { -                            VEXOperandCode::G_E_xmm // 32-bit +                            VEXOperandCode::VCVT_Gd_Ed_xmm                          }),                          0x51 => (Opcode::VSQRTSS, VEXOperandCode::G_V_E_xmm),                          0x52 => (Opcode::VRSQRTSS, VEXOperandCode::G_V_E_xmm), @@ -1666,7 +2180,11 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          0x5e => (Opcode::VDIVSS, VEXOperandCode::G_V_E_xmm),                          0x5f => (Opcode::VMAXSS, VEXOperandCode::G_V_E_xmm),                          0x6f => (Opcode::VMOVDQU, if L { VEXOperandCode::G_E_ymm } else { VEXOperandCode::G_E_xmm }), -//                        0x70 => (Opcode::VMOVSHDUP, if L { VEXOperandCode::G_E_ymm_imm8 } else { VEXOperandCode::G_E_xmm_imm8 }), +                        0x70 => (Opcode::VPSHUFHW, if L { +                            VEXOperandCode::G_E_ymm_imm8 +                        } else { +                            VEXOperandCode::G_E_xmm_imm8 +                        }),                          0x7e => (Opcode::VMOVQ, if L { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); } else { VEXOperandCode::G_E_xmm }),                          0x7f => (Opcode::VMOVDQU, if L { VEXOperandCode::E_G_ymm } else { VEXOperandCode::E_G_xmm }),                          0xc2 => (Opcode::VCMPSS, VEXOperandCode::G_V_E_xmm_imm8), @@ -1704,7 +2222,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      } else {                          VEXOperandCode::G_V_E_xmm                      }), -                    0x04 => (Opcode::VPHADDUBSW, if L { +                    0x04 => (Opcode::VPMADDUBSW, if L {                          VEXOperandCode::G_V_E_ymm                      } else {                          VEXOperandCode::G_V_E_xmm @@ -1780,16 +2298,26 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      } else {                          VEXOperandCode::G_E_xmm                      }), -                    0x18 => (Opcode::VBROADCASTSS, if L { -                        VEXOperandCode::G_E_ymm +                    0x18 => if instruction.prefixes.vex().w() { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::G_E_xmm -                    }), -                    0x19 => (Opcode::VBROADCASTSD, if L { -                        VEXOperandCode::G_E_ymm +                        (Opcode::VBROADCASTSS, if L { +                            VEXOperandCode::G_E_ymm +                        } else { +                            VEXOperandCode::G_E_xmm +                        }) +                    }, +                    0x19 => if instruction.prefixes.vex().w() { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::G_E_xmm -                    }), +                        (Opcode::VBROADCASTSD, if L { +                            VEXOperandCode::G_E_ymm +                        } else { +                            VEXOperandCode::G_E_xmm +                        }) +                    }                      0x1A => (Opcode::VBROADCASTF128, if L {                          VEXOperandCode::G_M_ymm                      } else { @@ -1812,32 +2340,32 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          VEXOperandCode::G_E_xmm                      }),                      0x20 => (Opcode::VPMOVSXBW, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x21 => (Opcode::VPMOVSXBD, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x22 => (Opcode::VPMOVSXBQ, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x23 => (Opcode::VPMOVSXWD, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x24 => (Opcode::VPMOVSXWQ, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x25 => (Opcode::VPMOVSXDQ, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }), @@ -1856,6 +2384,11 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      } else {                          VEXOperandCode::G_M_xmm                      }), +                    0x2B => (Opcode::VPACKUSDW, if L { +                        VEXOperandCode::G_V_E_ymm +                    } else { +                        VEXOperandCode::G_V_E_xmm +                    }),                      0x2C => (Opcode::VMASKMOVPS, if L {                          VEXOperandCode::G_V_M_ymm                      } else { @@ -1877,35 +2410,34 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          VEXOperandCode::M_V_G_xmm                      }),                      0x30 => (Opcode::VPMOVZXBW, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x31 => (Opcode::VPMOVZXBD, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x32 => (Opcode::VPMOVZXBQ, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x33 => (Opcode::VPMOVZXWD, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x34 => (Opcode::VPMOVZXWQ, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else {                          VEXOperandCode::G_E_xmm                      }),                      0x35 => (Opcode::VPMOVZXDQ, if L { -                        VEXOperandCode::G_E_ymm +                        VEXOperandCode::G_ymm_E_xmm                      } else { -                        instruction.opcode = Opcode::Invalid; -                        return Err(DecodeError::InvalidOpcode); +                        VEXOperandCode::G_E_xmm                      }),                      0x36 => (Opcode::VPERMD, if L {                          VEXOperandCode::G_V_E_ymm @@ -1918,11 +2450,21 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      } else {                          VEXOperandCode::G_V_E_xmm                      }), +                    0x38 => (Opcode::VPMINSB, if L { +                        VEXOperandCode::G_V_E_ymm +                    } else { +                        VEXOperandCode::G_V_E_xmm +                    }),                      0x39 => (Opcode::VPMINSD, if L {                          VEXOperandCode::G_V_E_ymm                      } else {                          VEXOperandCode::G_V_E_xmm                      }), +                    0x3A => (Opcode::VPMINUW, if L { +                        VEXOperandCode::G_V_E_ymm +                    } else { +                        VEXOperandCode::G_V_E_xmm +                    }),                      0x3B => (Opcode::VPMINUD, if L {                          VEXOperandCode::G_V_E_ymm                      } else { @@ -2443,26 +2985,22 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          VEXOperandCode::G_E_xmm                      }),                      0xDC => (Opcode::VAESENC, if L { -                        instruction.opcode = Opcode::Invalid; -                        return Err(DecodeError::InvalidOpcode); +                        VEXOperandCode::G_V_E_ymm                      } else {                          VEXOperandCode::G_V_E_xmm                      }),                      0xDD => (Opcode::VAESENCLAST, if L { -                        instruction.opcode = Opcode::Invalid; -                        return Err(DecodeError::InvalidOpcode); +                        VEXOperandCode::G_V_E_ymm                      } else {                          VEXOperandCode::G_V_E_xmm                      }),                      0xDE => (Opcode::VAESDEC, if L { -                        instruction.opcode = Opcode::Invalid; -                        return Err(DecodeError::InvalidOpcode); +                        VEXOperandCode::G_V_E_ymm                      } else {                          VEXOperandCode::G_V_E_xmm                      }),                      0xDF => (Opcode::VAESDECLAST, if L { -                        instruction.opcode = Opcode::Invalid; -                        return Err(DecodeError::InvalidOpcode); +                        VEXOperandCode::G_V_E_ymm                      } else {                          VEXOperandCode::G_V_E_xmm                      }), @@ -2576,17 +3114,17 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          VEXOperandCode::G_V_E_xmm                      }),                      0x04 => (Opcode::VPERMILPS, if L { -                        VEXOperandCode::G_V_E_ymm +                        VEXOperandCode::G_E_ymm_imm8                      } else { -                        VEXOperandCode::G_V_E_xmm +                        VEXOperandCode::G_E_xmm_imm8                      }),                      0x05 => (Opcode::VPERMILPD, if L { -                        VEXOperandCode::G_V_E_ymm +                        VEXOperandCode::G_E_ymm_imm8                      } else { -                        VEXOperandCode::G_V_E_xmm +                        VEXOperandCode::G_E_xmm_imm8                      }),                      0x06 => (Opcode::VPERM2F128, if L { -                        VEXOperandCode::G_V_E_ymm +                        VEXOperandCode::G_V_E_ymm_imm8                      } else {                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode); @@ -2601,6 +3139,16 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      } else {                          VEXOperandCode::G_E_xmm_imm8                      }), +                    0x0A => (Opcode::VROUNDSS, if L { +                        VEXOperandCode::G_V_E_ymm_imm8 +                    } else { +                        VEXOperandCode::G_V_E_xmm_imm8 +                    }), +                    0x0B => (Opcode::VROUNDSD, if L { +                        VEXOperandCode::G_V_E_ymm_imm8 +                    } else { +                        VEXOperandCode::G_V_E_xmm_imm8 +                    }),                      0x0C => (Opcode::VBLENDPS, if L {                          VEXOperandCode::G_V_E_ymm_imm8                      } else { @@ -2625,20 +3173,20 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::Ev_G_xmm_imm8 +                        VEXOperandCode::Eb_G_xmm_imm8                      }),                      0x15 => (Opcode::VPEXTRW, if L || instruction.prefixes.vex().w() {                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::Ev_G_xmm_imm8 +                        VEXOperandCode::Ew_G_xmm_imm8                      }),                      0x16 => if instruction.prefixes.vex().w() {                          (Opcode::VPEXTRQ, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode);                          } else { -                            VEXOperandCode::G_E_ymm_imm8 +                            VEXOperandCode::Eq_G_xmm_imm8                          })                      } else {                          (Opcode::VPEXTRD, if L { @@ -2646,27 +3194,37 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              return Err(DecodeError::InvalidOpcode);                          } else {                              // varies on W -                            VEXOperandCode::Ev_G_xmm_imm8 +                            VEXOperandCode::Ed_G_xmm_imm8                          })                      },                      0x17 => (Opcode::VEXTRACTPS, if L {                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::G_E_ymm_imm8 +                        VEXOperandCode::Ed_G_xmm_imm8                      }), -                    0x18 => (Opcode::VINSERTF128, if L { -                        VEXOperandCode::G_V_E_ymm_imm8 -                    } else { +                    0x18 => if instruction.prefixes.vex().w() {                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode); -                    }), -                    0x19 => (Opcode::VEXTRACTF128, if L { -                        VEXOperandCode::E_xmm_G_ymm_imm8                      } else { +                        (Opcode::VINSERTF128, if L { +                            VEXOperandCode::G_V_E_ymm_imm8 +                        } else { +                            instruction.opcode = Opcode::Invalid; +                            return Err(DecodeError::InvalidOpcode); +                        }) +                    }, +                    0x19 => if instruction.prefixes.vex().w() {                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode); -                    }), +                    } else { +                        (Opcode::VEXTRACTF128, if L { +                            VEXOperandCode::E_xmm_G_ymm_imm8 +                        } else { +                            instruction.opcode = Opcode::Invalid; +                            return Err(DecodeError::InvalidOpcode); +                        }) +                    },                      0x1D => (Opcode::VCVTPS2PH, if L {                          VEXOperandCode::E_xmm_G_ymm_imm8                      } else { @@ -2676,7 +3234,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                          instruction.opcode = Opcode::Invalid;                          return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::G_V_E_xmm_imm8 +                        VEXOperandCode::G_V_xmm_Eb_imm8                      }),                      0x21 => (Opcode::VINSERTPS, if L {                          instruction.opcode = Opcode::Invalid; @@ -2689,14 +3247,14 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode);                          } else { -                            VEXOperandCode::G_V_E_xmm_imm8 +                            VEXOperandCode::G_V_xmm_Eq_imm8                          })                      } else {                          (Opcode::VPINSRD, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode);                          } else { -                            VEXOperandCode::G_V_E_xmm_imm8 +                            VEXOperandCode::G_V_xmm_Ed_imm8                          })                      },                      0x38 => (Opcode::VINSERTI128, if L { @@ -2749,10 +3307,27 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &                      } else {                          VEXOperandCode::G_V_E_xmm_xmm4                      }), -                    0x4C => (Opcode::VPBLENDVB, if L { -                        VEXOperandCode::G_V_E_ymm_ymm4 +                    0x4C => if instruction.prefixes.vex().w() { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOpcode);                      } else { -                        VEXOperandCode::G_V_E_xmm_xmm4 +                        (Opcode::VPBLENDVB, if L { +                            VEXOperandCode::G_V_E_ymm_ymm4 +                        } else { +                            VEXOperandCode::G_V_E_xmm_xmm4 +                        }) +                    }, +                    0x60 => (Opcode::VPCMPESTRM, if L { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOpcode); +                    } else { +                        VEXOperandCode::G_E_xmm_imm8 +                    }), +                    0x61 => (Opcode::VPCMPESTRI, if L { +                        instruction.opcode = Opcode::Invalid; +                        return Err(DecodeError::InvalidOpcode); +                    } else { +                        VEXOperandCode::G_E_xmm_imm8                      }),                      0x62 => (Opcode::VPCMPISTRM, if L {                          instruction.opcode = Opcode::Invalid; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 1895f6e..c6b895c 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -1401,6 +1401,7 @@ fn test_misc() {  fn evex() {      test_display(&[0x62, 0xf2, 0x7d, 0x48, 0x2a, 0x44, 0x40, 0x01], "vmovntdqa zmm0, [rax + rax*2 + 0x40]");      test_display(&[0x62, 0xf2, 0x7d, 0x08, 0x2a, 0x44, 0x40, 0x01], "vmovntdqa xmm0, [rax + rax*2 + 0x10]"); +    test_display(&[0x62, 0xf2, 0x7d, 0x1d, 0x66, 0x50, 0x01, 0x11], "vfpclassps");  }  #[test] @@ -1411,11 +1412,985 @@ fn test_vex() {          test_invalid_under(&InstDecoder::minimal(), bytes);      } +    fn test_avx2(bytes: &[u8], text: &'static str) { +        test_display_under(&InstDecoder::minimal().with_avx().with_avx2(), bytes, text); +        test_display_under(&InstDecoder::default(), bytes, text); +        test_invalid_under(&InstDecoder::minimal(), bytes); +    } + +    fn test_instr_vex_aesni(bytes: &[u8], text: &'static str) { +        test_display_under(&InstDecoder::minimal().with_avx().with_aesni(), bytes, text); +        test_display_under(&InstDecoder::default(), bytes, text); +        test_invalid_under(&InstDecoder::minimal(), bytes); +    } +      fn test_instr_invalid(bytes: &[u8]) {          test_invalid_under(&InstDecoder::minimal().with_avx(), bytes);          test_invalid_under(&InstDecoder::default(), bytes);      } +    // prefix 03 +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x04, 0b11_001_010, 0x77], "vpermilps xmm9, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x04, 0b11_001_010, 0x77], "vpermilps ymm9, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x05, 0b11_001_010, 0x77], "vpermilpd xmm9, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x05, 0b11_001_010, 0x77], "vpermilpd ymm9, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_001, 0x06, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x06, 0b11_001_010, 0x77], "vperm2f128 ymm9, ymm0, ymm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x0c, 0b11_001_010, 0x77], "vblendps xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x0c, 0b11_001_010, 0x77], "vblendps ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x0d, 0b11_001_010, 0x77], "vblendpd xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x0d, 0b11_001_010, 0x77], "vblendpd ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x0e, 0b11_001_010, 0x77], "vpblendw xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x0e, 0b11_001_010, 0x77], "vpblendw ymm9, ymm8, ymm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x08, 0b11_001_010, 0x77], "vroundps xmm9, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x08, 0b11_001_010, 0x77], "vroundps ymm9, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x08, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x08, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x09, 0b11_001_010, 0x77], "vroundpd xmm9, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x09, 0b11_001_010, 0x77], "vroundpd ymm9, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x09, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x09, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x0a, 0b11_001_010, 0x77], "vroundss xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x0a, 0b11_001_010, 0x77], "vroundss ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x0b, 0b11_001_010, 0x77], "vroundsd xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x0b, 0b11_001_010, 0x77], "vroundsd ymm9, ymm8, ymm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00011, 0b1_0111_001, 0x0f, 0b11_001_010, 0x77], "vpalignr xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b1_0111_101, 0x0f, 0b11_001_010, 0x77], "vpalignr ymm9, ymm8, ymm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x14, 0b11_001_010, 0x77], "vpextrb r10d, xmm9, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x14, 0b00_001_010, 0x77], "vpextrb [r10], xmm9, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x14, 0b00_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x14, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x15, 0b11_001_010, 0x77], "vpextrw r10d, xmm9, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x15, 0b00_001_010, 0x77], "vpextrw [r10], xmm9, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x15, 0b00_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x15, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x16, 0b11_001_010, 0x77], "vpextrd r10d, xmm9, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x16, 0b00_001_010, 0x77], "vpextrd [r10], xmm9, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x16, 0b00_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x16, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b1_1111_001, 0x16, 0b11_001_010, 0x77], "vpextrq r10, xmm9, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_001, 0x16, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b1_1111_001, 0x16, 0b00_001_010, 0x77], "vpextrq [r10], xmm9, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x17, 0b11_001_010, 0x77], "vextractps r10d, xmm9, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x17, 0b00_001_010, 0x77], "vextractps [r10], xmm9, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x17, 0b00_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x17, 0b00_001_010, 0x77]); + +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_001, 0x18, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x18, 0b11_001_010, 0x77], "vinsertf128 ymm9, ymm8, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_101, 0x18, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x19, 0b11_001_010, 0x77], "vextractf128 xmm10, ymm9, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_001, 0x19, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b1_1111_101, 0x19, 0b11_001_010, 0x77]); + +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x20, 0b11_001_010, 0x77], "vpinsrb xmm9, xmm8, r10d, 0x77"); +    // TODO: byte ptr +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x20, 0b00_001_010, 0x77], "vpinsrb xmm9, xmm8, [r10], 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x20, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x21, 0b11_001_010, 0x77], "vinsertps xmm9, xmm8, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x21, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x22, 0b11_001_010, 0x77], "vpinsrd xmm9, xmm8, r10d, 0x77"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x22, 0b00_001_010, 0x77], "vpinsrd xmm9, xmm8, [r10], 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x22, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b1_0111_001, 0x22, 0b11_001_010, 0x77], "vpinsrq xmm9, xmm8, r10, 0x77"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00011, 0b1_0111_001, 0x22, 0b00_001_010, 0x77], "vpinsrq xmm9, xmm8, [r10], 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_101, 0x22, 0b00_001_010, 0x77]); + +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x40, 0b11_001_010, 0x77], "vdpps xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x40, 0b11_001_010, 0x77], "vdpps ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x41, 0b11_001_010, 0x77], "vdppd xmm9, xmm8, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x41, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x42, 0b11_001_010, 0x77], "vmpsadbw xmm9, xmm8, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00011, 0b0_0111_101, 0x42, 0b11_001_010, 0x77], "vmpsadbw ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x4c, 0b11_001_010, 0x77], "vpblendvb xmm9, xmm8, xmm10, xmm7"); +    test_avx2(&[0xc4, 0b000_00011, 0b0_0111_101, 0x4c, 0b11_001_010, 0x77], "vpblendvb ymm9, ymm8, ymm10, ymm7"); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_001, 0x4c, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_101, 0x4c, 0b11_001_010, 0x77]); + +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x60, 0b11_001_010, 0x77], "vpcmpestrm xmm9, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x60, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x60, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x60, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x61, 0b11_001_010, 0x77], "vpcmpestri xmm9, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x61, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x61, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x61, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x62, 0b11_001_010, 0x77], "vpcmpistrm xmm9, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x62, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x62, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x62, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00011, 0b0_1111_001, 0x63, 0b11_001_010, 0x77], "vpcmpistri xmm9, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b0_1111_101, 0x63, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_001, 0x63, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x63, 0b11_001_010, 0x77]); + +    test_instr_vex_aesni(&[0xc4, 0b000_00011, 0b1_1111_001, 0xdf, 0b11_001_010, 0x77], "vaeskeygenassist xmm9, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_001, 0xdf, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00011, 0b1_0111_101, 0xdf, 0b11_001_010, 0x77]); + +    // prefix 02 +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x00, 0b11_001_010], "vpshufb xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x00, 0b11_001_010], "vpshufb ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x01, 0b11_001_010], "vphaddw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x01, 0b11_001_010], "vphaddw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x02, 0b11_001_010], "vphaddd xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x02, 0b11_001_010], "vphaddd ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x03, 0b11_001_010], "vphaddsw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x03, 0b11_001_010], "vphaddsw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x04, 0b11_001_010], "vpmaddubsw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x04, 0b11_001_010], "vpmaddubsw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x05, 0b11_001_010], "vphsubw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x05, 0b11_001_010], "vphsubw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x06, 0b11_001_010], "vphsubd xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x06, 0b11_001_010], "vphsubd ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x07, 0b11_001_010], "vphsubsw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x07, 0b11_001_010], "vphsubsw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x08, 0b11_001_010], "vpsignb xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x08, 0b11_001_010], "vpsignb ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x09, 0b11_001_010], "vpsignw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x09, 0b11_001_010], "vpsignw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x0a, 0b11_001_010], "vpsignd xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x0a, 0b11_001_010], "vpsignd ymm9, ymm0, ymm10"); + +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x0b, 0b11_001_010], "vpmulhrsw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x0b, 0b11_001_010], "vpmulhrsw ymm9, ymm0, ymm10"); + +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x0c, 0b11_001_010], "vpermilps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_101, 0x0c, 0b11_001_010], "vpermilps ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x0d, 0b11_001_010], "vpermilpd xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_101, 0x0d, 0b11_001_010], "vpermilpd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x0e, 0b11_001_010], "vtestps xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x0e, 0b11_001_010], "vtestps ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x0f, 0b11_001_010], "vtestpd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x0f, 0b11_001_010], "vtestpd ymm9, ymm10"); + +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x17, 0b11_001_010], "vptest xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x17, 0b11_001_010], "vptest ymm9, ymm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x17, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x17, 0b11_001_010]); + +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x18, 0b00_001_010], "vbroadcastss xmm9, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x18, 0b00_001_010], "vbroadcastss ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00010, 0b1_1111_001, 0x18, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x18, 0b00_001_010]); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x19, 0b00_001_010], "vbroadcastsd ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x19, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0x19, 0b00_001_010]); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x1a, 0b00_001_010], "vbroadcastf128 ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0x1a, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_001, 0x1a, 0b00_001_010]); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x18, 0b11_001_010], "vbroadcastss xmm9, xmm10"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x18, 0b00_001_010], "vbroadcastss ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_001, 0x18, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0x18, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x19, 0b11_001_010], "vbroadcastsd ymm9, ymm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x19, 0b00_001_010], "vbroadcastsd ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00010, 0b1_1111_101, 0x19, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0x19, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_001, 0x19, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_001, 0x1a, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0x1a, 0b11_001_010]); + + +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x1c, 0b11_001_010], "vpabsb xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x1c, 0b11_001_010], "vpabsb ymm9, ymm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x1c, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x1d, 0b11_001_010], "vpabsw xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x1d, 0b11_001_010], "vpabsw ymm9, ymm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x1d, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x1e, 0b11_001_010], "vpabsd xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x1e, 0b11_001_010], "vpabsd ymm9, ymm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x1e, 0b11_001_010]); + +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x20, 0b11_001_010], "vpmovsxbw xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x20, 0b11_001_010], "vpmovsxbw ymm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x20, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x21, 0b11_001_010], "vpmovsxbd xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x21, 0b11_001_010], "vpmovsxbd ymm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x21, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x22, 0b11_001_010], "vpmovsxbq xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x22, 0b11_001_010], "vpmovsxbq ymm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x22, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x23, 0b11_001_010], "vpmovsxwd xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x23, 0b11_001_010], "vpmovsxwd ymm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x23, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x24, 0b11_001_010], "vpmovsxwq xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x24, 0b11_001_010], "vpmovsxwq ymm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x24, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x25, 0b11_001_010], "vpmovsxdq xmm9, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x25, 0b11_001_010], "vpmovsxdq ymm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x25, 0b11_001_010]); + +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x28, 0b11_001_010], "vpmuldq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x28, 0b11_001_010], "vpmuldq ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x29, 0b11_001_010], "vpcmpeqq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x29, 0b11_001_010], "vpcmpeqq ymm9, ymm8, ymm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x2a, 0b11_001_010]); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x2a, 0b00_001_010], "vmovntdqa xmm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x2a, 0b00_001_010]); +    // TODO: ymmword ptr +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x2a, 0b00_001_010], "vmovntdqa ymm9, [r10]"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x2b, 0b11_001_010], "vpackusdw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x2b, 0b11_001_010], "vpackusdw ymm9, ymm8, ymm10"); +    // TODO: ymmword ptr +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x2b, 0b00_001_010], "vpackusdw ymm9, ymm8, [r10]"); + +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x30, 0b11_001_010], "vpmovzxbw xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x30, 0b11_001_010], "vpmovzxbw ymm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x31, 0b11_001_010], "vpmovzxbd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x31, 0b11_001_010], "vpmovzxbd ymm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x32, 0b11_001_010], "vpmovzxbq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x32, 0b11_001_010], "vpmovzxbq ymm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x33, 0b11_001_010], "vpmovzxwd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x33, 0b11_001_010], "vpmovzxwd ymm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x34, 0b11_001_010], "vpmovzxwq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x34, 0b11_001_010], "vpmovzxwq ymm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x35, 0b11_001_010], "vpmovzxdq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_101, 0x35, 0b11_001_010], "vpmovzxdq ymm9, xmm10"); + +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x30, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x30, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x31, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x31, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x32, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x32, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x33, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x33, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x34, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x34, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x35, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x35, 0b11_001_010]); + +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x37, 0b11_001_010], "vpcmpgtq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x37, 0b11_001_010], "vpcmpgtq ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x38, 0b11_001_010], "vpminsb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x38, 0b11_001_010], "vpminsb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x39, 0b11_001_010], "vpminsd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x39, 0b11_001_010], "vpminsd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x3a, 0b11_001_010], "vpminuw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x3a, 0b11_001_010], "vpminuw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x3b, 0b11_001_010], "vpminud xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x3b, 0b11_001_010], "vpminud ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x3c, 0b11_001_010], "vpmaxsb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x3c, 0b11_001_010], "vpmaxsb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x3d, 0b11_001_010], "vpmaxsd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x3d, 0b11_001_010], "vpmaxsd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x3e, 0b11_001_010], "vpmaxuw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x3e, 0b11_001_010], "vpmaxuw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x3f, 0b11_001_010], "vpmaxud xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x3f, 0b11_001_010], "vpmaxud ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_0111_001, 0x40, 0b11_001_010], "vpmulld xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_0111_101, 0x40, 0b11_001_010], "vpmulld ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00010, 0b0_1111_001, 0x41, 0b11_001_010], "vphminposuw xmm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_001, 0x41, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0x41, 0b11_001_010]); +// TODO: should something be at opcode 42 here? +//    test_instr(&[0xc4, 0b000_00010, 0b1_0111_001, 0x42, 0b11_001_010], "vphminposuw xmm"); +//    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0x41, 0b11_001_010]); + +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b0_1111_001, 0xdb, 0b11_001_010], "vaesimc xmm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00010, 0b0_0111_101, 0xdb, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00010, 0b1_0111_101, 0xdb, 0b11_001_010]); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_001, 0xdc, 0b11_001_010], "vaesenc xmm9, xmm8, xmm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_101, 0xdc, 0b11_001_010], "vaesenc ymm9, ymm8, ymm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_001, 0xdd, 0b11_001_010], "vaesenclast xmm9, xmm8, xmm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_101, 0xdd, 0b11_001_010], "vaesenclast ymm9, ymm8, ymm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_001, 0xde, 0b11_001_010], "vaesdec xmm9, xmm8, xmm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_101, 0xde, 0b11_001_010], "vaesdec ymm9, ymm8, ymm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_001, 0xdf, 0b11_001_010], "vaesdeclast xmm9, xmm8, xmm10"); +    test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b1_0111_101, 0xdf, 0b11_001_010], "vaesdeclast ymm9, ymm8, ymm10"); + +    // prefix 01 +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0x10, 0b00_001_010], "vmovsd xmm9, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x10, 0b00_001_010], "vmovsd xmm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_011, 0x10, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_111, 0x10, 0b00_001_010]); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x10, 0b00_001_010], "vmovupd xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x10, 0b00_001_010], "vmovupd ymm9, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0x11, 0b00_001_010], "vmovsd [r10], xmm9"); +    // TODO: ... also qword ptr? +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x11, 0b00_001_010], "vmovsd [r10], xmm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_011, 0x11, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_111, 0x11, 0b00_001_010]); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x10, 0b00_001_010], "vmovupd xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x10, 0b00_001_010], "vmovupd ymm9, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x10, 0b00_001_010], "vmovss xmm9, [r10]"); +    // TODO: ... also dword ptr? +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x10, 0b00_001_010], "vmovss xmm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_010, 0x10, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_110, 0x10, 0b00_001_010]); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x10, 0b00_001_010], "vmovups xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x10, 0b00_001_010], "vmovups ymm9, [r10]"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x11, 0b11_001_010], "vmovsd xmm10, xmm8, xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x11, 0b11_001_010], "vmovsd xmm10, xmm8, xmm9"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x11, 0b00_001_010], "vmovss [r10], xmm9"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x11, 0b00_001_010], "vmovss [r10], xmm9"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x11, 0b00_001_010], "vmovups [r10], xmm9"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x11, 0b00_001_010], "vmovups [r10], ymm9"); + +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_011, 0x12, 0b00_001_010], "vmovddup xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_111, 0x12, 0b00_001_010], "vmovddup ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_011, 0x12, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_111, 0x12, 0b00_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x12, 0b11_001_010], "vmovhlps xmm9, xmm8, xmm10"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x12, 0b00_001_010], "vmovlps xmm9, xmm8, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_100, 0x12, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_111, 0x12, 0b11_001_010]); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x12, 0b00_001_010], "vmovsldup xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x12, 0b00_001_010], "vmovsldup ymm9, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x12, 0b00_001_010], "vmovsldup xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_110, 0x12, 0b00_001_010], "vmovsldup ymm9, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x12, 0b00_001_010], "vmovlpd xmm9, xmm8, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_101, 0x12, 0b00_001_010]); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x13, 0b00_001_010], "vmovlpd [r10], xmm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_001, 0x13, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_101, 0x13, 0b00_001_010]); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_000, 0x14, 0b11_001_010], "vunpcklps xmm9, xmm8, xmm10"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_100, 0x14, 0b11_001_010], "vunpcklps ymm9, ymm8, ymm10"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x14, 0b11_001_010], "vunpcklpd xmm9, xmm8, xmm10"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_101, 0x14, 0b11_001_010], "vunpcklpd ymm9, ymm8, ymm10"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_000, 0x15, 0b11_001_010], "vunpckhps xmm9, xmm8, xmm10"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_100, 0x15, 0b11_001_010], "vunpckhps ymm9, ymm8, ymm10"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x15, 0b11_001_010], "vunpckhpd xmm9, xmm8, xmm10"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_101, 0x15, 0b11_001_010], "vunpckhpd ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x16, 0b11_001_010], "vmovshdup xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x16, 0b11_001_010], "vmovshdup ymm9, ymm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_010, 0x16, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_110, 0x16, 0b11_001_010]); + +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x16, 0b00_001_010], "vmovhps xmm9, xmm8, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_100, 0x16, 0b00_001_010]); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x16, 0b00_001_010], "vmovhpd xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x16, 0b00_001_010], "vmovhpd xmm9, xmm8, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_001, 0x16, 0b11_001_010]); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x17, 0b00_001_010], "vmovhps [r10], xmm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_100, 0x17, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_000, 0x17, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_100, 0x17, 0b00_001_010]); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x17, 0b00_001_010], "vmovhpd [r10], xmm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_001, 0x17, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0x17, 0b00_001_010]); + +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_000, 0x28, 0b11_001_010], "vmovaps xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x28, 0b11_001_010], "vmovaps ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_000, 0x29, 0b11_001_010], "vmovaps xmm10, xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x29, 0b11_001_010], "vmovaps ymm10, ymm9"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x28, 0b11_001_010], "vmovapd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x28, 0b11_001_010], "vmovapd ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x29, 0b11_001_010], "vmovapd xmm10, xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x29, 0b11_001_010], "vmovapd ymm10, ymm9"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x2a, 0b11_001_010], "vcvtsi2ss xmm9, xmm0, r10d"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x2a, 0b00_001_010], "vcvtsi2ss xmm9, xmm0, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x2a, 0b11_001_010], "vcvtsi2ss xmm9, xmm0, r10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x2a, 0b00_001_010], "vcvtsi2ss xmm9, xmm0, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x2a, 0b11_001_010], "vcvtsi2ss xmm9, xmm0, r10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_011, 0x2a, 0b11_001_010], "vcvtsi2sd xmm9, xmm0, r10d"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_111, 0x2a, 0b11_001_010], "vcvtsi2sd xmm9, xmm0, r10d"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x2a, 0b11_001_010], "vcvtsi2sd xmm9, xmm0, r10"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_011, 0x2a, 0b00_001_010], "vcvtsi2sd xmm9, xmm0, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0x2a, 0b00_001_010], "vcvtsi2sd xmm9, xmm0, [r10]"); +    test_instr(&[0xc5, 0b0_1111_011, 0x2a, 0b11_001_010], "vcvtsi2sd xmm9, xmm0, edx"); +    test_instr(&[0xc5, 0b0_1111_111, 0x2a, 0b11_001_010], "vcvtsi2sd xmm9, xmm0, edx"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_000, 0x2b, 0b00_001_010], "vmovntps [r10], xmm9"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x2b, 0b00_001_010], "vmovntps [r10], ymm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_000, 0x2b, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_100, 0x2b, 0b11_001_010]); + +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x2b, 0b00_001_010], "vmovntpd [r10], xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x2b, 0b00_001_010], "vmovntpd [r10], ymm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_001, 0x2b, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0x2b, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x2c, 0b11_001_010], "vcvttss2si r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_110, 0x2c, 0b11_001_010], "vcvttss2si r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x2c, 0b11_001_010], "vcvttss2si r9, xmm10"); +    test_instr(&[0xc5, 0b0_1111_010, 0x2c, 0b11_001_010], "vcvttss2si r9d, xmm2"); +    // TODO: dword ptr +    test_instr(&[0xc5, 0b0_1111_010, 0x2c, 0b00_001_010], "vcvttss2si r9d, [rdx]"); +    test_instr(&[0xc5, 0b0_1111_110, 0x2c, 0b11_001_010], "vcvttss2si r9d, xmm2"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_011, 0x2c, 0b11_001_010], "vcvttsd2si r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_111, 0x2c, 0b11_001_010], "vcvttsd2si r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x2c, 0b11_001_010], "vcvttsd2si r9, xmm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x2c, 0b00_001_010], "vcvttsd2si r9, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_111, 0x2c, 0b00_001_010], "vcvttsd2si r9d, [r10]"); +    test_instr(&[0xc5, 0b0_1111_011, 0x2c, 0b11_001_010], "vcvttsd2si r9d, xmm2"); +    test_instr(&[0xc5, 0b0_1111_111, 0x2c, 0b11_001_010], "vcvttsd2si r9d, xmm2"); +    // TODO: qword ptr +    test_instr(&[0xc5, 0b0_1111_111, 0x2c, 0b00_001_010], "vcvttsd2si r9d, [rdx]"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x2d, 0b11_001_010], "vcvtss2si r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_110, 0x2d, 0b11_001_010], "vcvtss2si r9d, xmm10"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_110, 0x2d, 0b00_001_010], "vcvtss2si r9d, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x2d, 0b00_001_010], "vcvtss2si r9, [r10]"); +    test_instr(&[0xc5, 0b0_1111_010, 0x2d, 0b11_001_010], "vcvtss2si r9d, xmm2"); +    test_instr(&[0xc5, 0b0_1111_110, 0x2d, 0b11_001_010], "vcvtss2si r9d, xmm2"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_011, 0x2d, 0b11_001_010], "vcvtsd2si r9d, xmm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_011, 0x2d, 0b00_001_010], "vcvtsd2si r9d, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_111, 0x2d, 0b11_001_010], "vcvtsd2si r9d, xmm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_111, 0x2d, 0b00_001_010], "vcvtsd2si r9d, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x2d, 0b11_001_010], "vcvtsd2si r9, xmm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0x2d, 0b00_001_010], "vcvtsd2si r9, [r10]"); +    test_instr(&[0xc5, 0b0_1111_011, 0x2d, 0b11_001_010], "vcvtsd2si r9d, xmm2"); +    test_instr(&[0xc5, 0b0_1111_111, 0x2d, 0b11_001_010], "vcvtsd2si r9d, xmm2"); + +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x2e, 0b00_001_010], "vucomisd xmm9, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x2e, 0b00_001_010], "vucomisd xmm9, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x2e, 0b11_001_010], "vucomisd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x2e, 0b11_001_010], "vucomisd xmm9, xmm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x2f, 0b00_001_010], "vcomisd xmm9, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x2f, 0b00_001_010], "vcomisd xmm9, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x2f, 0b11_001_010], "vcomisd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x2f, 0b11_001_010], "vcomisd xmm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_001, 0x2e, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_101, 0x2e, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_001, 0x2e, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_101, 0x2e, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_001, 0x2f, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_101, 0x2f, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_001, 0x2f, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_0111_101, 0x2f, 0b11_001_010]); + +    test_instr(&[0xc5, 0b0_1111_000, 0x2e, 0b11_001_010], "vucomiss xmm9, xmm2"); +    // TODO: dword ptr +    test_instr(&[0xc5, 0b0_1111_100, 0x2e, 0b00_001_010], "vucomiss xmm9, [rdx]"); +    test_instr(&[0xc5, 0b0_1111_000, 0x2f, 0b11_001_010], "vcomiss xmm9, xmm2"); +    // TODO: dword ptr +    test_instr(&[0xc5, 0b0_1111_100, 0x2f, 0b00_001_010], "vcomiss xmm9, [rdx]"); +    test_invalid(&[0xc5, 0b0_1111_111, 0x2f, 0b11_001_010]); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x50, 0b11_001_010], "vmovmskps r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x50, 0b11_001_010], "vmovmskps r9d, ymm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_000, 0x50, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_100, 0x50, 0b00_001_010]); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x50, 0b11_001_010], "vmovmskpd r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x50, 0b11_001_010], "vmovmskpd r9d, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x50, 0b11_001_010], "vmovmskpd r9d, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x50, 0b11_001_010], "vmovmskpd r9d, ymm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_001, 0x50, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_101, 0x50, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_001, 0x50, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_101, 0x50, 0b00_001_010]); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x51, 0b00_001_010], "vsqrtpd xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x51, 0b00_001_010], "vsqrtpd ymm9, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_011, 0x51, 0b00_001_010], "vsqrtsd xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_111, 0x51, 0b00_001_010], "vsqrtsd xmm9, xmm8, [r10]"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x51, 0b11_001_010], "vsqrtps xmm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_000, 0x51, 0b11_001_010]); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x51, 0b11_001_010], "vsqrtps ymm9, ymm10"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x51, 0b00_001_010], "vsqrtss xmm9, xmm0, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x51, 0b00_001_010], "vsqrtss xmm9, xmm0, [r10]"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x52, 0b11_001_010], "vrsqrtps xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x52, 0b11_001_010], "vrsqrtps ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x52, 0b11_001_010], "vrsqrtss xmm9, xmm0, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x52, 0b11_001_010], "vrsqrtss xmm9, xmm0, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x53, 0b11_001_010], "vrcpps xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x53, 0b11_001_010], "vrcpps ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x53, 0b11_001_010], "vrcpss xmm9, xmm0, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x53, 0b11_001_010], "vrcpss xmm9, xmm0, xmm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x54, 0b11_001_010], "vandps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x54, 0b11_001_010], "vandps ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x55, 0b11_001_010], "vandnps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x55, 0b11_001_010], "vandnps ymm9, ymm8, ymm10"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x54, 0b00_001_010], "vandpd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x54, 0b00_001_010], "vandpd ymm9, ymm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x55, 0b00_001_010], "vandnpd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x55, 0b00_001_010], "vandnpd ymm9, ymm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x56, 0b00_001_010], "vorpd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x56, 0b00_001_010], "vorpd ymm9, ymm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x56, 0b00_001_010], "vorps xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x56, 0b00_001_010], "vorps ymm9, ymm8, [r10]"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x57, 0b11_001_010], "vxorps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x57, 0b11_001_010], "vxorps ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x57, 0b11_001_010], "vxorpd xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x57, 0b11_001_010], "vxorpd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x58, 0b11_001_010], "vaddps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x58, 0b11_001_010], "vaddps ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_010, 0x58, 0b11_001_010], "vaddss xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_110, 0x58, 0b11_001_010], "vaddss xmm9, xmm8, xmm10"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_010, 0x58, 0b00_001_010], "vaddss xmm9, xmm8, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_110, 0x58, 0b00_001_010], "vaddss xmm9, xmm8, [r10]"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x58, 0b00_001_010], "vaddpd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x58, 0b00_001_010], "vaddpd ymm9, ymm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x58, 0b00_001_010], "vaddsd xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x58, 0b00_001_010], "vaddsd xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x59, 0b00_001_010], "vmulps xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x59, 0b00_001_010], "vmulps ymm9, ymm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x59, 0b00_001_010], "vmulpd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x59, 0b00_001_010], "vmulpd ymm9, ymm8, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_010, 0x59, 0b00_001_010], "vmulss xmm9, xmm8, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_110, 0x59, 0b00_001_010], "vmulss xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x59, 0b00_001_010], "vmulsd xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x59, 0b00_001_010], "vmulsd xmm9, xmm8, [r10]"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x5a, 0b11_001_010], "vcvtps2pd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x5a, 0b11_001_010], "vcvtps2pd ymm9, xmm10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x5a, 0b00_001_010], "vcvtps2pd xmm9, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x5a, 0b00_001_010], "vcvtps2pd ymm9, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x5a, 0b11_001_010], "vcvtpd2ps xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x5a, 0b11_001_010], "vcvtpd2ps xmm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0x5a, 0b11_001_010], "vcvtsd2ss xmm9, xmm0, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0x5a, 0b11_001_010], "vcvtsd2ss xmm9, xmm0, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x5a, 0b11_001_010], "vcvtss2sd xmm9, xmm0, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x5a, 0b11_001_010], "vcvtss2sd xmm9, xmm0, xmm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x5b, 0b11_001_010], "vcvtps2dq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x5b, 0b11_001_010], "vcvtps2dq ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x5b, 0b11_001_010], "vcvttps2dq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0x5b, 0b11_001_010], "vcvttps2dq ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x5b, 0b11_001_010], "vcvtdq2ps xmm9, xmm10"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_000, 0x5b, 0b00_001_010], "vcvtdq2ps xmm9, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x5b, 0b11_001_010], "vcvtdq2ps ymm9, ymm10"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_100, 0x5b, 0b00_001_010], "vcvtdq2ps ymm9, [r10]"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_000, 0x5c, 0b00_001_010], "vsubps xmm9, xmm0, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_100, 0x5c, 0b00_001_010], "vsubps ymm9, ymm0, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_010, 0x5c, 0b00_001_010], "vsubss xmm9, xmm8, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_110, 0x5c, 0b00_001_010], "vsubss xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x5c, 0b00_001_010], "vsubpd xmm9, xmm0, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x5c, 0b00_001_010], "vsubpd ymm9, ymm0, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_011, 0x5c, 0b00_001_010], "vsubsd xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_111, 0x5c, 0b00_001_010], "vsubsd xmm9, xmm8, [r10]"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x5d, 0b00_001_010], "vminps xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x5d, 0b00_001_010], "vminps ymm9, ymm8, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_010, 0x5d, 0b00_001_010], "vminss xmm9, xmm8, [r10]"); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_110, 0x5d, 0b00_001_010], "vminss xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x5d, 0b00_001_010], "vminpd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x5d, 0b00_001_010], "vminpd ymm9, ymm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x5d, 0b00_001_010], "vminsd xmm9, xmm8, [r10]"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x5d, 0b00_001_010], "vminsd xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x5e, 0b00_001_010], "vdivps xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x5e, 0b00_001_010], "vdivps xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x5e, 0b00_001_010], "vdivpd xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_010, 0x5e, 0b00_001_010], "vdivss xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x5e, 0b00_001_010], "vdivsd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x5e, 0b00_001_010], "vdivps ymm9, ymm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x5e, 0b00_001_010], "vdivpd ymm9, ymm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_110, 0x5e, 0b00_001_010], "vdivss xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x5e, 0b00_001_010], "vdivsd xmm9, xmm8, [r10]"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0x5f, 0b00_001_010], "vmaxps xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x5f, 0b00_001_010], "vmaxpd xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_010, 0x5f, 0b00_001_010], "vmaxss xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x5f, 0b00_001_010], "vmaxsd xmm9, xmm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0x5f, 0b00_001_010], "vmaxps ymm9, ymm8, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x5f, 0b00_001_010], "vmaxpd ymm9, ymm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_110, 0x5f, 0b00_001_010], "vmaxss xmm9, xmm8, [r10]"); +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x5f, 0b00_001_010], "vmaxsd xmm9, xmm8, [r10]"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x60, 0b11_001_010], "vpunpcklbw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x60, 0b11_001_010], "vpunpcklbw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x61, 0b11_001_010], "vpunpcklwd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x61, 0b11_001_010], "vpunpcklwd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x62, 0b11_001_010], "vpunpckldq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x62, 0b11_001_010], "vpunpckldq ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x63, 0b11_001_010], "vpacksswb xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_101, 0x63, 0b11_001_010], "vpacksswb ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x64, 0b11_001_010], "vpcmpgtb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x64, 0b11_001_010], "vpcmpgtb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x65, 0b11_001_010], "vpcmpgtw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x65, 0b11_001_010], "vpcmpgtw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x66, 0b11_001_010], "vpcmpgtd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x66, 0b11_001_010], "vpcmpgtd ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x67, 0b11_001_010], "vpackuswb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x67, 0b11_001_010], "vpackuswb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x68, 0b11_001_010], "vpunpckhbw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x68, 0b11_001_010], "vpunpckhbw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x69, 0b11_001_010], "vpunpckhwd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x69, 0b11_001_010], "vpunpckhwd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x6a, 0b11_001_010], "vpunpckhdq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x6a, 0b11_001_010], "vpunpckhdq ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0x6b, 0b11_001_010], "vpackssdw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0x6b, 0b11_001_010], "vpackssdw ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x6c, 0b11_001_010], "vpunpcklqdq xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x6c, 0b11_001_010], "vpunpcklqdq ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x6d, 0b11_001_010], "vpunpckhqdq xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x6d, 0b11_001_010], "vpunpckhqdq ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x6e, 0b11_001_010], "vmovq xmm9, r10"); +    // TODO: qword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x6e, 0b00_001_010], "vmovq xmm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0x6e, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x6f, 0b11_001_010], "vmovdqa xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x6f, 0b11_001_010], "vmovdqa ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x6f, 0b11_001_010], "vmovdqu xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_110, 0x6f, 0b11_001_010], "vmovdqu ymm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x70, 0b11_001_010, 0x77], "vpshufd xmm9, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x70, 0b11_001_010, 0x77], "vpshufd ymm9, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0x70, 0b11_001_010, 0x77], "vpshufhw xmm9, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_110, 0x70, 0b11_001_010, 0x77], "vpshufhw ymm9, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0x70, 0b11_001_010, 0x77], "vpshuflw xmm9, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_111, 0x70, 0b11_001_010, 0x77], "vpshuflw ymm9, ymm10, 0x77"); + + +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x71, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x71, 0b11_010_010, 0x77], "vpsrlw xmm0, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x71, 0b11_010_010, 0x77], "vpsrlw xmm8, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x71, 0b11_010_010, 0x77], "vpsrlw ymm0, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x71, 0b00_011_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x71, 0b11_100_010, 0x77], "vpsraw xmm0, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x71, 0b11_100_010, 0x77], "vpsraw ymm0, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x71, 0b11_101_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x71, 0b11_110_010, 0x77], "vpsllw xmm0, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x71, 0b11_110_010, 0x77], "vpsllw ymm0, ymm10, 0x77"); + +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b00_000_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b11_010_010, 0x77], "vpsrld xmm0, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x72, 0b11_010_010, 0x77], "vpsrld ymm0, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0x72, 0b11_011_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b11_100_010, 0x77], "vpsrad xmm0, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b00_100_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0x72, 0b11_100_010, 0x77], "vpsrad ymm0, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b00_101_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b11_110_010, 0x77], "vpslld xmm0, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b00_110_010, 0x77]); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x72, 0b11_110_010, 0x77], "vpslld ymm0, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x72, 0b00_111_010, 0x77]); + +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_000_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_010_010, 0x77], "vpsrlq xmm0, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_011_010, 0x77], "vpsrldq xmm0, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x73, 0b11_010_010, 0x77], "vpsrlq ymm0, ymm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x73, 0b11_011_010, 0x77], "vpsrldq ymm0, ymm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_100_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_101_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_110_010, 0x77], "vpsllq xmm0, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x73, 0b11_111_010, 0x77], "vpslldq xmm0, xmm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x73, 0b11_110_010, 0x77], "vpsllq ymm0, ymm10, 0x77"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0x73, 0b11_111_010, 0x77], "vpslldq ymm0, ymm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x74, 0b11_001_010], "vpcmpeqb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0x74, 0b11_001_010], "vpcmpeqb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x75, 0b11_001_010], "vpcmpeqw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0x75, 0b11_001_010], "vpcmpeqw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x76, 0b11_001_010], "vpcmpeqd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0x76, 0b11_001_010], "vpcmpeqd ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x7c, 0b11_001_010], "vhaddpd xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x7c, 0b11_001_010], "vhaddpd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x7c, 0b11_001_010], "vhaddps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x7c, 0b11_001_010], "vhaddps ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0x7d, 0b11_001_010], "vhsubpd xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0x7d, 0b11_001_010], "vhsubpd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0x7d, 0b11_001_010], "vhsubps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0x7d, 0b11_001_010], "vhsubps ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x7e, 0b11_001_010], "vmovd r10d, xmm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_101, 0x7e, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0x7e, 0b11_001_010], "vmovq r10, xmm9"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0x7e, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0x7f, 0b11_001_010], "vmovdqa xmm10, xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_101, 0x7f, 0b11_001_010], "vmovdqa ymm10, ymm9"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_010, 0x7f, 0b11_001_010], "vmovdqu xmm10, xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_110, 0x7f, 0b11_001_010], "vmovdqu ymm10, ymm9"); + +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_000, 0xae, 0b00_010_001], "vldmxcsr [r9]"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_100, 0xae, 0b00_010_001]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_000, 0xae, 0b11_010_001]); +    // TODO: dword ptr +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_000, 0xae, 0b00_011_001], "vstmxcsr [r9]"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_100, 0xae, 0b00_011_001]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_000, 0xae, 0b11_011_001]); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_000, 0xc2, 0b11_001_010, 0x77], "vcmpps xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_100, 0xc2, 0b11_001_010, 0x77], "vcmpps ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xc2, 0b11_001_010, 0x77], "vcmppd xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0xc2, 0b11_001_010, 0x77], "vcmppd ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0xc2, 0b11_001_010, 0x77], "vcmpsd xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0xc2, 0b11_001_010, 0x77], "vcmpsd xmm9, xmm8, xmm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xc4, 0b11_001_010, 0x77], "vpinsrw xmm9, xmm8, r10d, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0xc4, 0b11_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_001, 0xc5, 0b00_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0xc5, 0b11_001_010, 0x77], "vpextrw r9d, xmm10, 0x77"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0xc5, 0b00_001_010, 0x77]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0xc5, 0b11_001_010, 0x77]); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xc6, 0b11_001_010, 0x77], "vshufpd xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_101, 0xc6, 0b11_001_010, 0x77], "vshufpd ymm9, ymm8, ymm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_000, 0xc6, 0b11_001_010, 0x77], "vshufps xmm9, xmm8, xmm10, 0x77"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_100, 0xc6, 0b11_001_010, 0x77], "vshufps ymm9, ymm8, ymm10, 0x77"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd0, 0b11_001_010], "vaddsubpd xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd0, 0b11_001_010], "vaddsubpd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_011, 0xd0, 0b11_001_010], "vaddsubps xmm9, xmm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_111, 0xd0, 0b11_001_010], "vaddsubps ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd1, 0b11_001_010], "vpsrlw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd1, 0b11_001_010], "vpsrlw ymm9, ymm8, xmm10"); +    // TODO: xmmword ptr +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd1, 0b00_001_010], "vpsrlw ymm9, ymm8, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd2, 0b11_001_010], "vpsrld xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd2, 0b11_001_010], "vpsrld ymm9, ymm8, xmm10"); +    // TODO: xmmword ptr +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd2, 0b00_001_010], "vpsrld ymm9, ymm8, [r10]"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd3, 0b11_001_010], "vpsrlq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd3, 0b11_001_010], "vpsrlq ymm9, ymm8, xmm10"); +    // TODO: xmmword ptr +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd3, 0b00_001_010], "vpsrlq ymm9, ymm8, [r10]"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd4, 0b11_001_010], "vpaddq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd4, 0b11_001_010], "vpaddq ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xd5, 0b11_001_010], "vpmullw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xd5, 0b11_001_010], "vpmullw ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_1111_001, 0xd7, 0b11_001_010], "vpmovmskb r9d, xmm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b0_1111_001, 0xd7, 0b00_001_010]); +    test_avx2(&[0xc4, 0b000_00001, 0b0_1111_101, 0xd7, 0b11_001_010], "vpmovmskb r9d, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd8, 0b11_001_010], "vpsubusb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd8, 0b11_001_010], "vpsubusb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xd9, 0b11_001_010], "vpsubusw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xd9, 0b11_001_010], "vpsubusw ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xda, 0b11_001_010], "vpminsw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xda, 0b11_001_010], "vpminsw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xdb, 0b11_001_010], "vpand xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xdb, 0b11_001_010], "vpand ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xdc, 0b11_001_010], "vpaddusb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xdc, 0b11_001_010], "vpaddusb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xdd, 0b11_001_010], "vpaddusw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xdd, 0b11_001_010], "vpaddusw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xde, 0b11_001_010], "vpmaxub xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xde, 0b11_001_010], "vpmaxub ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xdf, 0b11_001_010], "vpandn xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xdf, 0b11_001_010], "vpandn ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe0, 0b11_001_010], "vpavgb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe0, 0b11_001_010], "vpavgb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe1, 0b11_001_010], "vpsraw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe1, 0b11_001_010], "vpsraw ymm9, ymm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe2, 0b11_001_010], "vpsrad xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe2, 0b11_001_010], "vpsrad ymm9, ymm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe3, 0b11_001_010], "vpavgw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe3, 0b11_001_010], "vpavgw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe4, 0b11_001_010], "vpmulhuw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe4, 0b11_001_010], "vpmulhuw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe5, 0b11_001_010], "vpmulhw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe5, 0b11_001_010], "vpmulhw ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0xe6, 0b11_001_010], "vcvttpd2dq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0xe6, 0b11_001_010], "vcvttpd2dq xmm9, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_010, 0xe6, 0b11_001_010], "vcvtdq2pd xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_110, 0xe6, 0b11_001_010], "vcvtdq2pd ymm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0xe6, 0b11_001_010], "vcvtpd2dq xmm9, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0xe6, 0b11_001_010], "vcvtpd2dq xmm9, ymm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0xe7, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0xe7, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0xe7, 0b00_001_010], "vmovntdq [r10], xmm9"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_101, 0xe7, 0b00_001_010], "vmovntdq [r10], ymm9"); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe8, 0b11_001_010], "vpsubsb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe8, 0b11_001_010], "vpsubsb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xe9, 0b11_001_010], "vpsubsw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xe9, 0b11_001_010], "vpsubsw ymm9, ymm8, ymm10"); + +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xea, 0b11_001_010], "vpminsw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xea, 0b11_001_010], "vpminsw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xeb, 0b11_001_010], "vpor xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xeb, 0b11_001_010], "vpor ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xec, 0b11_001_010], "vpaddsb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xec, 0b11_001_010], "vpaddsb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xed, 0b11_001_010], "vpaddsw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xed, 0b11_001_010], "vpaddsw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xee, 0b11_001_010], "vpmaxsw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xee, 0b11_001_010], "vpmaxsw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b0_0111_001, 0xef, 0b11_001_010], "vpxor xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b0_0111_101, 0xef, 0b11_001_010], "vpxor ymm9, ymm8, ymm10"); + +    // TODO: xmmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_011, 0xf0, 0b00_001_010], "vlddqu xmm9, [r10]"); +    // TODO: ymmword ptr +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_111, 0xf0, 0b00_001_010], "vlddqu ymm9, [r10]"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_011, 0xf0, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_011, 0xf0, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_0111_111, 0xf0, 0b11_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_111, 0xf0, 0b11_001_010]); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xf1, 0b11_001_010], "vpsllw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xf1, 0b11_001_010], "vpsllw ymm9, ymm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xf2, 0b11_001_010], "vpslld xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xf2, 0b11_001_010], "vpslld ymm9, ymm8, xmm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xf3, 0b11_001_010], "vpsllq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xf3, 0b11_001_010], "vpsllq ymm9, ymm8, xmm10"); + + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xf4, 0b11_001_010], "vpmuludq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xf4, 0b11_001_010], "vpmuludq ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0xf5, 0b11_001_010], "vpmaddwd xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0xf5, 0b11_001_010], "vpmaddwd ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0xf6, 0b11_001_010], "vpsadbw xmm9, xmm0, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_1111_101, 0xf6, 0b11_001_010], "vpsadbw ymm9, ymm0, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_1111_001, 0xf7, 0b11_001_010], "vmaskmovdqu xmm9, xmm10"); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_001, 0xf7, 0b00_001_010]); +    test_invalid(&[0xc4, 0b000_00001, 0b1_1111_101, 0xf7, 0b11_001_010]); + +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xf8, 0b11_001_010], "vpsubb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xf8, 0b11_001_010], "vpsubb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xf9, 0b11_001_010], "vpsubw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xf9, 0b11_001_010], "vpsubw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xfa, 0b11_001_010], "vpsubd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xfa, 0b11_001_010], "vpsubd ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xfb, 0b11_001_010], "vpsubq xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xfb, 0b11_001_010], "vpsubq ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xfc, 0b11_001_010], "vpaddb xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xfc, 0b11_001_010], "vpaddb ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xfd, 0b11_001_010], "vpaddw xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xfd, 0b11_001_010], "vpaddw ymm9, ymm8, ymm10"); +    test_instr(&[0xc4, 0b000_00001, 0b1_0111_001, 0xfe, 0b11_001_010], "vpaddd xmm9, xmm8, xmm10"); +    test_avx2(&[0xc4, 0b000_00001, 0b1_0111_101, 0xfe, 0b11_001_010], "vpaddd ymm9, ymm8, ymm10"); +      test_instr(&[0xc5, 0xf8, 0x10, 0x00], "vmovups xmm0, [rax]");      test_instr(&[0xc5, 0xf8, 0x10, 0x01], "vmovups xmm0, [rcx]");      test_instr(&[0xc5, 0x78, 0x10, 0x0f], "vmovups xmm9, [rdi]"); @@ -1449,10 +2424,10 @@ fn test_vex() {      test_instr(&[0xc4, 0x02, 0x75, 0x03, 0x0f], "vphaddsw ymm9, ymm1, [r15]");      test_instr(&[0xc4, 0x02, 0x71, 0x03, 0xcd], "vphaddsw xmm9, xmm1, xmm13");      test_instr(&[0xc4, 0x02, 0x75, 0x03, 0xcd], "vphaddsw ymm9, ymm1, ymm13"); -    test_instr(&[0xc4, 0x02, 0x71, 0x04, 0x0f], "vphaddubsw xmm9, xmm1, [r15]"); -    test_instr(&[0xc4, 0x02, 0x75, 0x04, 0x0f], "vphaddubsw ymm9, ymm1, [r15]"); -    test_instr(&[0xc4, 0x02, 0x71, 0x04, 0xcd], "vphaddubsw xmm9, xmm1, xmm13"); -    test_instr(&[0xc4, 0x02, 0x75, 0x04, 0xcd], "vphaddubsw ymm9, ymm1, ymm13"); +    test_instr(&[0xc4, 0x02, 0x71, 0x04, 0x0f], "vpmaddubsw xmm9, xmm1, [r15]"); +    test_instr(&[0xc4, 0x02, 0x75, 0x04, 0x0f], "vpmaddubsw ymm9, ymm1, [r15]"); +    test_instr(&[0xc4, 0x02, 0x71, 0x04, 0xcd], "vpmaddubsw xmm9, xmm1, xmm13"); +    test_instr(&[0xc4, 0x02, 0x75, 0x04, 0xcd], "vpmaddubsw ymm9, ymm1, ymm13");      test_instr(&[0xc4, 0x02, 0x71, 0x05, 0x0f], "vphsubw xmm9, xmm1, [r15]");      test_instr(&[0xc4, 0x02, 0x75, 0x05, 0x0f], "vphsubw ymm9, ymm1, [r15]");      test_instr(&[0xc4, 0x02, 0x71, 0x05, 0xcd], "vphsubw xmm9, xmm1, xmm13"); @@ -1506,7 +2481,7 @@ fn test_vex() {      test_instr(&[0xc4, 0x02, 0x09, 0x9d, 0xcd], "vfnmadd132ss xmm9, xmm14, xmm13");      test_instr(&[0xc4, 0x02, 0x89, 0x9d, 0xcd], "vfnmadd132sd xmm9, xmm14, xmm13");  // ... -    test_instr(&[0xc4, 0xe3, 0x79, 0x14, 0xd0, 0x0a], "vpextrb rax, xmm2, 0xa"); +    test_instr(&[0xc4, 0xe3, 0x79, 0x14, 0xd0, 0x0a], "vpextrb eax, xmm2, 0xa");      test_instr(&[0xc4, 0xe3, 0x79, 0x14, 0x10, 0x0a], "vpextrb [rax], xmm2, 0xa");      test_instr_invalid(&[0xc4, 0xe3, 0xf9, 0x14, 0x00, 0xd0]);      test_instr_invalid(&[0xc4, 0xe3, 0xf9, 0x14, 0x00, 0x0a]); @@ -1543,13 +2518,12 @@ fn test_vex() {      test_instr(&[0xc5, 0xf1, 0xc4, 0xd8, 0x78], "vpinsrw xmm3, xmm1, eax, 0x78");      test_instr(&[0xc5, 0xf1, 0xc4, 0x18, 0x78], "vpinsrw xmm3, xmm1, [rax], 0x78"); -    // uh oh, i think these sizes are backwards... -    test_instr(&[0xc5, 0xe0, 0x54, 0x03], "vandpd xmm0, xmm3, [rbx]"); -    test_instr(&[0xc5, 0xe1, 0x54, 0x03], "vandps xmm0, xmm3, [rbx]"); -    test_instr(&[0xc5, 0xe0, 0x55, 0x03], "vandnpd xmm0, xmm3, [rbx]"); -    test_instr(&[0xc5, 0xe1, 0x55, 0x03], "vandnps xmm0, xmm3, [rbx]"); -    test_instr(&[0xc5, 0xe0, 0x56, 0x03], "vorpd xmm0, xmm3, [rbx]"); -    test_instr(&[0xc5, 0xe1, 0x56, 0x03], "vorps xmm0, xmm3, [rbx]"); +    test_instr(&[0xc5, 0xe0, 0x54, 0x03], "vandps xmm0, xmm3, [rbx]"); +    test_instr(&[0xc5, 0xe1, 0x54, 0x03], "vandpd xmm0, xmm3, [rbx]"); +    test_instr(&[0xc5, 0xe0, 0x55, 0x03], "vandnps xmm0, xmm3, [rbx]"); +    test_instr(&[0xc5, 0xe1, 0x55, 0x03], "vandnpd xmm0, xmm3, [rbx]"); +    test_instr(&[0xc5, 0xe0, 0x56, 0x03], "vorps xmm0, xmm3, [rbx]"); +    test_instr(&[0xc5, 0xe1, 0x56, 0x03], "vorpd xmm0, xmm3, [rbx]");      test_instr(&[0xc4, 0xa2, 0x15, 0x3e, 0x14, 0xb9], "vpmaxuw ymm2, ymm13, [rcx + r15 * 4]");  } | 
