4 files changed, 1764 insertions, 71 deletions
diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs
index e51d2dc..f258bad 100644
--- a/src/protected_mode/display.rs
+++ b/src/protected_mode/display.rs
@@ -5,7 +5,7 @@ use core::fmt;
 use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors};
 use yaxpeax_arch::display::*;
 
-use crate::protected_mode::{RegSpec, Opcode, Operand, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec};
+use crate::protected_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec};
 
 impl fmt::Display for InstDecoder {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -166,6 +166,44 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Operand {
             &Operand::Register(ref spec) => {
                 f.write_str(regspec_label(spec))
             }
+            &Operand::RegisterMaskMerge(ref spec, ref mask, merge_mode) => {
+                f.write_str(regspec_label(spec))?;
+                if mask.num != 0 {
+                    f.write_str("{")?;
+                    f.write_str(regspec_label(mask))?;
+                    f.write_str("}")?;
+                }
+                if let MergeMode::Zero = merge_mode {
+                    f.write_str("{z}")?;
+                }
+                Ok(())
+            }
+            &Operand::RegisterMaskMergeSae(ref spec, ref mask, merge_mode, sae_mode) => {
+                f.write_str(regspec_label(spec))?;
+                if mask.num != 0 {
+                    f.write_str("{")?;
+                    f.write_str(regspec_label(mask))?;
+                    f.write_str("}")?;
+                }
+                if let MergeMode::Zero = merge_mode {
+                    f.write_str("{z}")?;
+                }
+                f.write_str(sae_mode.label())?;
+                Ok(())
+            }
+            &Operand::RegisterMaskMergeSaeNoround(ref spec, ref mask, merge_mode) => {
+                f.write_str(regspec_label(spec))?;
+                if mask.num != 0 {
+                    f.write_str("{")?;
+                    f.write_str(regspec_label(mask))?;
+                    f.write_str("}")?;
+                }
+                if let MergeMode::Zero = merge_mode {
+                    f.write_str("{z}")?;
+                }
+                f.write_str("{sae}")?;
+                Ok(())
+            }
             &Operand::DisplacementU16(imm) => {
                 write!(f, "[{}]", colors.address(u16_hex(imm)))
             }
@@ -227,6 +265,69 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Operand {
                 format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
                 write!(f, "]")
             },
+            &Operand::RegDispMasked(ref spec, disp, ref mask_reg) => {
+                write!(f, "[{} ", regspec_label(spec))?;
+                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+                write!(f, "]")?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            },
+            &Operand::RegDerefMasked(ref spec, ref mask_reg) => {
+                f.write_str("[")?;
+                f.write_str(regspec_label(spec))?;
+                f.write_str("]")?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            },
+            &Operand::RegScaleMasked(ref spec, scale, ref mask_reg) => {
+                write!(f, "[{} * {}]",
+                    regspec_label(spec),
+                    colors.number(scale)
+                )?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            },
+            &Operand::RegScaleDispMasked(ref spec, scale, disp, ref mask_reg) => {
+                write!(f, "[{} * {} ",
+                    regspec_label(spec),
+                    colors.number(scale),
+                )?;
+                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+                write!(f, "]")?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            },
+            &Operand::RegIndexBaseMasked(ref base, ref index, ref mask_reg) => {
+                f.write_str("[")?;
+                f.write_str(regspec_label(base))?;
+                f.write_str(" + ")?;
+                f.write_str(regspec_label(index))?;
+                f.write_str("]")?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            }
+            &Operand::RegIndexBaseDispMasked(ref base, ref index, disp, ref mask_reg) => {
+                write!(f, "[{} + {} ",
+                    regspec_label(base),
+                    regspec_label(index),
+                )?;
+                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+                write!(f, "]")?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            },
+            &Operand::RegIndexBaseScaleMasked(ref base, ref index, scale, ref mask_reg) => {
+                write!(f, "[{} + {} * {}]",
+                    regspec_label(base),
+                    regspec_label(index),
+                    colors.number(scale)
+                )?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            }
+            &Operand::RegIndexBaseScaleDispMasked(ref base, ref index, scale, disp, ref mask_reg) => {
+                write!(f, "[{} + {} * {} ",
+                    regspec_label(base),
+                    regspec_label(index),
+                    colors.number(scale),
+                )?;
+                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+                write!(f, "]")?;
+                write!(f, "{{{}}}", regspec_label(mask_reg))
+            },
             &Operand::Nothing => { Ok(()) },
         }
     }
@@ -655,6 +756,7 @@ const MNEMONICS: &[&'static str] = &[
     "xorpd",
     "vmovddup",
     "vpshuflw",
+    "vpshufhw",
     "vhaddps",
     "vhsubps",
     "vaddsubps",
@@ -826,6 +928,7 @@ const MNEMONICS: &[&'static str] = &[
     "vpabsd",
     "vpabsw",
     "vpackssdw",
+    "vpackusdw",
     "vpacksswb",
     "vpackuswb",
     "vpaddb",
@@ -837,12 +940,12 @@ const MNEMONICS: &[&'static str] = &[
     "vpaddusw",
     "vpaddw",
     "vpalignr",
-    "vandps",
     "vandpd",
-    "vorps",
+    "vandps",
     "vorpd",
-    "vandnps",
+    "vorps",
     "vandnpd",
+    "vandnps",
     "vpand",
     "vpandn",
     "vpavgb",
@@ -863,6 +966,8 @@ const MNEMONICS: &[&'static str] = &[
     "vpcmpgtd",
     "vpcmpgtq",
     "vpcmpgtw",
+    "vpcmpestri",
+    "vpcmpestrm",
     "vpcmpistri",
     "vpcmpistrm",
     "vperm2f128",
@@ -884,7 +989,7 @@ const MNEMONICS: &[&'static str] = &[
     "vphaddd",
     "vphaddsw",
     "vphaddw",
-    "vphaddubsw",
+    "vpmaddubsw",
     "vphminposuw",
     "vphsubd",
     "vphsubsw",
@@ -902,8 +1007,11 @@ const MNEMONICS: &[&'static str] = &[
     "vpmaxub",
     "vpmaxuw",
     "vpmaxud",
+    "vpminsb",
     "vpminsw",
     "vpminsd",
+    "vpminub",
+    "vpminuw",
     "vpminud",
     "vpmovmskb",
     "vpmovsxbd",
@@ -922,6 +1030,7 @@ const MNEMONICS: &[&'static str] = &[
     "vpmulhrsw",
     "vpmulhuw",
     "vpmulhw",
+    "vpmullq",
     "vpmulld",
     "vpmullw",
     "vpmuludq",
@@ -992,6 +1101,9 @@ const MNEMONICS: &[&'static str] = &[
     "vxorpd",
     "vxorps",
     "vzeroupper",
+    "vzeroall",
+    "vldmxcsr",
+    "vstmxcsr",
     "pclmulqdq",
     "aeskeygenassist",
     "aesimc",
@@ -1219,6 +1331,7 @@ const MNEMONICS: &[&'static str] = &[
     "jrcxz",
     "pusha",
     "popa",
+    "bound",
     "arpl",
     "aas",
     "aaa",
@@ -1315,6 +1428,378 @@ const MNEMONICS: &[&'static str] = &[
     // TSXLDTRK
     "xsusldtrk",
     "xresldtrk",
+
+    // AVX512F
+    "valignd",
+    "valignq",
+    "vblendmpd",
+    "vblendmps",
+    "vcompresspd",
+    "vcompressps",
+    "vcvtpd2udq",
+    "vcvttpd2udq",
+    "vcvtps2udq",
+    "vcvttps2udq",
+    "vcvtqq2pd",
+    "vcvtqq2ps",
+    "vcvtsd2usi",
+    "vcvttsd2usi",
+    "vcvtss2usi",
+    "vcvttss2usi",
+    "vcvtudq2pd",
+    "vcvtudq2ps",
+    "vcvtusi2usd",
+    "vcvtusi2uss",
+    "vexpandpd",
+    "vexpandps",
+    "vextractf32x4",
+    "vextractf64x4",
+    "vextracti32x4",
+    "vextracti64x4",
+    "vfixupimmpd",
+    "vfixupimmps",
+    "vfixupimmsd",
+    "vfixupimmss",
+    "vgetexppd",
+    "vgetexpps",
+    "vgetexpsd",
+    "vgetexpss",
+    "vgetmantpd",
+    "vgetmantps",
+    "vgetmantsd",
+    "vgetmantss",
+    "vinsertf32x4",
+    "vinsertf64x4",
+    "vinserti64x4",
+    "vmovdqa32",
+    "vmovdqa64",
+    "vmovdqu32",
+    "vmovdqu64",
+    "vpblendmd",
+    "vpblendmq",
+    "vpcmpd",
+    "vpcmpud",
+    "vpcmpq",
+    "vpcmpuq",
+    "vpcompressq",
+    "vpcompressd",
+    "vpermi2d",
+    "vpermi2q",
+    "vpermi2pd",
+    "vpermi2ps",
+    "vpermt2d",
+    "vpermt2q",
+    "vpermt2pd",
+    "vpermt2ps",
+    "vpmaxsq",
+    "vpmaxuq",
+    "vpminsq",
+    "vpminuq",
+    "vpmovsqb",
+    "vpmovusqb",
+    "vpmovsqw",
+    "vpmovusqw",
+    "vpmovsqd",
+    "vpmovusqd",
+    "vpmovsdb",
+    "vpmovusdb",
+    "vpmovsdw",
+    "vpmovusdw",
+    "vprold",
+    "vprolq",
+    "vprolvd",
+    "vprolvq",
+    "vprord",
+    "vprorq",
+    "vprorrd",
+    "vprorrq",
+    "vpscatterdd",
+    "vpscatterdq",
+    "vpscatterqd",
+    "vpscatterqq",
+    "vpsraq",
+    "vpsravq",
+    "vptestnmd",
+    "vptestnmq",
+    "vpternlogd",
+    "vpternlogq",
+    "vptestmd",
+    "vptestmq",
+    "vrcp14pd",
+    "vrcp14ps",
+    "vrcp14sd",
+    "vrcp14ss",
+    "vrndscalepd",
+    "vrndscaleps",
+    "vrndscalesd",
+    "vrndscaless",
+    "vrsqrt14pd",
+    "vrsqrt14ps",
+    "vrsqrt14sd",
+    "vrsqrt14ss",
+    "vscaledpd",
+    "vscaledps",
+    "vscaledsd",
+    "vscaledss",
+    "vscatterdd",
+    "vscatterdq",
+    "vscatterqd",
+    "vscatterqq",
+    "vshuff32x4",
+    "vshuff64x2",
+    "vshufi32x4",
+    "vshufi64x2",
+
+    // AVX512DQ
+    "vcvttpd2qq",
+    "vcvtpd2qq",
+    "vcvttpd2uqq",
+    "vcvtpd2uqq",
+    "vcvttps2qq",
+    "vcvtps2qq",
+    "vcvttps2uqq",
+    "vcvtps2uqq",
+    "vcvtuqq2pd",
+    "vcvtuqq2ps",
+    "vextractf64x2",
+    "vextracti64x2",
+    "vfpclasspd",
+    "vfpclassps",
+    "vfpclasssd",
+    "vfpclassss",
+    "vinsertf64x2",
+    "vinserti64x2",
+    "vpmovm2d",
+    "vpmovm2q",
+    "vpmovb2d",
+    "vpmovq2m",
+    "vrangepd",
+    "vrangeps",
+    "vrangesd",
+    "vrangess",
+    "vreducepd",
+    "vreduceps",
+    "vreducesd",
+    "vreducess",
+
+    // AVX512BW
+    "vdbpsadbw",
+    "vmovdqu8",
+    "vmovdqu16",
+    "vpblendmb",
+    "vpblendmw",
+    "vpcmpb",
+    "vpcmpub",
+    "vpcmpw",
+    "vpcmpuw",
+    "vpermw",
+    "vpermi2b",
+    "vpermi2w",
+    "vpmovm2b",
+    "vpmovm2w",
+    "vpmovb2m",
+    "vpmovw2m",
+    "vpmovswb",
+    "vpmovuswb",
+    "vpsllvw",
+    "vpsravw",
+    "vpsrlvw",
+    "vptestnmb",
+    "vptestnmw",
+    "vptestmb",
+    "vptestmw",
+
+    // AVX512CD
+    "vpbroadcastm",
+    "vpconflictd",
+    "vpconflictq",
+    "vplzcntd",
+    "vplzcntq",
+
+    "kunpckbw",
+    "kunpckwd",
+    "kunpckdq",
+
+    "kaddb",
+    "kandb",
+    "kandnb",
+    "kmovb",
+    "knotb",
+    "korb",
+    "kortestb",
+    "kshiftlb",
+    "kshiftrb",
+    "ktestb",
+    "kxnorb",
+    "kxorb",
+    "kaddw",
+    "kandw",
+    "kandnw",
+    "kmovw",
+    "knotw",
+    "korw",
+    "kortestw",
+    "kshiftlw",
+    "kshiftrw",
+    "ktestw",
+    "kxnorw",
+    "kxorw",
+    "kaddd",
+    "kandd",
+    "kandnd",
+    "kmovd",
+    "knotd",
+    "kord",
+    "kortestd",
+    "kshiftld",
+    "kshiftrd",
+    "ktestd",
+    "kxnord",
+    "kxord",
+    "kaddq",
+    "kandq",
+    "kandnq",
+    "kmovq",
+    "knotq",
+    "korq",
+    "kortestq",
+    "kshiftlq",
+    "kshiftrq",
+    "ktestq",
+    "kxnorq",
+    "kxorq",
+
+    // AVX512ER
+    "vexp2pd",
+    "vexp2ps",
+    "vexp2sd",
+    "vexp2ss",
+    "vrcp28pd",
+    "vrcp28ps",
+    "vrcp28sd",
+    "vrcp28ss",
+    "vrsqrt28pd",
+    "vrsqrt28ps",
+    "vrsqrt28sd",
+    "vrsqrt28ss",
+
+    // AVX512PF
+    "vgatherpf0dpd",
+    "vgatherpf0dps",
+    "vgatherpf0qpd",
+    "vgatherpf0qps",
+    "vgatherpf1dpd",
+    "vgatherpf1dps",
+    "vgatherpf1qpd",
+    "vgatherpf1qps",
+    "vscatterpf0dpd",
+    "vscatterpf0dps",
+    "vscatterpf0qpd",
+    "vscatterpf0qps",
+    "vscatterpf1dpd",
+    "vscatterpf1dps",
+    "vscatterpf1qpd",
+    "vscatterpf1qps",
+
+    // MPX
+    "bndmk",
+    "bndcl",
+    "bndcu",
+    "bndcn",
+    "bndmov",
+    "bndldx",
+    "bndstx",
+
+
+
+    "vgf2p8affineqb",
+    "vgf2p8affineinvqb",
+    "vpshrdq",
+    "vpshrdd",
+    "vpshrdw",
+    "vpshldq",
+    "vpshldd",
+    "vpshldw",
+    "vbroadcastf32x8",
+    "vbroadcastf64x4",
+    "vbroadcastf32x4",
+    "vbroadcastf64x2",
+    "vbroadcastf32x2",
+    "vbroadcasti32x8",
+    "vbroadcasti64x4",
+    "vbroadcasti32x4",
+    "vbroadcasti64x2",
+    "vbroadcasti32x2",
+    "vextracti32x8",
+    "vextractf32x8",
+    "vinserti32x8",
+    "vinsertf32x8",
+    "vinserti32x4",
+    "v4fnmaddss",
+    "v4fnmaddps",
+    "vcvtneps2bf16",
+    "v4fmaddss",
+    "v4fmaddps",
+    "vcvtne2ps2bf16",
+    "vp2intersectd",
+    "vp2intersectq",
+    "vp4dpwssds",
+    "vp4dpwssd",
+    "vpdpwssds",
+    "vpdpwssd",
+    "vpdpbusds",
+    "vdpbf16ps",
+    "vpbroadcastmw2d",
+    "vpbroadcastmb2q",
+    "vpmovd2m",
+    "vpmovqd",
+    "vpmovwb",
+    "vpmovdb",
+    "vpmovdw",
+    "vpmovqb",
+    "vpmovqw",
+    "vgf2p8mulb",
+    "vpmadd52huq",
+    "vpmadd52luq",
+    "vpshufbitqmb",
+    "vpermb",
+    "vpexpandd",
+    "vpexpandq",
+    "vpabsq",
+    "vprorvd",
+    "vprorvq",
+    "vpmultishiftqb",
+    "vpermt2b",
+    "vpermt2w",
+    "vpshrdvq",
+    "vpshrdvd",
+    "vpshrdvw",
+    "vpshldvq",
+    "vpshldvd",
+    "vpshldvw",
+    "vpcompressb",
+    "vpcompressw",
+    "vpexpandb",
+    "vpexpandw",
+    "vpopcntd",
+    "vpopcntq",
+    "vpopcntb",
+    "vpopcntw",
+    "vscalefss",
+    "vscalefsd",
+    "vscalefps",
+    "vscalefpd",
+    "vpdpbusd",
+    "vcvtusi2sd",
+    "vcvtusi2ss",
+    "vpxord",
+    "vpxorq",
+    "vpord",
+    "vporq",
+    "vpandnd",
+    "vpandnq",
+    "vpandd",
+    "vpandq",
 ];
 
 impl Opcode {
@@ -1328,6 +1813,95 @@ impl Opcode {
 impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
     fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result {
         match self {
+            Opcode::VGF2P8AFFINEQB |
+            Opcode::VGF2P8AFFINEINVQB |
+            Opcode::VPSHRDQ |
+            Opcode::VPSHRDD |
+            Opcode::VPSHRDW |
+            Opcode::VPSHLDQ |
+            Opcode::VPSHLDD |
+            Opcode::VPSHLDW |
+            Opcode::VBROADCASTF32X8 |
+            Opcode::VBROADCASTF64X4 |
+            Opcode::VBROADCASTF32X4 |
+            Opcode::VBROADCASTF64X2 |
+            Opcode::VBROADCASTF32X2 |
+            Opcode::VBROADCASTI32X8 |
+            Opcode::VBROADCASTI64X4 |
+            Opcode::VBROADCASTI32X4 |
+            Opcode::VBROADCASTI64X2 |
+            Opcode::VBROADCASTI32X2 |
+            Opcode::VEXTRACTI32X8 |
+            Opcode::VEXTRACTF32X8 |
+            Opcode::VINSERTI32X8 |
+            Opcode::VINSERTF32X8 |
+            Opcode::VINSERTI32X4 |
+            Opcode::V4FNMADDSS |
+            Opcode::V4FNMADDPS |
+            Opcode::VCVTNEPS2BF16 |
+            Opcode::V4FMADDSS |
+            Opcode::V4FMADDPS |
+            Opcode::VCVTNE2PS2BF16 |
+            Opcode::VP2INTERSECTD |
+            Opcode::VP2INTERSECTQ |
+            Opcode::VP4DPWSSDS |
+            Opcode::VP4DPWSSD |
+            Opcode::VPDPWSSDS |
+            Opcode::VPDPWSSD |
+            Opcode::VPDPBUSDS |
+            Opcode::VDPBF16PS |
+            Opcode::VPBROADCASTMW2D |
+            Opcode::VPBROADCASTMB2Q |
+            Opcode::VPMOVD2M |
+            Opcode::VPMOVQD |
+            Opcode::VPMOVWB |
+            Opcode::VPMOVDB |
+            Opcode::VPMOVDW |
+            Opcode::VPMOVQB |
+            Opcode::VPMOVQW |
+            Opcode::VGF2P8MULB |
+            Opcode::VPMADD52HUQ |
+            Opcode::VPMADD52LUQ |
+            Opcode::VPSHUFBITQMB |
+            Opcode::VPERMB |
+            Opcode::VPEXPANDD |
+            Opcode::VPEXPANDQ |
+            Opcode::VPABSQ |
+            Opcode::VPRORVD |
+            Opcode::VPRORVQ |
+            Opcode::VPMULTISHIFTQB |
+            Opcode::VPERMT2B |
+            Opcode::VPERMT2W |
+            Opcode::VPSHRDVQ |
+            Opcode::VPSHRDVD |
+            Opcode::VPSHRDVW |
+            Opcode::VPSHLDVQ |
+            Opcode::VPSHLDVD |
+            Opcode::VPSHLDVW |
+            Opcode::VPCOMPRESSB |
+            Opcode::VPCOMPRESSW |
+            Opcode::VPEXPANDB |
+            Opcode::VPEXPANDW |
+            Opcode::VPOPCNTD |
+            Opcode::VPOPCNTQ |
+            Opcode::VPOPCNTB |
+            Opcode::VPOPCNTW |
+            Opcode::VSCALEFSS |
+            Opcode::VSCALEFSD |
+            Opcode::VSCALEFPS |
+            Opcode::VSCALEFPD |
+            Opcode::VPDPBUSD |
+            Opcode::VCVTUSI2SD |
+            Opcode::VCVTUSI2SS |
+            Opcode::VPXORD |
+            Opcode::VPXORQ |
+            Opcode::VPORD |
+            Opcode::VPORQ |
+            Opcode::VPANDND |
+            Opcode::VPANDNQ |
+            Opcode::VPANDD |
+            Opcode::VPANDQ |
+
             Opcode::VHADDPS |
             Opcode::VHSUBPS |
             Opcode::VADDSUBPS |
@@ -1434,6 +2008,7 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPMULHRSW |
             Opcode::VPMULHUW |
             Opcode::VPMULHW |
+            Opcode::VPMULLQ |
             Opcode::VPMULLD |
             Opcode::VPMULLW |
             Opcode::VPMULUDQ |
@@ -1451,6 +2026,34 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPSUBW |
             Opcode::VROUNDPD |
             Opcode::VROUNDPS |
+            Opcode::VEXP2PD |
+            Opcode::VEXP2PS |
+            Opcode::VEXP2SD |
+            Opcode::VEXP2SS |
+            Opcode::VRCP28PD |
+            Opcode::VRCP28PS |
+            Opcode::VRCP28SD |
+            Opcode::VRCP28SS |
+            Opcode::VRCP14PD |
+            Opcode::VRCP14PS |
+            Opcode::VRCP14SD |
+            Opcode::VRCP14SS |
+            Opcode::VRNDSCALEPD |
+            Opcode::VRNDSCALEPS |
+            Opcode::VRNDSCALESD |
+            Opcode::VRNDSCALESS |
+            Opcode::VRSQRT14PD |
+            Opcode::VRSQRT14PS |
+            Opcode::VRSQRT14SD |
+            Opcode::VRSQRT14SS |
+            Opcode::VSCALEDPD |
+            Opcode::VSCALEDPS |
+            Opcode::VSCALEDSD |
+            Opcode::VSCALEDSS |
+            Opcode::VRSQRT28PD |
+            Opcode::VRSQRT28PS |
+            Opcode::VRSQRT28SD |
+            Opcode::VRSQRT28SS |
             Opcode::VRSQRTPS |
             Opcode::VSQRTPD |
             Opcode::VSQRTPS |
@@ -1470,13 +2073,14 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VSQRTSS |
             Opcode::VPSADBW |
             Opcode::VMPSADBW |
+            Opcode::VDBPSADBW |
             Opcode::VPHADDD |
             Opcode::VPHADDSW |
             Opcode::VPHADDW |
             Opcode::VPHSUBD |
             Opcode::VPHSUBSW |
             Opcode::VPHSUBW |
-            Opcode::VPHADDUBSW |
+            Opcode::VPMADDUBSW |
             Opcode::VPMADDWD |
             Opcode::VDPPD |
             Opcode::VDPPS |
@@ -1499,6 +2103,19 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPSLLVD |
             Opcode::VPSLLVQ |
             Opcode::VPSLLW |
+            Opcode::VPROLD |
+            Opcode::VPROLQ |
+            Opcode::VPROLVD |
+            Opcode::VPROLVQ |
+            Opcode::VPRORD |
+            Opcode::VPRORQ |
+            Opcode::VPRORRD |
+            Opcode::VPRORRQ |
+            Opcode::VPSLLVW |
+            Opcode::VPSRAQ |
+            Opcode::VPSRAVQ |
+            Opcode::VPSRAVW |
+            Opcode::VPSRLVW |
             Opcode::VPSRAD |
             Opcode::VPSRAVD |
             Opcode::VPSRAW |
@@ -1584,6 +2201,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::SUB |
             Opcode::POPCNT |
             Opcode::LZCNT |
+            Opcode::VPLZCNTD |
+            Opcode::VPLZCNTQ |
             Opcode::BT |
             Opcode::BTS |
             Opcode::BTR |
@@ -1702,12 +2321,48 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::FXTRACT |
             Opcode::FYL2X |
             Opcode::FYL2XP1 |
-            Opcode::AAS |
             Opcode::AAA |
+            Opcode::AAS |
             Opcode::DAS |
             Opcode::DAA |
             Opcode::ADX |
             Opcode::AMX |
+            Opcode::KADDB |
+            Opcode::KANDB |
+            Opcode::KANDNB |
+            Opcode::KNOTB |
+            Opcode::KORB |
+            Opcode::KSHIFTLB |
+            Opcode::KSHIFTRB |
+            Opcode::KXNORB |
+            Opcode::KXORB |
+            Opcode::KADDW |
+            Opcode::KANDW |
+            Opcode::KANDNW |
+            Opcode::KNOTW |
+            Opcode::KORW |
+            Opcode::KSHIFTLW |
+            Opcode::KSHIFTRW |
+            Opcode::KXNORW |
+            Opcode::KXORW |
+            Opcode::KADDD |
+            Opcode::KANDD |
+            Opcode::KANDND |
+            Opcode::KNOTD |
+            Opcode::KORD |
+            Opcode::KSHIFTLD |
+            Opcode::KSHIFTRD |
+            Opcode::KXNORD |
+            Opcode::KXORD |
+            Opcode::KADDQ |
+            Opcode::KANDQ |
+            Opcode::KANDNQ |
+            Opcode::KNOTQ |
+            Opcode::KORQ |
+            Opcode::KSHIFTLQ |
+            Opcode::KSHIFTRQ |
+            Opcode::KXNORQ |
+            Opcode::KXORQ |
             Opcode::IMUL => { write!(out, "{}", colors.arithmetic_op(self)) }
             Opcode::POPF |
             Opcode::PUSHF |
@@ -1786,12 +2441,43 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VCVTSS2SI |
             Opcode::VCVTTSD2SI |
             Opcode::VCVTTSS2SI |
+            Opcode::VCVTPD2UDQ |
+            Opcode::VCVTTPD2UDQ |
+            Opcode::VCVTPS2UDQ |
+            Opcode::VCVTTPS2UDQ |
+            Opcode::VCVTQQ2PD |
+            Opcode::VCVTQQ2PS |
+            Opcode::VCVTSD2USI |
+            Opcode::VCVTTSD2USI |
+            Opcode::VCVTSS2USI |
+            Opcode::VCVTTSS2USI |
+            Opcode::VCVTUDQ2PD |
+            Opcode::VCVTUDQ2PS |
+            Opcode::VCVTUSI2USD |
+            Opcode::VCVTUSI2USS |
+            Opcode::VCVTTPD2QQ |
+            Opcode::VCVTPD2QQ |
+            Opcode::VCVTTPD2UQQ |
+            Opcode::VCVTPD2UQQ |
+            Opcode::VCVTTPS2QQ |
+            Opcode::VCVTPS2QQ |
+            Opcode::VCVTTPS2UQQ |
+            Opcode::VCVTPS2UQQ |
+            Opcode::VCVTUQQ2PD |
+            Opcode::VCVTUQQ2PS |
             Opcode::VMOVDDUP |
             Opcode::VPSHUFLW |
+            Opcode::VPSHUFHW |
+            Opcode::VBLENDMPD |
+            Opcode::VBLENDMPS |
+            Opcode::VPBLENDMD |
+            Opcode::VPBLENDMQ |
             Opcode::VBLENDPD |
             Opcode::VBLENDPS |
             Opcode::VBLENDVPD |
             Opcode::VBLENDVPS |
+            Opcode::VPBLENDMB |
+            Opcode::VPBLENDMW |
             Opcode::PBLENDVB |
             Opcode::PBLENDW |
             Opcode::BLENDPD |
@@ -1803,6 +2489,7 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VBROADCASTI128 |
             Opcode::VBROADCASTSD |
             Opcode::VBROADCASTSS |
+            Opcode::VPBROADCASTM |
             Opcode::VEXTRACTF128 |
             Opcode::VEXTRACTI128 |
             Opcode::VEXTRACTPS |
@@ -1811,10 +2498,49 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VGATHERDPS |
             Opcode::VGATHERQPD |
             Opcode::VGATHERQPS |
+            Opcode::VGATHERPF0DPD |
+            Opcode::VGATHERPF0DPS |
+            Opcode::VGATHERPF0QPD |
+            Opcode::VGATHERPF0QPS |
+            Opcode::VGATHERPF1DPD |
+            Opcode::VGATHERPF1DPS |
+            Opcode::VGATHERPF1QPD |
+            Opcode::VGATHERPF1QPS |
+            Opcode::VSCATTERDD |
+            Opcode::VSCATTERDQ |
+            Opcode::VSCATTERQD |
+            Opcode::VSCATTERQQ |
+            Opcode::VPSCATTERDD |
+            Opcode::VPSCATTERDQ |
+            Opcode::VPSCATTERQD |
+            Opcode::VPSCATTERQQ |
+            Opcode::VSCATTERPF0DPD |
+            Opcode::VSCATTERPF0DPS |
+            Opcode::VSCATTERPF0QPD |
+            Opcode::VSCATTERPF0QPS |
+            Opcode::VSCATTERPF1DPD |
+            Opcode::VSCATTERPF1DPS |
+            Opcode::VSCATTERPF1QPD |
+            Opcode::VSCATTERPF1QPS |
             Opcode::VINSERTF128 |
             Opcode::VINSERTI128 |
             Opcode::VINSERTPS |
             Opcode::INSERTPS |
+            Opcode::VEXTRACTF32X4 |
+            Opcode::VEXTRACTF64X2 |
+            Opcode::VEXTRACTF64X4 |
+            Opcode::VEXTRACTI32X4 |
+            Opcode::VEXTRACTI64X2 |
+            Opcode::VEXTRACTI64X4 |
+            Opcode::VINSERTF32X4 |
+            Opcode::VINSERTF64X2 |
+            Opcode::VINSERTF64X4 |
+            Opcode::VINSERTI64X2 |
+            Opcode::VINSERTI64X4 |
+            Opcode::VSHUFF32X4 |
+            Opcode::VSHUFF64X2 |
+            Opcode::VSHUFI32X4 |
+            Opcode::VSHUFI64X2 |
             Opcode::VMASKMOVDQU |
             Opcode::VMASKMOVPD |
             Opcode::VMASKMOVPS |
@@ -1845,6 +2571,32 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VMOVUPS |
             Opcode::VMOVSD |
             Opcode::VMOVSS |
+            Opcode::VMOVDQA32 |
+            Opcode::VMOVDQA64 |
+            Opcode::VMOVDQU32 |
+            Opcode::VMOVDQU64 |
+            Opcode::VPMOVM2B |
+            Opcode::VPMOVM2W |
+            Opcode::VPMOVB2M |
+            Opcode::VPMOVW2M |
+            Opcode::VPMOVSWB |
+            Opcode::VPMOVUSWB |
+            Opcode::VPMOVSQB |
+            Opcode::VPMOVUSQB |
+            Opcode::VPMOVSQW |
+            Opcode::VPMOVUSQW |
+            Opcode::VPMOVSQD |
+            Opcode::VPMOVUSQD |
+            Opcode::VPMOVSDB |
+            Opcode::VPMOVUSDB |
+            Opcode::VPMOVSDW |
+            Opcode::VPMOVUSDW |
+            Opcode::VPMOVM2D |
+            Opcode::VPMOVM2Q |
+            Opcode::VPMOVB2D |
+            Opcode::VPMOVQ2M |
+            Opcode::VMOVDQU8 |
+            Opcode::VMOVDQU16 |
 
             Opcode::VPBLENDD |
             Opcode::VPBLENDVB |
@@ -1883,6 +2635,9 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::PMOVZXDQ |
             Opcode::PMOVZXWD |
             Opcode::PMOVZXWQ |
+            Opcode::KUNPCKBW |
+            Opcode::KUNPCKWD |
+            Opcode::KUNPCKDQ |
             Opcode::VUNPCKHPD |
             Opcode::VUNPCKHPS |
             Opcode::VUNPCKLPD |
@@ -1898,9 +2653,12 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VSHUFPD |
             Opcode::VSHUFPS |
             Opcode::VPACKSSDW |
+            Opcode::VPACKUSDW |
             Opcode::PACKUSDW |
             Opcode::VPACKSSWB |
             Opcode::VPACKUSWB |
+            Opcode::VALIGND |
+            Opcode::VALIGNQ |
             Opcode::VPALIGNR |
             Opcode::PALIGNR |
             Opcode::VPERM2F128 |
@@ -1911,6 +2669,17 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPERMPD |
             Opcode::VPERMPS |
             Opcode::VPERMQ |
+            Opcode::VPERMI2D |
+            Opcode::VPERMI2Q |
+            Opcode::VPERMI2PD |
+            Opcode::VPERMI2PS |
+            Opcode::VPERMT2D |
+            Opcode::VPERMT2Q |
+            Opcode::VPERMT2PD |
+            Opcode::VPERMT2PS |
+            Opcode::VPERMI2B |
+            Opcode::VPERMI2W |
+            Opcode::VPERMW |
             Opcode::VPEXTRB |
             Opcode::VPEXTRD |
             Opcode::VPEXTRQ |
@@ -1929,11 +2698,34 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPINSRW |
             Opcode::VPMASKMOVD |
             Opcode::VPMASKMOVQ |
+            Opcode::VCOMPRESSPD |
+            Opcode::VCOMPRESSPS |
+            Opcode::VPCOMPRESSQ |
+            Opcode::VPCOMPRESSD |
+            Opcode::VEXPANDPD |
+            Opcode::VEXPANDPS |
             Opcode::VPSHUFB |
             Opcode::VPSHUFD |
             Opcode::VPHMINPOSUW |
             Opcode::PHMINPOSUW |
             Opcode::VZEROUPPER |
+            Opcode::VZEROALL |
+            Opcode::VFIXUPIMMPD |
+            Opcode::VFIXUPIMMPS |
+            Opcode::VFIXUPIMMSD |
+            Opcode::VFIXUPIMMSS |
+            Opcode::VREDUCEPD |
+            Opcode::VREDUCEPS |
+            Opcode::VREDUCESD |
+            Opcode::VREDUCESS |
+            Opcode::VGETEXPPD |
+            Opcode::VGETEXPPS |
+            Opcode::VGETEXPSD |
+            Opcode::VGETEXPSS |
+            Opcode::VGETMANTPD |
+            Opcode::VGETMANTPS |
+            Opcode::VGETMANTSD |
+            Opcode::VGETMANTSS |
             Opcode::VLDDQU |
             Opcode::BSWAP |
             Opcode::CVTDQ2PD |
@@ -2001,6 +2793,11 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::SHUFPD |
             Opcode::SHUFPS |
             Opcode::PMOVMSKB |
+            Opcode::KMOVB |
+            Opcode::KMOVW |
+            Opcode::KMOVD |
+            Opcode::KMOVQ |
+            Opcode::BNDMOV |
             Opcode::LDDQU |
             Opcode::CMC |
             Opcode::CLC |
@@ -2117,6 +2914,32 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VCOMISS |
             Opcode::VUCOMISD |
             Opcode::VUCOMISS |
+            Opcode::KORTESTB |
+            Opcode::KTESTB |
+            Opcode::KORTESTW |
+            Opcode::KTESTW |
+            Opcode::KORTESTD |
+            Opcode::KTESTD |
+            Opcode::KORTESTQ |
+            Opcode::KTESTQ |
+            Opcode::VPTESTNMD |
+            Opcode::VPTESTNMQ |
+            Opcode::VPTERNLOGD |
+            Opcode::VPTERNLOGQ |
+            Opcode::VPTESTMD |
+            Opcode::VPTESTMQ |
+            Opcode::VPTESTNMB |
+            Opcode::VPTESTNMW |
+            Opcode::VPTESTMB |
+            Opcode::VPTESTMW |
+            Opcode::VPCMPD |
+            Opcode::VPCMPUD |
+            Opcode::VPCMPQ |
+            Opcode::VPCMPUQ |
+            Opcode::VPCMPB |
+            Opcode::VPCMPUB |
+            Opcode::VPCMPW |
+            Opcode::VPCMPUW |
             Opcode::VCMPPD |
             Opcode::VCMPPS |
             Opcode::VCMPSD |
@@ -2125,6 +2948,10 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VMAXPS |
             Opcode::VMAXSD |
             Opcode::VMAXSS |
+            Opcode::VPMAXSQ |
+            Opcode::VPMAXUQ |
+            Opcode::VPMINSQ |
+            Opcode::VPMINUQ |
             Opcode::VMINPD |
             Opcode::VMINPS |
             Opcode::VMINSD |
@@ -2137,6 +2964,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPCMPGTD |
             Opcode::VPCMPGTQ |
             Opcode::VPCMPGTW |
+            Opcode::VPCMPESTRI |
+            Opcode::VPCMPESTRM |
             Opcode::VPCMPISTRI |
             Opcode::VPCMPISTRM |
             Opcode::VPMAXSB |
@@ -2145,9 +2974,22 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::VPMAXUB |
             Opcode::VPMAXUW |
             Opcode::VPMAXUD |
+            Opcode::VPMINSB |
             Opcode::VPMINSW |
             Opcode::VPMINSD |
+            Opcode::VPMINUB |
+            Opcode::VPMINUW |
             Opcode::VPMINUD |
+            Opcode::VFPCLASSPD |
+            Opcode::VFPCLASSPS |
+            Opcode::VFPCLASSSD |
+            Opcode::VFPCLASSSS |
+            Opcode::VRANGEPD |
+            Opcode::VRANGEPS |
+            Opcode::VRANGESD |
+            Opcode::VRANGESS |
+            Opcode::VPCONFLICTD |
+            Opcode::VPCONFLICTQ |
             Opcode::VPTEST |
             Opcode::VTESTPD |
             Opcode::VTESTPS |
@@ -2228,6 +3070,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::FXRSTOR |
             Opcode::LDMXCSR |
             Opcode::STMXCSR |
+            Opcode::VLDMXCSR |
+            Opcode::VSTMXCSR |
             Opcode::XSAVE |
             Opcode::XSAVEC |
             Opcode::XSAVES |
@@ -2337,7 +3181,14 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {
             Opcode::SENDUIPI |
             Opcode::XSUSLDTRK |
             Opcode::XRESLDTRK |
+            Opcode::BOUND |
             Opcode::ARPL |
+            Opcode::BNDMK |
+            Opcode::BNDCL |
+            Opcode::BNDCU |
+            Opcode::BNDCN |
+            Opcode::BNDLDX |
+            Opcode::BNDSTX |
             Opcode::LAR => { write!(out, "{}", colors.platform_op(self)) }
 
             Opcode::CRC32 |
@@ -2462,6 +3313,17 @@ impl Instruction {
     }
 }
 
+const MEM_SIZE_STRINGS: [&'static str; 64] = [
+    "byte", "word", "BUG", "dword", "BUG", "BUG", "BUG", "qword",
+    "far", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword",
+    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
+    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword",
+    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
+    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
+    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
+    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ptr", "zmmword",
+];
+
 fn contextualize_intel<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result {
     if instr.prefixes.lock() {
         write!(out, "lock ")?;
@@ -2469,11 +3331,8 @@ fn contextualize_intel<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors:
 
     if instr.prefixes.rep_any() {
         if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) {
-            // only a few of you actually use the prefix...
             if instr.prefixes.rep() {
                 write!(out, "rep ")?;
-            } else if instr.prefixes.repz() {
-                write!(out, "repz ")?;
             } else if instr.prefixes.repnz() {
                 write!(out, "repnz ")?;
             }
@@ -2489,53 +3348,19 @@ fn contextualize_intel<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors:
     if instr.operand_count > 0 {
         out.write_str(" ")?;
 
+        let x = Operand::from_spec(instr, instr.operands[0]);
+        if x.is_memory() {
+            out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?;
+            out.write_str(" ")?;
+        }
+
         if let Some(prefix) = instr.segment_override_for_op(0) {
             write!(out, "{}:", prefix)?;
         }
-
-        let x = Operand::from_spec(instr, instr.operands[0]);
         x.colorize(colors, out)?;
 
         for i in 1..instr.operand_count {
             match instr.opcode {
-                Opcode::MOVSX_b |
-                Opcode::MOVZX_b => {
-                    match &instr.operands[i as usize] {
-                        &OperandSpec::Nothing => {
-                            return Ok(());
-                        },
-                        &OperandSpec::RegMMM => {
-                            out.write_str(", ")?;
-                        }
-                        _ => {
-                            out.write_str(", byte ")?;
-                            if let Some(prefix) = instr.segment_override_for_op(i) {
-                                write!(out, "{}:", prefix)?;
-                            }
-                        }
-                    }
-                    let x = Operand::from_spec(instr, instr.operands[i as usize]);
-                    x.colorize(colors, out)?
-                },
-                Opcode::MOVSX_w |
-                Opcode::MOVZX_w => {
-                    match &instr.operands[i as usize] {
-                        &OperandSpec::Nothing => {
-                            return Ok(());
-                        },
-                        &OperandSpec::RegMMM => {
-                            out.write_str(", ")?;
-                        }
-                        _ => {
-                            out.write_str(", word ")?;
-                            if let Some(prefix) = instr.segment_override_for_op(i) {
-                                write!(out, "{}:", prefix)?;
-                            }
-                        }
-                    }
-                    let x = Operand::from_spec(instr, instr.operands[i as usize]);
-                    x.colorize(colors, out)?
-                },
                 _ => {
                     match &instr.operands[i as usize] {
                         &OperandSpec::Nothing => {
@@ -2543,11 +3368,57 @@ fn contextualize_intel<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors:
                         },
                         _ => {
                             out.write_str(", ")?;
+                            let x = Operand::from_spec(instr, instr.operands[i as usize]);
+                            if x.is_memory() {
+                                out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?;
+                                out.write_str(" ")?;
+                            }
                             if let Some(prefix) = instr.segment_override_for_op(i) {
                                 write!(out, "{}:", prefix)?;
                             }
-                            let x = Operand::from_spec(instr, instr.operands[i as usize]);
-                            x.colorize(colors, out)?
+                            x.colorize(colors, out)?;
+                            if let Some(evex) = instr.prefixes.evex() {
+                                if evex.broadcast() && x.is_memory() {
+                                    let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ {
+                                        if instr.opcode == Opcode::VFPCLASSPS || instr.opcode ==  Opcode::VCVTNEPS2BF16 {
+                                            if evex.vex().l() {
+                                                8
+                                            } else if evex.lp() {
+                                                16
+                                            } else {
+                                                4
+                                            }
+                                        } else if instr.opcode == Opcode::VFPCLASSPD {
+                                            if evex.vex().l() {
+                                                4
+                                            } else if evex.lp() {
+                                                8
+                                            } else {
+                                                2
+                                            }
+                                        } else {
+                                            // vcvtpd2ps is "cool": in broadcast mode, it can read a
+                                            // double-precision float (qword), resize to single-precision,
+                                            // then broadcast that to the whole destination register. this
+                                            // means we need to show `xmm, qword [addr]{1to4}` if vector
+                                            // size is 256. likewise, scale of 8 for the same truncation
+                                            // reason if vector size is 512.
+                                            // vcvtudq2pd is the same story.
+                                            // vfpclassp{s,d} is a mystery to me.
+                                            if evex.vex().l() {
+                                                4
+                                            } else if evex.lp() {
+                                                8
+                                            } else {
+                                                2
+                                            }
+                                        }
+                                    } else {
+                                        Operand::from_spec(instr, instr.operands[i as usize - 1]).width() / instr.mem_size
+                                    };
+                                    write!(out, "{{1to{}}}", scale)?;
+                                }
+                            }
                         }
                     }
                 }
@@ -2578,8 +3449,6 @@ fn contextualize_c<T: fmt::Write, Y: YaxColors>(instr: &Instruction, _colors: &Y
             // only a few of you actually use the prefix...
             if instr.prefixes.rep() {
                 out.write_str("rep ")?;
-            } else if instr.prefixes.repz() {
-                out.write_str("repz ")?;
             } else if instr.prefixes.repnz() {
                 out.write_str("repnz ")?;
             } // TODO: other rep kinds?
@@ -2804,8 +3673,6 @@ impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::St
             // only a few of you actually use the prefix...
             if self.prefixes.rep() {
                 write!(out, "rep ")?;
-            } else if self.prefixes.repz() {
-                write!(out, "repz ")?;
             } else if self.prefixes.repnz() {
                 write!(out, "repnz ")?;
             }
@@ -2831,7 +3698,8 @@ impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::St
                 x.colorize(colors, out)?;
             }
         };
-        for i in 1..4 {
+        for i in 1..self.operand_count {
+            let i = i as usize;
             match self.opcode {
                 Opcode::MOVSX_b |
                 Opcode::MOVZX_b => {
diff --git a/src/protected_mode/evex.rs b/src/protected_mode/evex.rs
new file mode 100644
index 0000000..9d2a093
--- /dev/null
+++ b/src/protected_mode/evex.rs
@@ -0,0 +1,17 @@
+// use crate::long_mode::{OperandSpec, DecodeError, RegSpec, RegisterBank, Instruction, Opcode};
+use crate::protected_mode::{DecodeError, RegSpec, RegisterBank, Instruction, Opcode};
+use crate::protected_mode::{read_modrm, read_E_vex, read_imm_unsigned};
+
+const DEFAULT_EVEX_REGISTER_SIZE: RegisterBank = RegisterBank::D;
+const DEFAULT_EVEX_REGISTER_WIDTH: u8 = 4;
+
+fn isa_has_qwords() -> bool {
+    false
+}
+
+fn apply_disp_scale(inst: &mut Instruction) {
+    inst.disp *= inst.mem_size as u32;
+}
+
+include!("../shared/generated_evex.in");
+include!("../shared/evex.in");
diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs
index bbcb9cb..dec7c86 100644
--- a/src/protected_mode/mod.rs
+++ b/src/protected_mode/mod.rs
@@ -1,4 +1,5 @@
 mod vex;
+mod evex;
 #[cfg(feature = "fmt")]
 mod display;
 pub mod uarch;
@@ -131,6 +132,19 @@ impl RegSpec {
         }
     }
 
+    /// construct a `RegSpec` for mask reg `num`
+    #[inline]
+    pub fn mask(num: u8) -> RegSpec {
+        if num >= 32 {
+            panic!("invalid x86 mask reg {}", num);
+        }
+
+        RegSpec {
+            num,
+            bank: RegisterBank::K
+        }
+    }
+
     /// construct a `RegSpec` for dword reg `num`
     #[inline]
     pub fn d(num: u8) -> RegSpec {
@@ -378,6 +392,9 @@ pub enum Operand {
     ImmediateU32(u32),
     ImmediateI32(i32),
     Register(RegSpec),
+    RegisterMaskMerge(RegSpec, RegSpec, MergeMode),
+    RegisterMaskMergeSae(RegSpec, RegSpec, MergeMode, SaeMode),
+    RegisterMaskMergeSaeNoround(RegSpec, RegSpec, MergeMode),
     DisplacementU16(u16),
     DisplacementU32(u32),
     RegDeref(RegSpec),
@@ -388,10 +405,32 @@ pub enum Operand {
     RegScaleDisp(RegSpec, u8, i32),
     RegIndexBaseScale(RegSpec, RegSpec, u8),
     RegIndexBaseScaleDisp(RegSpec, RegSpec, u8, i32),
+    RegDerefMasked(RegSpec, RegSpec),
+    RegDispMasked(RegSpec, i32, RegSpec),
+    RegScaleMasked(RegSpec, u8, RegSpec),
+    RegIndexBaseMasked(RegSpec, RegSpec, RegSpec),
+    RegIndexBaseDispMasked(RegSpec, RegSpec, i32, RegSpec),
+    RegScaleDispMasked(RegSpec, u8, i32, RegSpec),
+    RegIndexBaseScaleMasked(RegSpec, RegSpec, u8, RegSpec),
+    RegIndexBaseScaleDispMasked(RegSpec, RegSpec, u8, i32, RegSpec),
     Nothing,
 }
 
 impl OperandSpec {
+    fn masked(self) -> Self {
+        match self {
+            OperandSpec::RegRRR => OperandSpec::RegRRR_maskmerge,
+            OperandSpec::RegMMM => OperandSpec::RegMMM_maskmerge,
+            OperandSpec::RegVex => OperandSpec::RegVex_maskmerge,
+            OperandSpec::Deref => OperandSpec::Deref_mask,
+            OperandSpec::RegDisp => OperandSpec::RegDisp_mask,
+            OperandSpec::RegScale => OperandSpec::RegScale_mask,
+            OperandSpec::RegScaleDisp => OperandSpec::RegScaleDisp_mask,
+            OperandSpec::RegIndexBaseScale => OperandSpec::RegIndexBaseScale_mask,
+            OperandSpec::RegIndexBaseScaleDisp => OperandSpec::RegIndexBaseScaleDisp_mask,
+            o => o,
+        }
+    }
     pub fn is_memory(&self) -> bool {
         match self {
             OperandSpec::DispU16 |
@@ -405,7 +444,13 @@ impl OperandSpec {
             OperandSpec::RegIndexBaseDisp |
             OperandSpec::RegScaleDisp |
             OperandSpec::RegIndexBaseScale |
-            OperandSpec::RegIndexBaseScaleDisp => {
+            OperandSpec::RegIndexBaseScaleDisp |
+            OperandSpec::Deref_mask |
+            OperandSpec::RegDisp_mask |
+            OperandSpec::RegScale_mask |
+            OperandSpec::RegScaleDisp_mask |
+            OperandSpec::RegIndexBaseScale_mask |
+            OperandSpec::RegIndexBaseScaleDisp_mask => {
                 true
             },
             OperandSpec::ImmI8 |
@@ -414,8 +459,14 @@ impl OperandSpec {
             OperandSpec::ImmU8 |
             OperandSpec::ImmU16 |
             OperandSpec::RegRRR |
+            OperandSpec::RegRRR_maskmerge |
+            OperandSpec::RegRRR_maskmerge_sae |
+            OperandSpec::RegRRR_maskmerge_sae_noround |
             OperandSpec::RegMMM |
+            OperandSpec::RegMMM_maskmerge |
+            OperandSpec::RegMMM_maskmerge_sae_noround |
             OperandSpec::RegVex |
+            OperandSpec::RegVex_maskmerge |
             OperandSpec::Reg4 |
             OperandSpec::ImmInDispField |
             OperandSpec::Nothing => {
@@ -424,6 +475,54 @@ impl OperandSpec {
         }
     }
 }
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum MergeMode {
+    Merge,
+    Zero,
+}
+impl From<bool> for MergeMode {
+    fn from(b: bool) -> Self {
+        if b {
+            MergeMode::Zero
+        } else {
+            MergeMode::Merge
+        }
+    }
+}
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum SaeMode {
+    RoundNearest,
+    RoundDown,
+    RoundUp,
+    RoundZero,
+}
+const SAE_MODES: [SaeMode; 4] = [
+    SaeMode::RoundNearest,
+    SaeMode::RoundDown,
+    SaeMode::RoundUp,
+    SaeMode::RoundZero,
+];
+impl SaeMode {
+    pub fn label(&self) -> &'static str {
+        match self {
+            SaeMode::RoundNearest => "{rne-sae}",
+            SaeMode::RoundDown => "{rd-sae}",
+            SaeMode::RoundUp => "{ru-sae}",
+            SaeMode::RoundZero => "{rz-sae}",
+        }
+    }
+
+    fn from(l: bool, lp: bool) -> Self {
+        let mut idx = 0;
+        if l {
+            idx |= 1;
+        }
+        if lp {
+            idx |= 2;
+        }
+        SAE_MODES[idx]
+    }
+}
 impl Operand {
     fn from_spec(inst: &Instruction, spec: OperandSpec) -> Operand {
         match spec {
@@ -434,13 +533,56 @@ impl Operand {
             OperandSpec::RegRRR => {
                 Operand::Register(inst.modrm_rrr)
             }
+            OperandSpec::RegRRR_maskmerge => {
+                Operand::RegisterMaskMerge(
+                    inst.modrm_rrr,
+                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+                    MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+                )
+            }
+            OperandSpec::RegRRR_maskmerge_sae => {
+                Operand::RegisterMaskMergeSae(
+                    inst.modrm_rrr,
+                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+                    MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+                    SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()),
+                )
+            }
+            OperandSpec::RegRRR_maskmerge_sae_noround => {
+                Operand::RegisterMaskMergeSaeNoround(
+                    inst.modrm_rrr,
+                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+                    MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+                )
+            }
             // the register in modrm_mmm (eg modrm mod bits were 11)
             OperandSpec::RegMMM => {
                 Operand::Register(inst.modrm_mmm)
             }
+            OperandSpec::RegMMM_maskmerge => {
+                Operand::RegisterMaskMerge(
+                    inst.modrm_mmm,
+                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+                    MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+                )
+            }
+            OperandSpec::RegMMM_maskmerge_sae_noround => {
+                Operand::RegisterMaskMergeSaeNoround(
+                    inst.modrm_mmm,
+                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+                    MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+                )
+            }
             OperandSpec::RegVex => {
                 Operand::Register(inst.vex_reg)
             }
+            OperandSpec::RegVex_maskmerge => {
+                Operand::RegisterMaskMerge(
+                    inst.vex_reg,
+                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+                    MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+                )
+            }
             OperandSpec::Reg4 => {
                 Operand::Register(RegSpec { num: inst.imm as u8, bank: inst.vex_reg.bank })
             }
@@ -482,6 +624,48 @@ impl Operand {
             OperandSpec::RegIndexBaseScaleDisp => {
                 Operand::RegIndexBaseScaleDisp(inst.modrm_mmm, inst.sib_index, inst.scale, inst.disp as i32)
             }
+            OperandSpec::Deref_mask => {
+                if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+                    Operand::RegDerefMasked(inst.modrm_mmm, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+                } else {
+                    Operand::RegDeref(inst.modrm_mmm)
+                }
+            }
+            OperandSpec::RegDisp_mask => {
+                if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+                    Operand::RegDispMasked(inst.modrm_mmm, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+                } else {
+                    Operand::RegDisp(inst.modrm_mmm, inst.disp as i32)
+                }
+            }
+            OperandSpec::RegScale_mask => {
+                if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+                    Operand::RegScaleMasked(inst.sib_index, inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+                } else {
+                    Operand::RegScale(inst.sib_index, inst.scale)
+                }
+            }
+            OperandSpec::RegScaleDisp_mask => {
+                if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+                    Operand::RegScaleDispMasked(inst.sib_index, inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+                } else {
+                    Operand::RegScaleDisp(inst.sib_index, inst.scale, inst.disp as i32)
+                }
+            }
+            OperandSpec::RegIndexBaseScale_mask => {
+                if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+                    Operand::RegIndexBaseScaleMasked(inst.modrm_mmm, inst.sib_index, inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+                } else {
+                    Operand::RegIndexBaseScale(inst.modrm_mmm, inst.sib_index, inst.scale)
+                }
+            }
+            OperandSpec::RegIndexBaseScaleDisp_mask => {
+                if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+                    Operand::RegIndexBaseScaleDispMasked(inst.modrm_mmm, inst.sib_index, inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+                } else {
+                    Operand::RegIndexBaseScaleDisp(inst.modrm_mmm, inst.sib_index, inst.scale, inst.disp as i32)
+                }
+            }
         }
     }
     pub fn is_memory(&self) -> bool {
@@ -495,7 +679,15 @@ impl Operand {
             Operand::RegIndexBaseDisp(_, _, _) |
             Operand::RegScaleDisp(_, _, _) |
             Operand::RegIndexBaseScale(_, _, _) |
-            Operand::RegIndexBaseScaleDisp(_, _, _, _) => {
+            Operand::RegIndexBaseScaleDisp(_, _, _, _) |
+            Operand::RegDerefMasked(_, _) |
+            Operand::RegDispMasked(_, _, _) |
+            Operand::RegScaleMasked(_, _, _) |
+            Operand::RegIndexBaseMasked(_, _, _) |
+            Operand::RegIndexBaseDispMasked(_, _, _, _) |
+            Operand::RegScaleDispMasked(_, _, _, _) |
+            Operand::RegIndexBaseScaleMasked(_, _, _, _) |
+            Operand::RegIndexBaseScaleDispMasked(_, _, _, _, _) => {
                 true
             },
             Operand::ImmediateI8(_) |
@@ -505,6 +697,9 @@ impl Operand {
             Operand::ImmediateU32(_) |
             Operand::ImmediateI32(_) |
             Operand::Register(_) |
+            Operand::RegisterMaskMerge(_, _, _) |
+            Operand::RegisterMaskMergeSae(_, _, _, _) |
+            Operand::RegisterMaskMergeSaeNoround(_, _, _) |
             Operand::Nothing => {
                 false
             }
@@ -523,6 +718,9 @@ impl Operand {
             Operand::Register(reg) => {
                 reg.width()
             }
+            Operand::RegisterMaskMerge(reg, _, _) => {
+                reg.width()
+            }
             Operand::ImmediateI8(_) |
             Operand::ImmediateU8(_) => {
                 1
@@ -1186,6 +1384,7 @@ pub enum Opcode {
 
     VMOVDDUP,
     VPSHUFLW,
+    VPSHUFHW,
     VHADDPS,
     VHSUBPS,
     VADDSUBPS,
@@ -1358,6 +1557,7 @@ pub enum Opcode {
     VPABSD,
     VPABSW,
     VPACKSSDW,
+    VPACKUSDW,
     VPACKSSWB,
     VPACKUSWB,
     VPADDB,
@@ -1395,6 +1595,8 @@ pub enum Opcode {
     VPCMPGTD,
     VPCMPGTQ,
     VPCMPGTW,
+    VPCMPESTRI,
+    VPCMPESTRM,
     VPCMPISTRI,
     VPCMPISTRM,
     VPERM2F128,
@@ -1416,7 +1618,7 @@ pub enum Opcode {
     VPHADDD,
     VPHADDSW,
     VPHADDW,
-    VPHADDUBSW,
+    VPMADDUBSW,
     VPHMINPOSUW,
     VPHSUBD,
     VPHSUBSW,
@@ -1434,8 +1636,11 @@ pub enum Opcode {
     VPMAXUB,
     VPMAXUW,
     VPMAXUD,
+    VPMINSB,
     VPMINSW,
     VPMINSD,
+    VPMINUB,
+    VPMINUW,
     VPMINUD,
     VPMOVMSKB,
     VPMOVSXBD,
@@ -1454,6 +1659,7 @@ pub enum Opcode {
     VPMULHRSW,
     VPMULHUW,
     VPMULHW,
+    VPMULLQ,
     VPMULLD,
     VPMULLW,
     VPMULUDQ,
@@ -1524,6 +1730,9 @@ pub enum Opcode {
     VXORPD,
     VXORPS,
     VZEROUPPER,
+    VZEROALL,
+    VLDMXCSR,
+    VSTMXCSR,
 
     PCLMULQDQ,
     AESKEYGENASSIST,
@@ -1763,6 +1972,7 @@ pub enum Opcode {
 
     PUSHA,
     POPA,
+    BOUND,
     ARPL,
     AAS,
     AAA,
@@ -1863,6 +2073,376 @@ pub enum Opcode {
     // TSXLDTRK
     XSUSLDTRK,
     XRESLDTRK,
+
+    // AVX512F
+    VALIGND,
+    VALIGNQ,
+    VBLENDMPD,
+    VBLENDMPS,
+    VCOMPRESSPD,
+    VCOMPRESSPS,
+    VCVTPD2UDQ,
+    VCVTTPD2UDQ,
+    VCVTPS2UDQ,
+    VCVTTPS2UDQ,
+    VCVTQQ2PD,
+    VCVTQQ2PS,
+    VCVTSD2USI,
+    VCVTTSD2USI,
+    VCVTSS2USI,
+    VCVTTSS2USI,
+    VCVTUDQ2PD,
+    VCVTUDQ2PS,
+    VCVTUSI2USD,
+    VCVTUSI2USS,
+    VEXPANDPD,
+    VEXPANDPS,
+    VEXTRACTF32X4,
+    VEXTRACTF64X4,
+    VEXTRACTI32X4,
+    VEXTRACTI64X4,
+    VFIXUPIMMPD,
+    VFIXUPIMMPS,
+    VFIXUPIMMSD,
+    VFIXUPIMMSS,
+    VGETEXPPD,
+    VGETEXPPS,
+    VGETEXPSD,
+    VGETEXPSS,
+    VGETMANTPD,
+    VGETMANTPS,
+    VGETMANTSD,
+    VGETMANTSS,
+    VINSERTF32X4,
+    VINSERTF64X4,
+    VINSERTI64X4,
+    VMOVDQA32,
+    VMOVDQA64,
+    VMOVDQU32,
+    VMOVDQU64,
+    VPBLENDMD,
+    VPBLENDMQ,
+    VPCMPD,
+    VPCMPUD,
+    VPCMPQ,
+    VPCMPUQ,
+    VPCOMPRESSQ,
+    VPCOMPRESSD,
+    VPERMI2D,
+    VPERMI2Q,
+    VPERMI2PD,
+    VPERMI2PS,
+    VPERMT2D,
+    VPERMT2Q,
+    VPERMT2PD,
+    VPERMT2PS,
+    VPMAXSQ,
+    VPMAXUQ,
+    VPMINSQ,
+    VPMINUQ,
+    VPMOVSQB,
+    VPMOVUSQB,
+    VPMOVSQW,
+    VPMOVUSQW,
+    VPMOVSQD,
+    VPMOVUSQD,
+    VPMOVSDB,
+    VPMOVUSDB,
+    VPMOVSDW,
+    VPMOVUSDW,
+    VPROLD,
+    VPROLQ,
+    VPROLVD,
+    VPROLVQ,
+    VPRORD,
+    VPRORQ,
+    VPRORRD,
+    VPRORRQ,
+    VPSCATTERDD,
+    VPSCATTERDQ,
+    VPSCATTERQD,
+    VPSCATTERQQ,
+    VPSRAQ,
+    VPSRAVQ,
+    VPTESTNMD,
+    VPTESTNMQ,
+    VPTERNLOGD,
+    VPTERNLOGQ,
+    VPTESTMD,
+    VPTESTMQ,
+    VRCP14PD,
+    VRCP14PS,
+    VRCP14SD,
+    VRCP14SS,
+    VRNDSCALEPD,
+    VRNDSCALEPS,
+    VRNDSCALESD,
+    VRNDSCALESS,
+    VRSQRT14PD,
+    VRSQRT14PS,
+    VRSQRT14SD,
+    VRSQRT14SS,
+    VSCALEDPD,
+    VSCALEDPS,
+    VSCALEDSD,
+    VSCALEDSS,
+    VSCATTERDD,
+    VSCATTERDQ,
+    VSCATTERQD,
+    VSCATTERQQ,
+    VSHUFF32X4,
+    VSHUFF64X2,
+    VSHUFI32X4,
+    VSHUFI64X2,
+
+    // AVX512DQ
+    VCVTTPD2QQ,
+    VCVTPD2QQ,
+    VCVTTPD2UQQ,
+    VCVTPD2UQQ,
+    VCVTTPS2QQ,
+    VCVTPS2QQ,
+    VCVTTPS2UQQ,
+    VCVTPS2UQQ,
+    VCVTUQQ2PD,
+    VCVTUQQ2PS,
+    VEXTRACTF64X2,
+    VEXTRACTI64X2,
+    VFPCLASSPD,
+    VFPCLASSPS,
+    VFPCLASSSD,
+    VFPCLASSSS,
+    VINSERTF64X2,
+    VINSERTI64X2,
+    VPMOVM2D,
+    VPMOVM2Q,
+    VPMOVB2D,
+    VPMOVQ2M,
+    VRANGEPD,
+    VRANGEPS,
+    VRANGESD,
+    VRANGESS,
+    VREDUCEPD,
+    VREDUCEPS,
+    VREDUCESD,
+    VREDUCESS,
+
+    // AVX512BW
+    VDBPSADBW,
+    VMOVDQU8,
+    VMOVDQU16,
+    VPBLENDMB,
+    VPBLENDMW,
+    VPCMPB,
+    VPCMPUB,
+    VPCMPW,
+    VPCMPUW,
+    VPERMW,
+    VPERMI2B,
+    VPERMI2W,
+    VPMOVM2B,
+    VPMOVM2W,
+    VPMOVB2M,
+    VPMOVW2M,
+    VPMOVSWB,
+    VPMOVUSWB,
+    VPSLLVW,
+    VPSRAVW,
+    VPSRLVW,
+    VPTESTNMB,
+    VPTESTNMW,
+    VPTESTMB,
+    VPTESTMW,
+
+    // AVX512CD
+    VPBROADCASTM,
+    VPCONFLICTD,
+    VPCONFLICTQ,
+    VPLZCNTD,
+    VPLZCNTQ,
+
+    KUNPCKBW,
+    KUNPCKWD,
+    KUNPCKDQ,
+
+    KADDB,
+    KANDB,
+    KANDNB,
+    KMOVB,
+    KNOTB,
+    KORB,
+    KORTESTB,
+    KSHIFTLB,
+    KSHIFTRB,
+    KTESTB,
+    KXNORB,
+    KXORB,
+    KADDW,
+    KANDW,
+    KANDNW,
+    KMOVW,
+    KNOTW,
+    KORW,
+    KORTESTW,
+    KSHIFTLW,
+    KSHIFTRW,
+    KTESTW,
+    KXNORW,
+    KXORW,
+    KADDD,
+    KANDD,
+    KANDND,
+    KMOVD,
+    KNOTD,
+    KORD,
+    KORTESTD,
+    KSHIFTLD,
+    KSHIFTRD,
+    KTESTD,
+    KXNORD,
+    KXORD,
+    KADDQ,
+    KANDQ,
+    KANDNQ,
+    KMOVQ,
+    KNOTQ,
+    KORQ,
+    KORTESTQ,
+    KSHIFTLQ,
+    KSHIFTRQ,
+    KTESTQ,
+    KXNORQ,
+    KXORQ,
+
+    // AVX512ER
+    VEXP2PD,
+    VEXP2PS,
+    VEXP2SD,
+    VEXP2SS,
+    VRCP28PD,
+    VRCP28PS,
+    VRCP28SD,
+    VRCP28SS,
+    VRSQRT28PD,
+    VRSQRT28PS,
+    VRSQRT28SD,
+    VRSQRT28SS,
+
+    // AVX512PF
+    VGATHERPF0DPD,
+    VGATHERPF0DPS,
+    VGATHERPF0QPD,
+    VGATHERPF0QPS,
+    VGATHERPF1DPD,
+    VGATHERPF1DPS,
+    VGATHERPF1QPD,
+    VGATHERPF1QPS,
+    VSCATTERPF0DPD,
+    VSCATTERPF0DPS,
+    VSCATTERPF0QPD,
+    VSCATTERPF0QPS,
+    VSCATTERPF1DPD,
+    VSCATTERPF1DPS,
+    VSCATTERPF1QPD,
+    VSCATTERPF1QPS,
+
+    // MPX
+    BNDMK,
+    BNDCL,
+    BNDCU,
+    BNDCN,
+    BNDMOV,
+    BNDLDX,
+    BNDSTX,
+
+    VGF2P8AFFINEQB,
+    VGF2P8AFFINEINVQB,
+    VPSHRDQ,
+    VPSHRDD,
+    VPSHRDW,
+    VPSHLDQ,
+    VPSHLDD,
+    VPSHLDW,
+    VBROADCASTF32X8,
+    VBROADCASTF64X4,
+    VBROADCASTF32X4,
+    VBROADCASTF64X2,
+    VBROADCASTF32X2,
+    VBROADCASTI32X8,
+    VBROADCASTI64X4,
+    VBROADCASTI32X4,
+    VBROADCASTI64X2,
+    VBROADCASTI32X2,
+    VEXTRACTI32X8,
+    VEXTRACTF32X8,
+    VINSERTI32X8,
+    VINSERTF32X8,
+    VINSERTI32X4,
+    V4FNMADDSS,
+    V4FNMADDPS,
+    VCVTNEPS2BF16,
+    V4FMADDSS,
+    V4FMADDPS,
+    VCVTNE2PS2BF16,
+    VP2INTERSECTD,
+    VP2INTERSECTQ,
+    VP4DPWSSDS,
+    VP4DPWSSD,
+    VPDPWSSDS,
+    VPDPWSSD,
+    VPDPBUSDS,
+    VDPBF16PS,
+    VPBROADCASTMW2D,
+    VPBROADCASTMB2Q,
+    VPMOVD2M,
+    VPMOVQD,
+    VPMOVWB,
+    VPMOVDB,
+    VPMOVDW,
+    VPMOVQB,
+    VPMOVQW,
+    VGF2P8MULB,
+    VPMADD52HUQ,
+    VPMADD52LUQ,
+    VPSHUFBITQMB,
+    VPERMB,
+    VPEXPANDD,
+    VPEXPANDQ,
+    VPABSQ,
+    VPRORVD,
+    VPRORVQ,
+    VPMULTISHIFTQB,
+    VPERMT2B,
+    VPERMT2W,
+    VPSHRDVQ,
+    VPSHRDVD,
+    VPSHRDVW,
+    VPSHLDVQ,
+    VPSHLDVD,
+    VPSHLDVW,
+    VPCOMPRESSB,
+    VPCOMPRESSW,
+    VPEXPANDB,
+    VPEXPANDW,
+    VPOPCNTD,
+    VPOPCNTQ,
+    VPOPCNTB,
+    VPOPCNTW,
+    VSCALEFSS,
+    VSCALEFSD,
+    VSCALEFPS,
+    VSCALEFPD,
+    VPDPBUSD,
+    VCVTUSI2SD,
+    VCVTUSI2SS,
+    VPXORD,
+    VPXORQ,
+    VPORD,
+    VPORQ,
+    VPANDND,
+    VPANDNQ,
+    VPANDD,
+    VPANDQ,
 }
 
 #[derive(Debug)]
@@ -1889,7 +2469,7 @@ impl yaxpeax_arch::Instruction for Instruction {
     }
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
 #[non_exhaustive]
 pub enum DecodeError {
     ExhaustedInput,
@@ -1912,10 +2492,24 @@ enum OperandSpec {
     Nothing,
     // the register in modrm_rrr
     RegRRR,
+    // the register in modrm_rrr and is EVEX-encoded (may have a mask register, is merged or
+    // zeroed)
+    RegRRR_maskmerge,
+    // the register in modrm_rrr and is EVEX-encoded (may have a mask register, is merged or
+    // zeroed). additionally, this instruction has exceptions suppressed with a potentially
+    // custom rounding mode.
+    RegRRR_maskmerge_sae,
+    // the register in modrm_rrr and is EVEX-encoded (may have a mask register, is merged or
+    // zeroed). additionally, this instruction has exceptions suppressed.
+    RegRRR_maskmerge_sae_noround,
     // the register in modrm_mmm (eg modrm mod bits were 11)
     RegMMM,
+    // same as `RegRRR`: the register is modrm's `mmm` bits, and may be masekd.
+    RegMMM_maskmerge,
+    RegMMM_maskmerge_sae_noround,
     // the register selected by vex-vvvv bits
     RegVex,
+    RegVex_maskmerge,
     // the register selected by a handful of avx2 vex-coded instructions,
     // stuffed in imm4.
     Reg4,
@@ -1940,7 +2534,13 @@ enum OperandSpec {
     RegIndexBaseDisp,
     RegScaleDisp,
     RegIndexBaseScale,
-    RegIndexBaseScaleDisp
+    RegIndexBaseScaleDisp,
+    Deref_mask,
+    RegDisp_mask,
+    RegScale_mask,
+    RegScaleDisp_mask,
+    RegIndexBaseScale_mask,
+    RegIndexBaseScaleDisp_mask,
 }
 
 // the Hash, Eq, and PartialEq impls here are possibly misleading.
@@ -2460,6 +3060,47 @@ impl InstDecoder {
         self
     }
 
+    pub fn avx512(&self) -> bool {
+        let avx512_mask =
+            (1 << 19) |
+            (1 << 20) |
+            (1 << 23) |
+            (1 << 27) |
+            (1 << 28) |
+            (1 << 29) |
+            (1 << 31) |
+            (1 << 32) |
+            (1 << 34) |
+            (1 << 35) |
+            (1 << 40) |
+            (1 << 41) |
+            (1 << 42) |
+            (1 << 43);
+
+        (self.flags & avx512_mask) == avx512_mask
+    }
+
+    pub fn with_avx512(mut self) -> Self {
+        let avx512_mask =
+            (1 << 19) |
+            (1 << 20) |
+            (1 << 23) |
+            (1 << 27) |
+            (1 << 28) |
+            (1 << 29) |
+            (1 << 31) |
+            (1 << 32) |
+            (1 << 34) |
+            (1 << 35) |
+            (1 << 40) |
+            (1 << 41) |
+            (1 << 42) |
+            (1 << 43);
+
+        self.flags |= avx512_mask;
+        self
+    }
+
     pub fn cx8(&self) -> bool {
         self.flags & (1 << 44) != 0
     }
@@ -2659,6 +3300,13 @@ impl InstDecoder {
     /// Optionally reject or reinterpret instruction according to the decoder's
     /// declared extensions.
     fn revise_instruction(&self, inst: &mut Instruction) -> Result<(), DecodeError> {
+        if inst.prefixes.evex().is_some() {
+            if !self.avx512() {
+                return Err(DecodeError::InvalidOpcode);
+            } else {
+                return Ok(());
+            }
+        }
         match inst.opcode {
             Opcode::TZCNT => {
                 if !self.bmi1() {
@@ -2835,6 +3483,7 @@ impl InstDecoder {
             // AVX...
             Opcode::VMOVDDUP |
             Opcode::VPSHUFLW |
+            Opcode::VPSHUFHW |
             Opcode::VHADDPS |
             Opcode::VHSUBPS |
             Opcode::VADDSUBPS |
@@ -2998,6 +3647,7 @@ impl InstDecoder {
             Opcode::VPABSD |
             Opcode::VPABSW |
             Opcode::VPACKSSDW |
+            Opcode::VPACKUSDW |
             Opcode::VPACKSSWB |
             Opcode::VPACKUSWB |
             Opcode::VPADDB |
@@ -3056,7 +3706,7 @@ impl InstDecoder {
             Opcode::VPHADDD |
             Opcode::VPHADDSW |
             Opcode::VPHADDW |
-            Opcode::VPHADDUBSW |
+            Opcode::VPMADDUBSW |
             Opcode::VPHMINPOSUW |
             Opcode::VPHSUBD |
             Opcode::VPHSUBSW |
@@ -3094,6 +3744,7 @@ impl InstDecoder {
             Opcode::VPMULHRSW |
             Opcode::VPMULHUW |
             Opcode::VPMULHW |
+            Opcode::VPMULLQ |
             Opcode::VPMULLD |
             Opcode::VPMULLW |
             Opcode::VPMULUDQ |
@@ -3525,15 +4176,51 @@ impl Instruction {
     }
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct EvexData {
+    // data: present, z, b, Lp, Rp. aaa
+    bits: u8,
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub struct Prefixes {
     bits: u8,
     vex: PrefixVex,
     segment: Segment,
-    _pad: u8,
+    evex_data: EvexData,
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct PrefixEvex {
+    vex: PrefixVex,
+    evex_data: EvexData,
 }
 
-#[derive(Debug, Copy, Clone)]
+impl PrefixEvex {
+    fn present(&self) -> bool {
+        self.evex_data.present()
+    }
+    fn vex(&self) -> &PrefixVex {
+        &self.vex
+    }
+    fn mask_reg(&self) -> u8 {
+        self.evex_data.aaa()
+    }
+    fn broadcast(&self) -> bool {
+        self.evex_data.b()
+    }
+    fn merge(&self) -> bool {
+        self.evex_data.z()
+    }
+    fn lp(&self) -> bool {
+        self.evex_data.lp()
+    }
+    fn rp(&self) -> bool {
+        self.evex_data.rp()
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub struct PrefixVex {
     bits: u8,
 }
@@ -3552,6 +4239,8 @@ impl PrefixVex {
     fn l(&self) -> bool { (self.bits & 0x10) == 0x10 }
     #[inline]
     fn present(&self) -> bool { (self.bits & 0x80) == 0x80 }
+    #[inline]
+    fn compressed_disp(&self) -> bool { (self.bits & 0x20) == 0x20 }
 }
 
 #[allow(dead_code)]
@@ -3561,7 +4250,7 @@ impl Prefixes {
             bits: bits,
             vex: PrefixVex { bits: 0 },
             segment: Segment::DS,
-            _pad: 0,
+            evex_data: EvexData { bits: 0 },
         }
     }
     fn vex_from(&mut self, bits: u8) {
@@ -3623,6 +4312,26 @@ impl Prefixes {
     fn set_ss(&mut self) { self.segment = Segment::SS }
     #[inline]
     fn vex(&self) -> PrefixVex { PrefixVex { bits: self.vex.bits } }
+    #[inline]
+    fn evex_unchecked(&self) -> PrefixEvex { PrefixEvex { vex: PrefixVex { bits: self.vex.bits }, evex_data: self.evex_data } }
+    #[inline]
+    fn evex(&self) -> Option<PrefixEvex> {
+        let evex = self.evex_unchecked();
+        if evex.present() {
+            Some(evex)
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn apply_compressed_disp(&mut self, state: bool) {
+        if state {
+            self.vex.bits |= 0x20;
+        } else {
+            self.vex.bits &= 0xdf;
+        }
+    }
 
     #[inline]
     fn vex_from_c5(&mut self, bits: u8) {
@@ -3640,8 +4349,65 @@ impl Prefixes {
         let rxb = (high >> 5) ^ 0x07;
         let wrxb = rxb | w >> 4;
         let l = (low & 0x04) << 2;
-        let synthetic_rex = wrxb | l | 0x80;
-        self.vex = PrefixVex { bits: synthetic_rex };
+        let synthetic_vex = wrxb | l | 0x80;
+        self.vex = PrefixVex { bits: synthetic_vex };
+    }
+
+    #[inline]
+    fn evex_from(&mut self, b1: u8, b2: u8, b3: u8) {
+        let w = b2 & 0x80;
+        let rxb = ((b1 >> 5) & 0b111) ^ 0b111; // `rxb` is provided in inverted form
+        let wrxb = rxb | (w >> 4);
+        let l = (b3 & 0x20) >> 1;
+        let synthetic_vex = wrxb | l | 0x80;
+        self.vex_from(synthetic_vex);
+
+        // R' is provided in inverted form
+        let rp = ((b1 & 0x10) >> 4) ^ 1;
+        let lp = (b3 & 0x40) >> 6;
+        let aaa = b3 & 0b111;
+        let z = (b3 & 0x80) >> 7;
+        let b = (b3 & 0x10) >> 4;
+        self.evex_data.from(rp, lp, z, b, aaa);
+    }
+}
+
+impl EvexData {
+    fn from(&mut self, rp: u8, lp: u8, z: u8, b: u8, aaa: u8) {
+        let mut bits = 0;
+        bits |= aaa;
+        bits |= b << 3;
+        bits |= z << 4;
+        bits |= lp << 5;
+        bits |= rp << 6;
+        bits |= 0x80;
+        self.bits = bits;
+    }
+}
+
+impl EvexData {
+    pub(crate) fn present(&self) -> bool {
+        self.bits & 0b1000_0000 != 0
+    }
+
+    pub(crate) fn aaa(&self) -> u8 {
+        self.bits & 0b111
+    }
+
+    pub(crate) fn b(&self) -> bool {
+        (self.bits & 0b0000_1000) != 0
+    }
+
+    pub(crate) fn z(&self) -> bool {
+        (self.bits & 0b0001_0000) != 0
+    }
+
+    pub(crate) fn lp(&self) -> bool {
+        (self.bits & 0b0010_0000) != 0
+    }
+
+    pub(crate) fn rp(&self) -> bool {
+        (self.bits & 0b0100_0000) != 0
     }
 }
 
@@ -4179,6 +4945,7 @@ enum OperandCode {
     CS = OperandCodeBuilder::new().special_case(104).bits(),
     SS = OperandCodeBuilder::new().special_case(105).bits(),
     DS = OperandCodeBuilder::new().special_case(106).bits(),
+    ModRM_0x62 = OperandCodeBuilder::new().special_case(107).bits(),
 }
 
 const LOCKABLE_INSTRUCTIONS: &[Opcode] = &[
@@ -4345,7 +5112,7 @@ const OPCODES: [OpcodeRecord; 256] = [
 // 0x60
     OpcodeRecord(Interpretation::Instruction(Opcode::PUSHA), OperandCode::Nothing),
     OpcodeRecord(Interpretation::Instruction(Opcode::POPA), OperandCode::Nothing),
-    OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing),
+    OpcodeRecord(Interpretation::Instruction(Opcode::BOUND), OperandCode::ModRM_0x62),
     OpcodeRecord(Interpretation::Instruction(Opcode::ARPL), OperandCode::Ew_Gw),
     OpcodeRecord(Interpretation::Prefix, OperandCode::Nothing),
     OpcodeRecord(Interpretation::Prefix, OperandCode::Nothing),
@@ -4567,6 +5334,18 @@ pub(self) fn read_E_ymm<T: Iterator<Item=u8>>(bytes_iter: &mut T, instr: &mut In
         read_M(bytes_iter, instr, modrm, length)
     }
 }
+#[allow(non_snake_case)]
+pub(self) fn read_E_vex<T: Iterator<Item=u8>>(bytes_iter: &mut T, instr: &mut Instruction, modrm: u8, length: &mut u8, bank: RegisterBank) -> Result<OperandSpec, DecodeError> {
+    if modrm >= 0b11000000 {
+        read_modrm_reg(instr, modrm, bank)
+    } else {
+        let res = read_M(bytes_iter, instr, modrm, length)?;
+        if (modrm & 0b01_000_000) == 0b01_000_000 {
+            instr.prefixes.apply_compressed_disp(true);
+        }
+        Ok(res)
+    }
+}
 
 #[allow(non_snake_case)]
 fn read_modrm_reg(instr: &mut Instruction, modrm: u8, reg_bank: RegisterBank) -> Result<OperandSpec, DecodeError> {
@@ -8679,6 +9458,35 @@ fn unlikely_operands<T: Iterator<Item=u8>>(decoder: &InstDecoder, mut bytes_iter
             instruction.operands[1] = instruction.operands[0];
             instruction.operands[0] = temp;
         }
+        OperandCode::ModRM_0x62 => {
+            let modrm = read_modrm(&mut bytes_iter, length)?;
+
+            if modrm < 0xc0 {
+                instruction.modrm_rrr =
+                    RegSpec { bank: RegisterBank::D, num: (modrm >> 3) & 7 };
+                if instruction.prefixes.operand_size() {
+                    instruction.modrm_rrr.bank = RegisterBank::W;
+                    instruction.mem_size = 4;
+                } else {
+                    instruction.mem_size = 8;
+                }
+
+                instruction.operands[0] = OperandSpec::RegRRR;
+                instruction.operands[1] = read_M(&mut bytes_iter, instruction, modrm, length)?;
+                instruction.operand_count = 2;
+            } else {
+                let prefixes = &instruction.prefixes;
+                if prefixes.lock() || prefixes.operand_size() || prefixes.rep_any() {
+                    return Err(DecodeError::InvalidPrefixes);
+                } else {
+                    evex::read_evex(&mut bytes_iter, instruction, *length, Some(modrm))?;
+                    // there's an unavoidable `instruction.length = *length;` after
+                    // `unlikely_operands`. the current length is correct, so store it back to
+                    // length to make the reassignment store a correct length.
+                    *length = instruction.length;
+                }
+            }
+        }
         _ => {
             // TODO: this should be unreachable - safe to panic now?
             // can't simply delete this arm because the non-unlikely operands are handled outside
diff --git a/src/protected_mode/vex.rs b/src/protected_mode/vex.rs
index ce40e6e..5c66654 100644
--- a/src/protected_mode/vex.rs
+++ b/src/protected_mode/vex.rs
@@ -1685,7 +1685,7 @@ fn read_vex_instruction<T: Iterator<Item=u8>>(opcode_map: VEXOpcodeMap, bytes: &
                     } else {
                         VEXOperandCode::G_V_E_xmm
                     }),
-                    0x04 => (Opcode::VPHADDUBSW, if L {
+                    0x04 => (Opcode::VPMADDUBSW, if L {
                         VEXOperandCode::G_V_E_ymm
                     } else {
                         VEXOperandCode::G_V_E_xmm