mod regspec;
mod operand;

use std::fmt::Write;

use yaxpeax_arch::{AddressBase, Decoder, LengthedInstruction};
use yaxpeax_x86::long_mode::{DecodeError, InstDecoder, Opcode};

fn test_invalid(data: &[u8]) {
    test_invalid_under(&InstDecoder::default(), data);
}

fn test_invalid_under(decoder: &InstDecoder, data: &[u8]) {
    if let Ok(inst) = decoder.decode(data.into_iter().cloned()) {
        assert_eq!(inst.opcode, Opcode::Invalid, "decoded {:?} from {:02x?} under decoder {}", inst.opcode, data, decoder);
    } else {
        // this is fine
    }
}

fn test_display(data: &[u8], expected: &'static str) {
    test_display_under(&InstDecoder::default(), data, expected);
}

fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str) {
    let mut hex = String::new();
    for b in data {
        write!(hex, "{:02x}", b).unwrap();
    }
    match decoder.decode(data.into_iter().map(|x| *x)) {
        Ok(instr) => {
            let text = format!("{}", instr);
            assert!(
                text == expected,
                "display error for {}:\n  decoded: {:?} under decoder {}\n displayed: {}\n expected: {}\n",
                hex,
                instr,
                decoder,
                text,
                expected
            );
            // while we're at it, test that the instruction is as long, and no longer, than its
            // input
            assert_eq!((0u64.wrapping_offset(instr.len()).to_linear()) as usize, data.len(), "instruction length is incorrect, wanted instruction {}", expected);
        },
        Err(e) => {
            assert!(false, "decode error ({}) for {} under decoder {}:\n  expected: {}\n", e, hex, decoder, expected);
        }
    }
}

#[test]
fn test_modrm_decode() {
    // just modrm
    test_display(&[0x33, 0x08], "xor ecx, [rax]");
    test_display(&[0x33, 0x20], "xor esp, [rax]");
    test_display(&[0x33, 0x05, 0x78, 0x56, 0x34, 0x12], "xor eax, [rip + 0x12345678]");
    test_display(&[0x33, 0x41, 0x23], "xor eax, [rcx + 0x23]");
    test_display(&[0x33, 0x81, 0x23, 0x01, 0x65, 0x43], "xor eax, [rcx + 0x43650123]");
    test_display(&[0x33, 0xc1], "xor eax, ecx");

    // modrm + rex.w
    test_display(&[0x48, 0x33, 0x08], "xor rcx, [rax]");
    test_display(&[0x48, 0x33, 0x20], "xor rsp, [rax]");
    test_display(&[0x48, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12], "xor rax, [rip + 0x12345678]");
    test_display(&[0x48, 0x33, 0x41, 0x23], "xor rax, [rcx + 0x23]");
    test_display(&[0x48, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43], "xor rax, [rcx + 0x43650123]");
    test_display(&[0x48, 0x33, 0xc1], "xor rax, rcx");

    // modrm + rex.r
    test_display(&[0x44, 0x33, 0x08], "xor r9d, [rax]");
    test_display(&[0x44, 0x33, 0x20], "xor r12d, [rax]");
    test_display(&[0x44, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12], "xor r8d, [rip + 0x12345678]");
    test_display(&[0x44, 0x33, 0x41, 0x23], "xor r8d, [rcx + 0x23]");
    test_display(&[0x44, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43], "xor r8d, [rcx + 0x43650123]");
    test_display(&[0x44, 0x33, 0xc1], "xor r8d, ecx");

    // modrm + rex.rb
    test_display(&[0x45, 0x33, 0x08], "xor r9d, [r8]");
    test_display(&[0x45, 0x33, 0x20], "xor r12d, [r8]");
    test_display(&[0x45, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12], "xor r8d, [rip + 0x12345678]");
    test_display(&[0x45, 0x33, 0x41, 0x23], "xor r8d, [r9 + 0x23]");
    test_display(&[0x45, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43], "xor r8d, [r9 + 0x43650123]");
    test_display(&[0x45, 0x33, 0xc1], "xor r8d, r9d");

    // sib
    test_display(&[0x33, 0x04, 0x0a], "xor eax, [rdx + rcx * 1]");
    test_display(&[0x33, 0x04, 0x4a], "xor eax, [rdx + rcx * 2]");
    test_display(&[0x33, 0x04, 0x8a], "xor eax, [rdx + rcx * 4]");
    test_display(&[0x33, 0x04, 0xca], "xor eax, [rdx + rcx * 8]");
    test_display(&[0x33, 0x04, 0x20], "xor eax, [rax]");
    test_display(&[0x33, 0x04, 0x60], "xor eax, [rax]");
    test_display(&[0x33, 0x04, 0xa0], "xor eax, [rax]");
    test_display(&[0x33, 0x04, 0xe0], "xor eax, [rax]");
    test_display(&[0x42, 0x33, 0x04, 0x20], "xor eax, [rax + r12 * 1]");
    test_display(&[0x42, 0x33, 0x04, 0x60], "xor eax, [rax + r12 * 2]");
    test_display(&[0x42, 0x33, 0x04, 0xa0], "xor eax, [rax + r12 * 4]");
    test_display(&[0x42, 0x33, 0x04, 0xe0], "xor eax, [rax + r12 * 8]");
    test_display(&[0x43, 0x33, 0x04, 0x20], "xor eax, [r8 + r12 * 1]");
    test_display(&[0x43, 0x33, 0x04, 0x60], "xor eax, [r8 + r12 * 2]");
    test_display(&[0x43, 0x33, 0x04, 0xa0], "xor eax, [r8 + r12 * 4]");
    test_display(&[0x43, 0x33, 0x04, 0xe0], "xor eax, [r8 + r12 * 8]");
    test_display(&[0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44], "xor eax, [0x44332211]");
    test_display(&[0x41, 0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44], "xor eax, [0x44332211]");

    test_display(&[0x33, 0x44, 0x65, 0x11], "xor eax, [rbp + 0x11]");
    test_display(&[0x41, 0x33, 0x44, 0x65, 0x11], "xor eax, [r13 + 0x11]");
    test_display(&[0x33, 0x84, 0xa5, 0x11, 0x22, 0x33, 0x44], "xor eax, [rbp + 0x44332211]");
    test_display(&[0x41, 0x33, 0x84, 0xa5, 0x11, 0x22, 0x33, 0x44], "xor eax, [r13 + 0x44332211]");
    test_display(&[0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44], "xor eax, [0x44332211]");
    test_display(&[0x41, 0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44], "xor eax, [0x44332211]");
}

#[test]
fn test_mmx() {
    test_display(&[0x4f, 0x0f, 0x7e, 0xcf], "movd r15, mm1");
    test_display(&[0x41, 0x0f, 0x7e, 0xcf], "movd r15d, mm1");
    test_display(&[0x4f, 0x0f, 0x7f, 0xcf], "movq mm7, mm1");
    test_display(&[0x4f, 0x0f, 0x7f, 0x0f], "movq [r15], mm1");
    test_display(&[0x0f, 0xc4, 0xc0, 0x14], "pinsrw mm0, eax, 0x14");
    test_display(&[0x4f, 0x0f, 0xc4, 0xc0, 0x14], "pinsrw mm0, r8d, 0x14");
    test_display(&[0x4f, 0x0f, 0xc4, 0x00, 0x14], "pinsrw mm0, [r8], 0x14");
    test_display(&[0x4f, 0x0f, 0xd1, 0xcf], "psrlw mm1, mm7");
    test_display(&[0x4f, 0x0f, 0xd1, 0x00], "psrlw mm0, [r8]");
    test_invalid(&[0x4f, 0x0f, 0xd7, 0x00]);
    test_display(&[0x4f, 0x0f, 0xd7, 0xcf], "pmovmskb r9d, mm7");
    test_display(&[0x0f, 0x3a, 0x0f, 0xc1, 0x23], "palignr mm0, mm1, 0x23");
}

#[test]
fn test_cvt() {
    test_display(&[0x0f, 0x2c, 0xcf], "cvttps2pi mm1, xmm7");
    test_display(&[0x48, 0x0f, 0x2c, 0xcf], "cvttps2pi mm1, xmm7");
    test_display(&[0x4f, 0x0f, 0x2c, 0xcf], "cvttps2pi mm1, xmm15");
    test_display(&[0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2ps xmm9, mm7");
    test_display(&[0x4f, 0x0f, 0x2a, 0x00], "cvtpi2ps xmm8, [r8]");
    test_display(&[0x4f, 0x66, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm1, mm7");
    test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm9, mm7");
    test_display(&[0x4f, 0xf3, 0x0f, 0x2a, 0xcf], "cvtsi2ss xmm1, edi");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2a, 0xcf], "cvtsi2ss xmm9, r15");
    test_display(&[0x4f, 0xf2, 0x0f, 0x2a, 0xcf], "cvtsi2sd xmm1, edi");
    test_display(&[0xf2, 0x4f, 0x0f, 0x2a, 0xcf], "cvtsi2sd xmm9, r15");
    test_display(&[0x4f, 0xf2, 0x0f, 0x2a, 0x00], "cvtsi2sd xmm0, [rax]");
    test_display(&[0xf2, 0x4f, 0x0f, 0x2a, 0x00], "cvtsi2sd xmm8, [r8]");
    test_display(&[0x4f, 0xf3, 0x0f, 0x2a, 0x00], "cvtsi2ss xmm0, [rax]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2a, 0x00], "cvtsi2ss xmm8, [r8]");
    test_display(&[0x4f, 0x66, 0x0f, 0x2a, 0x00], "cvtpi2pd xmm0, [rax]");
    test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0x00], "cvtpi2pd xmm8, [r8]");
}

#[test]
fn test_aesni() {
    fn test_instr(bytes: &[u8], text: &'static str) {
        test_display_under(&InstDecoder::minimal().with_aesni(), bytes, text);
        test_display_under(&InstDecoder::default(), bytes, text);
        test_invalid_under(&InstDecoder::minimal(), bytes);
    }

    test_instr(&[0x66, 0x0f, 0x38, 0xdb, 0x0f], "aesimc xmm1, [rdi]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x38, 0xdb, 0xcf], "aesimc xmm9, xmm15");

    test_instr(&[0x66, 0x0f, 0x38, 0xdc, 0x0f], "aesenc xmm1, [rdi]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x38, 0xdc, 0xcf], "aesenc xmm9, xmm15");

    test_instr(&[0x66, 0x0f, 0x38, 0xdd, 0x0f], "aesenclast xmm1, [rdi]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x38, 0xdd, 0xcf], "aesenclast xmm9, xmm15");

    test_instr(&[0x66, 0x0f, 0x38, 0xde, 0x0f], "aesdec xmm1, [rdi]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x38, 0xde, 0xcf], "aesdec xmm9, xmm15");

    test_instr(&[0x66, 0x0f, 0x38, 0xdf, 0x0f], "aesdeclast xmm1, [rdi]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x38, 0xdf, 0xcf], "aesdeclast xmm9, xmm15");

    test_instr(&[0x66, 0x0f, 0x3a, 0xdf, 0x0f, 0xaa], "aeskeygenassist xmm1, [rdi], 0xaa");
    test_instr(&[0x66, 0x4f, 0x0f, 0x3a, 0xdf, 0xcf, 0xaa], "aeskeygenassist xmm9, xmm15, 0xaa");
}

#[test]
fn test_sse2() {
    fn test_instr(bytes: &[u8], text: &'static str) {
        // sse and sse2 are part of amd64, so x86_64, meaning even the minimal decoder must support
        // them.
        test_display_under(&InstDecoder::minimal(), bytes, text);
    }

    test_instr(&[0xf2, 0x0f, 0x10, 0x0c, 0xc7], "movsd xmm1, [rdi + rax * 8]");
    test_instr(&[0xf2, 0x0f, 0x11, 0x0c, 0xc7], "movsd [rdi + rax * 8], xmm1");
    test_instr(&[0x66, 0x0f, 0x11, 0x0c, 0xc7], "movupd [rdi + rax * 8], xmm1");
    test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0xc3], "movhlps xmm8, xmm11"); // reg-reg form is movhlps
    test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0x03], "movlpd xmm8, [r11]"); // reg-mem is movlpd
    test_instr(&[0x66, 0x4f, 0x0f, 0x13, 0x03], "movlpd [r11], xmm8");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x13, 0xc3]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x14, 0x03], "unpcklpd xmm8, [r11]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x14, 0xc3], "unpcklpd xmm8, xmm11");
    test_instr(&[0x66, 0x4f, 0x0f, 0x15, 0x03], "unpckhpd xmm8, [r11]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x15, 0xc3], "unpckhpd xmm8, xmm11");
    test_instr(&[0x66, 0x4f, 0x0f, 0x16, 0x03], "movhpd xmm8, [r11]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x16, 0xc3], "movlhps xmm8, xmm11");
    test_instr(&[0x66, 0x4f, 0x0f, 0x17, 0x03], "movhpd [r11], xmm8");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x17, 0xc3]);

    test_instr(&[0x66, 0x4f, 0x0f, 0x28, 0xd0], "movapd xmm10, xmm8");
    test_instr(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]");

    test_instr(&[0x66, 0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm9, mm7");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2a, 0x0f], "cvtpi2pd xmm9, [r15]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x2a, 0xcf], "cvtsi2sd xmm9, r15");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x2a, 0x0f], "cvtsi2sd xmm9, [r15]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2b, 0x0f], "movntpd [r15], xmm9");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2c, 0xcf], "cvttpd2pi mm1, xmm15");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2c, 0x0f], "cvttpd2pi mm1, [r15]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x2c, 0xcf], "cvttsd2si xmm9, xmm15");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x2c, 0x0f], "cvttsd2si xmm9, [r15]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2d, 0xcf], "cvtpd2pi mm1, xmm15");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2d, 0x0f], "cvtpd2pi mm1, [r15]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x2d, 0xcf], "cvtsd2si xmm9, xmm15");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x2d, 0x0f], "cvtsd2si xmm9, [r15]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2e, 0xcf], "ucomisd xmm9, xmm15");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2e, 0x0f], "ucomisd xmm9, [r15]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2f, 0xcf], "comisd xmm9, xmm15");
    test_instr(&[0x66, 0x4f, 0x0f, 0x2f, 0x0f], "comisd xmm9, [r15]");

    /*
     * .... 660f38
     * .... 660f7f
     */

    test_invalid(&[0x66, 0x4f, 0x0f, 0x50, 0x01]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x50, 0xc1], "movmskpd r8d, xmm9");
    test_instr(&[0x66, 0x4f, 0x0f, 0x51, 0x01], "sqrtpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x51, 0x01], "sqrtsd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]"); // note: NOT "rsqrtpd" - no such instruction exists, so fall back to just 0f52 parse.
    test_instr(&[0x66, 0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]"); // note: NOT "rcppd" - no such instruction exists, so fall back to just 0f53 parse.
    test_instr(&[0x66, 0x4f, 0x0f, 0x54, 0x01], "andpd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x55, 0x01], "andnpd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x56, 0x01], "orpd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x57, 0x01], "xorpd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x58, 0x01], "addpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x58, 0x01], "addsd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x59, 0x01], "mulpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x59, 0x01], "mulsd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x5a, 0x01], "cvtpd2ps xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x5a, 0x01], "cvtsd2ss xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x5b, 0x01], "cvtps2dq xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x5c, 0x01], "subpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x5c, 0x01], "subsd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x5d, 0x01], "minpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x5d, 0x01], "minsd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x5e, 0x01], "divpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x5e, 0x01], "divsd xmm8, [r9]");
    test_instr(&[0x66, 0x4f, 0x0f, 0x5f, 0x01], "maxpd xmm8, [r9]");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x5f, 0x01], "maxsd xmm8, [r9]");
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x60, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpcklbw xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x61, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpcklwd xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x62, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpckldq xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x63, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "packsswb xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x64, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "pcmpgtb xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x65, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "pcmpgtw xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x66, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "pcmpgtd xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x67, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "packuswb xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x68, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpckhbw xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x69, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpckhwd xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x6a, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpckhdq xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x6b, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "packssdw xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x6c, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpcklqdq xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x6d, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "punpckhqdq xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    // TODO: this needs to be clear that the operand is `dword`
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "movq xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "movdqa xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );

    test_instr(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax");
    test_instr(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e");
    test_instr(&[0xf2, 0x0f, 0x70, 0xc0, 0x4e], "pshuflw xmm0, xmm0, 0x4e");
    test_instr(&[0xf3, 0x0f, 0x70, 0xc0, 0x4e], "pshufhw xmm0, xmm0, 0x4e");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x10, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x20, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x30, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x10, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x20, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x30, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x10, 0x8f]);
    test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x18, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f");
    test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f");
    test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x30, 0x8f]);
    test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x38, 0x8f]);
    test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f");
    test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f");
    test_instr(&[0x66, 0x0f, 0x7e, 0xc1], "movd ecx, xmm0");
    test_instr(&[0x66, 0x48, 0x0f, 0x7e, 0xc1], "movq rcx, xmm0");
    test_instr(&[0x66, 0x48, 0x0f, 0x7e, 0x01], "movd [rcx], xmm0");
    test_instr(&[0x66, 0x4f, 0x0f, 0x7e, 0xc1], "movq r9, xmm8");
    test_instr(
        &[0x66, 0x4f, 0x0f, 0x7f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "movdqa [r12 + r11 * 4 - 0x334455cc], xmm11"
    );

    test_instr(&[0x66, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm0, xmm3, 0x8");
    test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm8, xmm11, 0x8");
    test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmppd xmm8, [r11], 0x8");
    test_instr(&[0xf2, 0x0f, 0xc2, 0xc3, 0x08], "cmpsd xmm0, xmm3, 0x8");
    test_instr(&[0xf2, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmpsd xmm8, xmm11, 0x8");
    test_instr(&[0xf2, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmpsd xmm8, [r11], 0x8");

    test_instr(&[0x66, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm0, ebx, 0x8");
    test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm8, r11d, 0x8");

    test_instr(&[0x66, 0x0f, 0xc4, 0x03, 0x08], "pinsrw xmm0, [rbx], 0x8");
    test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0x03, 0x08], "pinsrw xmm8, [r11], 0x8");

//    test_instr(&[0x66, 0x0f, 0xc5, 0xc3, 0x08], "pextrw eax, xmm3, 0x8");
//    test_instr(&[0x66, 0x4f, 0x0f, 0xc5, 0xc3, 0x08], "pextrw r8d, xmm11, 0x8");
//    test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x03, 0x08]);
//    test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x40, 0x08]);
//    test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x80, 0x08]);

    test_instr(&[0x66, 0x4f, 0x0f, 0xc6, 0x03, 0x08], "shufpd xmm8, [r11], 0x8");
    test_instr(&[0x66, 0x0f, 0xc6, 0x03, 0x08], "shufpd xmm0, [rbx], 0x8");
    test_instr(&[0x66, 0x0f, 0xc6, 0xc3, 0x08], "shufpd xmm0, xmm3, 0x8");
    test_instr(&[0x66, 0x0f, 0xd1, 0xc1], "psrlw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd1, 0x01], "psrlw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xd2, 0xc1], "psrld xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd2, 0x01], "psrld xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xd3, 0xc1], "psrlq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd3, 0x01], "psrlq xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xd4, 0xc1], "paddq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd4, 0x01], "paddq xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xd5, 0xc1], "pmullw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd5, 0x01], "pmullw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xd6, 0xc1], "movq xmm1, xmm0");
    test_instr(&[0x66, 0x0f, 0xd6, 0x01], "movq [rcx], xmm0");
    test_invalid(&[0xf3, 0x4f, 0x0f, 0xd6, 0x03]);
    test_instr(&[0xf3, 0x4f, 0x0f, 0xd6, 0xc3], "movq2dq xmm8, mm3");
    test_instr(&[0xf2, 0x4f, 0x0f, 0xd6, 0xc3], "movdq2q mm0, xmm11");
    test_instr(&[0x66, 0x0f, 0xd7, 0xc1], "pmovmskb eax, xmm1");
    test_instr(&[0x66, 0x4f, 0x0f, 0xd7, 0xc1], "pmovmskb r8d, xmm9");
    test_invalid(&[0x66, 0x0f, 0xd7, 0x01]);
    test_instr(&[0x66, 0x0f, 0xd8, 0xc1], "psubusb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd8, 0x01], "psubusb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xd9, 0xc1], "psubusw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xd9, 0x01], "psubusw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xda, 0xc1], "pminub xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xda, 0x01], "pminub xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xdb, 0xc1], "pand xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xdb, 0x01], "pand xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xdc, 0xc1], "paddusb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xdc, 0x01], "paddusb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xdd, 0xc1], "paddusw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xdd, 0x01], "paddusw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xde, 0xc1], "pmaxub xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xde, 0x01], "pmaxub xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xdf, 0xc1], "pandn xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xdf, 0x01], "pandn xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe0, 0xc1], "pavgb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe0, 0x01], "pavgb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe1, 0xc1], "psraw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe1, 0x01], "psraw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe2, 0xc1], "psrad xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe2, 0x01], "psrad xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe3, 0xc1], "pavgw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe3, 0x01], "pavgw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe4, 0xc1], "pmulhuw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe4, 0x01], "pmulhuw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe5, 0xc1], "pmulhw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe5, 0x01], "pmulhw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe6, 0xc1], "cvttpd2dq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe6, 0x01], "cvttpd2dq xmm0, [rcx]");
    test_invalid(&[0x66, 0x0f, 0xe7, 0xc1]);
    test_instr(&[0x66, 0x0f, 0xe7, 0x01], "movntdq [rcx], xmm0");
    test_instr(&[0x66, 0x0f, 0xe8, 0xc1], "psubsb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe8, 0x01], "psubsb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xe9, 0xc1], "psubsw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xe9, 0x01], "psubsw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xea, 0xc1], "pminsw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xea, 0x01], "pminsw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xeb, 0xc3], "por xmm0, xmm3");
    test_instr(&[0x66, 0x0f, 0xeb, 0xc4], "por xmm0, xmm4");
    test_instr(&[0x66, 0x0f, 0xeb, 0xd3], "por xmm2, xmm3");
    test_instr(&[0x66, 0x0f, 0xeb, 0x12], "por xmm2, [rdx]");
    test_instr(&[0x66, 0x0f, 0xeb, 0xc1], "por xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xeb, 0x01], "por xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xec, 0xc1], "paddsb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xec, 0x01], "paddsb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xed, 0xc1], "paddsw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xed, 0x01], "paddsw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xee, 0xc1], "pmaxsw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xee, 0x01], "pmaxsw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xef, 0xc1], "pxor xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xef, 0x01], "pxor xmm0, [rcx]");
    test_invalid(&[0x66, 0x0f, 0xf0, 0xc1]);
    test_invalid(&[0x66, 0x0f, 0xf0, 0x01]);
    test_instr(&[0x66, 0x0f, 0xf1, 0xc1], "psllw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf1, 0x01], "psllw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf2, 0xc1], "pslld xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf2, 0x01], "pslld xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf3, 0xc1], "psllq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf3, 0x01], "psllq xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf4, 0xc1], "pmuludq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf4, 0x01], "pmuludq xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf5, 0xc1], "pmaddwd xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf5, 0x01], "pmaddwd xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf6, 0xc1], "psadbw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf6, 0x01], "psadbw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf7, 0xc1], "maskmovdqu xmm0, xmm1");
    test_invalid(&[0x66, 0x0f, 0xf7, 0x01]);
    test_instr(&[0x66, 0x0f, 0xf8, 0xc1], "psubb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf8, 0x01], "psubb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xf9, 0xc1], "psubw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xf9, 0x01], "psubw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xfa, 0xc1], "psubd xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xfa, 0x01], "psubd xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xfb, 0xc1], "psubq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xfb, 0x01], "psubq xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xfc, 0xc1], "paddb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xfc, 0x01], "paddb xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xfd, 0xc1], "paddw xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xfd, 0x01], "paddw xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xfe, 0xc1], "paddd xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xfe, 0x01], "paddd xmm0, [rcx]");
    test_instr(&[0x66, 0x0f, 0xff, 0xc1], "paddq xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0xff, 0x01], "paddq xmm0, [rcx]");

    test_instr(&[0x66, 0x0f, 0x74, 0xc1], "pcmpeqb xmm0, xmm1");
    test_instr(&[0x66, 0x0f, 0x74, 0x12], "pcmpeqb xmm2, [rdx]");
    test_instr(&[0x66, 0x0f, 0xf8, 0xc8], "psubb xmm1, xmm0");
    test_instr(&[0x66, 0x0f, 0xf8, 0xd0], "psubb xmm2, xmm0");
    test_instr(&[0x66, 0x0f, 0xf8, 0x12], "psubb xmm2, [rdx]");
}

#[test]
fn test_sse3() {
    fn test_instr(bytes: &[u8], text: &'static str) {
        test_display_under(&InstDecoder::minimal().with_sse3(), bytes, text);
        test_invalid_under(&InstDecoder::minimal(), bytes);
        // avx doesn't imply older instructions are necessarily valid
        test_invalid_under(&InstDecoder::minimal().with_avx(), bytes);
        // sse4 doesn't imply older instructions are necessarily valid
        test_invalid_under(&InstDecoder::minimal().with_sse4_1(), bytes);
        test_invalid_under(&InstDecoder::minimal().with_sse4_2(), bytes);
    }

    fn test_instr_invalid(bytes: &[u8]) {
        test_invalid_under(&InstDecoder::minimal().with_sse3(), bytes);
        test_invalid_under(&InstDecoder::default(), bytes);
    }
    test_instr(&[0xf2, 0x0f, 0xf0, 0x0f], "lddqu xmm1, [rdi]");
    test_instr_invalid(&[0xf2, 0x0f, 0xf0, 0xcf]);
    test_instr(&[0xf2, 0x0f, 0xd0, 0x0f], "addsubps xmm1, [rdi]");
    test_instr(&[0xf2, 0x0f, 0xd0, 0xcf], "addsubps xmm1, xmm7");
    test_instr(&[0xf2, 0x4f, 0x0f, 0xd0, 0xcf], "addsubps xmm9, xmm15");
    test_instr(&[0x66, 0x0f, 0xd0, 0x0f], "addsubpd xmm1, [rdi]");
    test_instr(&[0x66, 0x0f, 0xd0, 0xcf], "addsubpd xmm1, xmm7");
    test_instr(&[0x66, 0x4f, 0x0f, 0xd0, 0xcf], "addsubpd xmm9, xmm15");

    test_instr(&[0xf2, 0x0f, 0x7c, 0x0f], "haddps xmm1, [rdi]");
    test_instr(&[0xf2, 0x0f, 0x7c, 0xcf], "haddps xmm1, xmm7");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x7c, 0xcf], "haddps xmm9, xmm15");
    test_instr(&[0x66, 0x0f, 0x7c, 0x0f], "haddpd xmm1, [rdi]");
    test_instr(&[0x66, 0x0f, 0x7c, 0xcf], "haddpd xmm1, xmm7");
    test_instr(&[0x66, 0x4f, 0x0f, 0x7c, 0xcf], "haddpd xmm9, xmm15");

    test_instr(&[0xf2, 0x0f, 0x7d, 0x0f], "hsubps xmm1, [rdi]");
    test_instr(&[0xf2, 0x0f, 0x7d, 0xcf], "hsubps xmm1, xmm7");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x7d, 0xcf], "hsubps xmm9, xmm15");
    test_instr(&[0x66, 0x0f, 0x7d, 0x0f], "hsubpd xmm1, [rdi]");
    test_instr(&[0x66, 0x0f, 0x7d, 0xcf], "hsubpd xmm1, xmm7");
    test_instr(&[0x66, 0x4f, 0x0f, 0x7d, 0xcf], "hsubpd xmm9, xmm15");

    test_instr(&[0xf3, 0x0f, 0x12, 0x0f], "movsldup xmm1, [rdi]");
    test_instr(&[0xf3, 0x0f, 0x12, 0xcf], "movsldup xmm1, xmm7");
    test_instr(&[0xf3, 0x4f, 0x0f, 0x12, 0xcf], "movsldup xmm9, xmm15");
    test_instr(&[0xf3, 0x0f, 0x16, 0x0f], "movshdup xmm1, [rdi]");
    test_instr(&[0xf3, 0x0f, 0x16, 0xcf], "movshdup xmm1, xmm7");
    test_instr(&[0xf3, 0x4f, 0x0f, 0x16, 0xcf], "movshdup xmm9, xmm15");

    test_instr(&[0xf2, 0x0f, 0x12, 0x0f], "movddup xmm1, [rdi]");
    test_instr(&[0xf2, 0x0f, 0x12, 0xcf], "movddup xmm1, xmm7");
    test_instr(&[0xf2, 0x4f, 0x0f, 0x12, 0xcf], "movddup xmm9, xmm15");

    test_instr(&[0x66, 0x0f, 0x01, 0xc8], "monitor");
    test_instr(&[0xf2, 0x0f, 0x01, 0xc8], "monitor");
    test_instr(&[0xf3, 0x0f, 0x01, 0xc8], "monitor");

    test_instr(&[0x66, 0x0f, 0x01, 0xc9], "mwait");
    test_instr(&[0xf2, 0x0f, 0x01, 0xc9], "mwait");
    test_instr(&[0xf3, 0x0f, 0x01, 0xc9], "mwait");
}

#[test]
fn test_0f01() {
    // drawn heavily from "Table A-6.  Opcode Extensions for One- and Two-byte Opcodes by Group
    // Number"
    test_display(&[0x0f, 0x01, 0x38], "invlpg [rax]");
    test_display(&[0x0f, 0x01, 0x3f], "invlpg [rdi]");
    test_display(&[0x0f, 0x01, 0x40, 0xff], "sgdt [rax - 0x1]");
    test_display(&[0x0f, 0x01, 0x41, 0xff], "sgdt [rcx - 0x1]");
    test_display(&[0x0f, 0x01, 0x49, 0xff], "sidt [rcx - 0x1]");
    test_display(&[0x0f, 0x01, 0x51, 0xff], "lgdt [rcx - 0x1]");
    test_display(&[0x0f, 0x01, 0x59, 0xff], "lidt [rcx - 0x1]");
    test_display(&[0x0f, 0x01, 0x61, 0xff], "smsw [rcx - 0x1]");
    test_display(&[0x0f, 0x01, 0xc0], "enclv");
    test_display(&[0x0f, 0x01, 0xc1], "vmcall");
    test_display(&[0x0f, 0x01, 0xc2], "vmlaunch");
    test_display(&[0x0f, 0x01, 0xc3], "vmresume");
    test_display(&[0x0f, 0x01, 0xc4], "vmxoff");
    test_invalid(&[0x0f, 0x01, 0xc5]);
    test_invalid(&[0x0f, 0x01, 0xc6]);
    test_invalid(&[0x0f, 0x01, 0xc7]);
    test_display(&[0x0f, 0x01, 0xc8], "monitor");
    test_display(&[0x0f, 0x01, 0xc9], "mwait");
    test_display(&[0x0f, 0x01, 0xca], "clac");
    test_display(&[0x0f, 0x01, 0xcb], "stac");
    test_display(&[0x0f, 0x01, 0xcf], "encls");
    test_display(&[0x0f, 0x01, 0xd0], "xgetbv");
    test_display(&[0x0f, 0x01, 0xd1], "xsetbv");
    test_invalid(&[0x0f, 0x01, 0xd2]);
    test_invalid(&[0x0f, 0x01, 0xd3]);
    test_display(&[0x0f, 0x01, 0xd4], "vmfunc");
    test_display(&[0x0f, 0x01, 0xd5], "xend");
    test_display(&[0x0f, 0x01, 0xd6], "xtest");
    test_display(&[0x0f, 0x01, 0xd7], "enclu");
    test_display(&[0x0f, 0x01, 0xd8], "vmrun rax");
    test_display(&[0x0f, 0x01, 0xd9], "vmmcall");
    test_display(&[0x0f, 0x01, 0xda], "vmload rax");
    test_display(&[0x0f, 0x01, 0xdb], "vmsave rax");
    test_display(&[0x0f, 0x01, 0xdc], "stgi");
    test_display(&[0x0f, 0x01, 0xdd], "clgi");
    test_display(&[0x0f, 0x01, 0xde], "skinit eax");
    test_display(&[0x0f, 0x01, 0xdf], "invlpga rax, ecx");
    test_display(&[0x0f, 0x01, 0xee], "rdpkru");
    test_display(&[0x0f, 0x01, 0xef], "wrpkru");
    test_display(&[0x0f, 0x01, 0xf8], "swapgs");
    test_display(&[0x0f, 0x01, 0xf9], "rdtscp");
}

#[test]
fn test_0fae() {
    let intel = InstDecoder::minimal().with_intel_quirks();
    let amd = InstDecoder::minimal().with_amd_quirks();
    let default = InstDecoder::default();
    let minimal = InstDecoder::minimal();
    // drawn heavily from "Table A-6.  Opcode Extensions for One- and Two-byte Opcodes by Group
    // Number"
    test_display(&[0x0f, 0xae, 0x04, 0x4f], "fxsave [rdi + rcx * 2]");
    test_display(&[0x0f, 0xae, 0x0c, 0x4f], "fxrstor [rdi + rcx * 2]");
    test_display(&[0x0f, 0xae, 0x14, 0x4f], "ldmxcsr [rdi + rcx * 2]");
    test_display(&[0x0f, 0xae, 0x1c, 0x4f], "stmxcsr [rdi + rcx * 2]");
    test_display(&[0x0f, 0xae, 0x24, 0x4f], "xsave [rdi + rcx * 2]");
    test_display(&[0x0f, 0xc7, 0x5c, 0x24, 0x40], "xrstors [rsp + 0x40]");
    test_display(&[0x0f, 0xc7, 0x64, 0x24, 0x40], "xsavec [rsp + 0x40]");
    test_display(&[0x0f, 0xc7, 0x6c, 0x24, 0x40], "xsaves [rsp + 0x40]");
    test_display(&[0x4f, 0x0f, 0xc7, 0x5c, 0x24, 0x40], "xrstors64 [r12 + r12 * 1 + 0x40]");
    test_display(&[0x4f, 0x0f, 0xc7, 0x64, 0x24, 0x40], "xsavec64 [r12 + r12 * 1 + 0x40]");
    test_display(&[0x4f, 0x0f, 0xc7, 0x6c, 0x24, 0x40], "xsaves64 [r12 + r12 * 1 + 0x40]");
    test_display(&[0x0f, 0xc7, 0x74, 0x24, 0x40], "vmptrld [rsp + 0x40]");
    test_display(&[0x0f, 0xc7, 0x7c, 0x24, 0x40], "vmptrst [rsp + 0x40]");
    test_display(&[0x0f, 0xae, 0x2c, 0x4f], "xrstor [rdi + rcx * 2]");
    test_display(&[0x0f, 0xae, 0x34, 0x4f], "xsaveopt [rdi + rcx * 2]");
    test_display(&[0x0f, 0xae, 0x3c, 0x4f], "clflush [rdi + rcx * 2]");

    for (modrm, text) in &[(0xe8u8, "lfence"), (0xf0u8, "mfence"), (0xf8u8, "sfence")] {
        test_display_under(&intel, &[0x0f, 0xae, *modrm], text);
        test_display_under(&amd, &[0x0f, 0xae, *modrm], text);
        test_display_under(&default, &[0x0f, 0xae, *modrm], text);
        test_display_under(&minimal, &[0x0f, 0xae, *modrm], text);
        // it turns out intel and amd accept m != 0 for {l,m,s}fence:
        // from intel:
        // ```
        // Specification of the instruction's opcode above indicates a ModR/M byte of F0. For this
        // instruction, the processor ignores the r/m field of the ModR/M byte. Thus, MFENCE is encoded
        // by any opcode of the form 0F AE Fx, where x is in the range 0-7.
        // ```
        // whereas amd does not discuss the r/m field at all. at least as of zen, amd also accepts
        // these encodings.
        for m in 1u8..8u8 {
            test_display_under(&intel, &[0x0f, 0xae, modrm | m], text);
            test_display_under(&amd, &[0x0f, 0xae, modrm | m], text);
            test_display_under(&default, &[0x0f, 0xae, modrm | m], text);
            test_invalid_under(&minimal, &[0x0f, 0xae, modrm | m]);
        }
    }
}

#[test]
fn test_system() {
    test_display(&[0x66, 0x4f, 0x0f, 0xb2, 0x00], "lss r8, [r8]");
    test_display(&[0x67, 0x4f, 0x0f, 0xb2, 0x00], "lss r8, [r8d]");
    test_display(&[0x4f, 0x0f, 0xb2, 0x00], "lss r8, [r8]");
    test_display(&[0x45, 0x0f, 0x22, 0xc8], "mov cr9, r8");
    test_display(&[0x45, 0x0f, 0x20, 0xc8], "mov r8, cr9");
    test_display(&[0x40, 0x0f, 0x22, 0xc8], "mov cr1, rax");
    test_display(&[0x0f, 0x22, 0xc8], "mov cr1, rax");
    test_display(&[0x44, 0x0f, 0x22, 0xcf], "mov cr9, rdi");
    test_display(&[0x0f, 0x22, 0xcf], "mov cr1, rdi");
    test_display(&[0x0f, 0x20, 0xc8], "mov rax, cr1");

    test_display(&[0x45, 0x0f, 0x23, 0xc8], "mov dr9, r8");
    test_display(&[0x45, 0x0f, 0x21, 0xc8], "mov r8, dr9");
    test_display(&[0x40, 0x0f, 0x23, 0xc8], "mov dr1, rax");
    test_display(&[0x0f, 0x23, 0xc8], "mov dr1, rax");
    test_display(&[0x0f, 0x21, 0xc8], "mov rax, dr1");
}

#[test]
fn test_arithmetic() {
    test_display(&[0x81, 0xec, 0x10, 0x03, 0x00, 0x00], "sub esp, 0x310");
    test_display(&[0x0f, 0xaf, 0xc2], "imul eax, edx");
    test_display(&[0x4b, 0x69, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65], "imul rax, [r11 + 0x6f], 0x656c706d");
    test_display(&[0x66, 0x0f, 0xaf, 0xd1], "imul dx, cx");
    test_display(&[0x4b, 0x6b, 0x43, 0x6f, 0x6d], "imul al, [r11 + 0x6f], 0x6d");
    test_display(&[0x4f, 0x4e, 0x00, 0xcc], "add spl, r9b");
}

#[test]
#[allow(non_snake_case)]
fn test_E_decode() {
    test_display(&[0xff, 0x75, 0xb8], "push [rbp - 0x48]");
    test_display(&[0xff, 0x75, 0x08], "push [rbp + 0x8]");
}

#[test]
fn test_sse() {
    test_display(&[0xf3, 0x0f, 0x10, 0x0c, 0xc7], "movss xmm1, [rdi + rax * 8]");
    test_display(&[0xf3, 0x0f, 0x11, 0x0c, 0xc7], "movss [rdi + rax * 8], xmm1");
    test_display(&[0x4f, 0x0f, 0x28, 0x00], "movaps xmm8, [r8]");
    test_display(&[0x4f, 0x0f, 0x29, 0x00], "movaps [r8], xmm8");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2a, 0xc1], "cvtsi2ss xmm8, r9");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2a, 0x01], "cvtsi2ss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x2b, 0x00], "movntps [r8], xmm8");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2c, 0xc1], "cvttss2si r8, xmm9");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2c, 0x01], "cvttss2si r8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2d, 0xc1], "cvtss2si r8, xmm9");
    test_display(&[0xf3, 0x4f, 0x0f, 0x2d, 0x01], "cvtss2si r8, [r9]");
    test_display(&[0x4f, 0x0f, 0x2e, 0x00], "ucomiss xmm8, [r8]");
    test_display(&[0x4f, 0x0f, 0x2f, 0x00], "comiss xmm8, [r8]");
    test_display(&[0x0f, 0x28, 0xd0], "movaps xmm2, xmm0");
    test_display(&[0x66, 0x0f, 0x28, 0xd0], "movapd xmm2, xmm0");
    test_display(&[0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [rax]");
    test_display(&[0x4f, 0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [rax]");
    test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]");
    test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]");
    test_display(&[0x67, 0x4f, 0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [eax]");
    test_display(&[0x67, 0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8d]");
    test_display(&[0x66, 0x0f, 0x29, 0x00], "movapd [rax], xmm0");
    test_invalid(&[0x4f, 0x0f, 0x50, 0x00]);
    test_display(&[0x4f, 0x0f, 0x50, 0xc1], "movmskps r8d, xmm9");
    test_display(&[0x4f, 0x0f, 0x51, 0x01], "sqrtps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x51, 0x01], "sqrtss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x52, 0x01], "rsqrtss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x53, 0x01], "rcpss xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x53, 0xc1], "rcpss xmm8, xmm9");
    test_display(&[0x4f, 0x0f, 0x54, 0x01], "andps xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x55, 0x01], "andnps xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x56, 0x01], "orps xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x57, 0x01], "xorps xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x58, 0x01], "addps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x58, 0x01], "addss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x59, 0x01], "mulps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x59, 0x01], "mulss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x5a, 0x01], "cvtps2pd xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x5a, 0x01], "cvtss2sd xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x5b, 0x01], "cvtdq2ps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x5b, 0x01], "cvttps2dq xmm8, [r9]");
    test_display(&[0x67, 0x4f, 0x0f, 0x5b, 0x01], "cvtdq2ps xmm8, [r9d]");
    test_display(&[0x4f, 0x0f, 0x5c, 0x01], "subps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x5c, 0x01], "subss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x5d, 0x01], "minps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x5d, 0x01], "minss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x5e, 0x01], "divps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x5e, 0x01], "divss xmm8, [r9]");
    test_display(&[0x4f, 0x0f, 0x5f, 0x01], "maxps xmm8, [r9]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x5f, 0x01], "maxss xmm8, [r9]");

    test_display(&[0x0f, 0x70, 0x00, 0x7f], "pshufw mm0, [rax], 0x7f");
    test_display(&[0x4f, 0x0f, 0x70, 0x00, 0x7f], "pshufw mm0, [r8], 0x7f");

    test_display(&[0x66, 0x0f, 0xef, 0xc0], "pxor xmm0, xmm0");
    test_display(&[0x66, 0x4f, 0x0f, 0xef, 0xc0], "pxor xmm8, xmm8");
    test_display(&[0xf2, 0x0f, 0x10, 0x0c, 0xc6], "movsd xmm1, [rsi + rax * 8]");
    test_display(&[0xf3, 0x0f, 0x10, 0x04, 0x86], "movss xmm0, [rsi + rax * 4]");
    test_display(&[0xf2, 0x0f, 0x59, 0xc8], "mulsd xmm1, xmm0");
    test_display(&[0xf3, 0x0f, 0x59, 0xc8], "mulss xmm1, xmm0");
    test_display(&[0xf2, 0x4f, 0x0f, 0x59, 0xc8], "mulsd xmm9, xmm8");

    test_display(
        &[0xf3, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "movdqu xmm11, [r12 + r11 * 4 - 0x334455cc]"
    );
    test_display(&[0xf3, 0x0f, 0x70, 0xc0, 0x4e], "pshufhw xmm0, xmm0, 0x4e");
    test_display(&[0xf3, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm1");
    test_display(&[0xf3, 0x4f, 0x0f, 0x7e, 0xc1], "movd r9, mm0"); // use of rex.w demotes to movd r/mm
    test_display(&[0xf3, 0x40, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm1");
    test_display(&[0xf3, 0x41, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm9");
    test_display(&[0xf3, 0x42, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm1");
    test_display(&[0xf3, 0x44, 0x0f, 0x7e, 0xc1], "movq xmm8, xmm1");
    test_display(&[0xf3, 0x48, 0x0f, 0x7e, 0xc1], "movd rcx, mm0");
    test_display(
        &[0xf3, 0x4f, 0x0f, 0x7f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc],
        "movdqu [r12 + r11 * 4 - 0x334455cc], xmm11"
    );

    test_display(&[0xf3, 0x0f, 0xc2, 0xc3, 0x08], "cmpss xmm0, xmm3, 0x8");
    test_display(&[0xf3, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmpss xmm8, xmm11, 0x8");
    test_display(&[0xf3, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmpss xmm8, [r11], 0x8");
}

// SETLE, SETNG, ...

#[test]
fn test_mov() {
    // test_display(&[0xa1, 0x93, 0x62, 0xc4, 0x00, 0x12, 0x34, 0x12, 0x34], "mov eax, [0x3412341200c46293]");
    // RCT.exe 32bit version, TODO: FIX
    test_display(&[0xa1, 0x93, 0x62, 0xc4, 0x00], "mov eax, [0xc46293]");
    test_display(&[0xba, 0x01, 0x00, 0x00, 0x00], "mov edx, 0x1");
    test_display(&[0x48, 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, 0x00], "mov [rsp], 0x0");
    test_display(&[0x48, 0x89, 0x44, 0x24, 0x08], "mov [rsp + 0x8], rax");
    test_display(&[0x48, 0x89, 0x43, 0x18], "mov [rbx + 0x18], rax");
    test_display(&[0x48, 0xc7, 0x43, 0x10, 0x00, 0x00, 0x00, 0x00], "mov [rbx + 0x10], 0x0");
    test_display(&[0x49, 0x89, 0x4e, 0x08], "mov [r14 + 0x8], rcx");
    test_display(&[0x48, 0x8b, 0x32], "mov rsi, [rdx]");
    test_display(&[0x4d, 0x8b, 0x4c, 0x10, 0xf8], "mov r9, [r8 + rdx * 1 - 0x8]");
    test_display(&[0x49, 0x89, 0x46, 0x10], "mov [r14 + 0x10], rax");
    test_display(&[0x4d, 0x0f, 0x43, 0xec], "cmovnb r13, r12");
    test_display(&[0x0f, 0xb6, 0x06], "movzx eax, byte [rsi]");
    test_display(&[0x0f, 0xb7, 0x06], "movzx eax, word [rsi]");
    test_display(&[0x89, 0x55, 0x94], "mov [rbp - 0x6c], edx");
    test_display(&[0x65, 0x4c, 0x89, 0x04, 0x25, 0xa8, 0x01, 0x00, 0x00], "mov gs:[0x1a8], r8");
    test_display(&[0x0f, 0xbe, 0x83, 0xb4, 0x00, 0x00, 0x00], "movsx eax, byte [rbx + 0xb4]");
    test_display(&[0x46, 0x63, 0xc1], "movsxd r8, ecx");
    test_display(&[0x48, 0x63, 0x04, 0xba], "movsxd rax, [rdx + rdi * 4]");
    test_display(&[0xf3, 0x0f, 0x6f, 0x07], "movdqu xmm0, [rdi]");
    test_display(&[0xf3, 0x0f, 0x7f, 0x45, 0x00], "movdqu [rbp], xmm0");
}

#[test]
fn test_stack() {
    test_display(&[0x66, 0x41, 0x50], "push r8w");
}

#[test]
fn test_prefixes() {
    test_display(&[0x66, 0x41, 0x31, 0xc0], "xor r8w, ax");
    test_display(&[0x66, 0x41, 0x32, 0xc0], "xor al, r8b");
    test_display(&[0x40, 0x32, 0xc5], "xor al, bpl");

    // test that WAIT doesn't blow up, at least...
    assert_eq!(InstDecoder::default().decode([0x9b, 0xf8].iter().cloned()).err(), Some(DecodeError::IncompleteDecoder));
}

#[test]
fn test_control_flow() {
    test_display(&[0x73, 0x31], "jnb 0x31");
    test_display(&[0x72, 0x5a], "jb 0x5a");
    test_display(&[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00], "jna 0x18b");
    test_display(&[0x74, 0x47], "jz 0x47");
    test_display(&[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00], "call [rip + 0x24727e]");
    test_display(&[0xff, 0x24, 0xcd, 0x70, 0xa0, 0xbc, 0x01], "jmp [rcx * 8 + 0x1bca070]");
    test_display(&[0xff, 0xe0], "jmp rax");
    test_display(&[0x66, 0xff, 0xe0], "jmp rax");
    test_display(&[0x67, 0xff, 0xe0], "jmp rax");
    test_invalid(&[0xff, 0xd8]);
    test_display(&[0xff, 0x18], "callf [rax]");
    test_display(&[0xc3], "ret");
}

#[test]
fn test_test_cmp() {
    test_display(&[0xf6, 0x05, 0x2c, 0x9b, 0xff, 0xff, 0x01], "test [rip - 0x64d4], 0x1");
    test_display(&[0x48, 0x3d, 0x01, 0xf0, 0xff, 0xff], "cmp rax, -0xfff");
    test_display(&[0x3d, 0x01, 0xf0, 0xff, 0xff], "cmp eax, -0xfff");
    test_display(&[0x48, 0x83, 0xf8, 0xff], "cmp rax, -0x1");
    test_display(&[0x48, 0x39, 0xc6], "cmp rsi, rax");
}

#[test]
fn test_push_pop() {
    test_display(&[0x5b], "pop rbx");
    test_display(&[0x41, 0x5e], "pop r14");
    test_display(&[0x68, 0x7f, 0x63, 0xc4, 0x00], "push 0xc4637f");
}

#[test]
fn test_bmi1() {
    let bmi1 = InstDecoder::minimal().with_bmi1();
    let no_bmi1 = InstDecoder::minimal();
    test_display_under(&bmi1, &[0x41, 0x0f, 0xbc, 0xd3], "tzcnt edx, r11d");
    test_display_under(&no_bmi1, &[0x41, 0x0f, 0xbc, 0xd3], "bsf edx, r11d");

    // just 0f38
    test_display_under(&bmi1, &[0xc4, 0xc2, 0x60, 0xf2, 0x01], "andn eax, ebx, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0xe0, 0xf2, 0x01], "andn rax, rbx, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0x78, 0xf3, 0x09], "blsr eax, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0xf8, 0xf3, 0x09], "blsr rax, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0x78, 0xf3, 0x11], "blsmsk eax, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0xf8, 0xf3, 0x11], "blsmsk rax, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0x78, 0xf3, 0x19], "blsi eax, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0xf8, 0xf3, 0x19], "blsi rax, [r9]");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0x60, 0xf7, 0x01], "bextr eax, [r9], ebx");
    test_display_under(&bmi1, &[0xc4, 0xc2, 0xe0, 0xf7, 0x01], "bextr rax, [r9], rbx");
}

#[test]
fn test_bmi2() {
    let bmi2 = InstDecoder::minimal().with_bmi2();
    // f2 0f3a
    test_display_under(&bmi2, &[0xc4, 0xc3, 0x7b, 0xf0, 0x01, 0x05], "rorx eax, [r9], 0x5");
    test_display_under(&bmi2, &[0xc4, 0xc3, 0xfb, 0xf0, 0x01, 0x05], "rorx rax, [r9], 0x5");

    // f2 0f38 map
    test_display_under(&bmi2, &[0xc4, 0xe2, 0x63, 0xf5, 0x07], "pdep eax, ebx, [rdi]");
    test_display_under(&bmi2, &[0xc4, 0xe2, 0xe3, 0xf5, 0x07], "pdep rax, rbx, [rdi]");
    test_display_under(&bmi2, &[0xc4, 0xe2, 0x63, 0xf6, 0x07], "mulx eax, ebx, [rdi]");
    test_display_under(&bmi2, &[0xc4, 0xe2, 0xe3, 0xf6, 0x07], "mulx rax, rbx, [rdi]");
    test_display_under(&bmi2, &[0xc4, 0xc2, 0x63, 0xf7, 0x01], "shrx eax, [r9], ebx");
    test_display_under(&bmi2, &[0xc4, 0xc2, 0xe3, 0xf7, 0x01], "shrx rax, [r9], rbx");

    // f3 0f38 map
    test_display_under(&bmi2, &[0xc4, 0xe2, 0x62, 0xf5, 0x07], "pext eax, ebx, [rdi]");
    test_display_under(&bmi2, &[0xc4, 0xe2, 0xe2, 0xf5, 0x07], "pext rax, rbx, [rdi]");
    test_display_under(&bmi2, &[0xc4, 0xc2, 0x62, 0xf7, 0x01], "sarx eax, [r9], ebx");
    test_display_under(&bmi2, &[0xc4, 0xc2, 0xe2, 0xf7, 0x01], "sarx rax, [r9], rbx");

    // just 0f38
    test_display_under(&bmi2, &[0xc4, 0xe2, 0x60, 0xf5, 0x07], "bzhi eax, [rdi], ebx");
    test_display_under(&bmi2, &[0xc4, 0xe2, 0xe0, 0xf5, 0x07], "bzhi rax, [rdi], rbx");

    // 66 0f38
    test_display_under(&bmi2, &[0xc4, 0xc2, 0x61, 0xf7, 0x01], "shlx eax, [r9], ebx");
    test_display_under(&bmi2, &[0xc4, 0xc2, 0xe1, 0xf7, 0x01], "shlx rax, [r9], rbx");
}

#[test]
fn test_popcnt() {
    let popcnt = InstDecoder::minimal().with_popcnt();
    let intel_popcnt = InstDecoder::minimal().with_intel_quirks().with_sse4_2();
    let no_popcnt = InstDecoder::minimal();
    test_display_under(&popcnt, &[0xf3, 0x0f, 0xb8, 0xc1], "popcnt eax, ecx");
    test_display_under(&intel_popcnt, &[0xf3, 0x0f, 0xb8, 0xc1], "popcnt eax, ecx");
    test_display_under(&popcnt, &[0xf3, 0x4f, 0x0f, 0xb8, 0xc1], "popcnt r8, r9");
    test_display_under(&intel_popcnt, &[0xf3, 0x4f, 0x0f, 0xb8, 0xc1], "popcnt r8, r9");
    test_invalid_under(&no_popcnt, &[0xf3, 0x4f, 0x0f, 0xb8, 0xc1]);
}

#[test]
fn test_bitwise() {
    test_display_under(&InstDecoder::minimal(), &[0x41, 0x0f, 0xbc, 0xd3], "bsf edx, r11d");
    test_display(&[0x48, 0x0f, 0xa3, 0xd0], "bt rax, rdx");
    test_display(&[0x48, 0x0f, 0xab, 0xd0], "bts rax, rdx");
}

#[test]
fn test_misc() {
    // TODO
//    test_display(&[0xf2, 0x0f, 0x38, 0xf0, 0xc1], "crc32 eax, cl");
//    test_display(&[0xf2, 0x0f, 0x38, 0xf1, 0xc1], "crc32 eax, ecx");
    test_display(&[0xfe, 0x00], "inc [rax]"); // TODO: inc byte [rax]
    test_display(&[0xfe, 0x08], "dec [rax]"); // TODO: dec byte [rax]
    test_display(&[0xff, 0x00], "inc [rax]"); // TODO: inc dword [rax]
    test_display(&[0x48, 0xff, 0x00], "inc [rax]"); // TODO: inc qword [rax]
    test_display(&[0xe4, 0x99], "in al, 0x99");
    test_display(&[0xe5, 0x99], "in eax, 0x99");
    test_display(&[0x67, 0xe5, 0x99], "in eax, 0x99");
    test_display(&[0x4f, 0xe5, 0x99], "in eax, 0x99");
    test_display(&[0xe6, 0x99], "out 0x99, al");
    test_display(&[0x4f, 0xe7, 0x99], "out 0x99, eax");
    test_display(&[0xec], "in al, dx");
    test_display(&[0xed], "in eax, dx");
    test_display(&[0xee], "out dx, al");
    test_display(&[0xef], "out dx, eax");
    test_display(&[0xcd, 0x00], "int 0x0");
    test_display(&[0xcd, 0xff], "int 0xff");
    test_display(&[0x9c], "pushf");
    test_display(&[0x48, 0x98], "cdqe");
    test_display(&[0x98], "cwde");
    test_display(&[0x66, 0x99], "cwd");
    test_display(&[0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00], "nop cs:[rax + rax * 1]");
    test_display(&[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00], "nop [rax + rax * 1]");
    test_display(&[0x48, 0x8d, 0xa4, 0xc7, 0x20, 0x00, 0x00, 0x12], "lea rsp, [rdi + rax * 8 + 0x12000020]");
    test_display(&[0x33, 0xc0], "xor eax, eax");
    test_display(&[0x48, 0x8d, 0x53, 0x08], "lea rdx, [rbx + 0x8]");
    test_display(&[0x31, 0xc9], "xor ecx, ecx");
    test_display(&[0x48, 0x29, 0xc8], "sub rax, rcx");
    test_display(&[0x48, 0x03, 0x0b], "add rcx, [rbx]");
    test_display(&[0x48, 0x8d, 0x0c, 0x12], "lea rcx, [rdx + rdx * 1]");
    test_display(&[0xf6, 0xc2, 0x18], "test dl, 0x18");
    test_display(&[0xf3, 0x48, 0xab], "rep stos es:[rdi], rax");
    test_display(&[0xf3, 0x48, 0xa5], "rep movs es:[rdi], ds:[rsi]");
    test_display(&[0xf3, 0x45, 0x0f, 0xbc, 0xd7], "tzcnt r10d, r15d");

    test_display(&[0xf3, 0x0f, 0xae, 0xc4], "rdfsbase esp");
    test_display(&[0xf3, 0x4f, 0x0f, 0xae, 0xc4], "rdfsbase r12");
    test_display(&[0xf3, 0x0f, 0xae, 0xcc], "rdgsbase esp");
    test_display(&[0xf3, 0x4f, 0x0f, 0xae, 0xcc], "rdgsbase r12");
    test_display(&[0xf3, 0x0f, 0xae, 0xd4], "wrfsbase esp");
    test_display(&[0xf3, 0x4f, 0x0f, 0xae, 0xd4], "wrfsbase r12");
    test_display(&[0xf3, 0x0f, 0xae, 0xdc], "wrgsbase esp");
    test_display(&[0xf3, 0x4f, 0x0f, 0xae, 0xdc], "wrgsbase r12");
    test_display(&[0x66, 0x0f, 0xae, 0x3f], "clflushopt [rdi]"); // or clflush without 66
    test_invalid(&[0x66, 0x0f, 0xae, 0xff]);
    test_display(&[0x66, 0x0f, 0xae, 0x37], "clwb [rdi]");
    test_invalid(&[0x66, 0x0f, 0xae, 0xf7]);
}

#[test]
#[ignore]
// TODO also not supported at all
fn evex() {
    test_display(&[0x62, 0xf2, 0x7d, 0x48, 0x2a, 0x44, 0x40, 0x01], "vmovntdqa zmm0, [rax + rax*2 + 0x40]");
    test_display(&[0x62, 0xf2, 0x7d, 0x08, 0x2a, 0x44, 0x40, 0x01], "vmovntdqa xmm0, [rax + rax*2 + 0x10]");
}

#[test]
fn test_vex() {
    fn test_instr(bytes: &[u8], text: &'static str) {
        test_display_under(&InstDecoder::minimal().with_avx(), bytes, text);
        test_display_under(&InstDecoder::default(), bytes, text);
        test_invalid_under(&InstDecoder::minimal(), bytes);
    }

    fn test_instr_invalid(bytes: &[u8]) {
        test_invalid_under(&InstDecoder::minimal().with_avx(), bytes);
        test_invalid_under(&InstDecoder::default(), bytes);
    }

    test_instr(&[0xc5, 0xf8, 0x10, 0x00], "vmovups xmm0, [rax]");
    test_instr(&[0xc5, 0xf8, 0x10, 0x01], "vmovups xmm0, [rcx]");
    test_instr(&[0xc5, 0x78, 0x10, 0x0f], "vmovups xmm9, [rdi]");
    test_instr(&[0xc5, 0xf8, 0x10, 0xcf], "vmovups xmm1, xmm7");
    test_instr(&[0xc5, 0xf9, 0x10, 0x0f], "vmovupd xmm1, [rdi]");
    test_instr(&[0xc5, 0xfa, 0x7e, 0x10], "vmovq xmm2, [rax]");
    test_instr(&[0xc5, 0xfc, 0x10, 0x0f], "vmovups ymm1, [rdi]");
    test_instr(&[0xc5, 0xfd, 0x10, 0x0f], "vmovupd ymm1, [rdi]");
    test_instr(&[0xc5, 0xfe, 0x10, 0x0f], "vmovss xmm1, [rdi]");
    test_instr(&[0xc5, 0xff, 0x10, 0xcf], "vmovsd xmm1, xmm0, xmm7");
    test_instr(&[0xc5, 0xff, 0x10, 0x01], "vmovsd xmm0, [rcx]");
    test_instr(&[0xc5, 0xf9, 0x6e, 0xc6], "vmovd xmm0, esi");
    test_instr(&[0xc5, 0xf9, 0x6e, 0x13], "vmovd xmm2, [rbx]");
    test_instr(&[0xc5, 0xf9, 0x7e, 0xc6], "vmovd esi, xmm0");
    test_instr(&[0xc5, 0xf9, 0x7e, 0x13], "vmovd [rbx], xmm2");
    test_instr_invalid(&[0x4f, 0xc5, 0xf8, 0x10, 0x00]);
    test_instr_invalid(&[0xf0, 0xc5, 0xf8, 0x10, 0x00]);
    test_instr(&[0xc4, 0x02, 0x71, 0x00, 0x0f], "vpshufb xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x00, 0x0f], "vpshufb ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x00, 0xcd], "vpshufb xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x00, 0xcd], "vpshufb ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x01, 0x0f], "vphaddw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x01, 0x0f], "vphaddw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x01, 0xcd], "vphaddw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x01, 0xcd], "vphaddw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x02, 0x0f], "vphaddd xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x02, 0x0f], "vphaddd ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x02, 0xcd], "vphaddd xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x02, 0xcd], "vphaddd ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x03, 0x0f], "vphaddsw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x03, 0x0f], "vphaddsw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x03, 0xcd], "vphaddsw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x03, 0xcd], "vphaddsw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x04, 0x0f], "vphaddubsw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x04, 0x0f], "vphaddubsw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x04, 0xcd], "vphaddubsw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x04, 0xcd], "vphaddubsw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x05, 0x0f], "vphsubw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x05, 0x0f], "vphsubw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x05, 0xcd], "vphsubw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x05, 0xcd], "vphsubw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x06, 0x0f], "vphsubd xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x06, 0x0f], "vphsubd ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x06, 0xcd], "vphsubd xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x06, 0xcd], "vphsubd ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x07, 0x0f], "vphsubsw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x07, 0x0f], "vphsubsw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x07, 0xcd], "vphsubsw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x07, 0xcd], "vphsubsw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x08, 0x0f], "vpsignb xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x08, 0x0f], "vpsignb ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x08, 0xcd], "vpsignb xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x08, 0xcd], "vpsignb ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x09, 0x0f], "vpsignw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x09, 0x0f], "vpsignw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x09, 0xcd], "vpsignw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x09, 0xcd], "vpsignw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x0a, 0x0f], "vpsignd xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x0a, 0x0f], "vpsignd ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x0a, 0xcd], "vpsignd xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x0a, 0xcd], "vpsignd ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x0b, 0x0f], "vpmulhrsw xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x0b, 0x0f], "vpmulhrsw ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x0b, 0xcd], "vpmulhrsw xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x0b, 0xcd], "vpmulhrsw ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x0c, 0x0f], "vpermilps xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x0c, 0x0f], "vpermilps ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x0c, 0xcd], "vpermilps xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x0c, 0xcd], "vpermilps ymm9, ymm1, ymm13");
    test_instr(&[0xc4, 0x02, 0x71, 0x0d, 0x0f], "vpermilpd xmm9, xmm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x75, 0x0d, 0x0f], "vpermilpd ymm9, ymm1, [r15]");
    test_instr(&[0xc4, 0x02, 0x71, 0x0d, 0xcd], "vpermilpd xmm9, xmm1, xmm13");
    test_instr(&[0xc4, 0x02, 0x75, 0x0d, 0xcd], "vpermilpd ymm9, ymm1, ymm13");
    test_instr_invalid(&[0xc4, 0x02, 0x71, 0x0e, 0x00]);
    test_instr(&[0xc4, 0x02, 0x79, 0x0e, 0x0f], "vtestps xmm9, [r15]");
    test_instr(&[0xc4, 0x02, 0x7d, 0x0e, 0x0f], "vtestps ymm9, [r15]");
    test_instr(&[0xc4, 0x02, 0x79, 0x0e, 0xcd], "vtestps xmm9, xmm13");
    test_instr(&[0xc4, 0x02, 0x7d, 0x0e, 0xcd], "vtestps ymm9, ymm13");
    test_instr_invalid(&[0xc4, 0x02, 0x71, 0x0f, 0x00]);
    test_instr(&[0xc4, 0x02, 0x79, 0x0f, 0x0f], "vtestpd xmm9, [r15]");
    test_instr(&[0xc4, 0x02, 0x7d, 0x0f, 0x0f], "vtestpd ymm9, [r15]");
    test_instr(&[0xc4, 0x02, 0x79, 0x0f, 0xcd], "vtestpd xmm9, xmm13");
    test_instr(&[0xc4, 0x02, 0x7d, 0x0f, 0xcd], "vtestpd ymm9, ymm13");
    test_instr(&[0xc4, 0xe2, 0x65, 0x90, 0x04, 0x51], "vpgatherdd ymm0, [rcx + ymm2 * 2], ymm3");
    test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, [rcx + ymm2 * 2], ymm3");
    test_instr(&[0xc4, 0xe2, 0x65, 0x91, 0x04, 0x51], "vpgatherqd ymm0, [rcx + ymm2 * 2], ymm3");
    test_instr(&[0xc4, 0xe2, 0xe5, 0x91, 0x04, 0x51], "vpgatherqq ymm0, [rcx + ymm2 * 2], ymm3");
    test_instr(&[0xc4, 0x02, 0x09, 0x9d, 0xcd], "vfnmadd132ss xmm9, xmm14, xmm13");
    test_instr(&[0xc4, 0x02, 0x89, 0x9d, 0xcd], "vfnmadd132sd xmm9, xmm14, xmm13");
// ...
    test_instr(&[0xc4, 0xe3, 0x79, 0x14, 0xd0, 0x0a], "vpextrb rax, xmm2, 0xa");
    test_instr(&[0xc4, 0xe3, 0x79, 0x14, 0x10, 0x0a], "vpextrb [rax], xmm2, 0xa");
    test_instr_invalid(&[0xc4, 0xe3, 0xf9, 0x14, 0x00, 0xd0]);
    test_instr_invalid(&[0xc4, 0xe3, 0xf9, 0x14, 0x00, 0x0a]);
}

#[test]
fn strange_prefixing() {
    test_display(&[0x45, 0x66, 0x0f, 0x21, 0xc8], "mov rax, dr1");
    test_display(&[0x45, 0xf2, 0x0f, 0x21, 0xc8], "mov rax, dr1");
    test_display(&[0x45, 0xf3, 0x0f, 0x21, 0xc8], "mov rax, dr1");
}

#[test]
fn prefixed_0f() {
    test_display(&[0x0f, 0x02, 0xc0], "lar eax, ax");
    test_display(&[0x48, 0x0f, 0x02, 0xc0], "lar rax, ax");
    test_display(&[0x0f, 0x03, 0xc0], "lsl eax, eax");
    test_display(&[0x48, 0x0f, 0x03, 0xc0], "lsl rax, rax");
    test_display(&[0x0f, 0x05], "syscall");
    test_display(&[0x48, 0x0f, 0x05], "syscall");
    test_display(&[0x66, 0x0f, 0x05], "syscall");
    test_display(&[0x0f, 0x06], "clts");
    test_display(&[0xf2, 0x0f, 0x06], "clts");
    test_display(&[0x0f, 0x07], "sysret");
    test_display(&[0xf2, 0x0f, 0x07], "sysret");
    test_display(&[0x0f, 0x12, 0x0f], "movlps xmm1, [rdi]");
    test_display(&[0x0f, 0x12, 0xcf], "movhlps xmm1, xmm7");
    test_display(&[0x0f, 0x16, 0x0f], "movhps xmm1, [rdi]");
    test_display(&[0x0f, 0x16, 0xcf], "movlhps xmm1, xmm7");
    test_display(&[0x0f, 0x12, 0xc0], "movhlps xmm0, xmm0");
    test_invalid(&[0x0f, 0x13, 0xc0]);
    test_display(&[0x0f, 0x13, 0x00], "movlps [rax], xmm0");
    test_display(&[0x0f, 0x14, 0x08], "unpcklps xmm1, [rax]");
    test_display(&[0x0f, 0x15, 0x08], "unpckhps xmm1, [rax]");
    test_display(&[0x0f, 0x16, 0x0f], "movhps xmm1, [rdi]");
    test_display(&[0x0f, 0x16, 0xc0], "movlhps xmm0, xmm0");
    test_invalid(&[0x0f, 0x17, 0xc0]);
    test_display(&[0x0f, 0x17, 0x00], "movhps [rax], xmm0");
    test_invalid(&[0x0f, 0x18, 0xc0]);
    test_display(&[0x0f, 0x18, 0x00], "prefetchnta [rax]");
    test_display(&[0x0f, 0x18, 0x08], "prefetch0 [rax]");
    test_display(&[0x0f, 0x18, 0x10], "prefetch1 [rax]");
    test_display(&[0x0f, 0x18, 0x18], "prefetch2 [rax]");
    test_display(&[0x0f, 0x18, 0x20], "nop [rax]");
    test_display(&[0x4f, 0x0f, 0x18, 0x20], "nop [r8]");
    test_display(&[0x0f, 0x19, 0x20], "nop [rax]");
    test_display(&[0x0f, 0x1a, 0x20], "nop [rax]");
    test_display(&[0x0f, 0x1b, 0x20], "nop [rax]");
    test_display(&[0x0f, 0x1c, 0x20], "nop [rax]");
    test_display(&[0x0f, 0x1d, 0x20], "nop [rax]");
    test_display(&[0x0f, 0x1e, 0x20], "nop [rax]");
    test_display(&[0x0f, 0x1f, 0x20], "nop [rax]");
    test_display(&[0x45, 0x0f, 0x20, 0xc8], "mov r8, cr9");
    test_display(&[0x0f, 0x20, 0xc8], "mov rax, cr1");
    test_display(&[0x45, 0x0f, 0x21, 0xc8], "mov r8, dr9");
    test_display(&[0x0f, 0x21, 0xc8], "mov rax, dr1");
    test_display(&[0x45, 0x0f, 0x22, 0xc8], "mov cr9, r8");
    test_display(&[0x40, 0x0f, 0x22, 0xc8], "mov cr1, rax");
    test_display(&[0x0f, 0x22, 0xc8], "mov cr1, rax");
    test_display(&[0x44, 0x0f, 0x22, 0xcf], "mov cr9, rdi");
    test_display(&[0x0f, 0x22, 0xcf], "mov cr1, rdi");
    test_display(&[0x45, 0x0f, 0x23, 0xc8], "mov dr9, r8");
    test_display(&[0x40, 0x0f, 0x23, 0xc8], "mov dr1, rax");
    test_display(&[0x0f, 0x23, 0xc8], "mov dr1, rax");
    test_display(&[0x44, 0x0f, 0x23, 0xcf], "mov dr9, rdi");
    test_display(&[0x0f, 0x23, 0xcf], "mov dr1, rdi");
    test_display(&[0x0f, 0x30], "wrmsr");
    test_display(&[0x0f, 0x31], "rdtsc");
    test_display(&[0x0f, 0x32], "rdmsr");
    test_display(&[0x0f, 0x33], "rdpmc");
    test_display(&[0x0f, 0x34], "sysenter");
    test_display(&[0x0f, 0x35], "sysexit");
    test_invalid(&[0x0f, 0x36]);
    test_display(&[0x0f, 0x37], "getsec");
    test_display(&[0x0f, 0x60, 0x00], "punpcklbw mm0, [rax]");
    test_display(&[0x0f, 0x60, 0xc2], "punpcklbw mm0, mm2");
    test_display(&[0x0f, 0x61, 0x00], "punpcklwd mm0, [rax]");
    test_display(&[0x0f, 0x61, 0xc2], "punpcklwd mm0, mm2");
    test_display(&[0x0f, 0x62, 0x00], "punpckldq mm0, [rax]");
    test_display(&[0x0f, 0x62, 0xc2], "punpckldq mm0, mm2");
    test_display(&[0x0f, 0x63, 0x00], "packsswb mm0, [rax]");
    test_display(&[0x0f, 0x63, 0xc2], "packsswb mm0, mm2");
    test_display(&[0x0f, 0x64, 0x00], "pcmpgtb mm0, [rax]");
    test_display(&[0x0f, 0x64, 0xc2], "pcmpgtb mm0, mm2");
    test_display(&[0x0f, 0x65, 0x00], "pcmpgtw mm0, [rax]");
    test_display(&[0x0f, 0x65, 0xc2], "pcmpgtw mm0, mm2");
    test_display(&[0x0f, 0x66, 0x00], "pcmpgtd mm0, [rax]");
    test_display(&[0x0f, 0x66, 0xc2], "pcmpgtd mm0, mm2");
    test_display(&[0x0f, 0x67, 0x00], "packuswb mm0, [rax]");
    test_display(&[0x0f, 0x67, 0xc2], "packuswb mm0, mm2");
    test_display(&[0x0f, 0x68, 0x00], "punpckhbw mm0, [rax]");
    test_display(&[0x0f, 0x68, 0xc2], "punpckhbw mm0, mm2");
    test_display(&[0x0f, 0x69, 0x00], "punpckhwd mm0, [rax]");
    test_display(&[0x0f, 0x69, 0xc2], "punpckhwd mm0, mm2");
    test_display(&[0x0f, 0x6a, 0x00], "punpckhdq mm0, [rax]");
    test_display(&[0x0f, 0x6a, 0xc2], "punpckhdq mm0, mm2");
    test_display(&[0x0f, 0x6b, 0x00], "packssdw mm0, [rax]");
    test_display(&[0x0f, 0x6b, 0xc2], "packssdw mm0, mm2");
    test_invalid(&[0x0f, 0x6c]);
    test_invalid(&[0x0f, 0x6d]);
    test_display(&[0x0f, 0x6e, 0x00], "movd mm0, [rax]");
    test_display(&[0x0f, 0x6e, 0xc2], "movd mm0, edx");
    test_display(&[0x0f, 0x6f, 0x00], "movq mm0, [rax]");
    test_display(&[0x0f, 0x6f, 0xc2], "movq mm0, mm2");
    test_display(&[0x0f, 0x70, 0x00, 0x7f], "pshufw mm0, [rax], 0x7f");
    test_display(&[0x4f, 0x0f, 0x70, 0x00, 0x7f], "pshufw mm0, [r8], 0x7f");
    test_invalid(&[0x0f, 0x71, 0x00, 0x7f]);
    test_invalid(&[0x0f, 0x71, 0xc0, 0x7f]);
    test_display(&[0x0f, 0x71, 0xd0, 0x7f], "psrlw mm0, 0x7f");
    test_display(&[0x0f, 0x71, 0xe0, 0x7f], "psraw mm0, 0x7f");
    test_display(&[0x0f, 0x71, 0xf0, 0x7f], "psllw mm0, 0x7f");
    test_invalid(&[0x0f, 0x72, 0x00, 0x7f]);
    test_invalid(&[0x0f, 0x72, 0xc0, 0x7f]);
    test_display(&[0x0f, 0x72, 0xd0, 0x7f], "psrld mm0, 0x7f");
    test_display(&[0x0f, 0x72, 0xe0, 0x7f], "psrad mm0, 0x7f");
    test_display(&[0x0f, 0x72, 0xf0, 0x7f], "pslld mm0, 0x7f");
    test_invalid(&[0x0f, 0x73, 0x00, 0x7f]);
    test_invalid(&[0x0f, 0x73, 0xc0, 0x7f]);
    test_display(&[0x0f, 0x73, 0xd0, 0x7f], "psrlq mm0, 0x7f");
    test_invalid(&[0x0f, 0x73, 0xe0, 0x7f]);
    test_display(&[0x0f, 0x73, 0xf0, 0x7f], "psllq mm0, 0x7f");
    test_display(&[0x0f, 0xa0], "push fs");
    test_display(&[0x0f, 0xa1], "pop fs");
    test_display(&[0x0f, 0xa2], "cpuid");
    test_display(&[0x0f, 0xa4, 0xc0, 0x11], "shld eax, eax, 0x11");
    test_display(&[0x66, 0x0f, 0xa4, 0xcf, 0x11], "shld di, cx, 0x11");
    test_display(&[0x66, 0x45, 0x0f, 0xa4, 0xcf, 0x11], "shld r15w, r9w, 0x11");
    test_display(&[0x0f, 0xa5, 0xc0], "shld eax, eax, cl");
    test_display(&[0x0f, 0xa5, 0xc9], "shld ecx, ecx, cl");
}

#[test]
fn prefixed_660f() {
    test_display(&[0x66, 0x0f, 0x10, 0xc0], "movupd xmm0, xmm0");
    test_display(&[0x66, 0x48, 0x0f, 0x10, 0xc0], "movupd xmm0, xmm0");
    test_display(&[0x66, 0x4a, 0x0f, 0x10, 0xc0], "movupd xmm0, xmm0");
    test_display(&[0x66, 0x4b, 0x0f, 0x10, 0xc0], "movupd xmm0, xmm8");
    test_display(&[0x66, 0x4c, 0x0f, 0x10, 0xc0], "movupd xmm8, xmm0");
    test_display(&[0x66, 0x4d, 0x0f, 0x10, 0xc0], "movupd xmm8, xmm8");
    test_display(&[0xf2, 0x66, 0x66, 0x4d, 0x0f, 0x10, 0xc0], "movupd xmm8, xmm8");
}

#[test]
fn prefixed_f20f() {
    test_display(&[0xf2, 0x0f, 0x16, 0xcf], "movlhps xmm1, xmm7");
    test_display(&[0xf2, 0x4d, 0x0f, 0x16, 0xcf], "movlhps xmm9, xmm15");
    test_display(&[0x40, 0x66, 0xf2, 0x66, 0x4d, 0x0f, 0x16, 0xcf], "movlhps xmm9, xmm15");
}

#[test]
fn prefixed_f30f() {
    test_display(&[0xf3, 0x0f, 0x16, 0xcf], "movshdup xmm1, xmm7");
    test_display(&[0xf3, 0x4d, 0x0f, 0x16, 0xcf], "movshdup xmm9, xmm15");
}

#[test]
fn test_adx() {
    test_display(&[0x66, 0x0f, 0x38, 0xf6, 0xc1], "adcx eax, ecx");
    test_display(&[0x66, 0x0f, 0x38, 0xf6, 0x01], "adcx eax, [rcx]");
    test_display(&[0x66, 0x4f, 0x0f, 0x38, 0xf6, 0x01], "adcx r8, [r9]");
    test_display(&[0xf3, 0x0f, 0x38, 0xf6, 0xc1], "adox eax, ecx");
    test_display(&[0xf3, 0x0f, 0x38, 0xf6, 0x01], "adox eax, [rcx]");
    test_display(&[0xf3, 0x4f, 0x0f, 0x38, 0xf6, 0x01], "adox r8, [r9]");
}

#[test]
fn test_prefetchw() {
    test_display(&[0x0f, 0x0d, 0x08], "prefetchw [rax]");
}

#[test]
fn test_lzcnt() {
    test_display(&[0x66, 0xf3, 0x0f, 0xbd, 0xc1], "lzcnt ax, cx");
    test_display(&[0xf3, 0x0f, 0xbd, 0xc1], "lzcnt eax, ecx");
    test_display(&[0xf3, 0x48, 0x0f, 0xbd, 0xc1], "lzcnt rax, rcx");
}

#[test]
fn test_svm() {
    test_display(&[0x0f, 0x01, 0xdf], "invlpga rax, ecx");
    test_display(&[0x0f, 0x01, 0xde], "skinit eax");
    test_display(&[0x0f, 0x01, 0xdd], "clgi");
    test_display(&[0x0f, 0x01, 0xdc], "stgi");
    test_display(&[0x0f, 0x01, 0xdb], "vmsave rax");
    test_display(&[0x0f, 0x01, 0xda], "vmload rax");
    test_display(&[0x0f, 0x01, 0xd9], "vmmcall");
    test_display(&[0x0f, 0x01, 0xd8], "vmrun rax");
}

#[test]
fn test_movbe() {
    test_display(&[0x0f, 0x38, 0xf0, 0x06], "movbe eax, [rsi]");
    test_display(&[0x4f, 0x0f, 0x38, 0xf0, 0x06], "movbe r8, [r14]");
}

#[test]
fn test_tsx() {
    test_display(&[0xc6, 0xf8, 0x10], "xabort 0x10");
    test_display(&[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56], "xbegin $+0x56341210");
    test_display(&[0x66, 0xc7, 0xf8, 0x10, 0x12], "xbegin $+0x1210");
    test_display(&[0x0f, 0x01, 0xd5], "xend");
    test_display(&[0x0f, 0x01, 0xd6], "xtest");
}

#[test]
fn test_rand() {
    test_display(&[0x0f, 0xc7, 0xfd], "rdseed ebp");
    test_display(&[0x66, 0x0f, 0xc7, 0xfd], "rdseed bp");
    test_display(&[0x48, 0x0f, 0xc7, 0xfd], "rdseed rbp");
    test_display(&[0x0f, 0xc7, 0xf5], "rdrand ebp");
    test_display(&[0x66, 0x0f, 0xc7, 0xf5], "rdrand bp");
    test_display(&[0x48, 0x0f, 0xc7, 0xf5], "rdrand rbp");
}

#[test]
fn test_sha() {
    test_display(&[0x0f, 0x3a, 0xcc, 0x12, 0x40], "sha1rnds4 xmm2, [rdx], 0x40");
    test_display(&[0x0f, 0x38, 0xc8, 0x12], "sha1nexte xmm2, [rdx]");
    test_display(&[0x0f, 0x38, 0xc9, 0x12], "sha1msg1 xmm2, [rdx]");
    test_display(&[0x0f, 0x38, 0xca, 0x12], "sha1msg2 xmm2, [rdx]");
    test_display(&[0x0f, 0x38, 0xcb, 0x12], "sha256rnds2 xmm2, [rdx]");
    test_display(&[0x0f, 0x38, 0xcc, 0x12], "sha256msg1 xmm2, [rdx]");
    test_display(&[0x0f, 0x38, 0xcd, 0x12], "sha256msg2 xmm2, [rdx]");
}

#[test]
fn test_vmx() {
    test_display(&[0x0f, 0xc7, 0x3f], "vmptrst [rdi]");
    test_display(&[0x0f, 0xc7, 0x37], "vmptrld [rdi]");
    test_display(&[0xf3, 0x0f, 0xc7, 0xf7], "rdrand edi");
    test_display(&[0xf3, 0x0f, 0xc7, 0x37], "vmxon [rdi]");
    test_display(&[0x66, 0x0f, 0xc7, 0xf7], "rdrand di");
    test_display(&[0x66, 0x0f, 0xc7, 0x37], "vmclear [rdi]");

    // this is actually vmx
    // test_invalid(&[0x66, 0x0f, 0xc7, 0x03]);
    test_display(&[0x66, 0x4f, 0x0f, 0xc7, 0x33], "vmclear [r11]");
    test_display(&[0x66, 0x0f, 0xc7, 0x33], "vmclear [rbx]");
    test_display(&[0xf3, 0x4f, 0x0f, 0xc7, 0x33], "vmxon [r11]");
    test_display(&[0xf3, 0x0f, 0xc7, 0x33], "vmxon [rbx]");
}

#[test]
fn test_rdpid() {
    test_display(&[0xf3, 0x0f, 0xc7, 0xfd], "rdpid ebp");
}

#[test]
fn test_cmpxchg8b() {
    test_display(&[0x0f, 0xc7, 0x0f], "cmpxchg8b [rdi]");
    test_display(&[0x4f, 0x0f, 0xc7, 0x0f], "cmpxchg16b [r15]");
}