From 33c520341b373ac18e7924eb9227615ac65c2618 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 22 Feb 2020 00:51:30 -0800 Subject: support 660f sse2 instructions this isn't quite all of sse2, but gets close. the f20f opcode map still needs some touching up. also fix `G_E_xmm_Ib` not respecting rex.r for the rrr operand --- src/long_mode/display.rs | 58 ++++++++ src/long_mode/mod.rs | 284 ++++++++++++++++++++++++------------ test/test.rs | 367 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 532 insertions(+), 177 deletions(-) diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index be7ab0d..d45a98a 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -285,7 +285,9 @@ impl fmt::Display for Opcode { &Opcode::CMPSS => write!(f, "cmpss"), &Opcode::CMPSD => write!(f, "cmpsd"), &Opcode::UNPCKLPS => write!(f, "unpcklps"), + &Opcode::UNPCKLPD => write!(f, "unpcklpd"), &Opcode::UNPCKHPS => write!(f, "unpckhps"), + &Opcode::UNPCKHPD => write!(f, "unpckhpd"), &Opcode::MOVUPS => write!(f, "movups"), &Opcode::MOVQ2DQ => write!(f, "movq2dq"), &Opcode::MOVDQ2Q => write!(f, "movdq2q"), @@ -499,40 +501,61 @@ impl fmt::Display for Opcode { &Opcode::SETLE => write!(f, "setle"), &Opcode::SETG => write!(f, "setg"), &Opcode::ADDPS => write!(f, "addps"), + &Opcode::ADDPD => write!(f, "addpd"), &Opcode::ANDNPS => write!(f, "andnps"), + &Opcode::ANDNPD => write!(f, "andnpd"), &Opcode::ANDPS => write!(f, "andps"), + &Opcode::ANDPD => write!(f, "andpd"), &Opcode::BSWAP => write!(f, "bswap"), &Opcode::CMPPS => write!(f, "cmpps"), + &Opcode::CMPPD => write!(f, "cmppd"), + &Opcode::COMISD => write!(f, "comisd"), &Opcode::COMISS => write!(f, "comiss"), &Opcode::CVTDQ2PS => write!(f, "cvtdq2ps"), + &Opcode::CVTPS2DQ => write!(f, "cvtps2dq"), &Opcode::CVTPI2PS => write!(f, "cvtpi2ps"), &Opcode::CVTPI2PD => write!(f, "cvtpi2pd"), &Opcode::CVTPS2PD => write!(f, "cvtps2pd"), + &Opcode::CVTPD2PS => write!(f, "cvtpd2ps"), &Opcode::CVTPS2PI => write!(f, "cvtps2pi"), + &Opcode::CVTPD2PI => write!(f, "cvtpd2pi"), &Opcode::CVTTPS2PI => write!(f, "cvttps2pi"), + &Opcode::CVTTPD2PI => write!(f, "cvttpd2pi"), + &Opcode::CVTTPD2DQ => write!(f, "cvttpd2dq"), &Opcode::DIVPS => write!(f, "divps"), + &Opcode::DIVPD => write!(f, "divpd"), &Opcode::EMMS => write!(f, "emms"), &Opcode::GETSEC => write!(f, "getsec"), &Opcode::LFS => write!(f, "lfs"), &Opcode::LGS => write!(f, "lgs"), &Opcode::LSS => write!(f, "lss"), &Opcode::MASKMOVQ => write!(f, "maskmovq"), + &Opcode::MASKMOVDQU => write!(f, "maskmovdqu"), &Opcode::MAXPS => write!(f, "maxps"), + &Opcode::MAXPD => write!(f, "maxpd"), &Opcode::MINPS => write!(f, "minps"), + &Opcode::MINPD => write!(f, "minpd"), &Opcode::MOVAPS => write!(f, "movaps"), &Opcode::MOVAPD => write!(f, "movapd"), &Opcode::MOVD => write!(f, "movd"), &Opcode::MOVLPS => write!(f, "movlps"), + &Opcode::MOVLPD => write!(f, "movlpd"), &Opcode::MOVLHPS => write!(f, "movlhps"), &Opcode::MOVHPS => write!(f, "movhps"), + &Opcode::MOVHPD => write!(f, "movhpd"), &Opcode::MOVHLPS => write!(f, "movhlps"), &Opcode::MOVUPD => write!(f, "movupd"), &Opcode::MOVMSKPS => write!(f, "movmskps"), + &Opcode::MOVMSKPD => write!(f, "movmskpd"), &Opcode::MOVNTI => write!(f, "movnti"), &Opcode::MOVNTPS => write!(f, "movntps"), + &Opcode::MOVNTPD => write!(f, "movntpd"), &Opcode::MOVNTQ => write!(f, "movntq"), + &Opcode::MOVNTDQ => write!(f, "movntdq"), &Opcode::MULPS => write!(f, "mulps"), + &Opcode::MULPD => write!(f, "mulpd"), &Opcode::ORPS => write!(f, "orps"), + &Opcode::ORPD => write!(f, "orpd"), &Opcode::PACKSSDW => write!(f, "packssdw"), &Opcode::PACKSSWB => write!(f, "packsswb"), &Opcode::PACKUSWB => write!(f, "packuswb"), @@ -601,17 +624,22 @@ impl fmt::Display for Opcode { &Opcode::RSM => write!(f, "rsm"), &Opcode::RSQRTPS => write!(f, "rsqrtps"), &Opcode::SHLD => write!(f, "shld"), + &Opcode::SHUFPD => write!(f, "shufpd"), &Opcode::SHUFPS => write!(f, "shufps"), &Opcode::SLHD => write!(f, "slhd"), &Opcode::SQRTPS => write!(f, "sqrtps"), + &Opcode::SQRTPD => write!(f, "sqrtpd"), &Opcode::SUBPS => write!(f, "subps"), + &Opcode::SUBPD => write!(f, "subpd"), &Opcode::SYSENTER => write!(f, "sysenter"), &Opcode::SYSEXIT => write!(f, "sysexit"), + &Opcode::UCOMISD => write!(f, "ucomisd"), &Opcode::UCOMISS => write!(f, "ucomiss"), &Opcode::UD2E => write!(f, "ud2e"), &Opcode::VMREAD => write!(f, "vmread"), &Opcode::VMWRITE => write!(f, "vmwrite"), &Opcode::XORPS => write!(f, "xorps"), + &Opcode::XORPD => write!(f, "xorpd"), &Opcode::CBW => write!(f, "cbw"), &Opcode::CWDE => write!(f, "cwde"), &Opcode::CDQE => write!(f, "cdqe"), @@ -623,6 +651,7 @@ impl fmt::Display for Opcode { &Opcode::BLSI => write!(f, "blsi"), &Opcode::BLSMSK => write!(f, "blsmsk"), &Opcode::BLSR => write!(f, "blsr"), + &Opcode::VMCLEAR => write!(f, "vmclear"), &Opcode::VMCALL => write!(f, "vmcall"), &Opcode::VMLAUNCH => write!(f, "vmlaunch"), &Opcode::VMRESUME => write!(f, "vmresume"), @@ -1291,12 +1320,19 @@ impl > Colorize> Colorize { write!(out, "{}", colors.arithmetic_op(self)) } Opcode::POPF | @@ -1553,33 +1593,44 @@ impl > Colorize> Colorize> Colorize> Colorize { write!(out, "{}", colors.comparison_op(self)) } Opcode::WRMSR | @@ -1797,6 +1854,7 @@ impl > Colorize>(bytes_iter: &mut T, length: &mut u8) -> Result<(OpcodeRecord, u8), DecodeError> { @@ -5138,6 +5177,15 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, instruction.modrm_mmm.num &= 0b111; instruction.operand_count = 2; }, + OperandCode::G_U_xmm => { + instruction.operands[1] = mem_oper; + instruction.modrm_rrr.bank = RegisterBank::X; + if mem_oper != OperandSpec::RegMMM { + return Err(DecodeError::InvalidOperand); + } + instruction.modrm_mmm.bank = RegisterBank::X; + instruction.operand_count = 2; + }, op @ OperandCode::G_M_xmm | op @ OperandCode::G_E_xmm => { instruction.modrm_rrr.bank = RegisterBank::X; @@ -5158,7 +5206,8 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, let modrm = read_modrm(&mut bytes_iter, length)?; instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; - instruction.modrm_rrr = RegSpec { bank: RegisterBank::X, num: (modrm >> 3) & 7 }; + instruction.modrm_rrr = + RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X); instruction.operands[0] = OperandSpec::RegRRR; instruction.imm = read_num(&mut bytes_iter, 1)? as u8 as u64; @@ -5379,6 +5428,36 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u64; instruction.operands[1] = OperandSpec::ImmU8; }, + OperandCode::ModRM_0x660f12 => { + // If this is reg-reg, interpret the instruction as 66-prefixed (no-op here) + // `movhlps`. If this is reg-mem, it's a `movlpd`. + let modrm = read_modrm(&mut bytes_iter, length)?; + if modrm & 0xc0 == 0xc0 { + instruction.opcode = Opcode::MOVHLPS; + } else { + instruction.opcode = Opcode::MOVLPD; + } + instruction.modrm_rrr = + RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X); + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; + instruction.operand_count = 2; + } + OperandCode::ModRM_0x660f16 => { + // If this is reg-reg, interpret the instruction as 66-prefixed (no-op here) + // `movlhps`. If this is reg-mem, it's a `movhpd`. + let modrm = read_modrm(&mut bytes_iter, length)?; + if modrm & 0xc0 == 0xc0 { + instruction.opcode = Opcode::MOVLHPS; + } else { + instruction.opcode = Opcode::MOVHPD; + } + instruction.modrm_rrr = + RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X); + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; + instruction.operand_count = 2; + } OperandCode::ModRM_0x660f38 => { let op = bytes_iter.next().ok_or(DecodeError::ExhaustedInput).map(|b| { *length += 1; b })?; match op { @@ -5517,6 +5596,24 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.imm = read_imm_signed(&mut bytes_iter, 1, length)? as u64; instruction.operands[1] = OperandSpec::ImmU8; }, + OperandCode::ModRM_0x660fc7 => { + let modrm = read_modrm(&mut bytes_iter, length)?; + + let r = (modrm >> 3) & 7; + match r { + 6 => { + instruction.opcode = Opcode::VMCLEAR; + instruction.operands[0] = read_E(&mut bytes_iter, instruction, modrm, 1 /* doesn't matter, something using this width is invalid */, length)?; + if instruction.operands[0] == OperandSpec::RegMMM { + return Err(DecodeError::InvalidOperand); + } + instruction.operand_count = 1; + } + _ => { + return Err(DecodeError::InvalidOpcode); + } + } + }, OperandCode::G_mm_Edq => { instruction.operands[1] = mem_oper; instruction.modrm_rrr.bank = RegisterBank::MM; @@ -5581,6 +5678,17 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } }, + OperandCode::G_xmm_Ed_Ib => { + instruction.operands[1] = mem_oper; + instruction.operands[2] = OperandSpec::ImmU8; + instruction.imm = + read_num(&mut bytes_iter, 1)? as u64; + *length += 1; + instruction.modrm_rrr.bank = RegisterBank::X; + if mem_oper == OperandSpec::RegMMM { + instruction.modrm_mmm.bank = RegisterBank::D; + } + }, OperandCode::G_xmm_Eq => { instruction.operands[1] = mem_oper; instruction.modrm_rrr.bank = RegisterBank::X; diff --git a/test/test.rs b/test/test.rs index 1bac590..e13f587 100644 --- a/test/test.rs +++ b/test/test.rs @@ -114,6 +114,284 @@ fn test_aesni() { } #[test] +fn test_sse2() { + fn test_instr(bytes: &[u8], text: &'static str) { + // sse and sse2 are part of amd64, so x86_64, meaning even the minimal decoder must support + // them. + test_display_under(&InstDecoder::minimal(), bytes, text); + } + + fn test_instr_invalid(bytes: &[u8]) { + test_invalid_under(&InstDecoder::minimal(), bytes); + test_invalid_under(&InstDecoder::default(), bytes); + } + + test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0xc3], "movhlps xmm8, xmm11"); // reg-reg form is movhlps + test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0x03], "movlpd xmm8, [r11]"); // reg-mem is movlpd + test_instr(&[0x66, 0x4f, 0x0f, 0x13, 0x03], "movlpd [r11], xmm8"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x13, 0xc3]); + test_instr(&[0x66, 0x4f, 0x0f, 0x14, 0x03], "unpcklpd xmm8, [r11]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x14, 0xc3], "unpcklpd xmm8, xmm11"); + test_instr(&[0x66, 0x4f, 0x0f, 0x15, 0x03], "unpckhpd xmm8, [r11]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x15, 0xc3], "unpckhpd xmm8, xmm11"); + test_instr(&[0x66, 0x4f, 0x0f, 0x16, 0x03], "movhpd xmm8, [r11]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x16, 0xc3], "movlhps xmm8, xmm11"); + test_instr(&[0x66, 0x4f, 0x0f, 0x17, 0x03], "movhpd [r11], xmm8"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x17, 0xc3]); + + test_display(&[0x66, 0x4f, 0x0f, 0x28, 0xd0], "movapd xmm10, xmm8"); + test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); + + test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm9, mm7"); + test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0x0f], "cvtpi2pd xmm9, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2b, 0x0f], "movntpd [r15], xmm9"); + test_display(&[0x66, 0x4f, 0x0f, 0x2c, 0xcf], "cvttpd2pi mm1, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2c, 0x0f], "cvttpd2pi mm1, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2d, 0xcf], "cvtpd2pi mm1, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2d, 0x0f], "cvtpd2pi mm1, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2e, 0xcf], "ucomisd xmm9, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2e, 0x0f], "ucomisd xmm9, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2f, 0xcf], "comisd xmm9, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2f, 0x0f], "comisd xmm9, [r15]"); + + /* + * .... 660f38 + * .... 660f7f + */ + + test_invalid(&[0x66, 0x4f, 0x0f, 0x50, 0x01]); + test_display(&[0x66, 0x4f, 0x0f, 0x50, 0xc1], "movmskpd r8d, xmm9"); + test_display(&[0x66, 0x4f, 0x0f, 0x51, 0x01], "sqrtpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]"); // note: NOT "rsqrtpd" - no such instruction exists, so fall back to just 0f52 parse. + test_display(&[0x66, 0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]"); // note: NOT "rcppd" - no such instruction exists, so fall back to just 0f53 parse. + test_display(&[0x66, 0x4f, 0x0f, 0x54, 0x01], "andpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x55, 0x01], "andnpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x56, 0x01], "orpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x57, 0x01], "xorpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x58, 0x01], "addpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x59, 0x01], "mulpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5a, 0x01], "cvtpd2ps xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5b, 0x01], "cvtps2dq xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5c, 0x01], "subpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5d, 0x01], "minpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5e, 0x01], "divpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5f, 0x01], "maxpd xmm8, [r9]"); + test_display( + &[0x66, 0x4f, 0x0f, 0x60, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpcklbw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x61, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpcklwd xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x62, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckldq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x63, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "packsswb xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x64, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "pcmpgtb xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x65, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "pcmpgtw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x66, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "pcmpgtd xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x67, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "packuswb xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x68, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhbw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x69, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhwd xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6a, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhdq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6b, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "packssdw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6c, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpcklqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6d, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + // TODO: this needs to be clear that the operand is `dword` + test_display( + &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movdqa xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + + test_display(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax"); + test_display(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x10, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x20, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x30, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x10, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x20, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x30, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x10, 0x8f]); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x18, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f"); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x30, 0x8f]); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x38, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f"); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f"); + + test_instr(&[0x66, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm0, xmm3, 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm8, xmm11, 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmppd xmm8, [r11], 0x8"); + + test_instr(&[0x66, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm0, ebx, 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm8, r11d, 0x8"); + + test_instr(&[0x66, 0x0f, 0xc4, 0x03, 0x08], "pinsrw xmm0, [rbx], 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0x03, 0x08], "pinsrw xmm8, [r11], 0x8"); + +// test_instr(&[0x66, 0x0f, 0xc5, 0xc3, 0x08], "pextrw eax, xmm3, 0x8"); +// test_instr(&[0x66, 0x4f, 0x0f, 0xc5, 0xc3, 0x08], "pextrw r8d, xmm11, 0x8"); +// test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x03, 0x08]); +// test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x40, 0x08]); +// test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x80, 0x08]); + + test_instr(&[0x66, 0x4f, 0x0f, 0xc6, 0x03, 0x08], "shufpd xmm8, [r11], 0x8"); + test_instr(&[0x66, 0x0f, 0xc6, 0x03, 0x08], "shufpd xmm0, [rbx], 0x8"); + test_instr(&[0x66, 0x0f, 0xc6, 0xc3, 0x08], "shufpd xmm0, xmm3, 0x8"); + test_instr(&[0x66, 0x0f, 0xd1, 0xc1], "psrlw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd1, 0x01], "psrlw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd2, 0xc1], "psrld xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd2, 0x01], "psrld xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd3, 0xc1], "psrlq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd3, 0x01], "psrlq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd4, 0xc1], "paddq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd4, 0x01], "paddq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd5, 0xc1], "pmullw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd5, 0x01], "pmullw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd6, 0xc1], "movq xmm1, xmm0"); + test_instr(&[0x66, 0x0f, 0xd6, 0x01], "movq [rcx], xmm0"); + test_instr(&[0x66, 0x0f, 0xd7, 0xc1], "pmovmskb eax, xmm1"); + test_instr(&[0x66, 0x4f, 0x0f, 0xd7, 0xc1], "pmovmskb r8d, xmm9"); + test_invalid(&[0x66, 0x0f, 0xd7, 0x01]); + test_instr(&[0x66, 0x0f, 0xd8, 0xc1], "psubusb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd8, 0x01], "psubusb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd9, 0xc1], "psubusw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd9, 0x01], "psubusw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xda, 0xc1], "pminub xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xda, 0x01], "pminub xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdb, 0xc1], "pand xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdb, 0x01], "pand xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdc, 0xc1], "paddusb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdc, 0x01], "paddusb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdd, 0xc1], "paddusw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdd, 0x01], "paddusw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xde, 0xc1], "pmaxub xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xde, 0x01], "pmaxub xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdf, 0xc1], "pandn xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdf, 0x01], "pandn xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe0, 0xc1], "pavgb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe0, 0x01], "pavgb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe1, 0xc1], "psraw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe1, 0x01], "psraw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe2, 0xc1], "psrad xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe2, 0x01], "psrad xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe3, 0xc1], "pavgw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe3, 0x01], "pavgw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe4, 0xc1], "pmulhuw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe4, 0x01], "pmulhuw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe5, 0xc1], "pmulhw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe5, 0x01], "pmulhw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe6, 0xc1], "cvttpd2dq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe6, 0x01], "cvttpd2dq xmm0, [rcx]"); + test_invalid(&[0x66, 0x0f, 0xe7, 0xc1]); + test_instr(&[0x66, 0x0f, 0xe7, 0x01], "movntdq [rcx], xmm0"); + test_instr(&[0x66, 0x0f, 0xe8, 0xc1], "psubsb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe8, 0x01], "psubsb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe9, 0xc1], "psubsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe9, 0x01], "psubsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xea, 0xc1], "pminsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xea, 0x01], "pminsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xeb, 0xc3], "por xmm0, xmm3"); + test_instr(&[0x66, 0x0f, 0xeb, 0xc4], "por xmm0, xmm4"); + test_instr(&[0x66, 0x0f, 0xeb, 0xd3], "por xmm2, xmm3"); + test_instr(&[0x66, 0x0f, 0xeb, 0x12], "por xmm2, [rdx]"); + test_instr(&[0x66, 0x0f, 0xeb, 0xc1], "por xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xeb, 0x01], "por xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xec, 0xc1], "paddsb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xec, 0x01], "paddsb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xed, 0xc1], "paddsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xed, 0x01], "paddsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xee, 0xc1], "pmaxsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xee, 0x01], "pmaxsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xef, 0xc1], "pxor xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xef, 0x01], "pxor xmm0, [rcx]"); + test_invalid(&[0x66, 0x0f, 0xf0, 0xc1]); + test_invalid(&[0x66, 0x0f, 0xf0, 0x01]); + test_instr(&[0x66, 0x0f, 0xf1, 0xc1], "psllw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf1, 0x01], "psllw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf2, 0xc1], "pslld xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf2, 0x01], "pslld xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf3, 0xc1], "psllq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf3, 0x01], "psllq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf4, 0xc1], "pmuludq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf4, 0x01], "pmuludq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf5, 0xc1], "pmaddwd xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf5, 0x01], "pmaddwd xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf6, 0xc1], "psadbw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf6, 0x01], "psadbw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf7, 0xc1], "maskmovdqu xmm0, xmm1"); + test_invalid(&[0x66, 0x0f, 0xf7, 0x01]); + test_instr(&[0x66, 0x0f, 0xf8, 0xc1], "psubb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf8, 0x01], "psubb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf9, 0xc1], "psubw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf9, 0x01], "psubw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfa, 0xc1], "psubd xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfa, 0x01], "psubd xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfb, 0xc1], "psubq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfb, 0x01], "psubq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfc, 0xc1], "paddb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfc, 0x01], "paddb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfd, 0xc1], "paddw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfd, 0x01], "paddw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfe, 0xc1], "paddd xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfe, 0x01], "paddd xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xff, 0xc1], "paddq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xff, 0x01], "paddq xmm0, [rcx]"); + + test_instr(&[0x66, 0x0f, 0x74, 0xc1], "pcmpeqb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0x74, 0x12], "pcmpeqb xmm2, [rdx]"); + test_instr(&[0x66, 0x0f, 0xf8, 0xc8], "psubb xmm1, xmm0"); + test_instr(&[0x66, 0x0f, 0xf8, 0xd0], "psubb xmm2, xmm0"); + test_instr(&[0x66, 0x0f, 0xf8, 0x12], "psubb xmm2, [rdx]"); +} + +#[test] fn test_sse3() { fn test_instr(bytes: &[u8], text: &'static str) { test_display_under(&InstDecoder::minimal().with_sse3(), bytes, text); @@ -298,94 +576,6 @@ fn test_E_decode() { #[test] fn test_sse() { - test_display( - &[0x66, 0x4f, 0x0f, 0x60, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpcklbw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x61, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpcklwd xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x62, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckldq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x63, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "packsswb xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x64, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "pcmpgtb xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x65, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "pcmpgtw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x66, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "pcmpgtd xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x67, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "packuswb xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x68, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhbw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x69, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhwd xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6a, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhdq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6b, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "packssdw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6c, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpcklqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6d, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - // this needs to be clear that the operand is `dword` - test_display( - &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "movq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "movdqa xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - - test_display(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax"); - test_display(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x10, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x20, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x30, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x10, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x20, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x30, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x10, 0x8f]); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x18, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f"); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x30, 0x8f]); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x38, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f"); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f"); test_display(&[0x4f, 0x0f, 0x28, 0x00], "movaps xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x29, 0x00], "movaps [r8], xmm8"); test_display(&[0x4f, 0x0f, 0x2b, 0x00], "movntps [r8], xmm8"); @@ -408,7 +598,6 @@ fn test_sse() { test_display(&[0x4f, 0x0f, 0x59, 0x00], "mulps xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x5a, 0x00], "cvtps2pd xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8]"); test_display(&[0x67, 0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8d]"); test_display(&[0x4f, 0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [rax]"); test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); -- cgit v1.1