From d1787896dacc4821ebf399508472b3985ab2a232 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 22 Feb 2020 15:12:37 -0800 Subject: more sse/sse2 support largely f20f/f30f opcode map items --- src/long_mode/mod.rs | 84 ++++++++++++++++--- test/test.rs | 229 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 229 insertions(+), 84 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 522eff5..79e5b39 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -2831,6 +2831,7 @@ pub enum OperandCode { AL_DX = 0x105, DX_AX = 0x106, DX_AL = 0x107, + MOVQ_f30f = 0x108, G_xmm_Ed_Ib = 0x1ef, // mirror G_xmm_Edq, but also read an immediate Zv_R0 = 0x40, Zv_R1 = 0x41, @@ -2884,6 +2885,7 @@ pub enum OperandCode { Gv_Ev_Iv = 0xc5, // gap: 0xc6 Gd_U_xmm = 0xc7, + Gv_E_xmm = 0x1c7, M_G_xmm = 0xc9, ModRM_0x0f12 = 0xcb, ModRM_0x0f16 = 0xce, @@ -2917,8 +2919,11 @@ pub enum OperandCode { G_mm_E_xmm = 0xe5, G_E_mm = 0xe7, Edq_G_mm = 0xe9, + Edq_G_xmm = 0x1e9, E_G_mm = 0xeb, G_xmm_E_mm = 0xed, + G_xmm_U_mm = 0x1ed, + U_mm_G_xmm = 0x2ed, G_xmm_Edq = 0xef, G_U_mm = 0xf1, Ev_Gv_Ib = 0xf3, @@ -3086,8 +3091,8 @@ const OPCODE_660F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::HADDPD), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::HSUBPD), OperandCode::G_E_xmm), - OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::MOVD), OperandCode::Edq_G_xmm), + OpcodeRecord(Interpretation::Instruction(Opcode::MOVDQA), OperandCode::E_G_xmm), // 0x80 OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3461,7 +3466,7 @@ const OPCODE_F20F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::MOVDQ2Q), OperandCode::Unsupported), + OpcodeRecord(Interpretation::Instruction(Opcode::MOVDQ2Q), OperandCode::U_mm_G_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3561,8 +3566,8 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::CVTSI2SS), OperandCode::G_xmm_Edq), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::CVTTSS2SI), OperandCode::G_E_xmm), - OpcodeRecord(Interpretation::Instruction(Opcode::CVTSS2SI), OperandCode::G_E_xmm), + OpcodeRecord(Interpretation::Instruction(Opcode::CVTTSS2SI), OperandCode::Gv_E_xmm), + OpcodeRecord(Interpretation::Instruction(Opcode::CVTSS2SI), OperandCode::Gv_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), // 0x30 @@ -3603,7 +3608,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::SQRTSS), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::RSQRTSS), OperandCode::G_E_xmm), - OpcodeRecord(Interpretation::Instruction(Opcode::RCPSS), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::RCPSS), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3611,7 +3616,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::ADDSS), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::MULSS), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::CVTSS2SD), OperandCode::G_E_xmm), - OpcodeRecord(Interpretation::Instruction(Opcode::CVTTPS2DQ), OperandCode::Nothing), + OpcodeRecord(Interpretation::Instruction(Opcode::CVTTPS2DQ), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::SUBSS), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::MINSS), OperandCode::G_E_xmm), OpcodeRecord(Interpretation::Instruction(Opcode::DIVSS), OperandCode::G_E_xmm), @@ -3648,7 +3653,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::MOVQ), OperandCode::G_E_xmm), + OpcodeRecord(Interpretation::Instruction(Opcode::MOVQ), OperandCode::MOVQ_f30f), OpcodeRecord(Interpretation::Instruction(Opcode::MOVDQU), OperandCode::E_G_xmm), // 0x80 OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -3742,7 +3747,7 @@ const OPCODE_F30F_MAP: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), - OpcodeRecord(Interpretation::Instruction(Opcode::MOVQ2DQ), OperandCode::Unsupported), + OpcodeRecord(Interpretation::Instruction(Opcode::MOVQ2DQ), OperandCode::G_xmm_U_mm), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::Nothing), @@ -5344,6 +5349,30 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, } fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter: T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec, length: &mut u8) -> Result<(), DecodeError> { match operand_code { + OperandCode::MOVQ_f30f => { + // if rex.w is set, the f3 prefix no longer applies and this becomes an 0f7e movq, + // rather than f30f7e movq. + // + // the difference here is that 0f7e movq has reversed operand order from f30f7e movq, + // in addition to the selected register banks being different. + // + // anyway, there are two operands, and the primary concern here is "what are they?". + instruction.operand_count = 2; + let modrm = read_modrm(&mut bytes_iter, length)?; + instruction.modrm_rrr = + RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.rex().r(), RegisterBank::X); + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = read_E_xmm(&mut bytes_iter, instruction, modrm, length)?; + if instruction.prefixes.rex().w() { + let op = instruction.operands[0]; + instruction.operands[0] = instruction.operands[1]; + instruction.operands[1] = op; + instruction.modrm_mmm.bank = RegisterBank::Q; + instruction.modrm_rrr.bank = RegisterBank::MM; + instruction.modrm_rrr.num &= 0b111; + instruction.opcode = Opcode::MOVD; + } + } OperandCode::ModRM_0x0f71 => { instruction.operand_count = 2; @@ -5648,6 +5677,20 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } } + OperandCode::Edq_G_xmm => { + instruction.operands[1] = instruction.operands[0]; + instruction.operands[0] = mem_oper; + instruction.modrm_rrr.bank = RegisterBank::X; + if mem_oper == OperandSpec::RegMMM { + if instruction.prefixes.rex().w() { + instruction.modrm_mmm.bank = RegisterBank::Q; + // movd/q is so weird + instruction.opcode = Opcode::MOVQ; + } else { + instruction.modrm_mmm.bank = RegisterBank::D; + } + } + } OperandCode::E_G_mm => { instruction.operands[1] = instruction.operands[0]; instruction.operands[0] = mem_oper; @@ -5704,14 +5747,29 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.modrm_mmm.bank = RegisterBank::X; } }, - OperandCode::G_xmm_E_mm => { + op @ OperandCode::G_xmm_U_mm | + op @ OperandCode::G_xmm_E_mm => { instruction.operands[1] = mem_oper; instruction.modrm_rrr.bank = RegisterBank::X; if mem_oper == OperandSpec::RegMMM { instruction.modrm_mmm.bank = RegisterBank::MM; instruction.modrm_mmm.num &= 0b111; + } else { + if op == OperandCode::G_xmm_U_mm { + return Err(DecodeError::InvalidOperand); + } } }, + OperandCode::U_mm_G_xmm => { + instruction.operands[1] = mem_oper; + instruction.modrm_mmm.bank = RegisterBank::X; + if mem_oper == OperandSpec::RegMMM { + instruction.modrm_rrr.bank = RegisterBank::MM; + instruction.modrm_rrr.num &= 0b111; + } else { + return Err(DecodeError::InvalidOperand); + } + } // sure hope these aren't backwards huh OperandCode::AL_Xb => { instruction.operands[0] = OperandSpec::AL; @@ -5798,6 +5856,12 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.modrm_rrr.bank = RegisterBank::D; instruction.modrm_mmm.bank = RegisterBank::X; } + OperandCode::Gv_E_xmm => { + instruction.operands[1] = mem_oper; + if instruction.operands[1] == OperandSpec::RegMMM { + instruction.modrm_mmm.bank = RegisterBank::X; + } + } OperandCode::M_G_xmm => { instruction.operands[1] = instruction.operands[0]; instruction.operands[0] = mem_oper; diff --git a/test/test.rs b/test/test.rs index 4722267..fe6a898 100644 --- a/test/test.rs +++ b/test/test.rs @@ -56,6 +56,7 @@ fn test_mmx() { test_display(&[0x4f, 0x0f, 0x7e, 0xcf], "movd r15, mm1"); test_display(&[0x41, 0x0f, 0x7e, 0xcf], "movd r15d, mm1"); test_display(&[0x4f, 0x0f, 0x7f, 0xcf], "movq mm7, mm1"); + test_display(&[0x4f, 0x0f, 0x7f, 0x0f], "movq [r15], mm1"); test_display(&[0x0f, 0xc4, 0xc0, 0x14], "pinsrw mm0, eax, 0x14"); test_display(&[0x4f, 0x0f, 0xc4, 0xc0, 0x14], "pinsrw mm0, r8d, 0x14"); test_display(&[0x4f, 0x0f, 0xc4, 0x00, 0x14], "pinsrw mm0, [r8], 0x14"); @@ -126,6 +127,9 @@ fn test_sse2() { test_invalid_under(&InstDecoder::default(), bytes); } + test_instr(&[0xf2, 0x0f, 0x10, 0x0c, 0xc7], "movsd xmm1, [rdi + rax * 8]"); + test_instr(&[0xf2, 0x0f, 0x11, 0x0c, 0xc7], "movsd [rdi + rax * 8], xmm1"); + test_instr(&[0x66, 0x0f, 0x11, 0x0c, 0xc7], "movupd [rdi + rax * 8], xmm1"); test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0xc3], "movhlps xmm8, xmm11"); // reg-reg form is movhlps test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0x03], "movlpd xmm8, [r11]"); // reg-mem is movlpd test_instr(&[0x66, 0x4f, 0x0f, 0x13, 0x03], "movlpd [r11], xmm8"); @@ -139,20 +143,26 @@ fn test_sse2() { test_instr(&[0x66, 0x4f, 0x0f, 0x17, 0x03], "movhpd [r11], xmm8"); test_invalid(&[0x66, 0x4f, 0x0f, 0x17, 0xc3]); - test_display(&[0x66, 0x4f, 0x0f, 0x28, 0xd0], "movapd xmm10, xmm8"); - test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); - - test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm9, mm7"); - test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0x0f], "cvtpi2pd xmm9, [r15]"); - test_display(&[0x66, 0x4f, 0x0f, 0x2b, 0x0f], "movntpd [r15], xmm9"); - test_display(&[0x66, 0x4f, 0x0f, 0x2c, 0xcf], "cvttpd2pi mm1, xmm15"); - test_display(&[0x66, 0x4f, 0x0f, 0x2c, 0x0f], "cvttpd2pi mm1, [r15]"); - test_display(&[0x66, 0x4f, 0x0f, 0x2d, 0xcf], "cvtpd2pi mm1, xmm15"); - test_display(&[0x66, 0x4f, 0x0f, 0x2d, 0x0f], "cvtpd2pi mm1, [r15]"); - test_display(&[0x66, 0x4f, 0x0f, 0x2e, 0xcf], "ucomisd xmm9, xmm15"); - test_display(&[0x66, 0x4f, 0x0f, 0x2e, 0x0f], "ucomisd xmm9, [r15]"); - test_display(&[0x66, 0x4f, 0x0f, 0x2f, 0xcf], "comisd xmm9, xmm15"); - test_display(&[0x66, 0x4f, 0x0f, 0x2f, 0x0f], "comisd xmm9, [r15]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x28, 0xd0], "movapd xmm10, xmm8"); + test_instr(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); + + test_instr(&[0x66, 0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm9, mm7"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2a, 0x0f], "cvtpi2pd xmm9, [r15]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x2a, 0xcf], "cvtsi2sd xmm9, r15"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x2a, 0x0f], "cvtsi2sd xmm9, [r15]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2b, 0x0f], "movntpd [r15], xmm9"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2c, 0xcf], "cvttpd2pi mm1, xmm15"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2c, 0x0f], "cvttpd2pi mm1, [r15]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x2c, 0xcf], "cvttsd2si xmm9, xmm15"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x2c, 0x0f], "cvttsd2si xmm9, [r15]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2d, 0xcf], "cvtpd2pi mm1, xmm15"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2d, 0x0f], "cvtpd2pi mm1, [r15]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x2d, 0xcf], "cvtsd2si xmm9, xmm15"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x2d, 0x0f], "cvtsd2si xmm9, [r15]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2e, 0xcf], "ucomisd xmm9, xmm15"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2e, 0x0f], "ucomisd xmm9, [r15]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2f, 0xcf], "comisd xmm9, xmm15"); + test_instr(&[0x66, 0x4f, 0x0f, 0x2f, 0x0f], "comisd xmm9, [r15]"); /* * .... 660f38 @@ -160,114 +170,135 @@ fn test_sse2() { */ test_invalid(&[0x66, 0x4f, 0x0f, 0x50, 0x01]); - test_display(&[0x66, 0x4f, 0x0f, 0x50, 0xc1], "movmskpd r8d, xmm9"); - test_display(&[0x66, 0x4f, 0x0f, 0x51, 0x01], "sqrtpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]"); // note: NOT "rsqrtpd" - no such instruction exists, so fall back to just 0f52 parse. - test_display(&[0x66, 0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]"); // note: NOT "rcppd" - no such instruction exists, so fall back to just 0f53 parse. - test_display(&[0x66, 0x4f, 0x0f, 0x54, 0x01], "andpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x55, 0x01], "andnpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x56, 0x01], "orpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x57, 0x01], "xorpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x58, 0x01], "addpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x59, 0x01], "mulpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5a, 0x01], "cvtpd2ps xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5b, 0x01], "cvtps2dq xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5c, 0x01], "subpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5d, 0x01], "minpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5e, 0x01], "divpd xmm8, [r9]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5f, 0x01], "maxpd xmm8, [r9]"); - test_display( + test_instr(&[0x66, 0x4f, 0x0f, 0x50, 0xc1], "movmskpd r8d, xmm9"); + test_instr(&[0x66, 0x4f, 0x0f, 0x51, 0x01], "sqrtpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x51, 0x01], "sqrtsd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]"); // note: NOT "rsqrtpd" - no such instruction exists, so fall back to just 0f52 parse. + test_instr(&[0x66, 0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]"); // note: NOT "rcppd" - no such instruction exists, so fall back to just 0f53 parse. + test_instr(&[0x66, 0x4f, 0x0f, 0x54, 0x01], "andpd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x55, 0x01], "andnpd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x56, 0x01], "orpd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x57, 0x01], "xorpd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x58, 0x01], "addpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x58, 0x01], "addsd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x59, 0x01], "mulpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x59, 0x01], "mulsd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x5a, 0x01], "cvtpd2ps xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x5a, 0x01], "cvtsd2ss xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x5b, 0x01], "cvtps2dq xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x5c, 0x01], "subpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x5c, 0x01], "subsd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x5d, 0x01], "minpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x5d, 0x01], "minsd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x5e, 0x01], "divpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x5e, 0x01], "divsd xmm8, [r9]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x5f, 0x01], "maxpd xmm8, [r9]"); + test_instr(&[0xf2, 0x4f, 0x0f, 0x5f, 0x01], "maxsd xmm8, [r9]"); + test_instr( &[0x66, 0x4f, 0x0f, 0x60, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpcklbw xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x61, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpcklwd xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x62, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpckldq xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x63, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "packsswb xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x64, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "pcmpgtb xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x65, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "pcmpgtw xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x66, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "pcmpgtd xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x67, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "packuswb xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x68, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpckhbw xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x69, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpckhwd xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x6a, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpckhdq xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x6b, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "packssdw xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x6c, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpcklqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x6d, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "punpckhqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" ); // TODO: this needs to be clear that the operand is `dword` - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "movq xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display( + test_instr( &[0x66, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], "movdqa xmm11, [r12 + r11 * 4 - 0x334455cc]" ); - test_display(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax"); - test_display(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e"); + test_instr(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax"); + test_instr(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e"); + test_instr(&[0xf2, 0x0f, 0x70, 0xc0, 0x4e], "pshuflw xmm0, xmm0, 0x4e"); + test_instr(&[0xf3, 0x0f, 0x70, 0xc0, 0x4e], "pshufhw xmm0, xmm0, 0x4e"); test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x10, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x20, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x30, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x10, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x20, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x30, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x10, 0x8f]); test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x18, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f"); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f"); test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x30, 0x8f]); test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x38, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f"); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f"); + test_instr(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f"); + test_instr(&[0x66, 0x0f, 0x7e, 0xc1], "movd ecx, xmm0"); + test_instr(&[0x66, 0x48, 0x0f, 0x7e, 0xc1], "movq rcx, xmm0"); + test_instr(&[0x66, 0x48, 0x0f, 0x7e, 0x01], "movd [rcx], xmm0"); + test_instr(&[0x66, 0x4f, 0x0f, 0x7e, 0xc1], "movq r9, xmm8"); + test_instr( + &[0x66, 0x4f, 0x0f, 0x7f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movdqa [r12 + r11 * 4 - 0x334455cc], xmm11" + ); test_instr(&[0x66, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm0, xmm3, 0x8"); test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm8, xmm11, 0x8"); test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmppd xmm8, [r11], 0x8"); + test_instr(&[0xf2, 0x0f, 0xc2, 0xc3, 0x08], "cmpsd xmm0, xmm3, 0x8"); + test_instr(&[0xf2, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmpsd xmm8, xmm11, 0x8"); + test_instr(&[0xf2, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmpsd xmm8, [r11], 0x8"); test_instr(&[0x66, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm0, ebx, 0x8"); test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm8, r11d, 0x8"); @@ -296,6 +327,9 @@ fn test_sse2() { test_instr(&[0x66, 0x0f, 0xd5, 0x01], "pmullw xmm0, [rcx]"); test_instr(&[0x66, 0x0f, 0xd6, 0xc1], "movq xmm1, xmm0"); test_instr(&[0x66, 0x0f, 0xd6, 0x01], "movq [rcx], xmm0"); + test_invalid(&[0xf3, 0x4f, 0x0f, 0xd6, 0x03]); + test_instr(&[0xf3, 0x4f, 0x0f, 0xd6, 0xc3], "movq2dq xmm8, mm3"); + test_instr(&[0xf2, 0x4f, 0x0f, 0xd6, 0xc3], "movdq2q mm0, xmm11"); test_instr(&[0x66, 0x0f, 0xd7, 0xc1], "pmovmskb eax, xmm1"); test_instr(&[0x66, 0x4f, 0x0f, 0xd7, 0xc1], "pmovmskb r8d, xmm9"); test_invalid(&[0x66, 0x0f, 0xd7, 0x01]); @@ -576,35 +610,62 @@ fn test_E_decode() { #[test] fn test_sse() { + test_display(&[0xf3, 0x0f, 0x10, 0x0c, 0xc7], "movss xmm1, [rdi + rax * 8]"); + test_display(&[0xf3, 0x0f, 0x11, 0x0c, 0xc7], "movss [rdi + rax * 8], xmm1"); test_display(&[0x4f, 0x0f, 0x28, 0x00], "movaps xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x29, 0x00], "movaps [r8], xmm8"); + test_display(&[0xf3, 0x4f, 0x0f, 0x2a, 0xc1], "cvtsi2ss xmm8, r9"); + test_display(&[0xf3, 0x4f, 0x0f, 0x2a, 0x01], "cvtsi2ss xmm8, [r9]"); test_display(&[0x4f, 0x0f, 0x2b, 0x00], "movntps [r8], xmm8"); + test_display(&[0xf3, 0x4f, 0x0f, 0x2c, 0xc1], "cvttss2si r8, xmm9"); + test_display(&[0xf3, 0x4f, 0x0f, 0x2c, 0x01], "cvttss2si r8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x2d, 0xc1], "cvtss2si r8, xmm9"); + test_display(&[0xf3, 0x4f, 0x0f, 0x2d, 0x01], "cvtss2si r8, [r9]"); test_display(&[0x4f, 0x0f, 0x2e, 0x00], "ucomiss xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x2f, 0x00], "comiss xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x50, 0xc0], "movmskps r8d, xmm8"); test_display(&[0x0f, 0x28, 0xd0], "movaps xmm2, xmm0"); test_display(&[0x66, 0x0f, 0x28, 0xd0], "movapd xmm2, xmm0"); test_display(&[0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [rax]"); - test_invalid(&[0x4f, 0x0f, 0x50, 0x00]); - test_display(&[0x4f, 0x0f, 0x50, 0xc0], "movmskps r8d, xmm8"); - test_display(&[0x4f, 0x0f, 0x51, 0x00], "sqrtps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x52, 0x00], "rsqrtps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x53, 0x00], "rcpps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x54, 0x00], "andps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x55, 0x00], "andnps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x56, 0x00], "orps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x57, 0x00], "xorps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x58, 0x00], "addps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x59, 0x00], "mulps xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x5a, 0x00], "cvtps2pd xmm8, [r8]"); - test_display(&[0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8]"); - test_display(&[0x67, 0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8d]"); test_display(&[0x4f, 0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [rax]"); test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); test_display(&[0x67, 0x4f, 0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [eax]"); test_display(&[0x67, 0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8d]"); test_display(&[0x66, 0x0f, 0x29, 0x00], "movapd [rax], xmm0"); + test_invalid(&[0x4f, 0x0f, 0x50, 0x00]); + test_display(&[0x4f, 0x0f, 0x50, 0xc1], "movmskps r8d, xmm9"); + test_display(&[0x4f, 0x0f, 0x51, 0x01], "sqrtps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x51, 0x01], "sqrtss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x52, 0x01], "rsqrtss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x53, 0x01], "rcpss xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x53, 0xc1], "rcpss xmm8, xmm9"); + test_display(&[0x4f, 0x0f, 0x54, 0x01], "andps xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x55, 0x01], "andnps xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x56, 0x01], "orps xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x57, 0x01], "xorps xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x58, 0x01], "addps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x58, 0x01], "addss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x59, 0x01], "mulps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x59, 0x01], "mulss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x5a, 0x01], "cvtps2pd xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x5a, 0x01], "cvtss2sd xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x5b, 0x01], "cvtdq2ps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x5b, 0x01], "cvttps2dq xmm8, [r9]"); + test_display(&[0x67, 0x4f, 0x0f, 0x5b, 0x01], "cvtdq2ps xmm8, [r9d]"); + test_display(&[0x4f, 0x0f, 0x5c, 0x01], "subps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x5c, 0x01], "subss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x5d, 0x01], "minps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x5d, 0x01], "minss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x5e, 0x01], "divps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x5e, 0x01], "divss xmm8, [r9]"); + test_display(&[0x4f, 0x0f, 0x5f, 0x01], "maxps xmm8, [r9]"); + test_display(&[0xf3, 0x4f, 0x0f, 0x5f, 0x01], "maxss xmm8, [r9]"); + + test_display(&[0x0f, 0x70, 0x00, 0x7f], "pshufw mm0, [rax], 0x7f"); + test_display(&[0x4f, 0x0f, 0x70, 0x00, 0x7f], "pshufw mm0, [r8], 0x7f"); + test_display(&[0x66, 0x0f, 0xef, 0xc0], "pxor xmm0, xmm0"); test_display(&[0x66, 0x4f, 0x0f, 0xef, 0xc0], "pxor xmm8, xmm8"); test_display(&[0xf2, 0x0f, 0x10, 0x0c, 0xc6], "movsd xmm1, [rsi + rax * 8]"); @@ -612,7 +673,27 @@ fn test_sse() { test_display(&[0xf2, 0x0f, 0x59, 0xc8], "mulsd xmm1, xmm0"); test_display(&[0xf3, 0x0f, 0x59, 0xc8], "mulss xmm1, xmm0"); test_display(&[0xf2, 0x4f, 0x0f, 0x59, 0xc8], "mulsd xmm9, xmm8"); - test_display(&[0xf2, 0x0f, 0x11, 0x0c, 0xc7], "movsd [rdi + rax * 8], xmm1"); + + test_display( + &[0xf3, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movdqu xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display(&[0xf3, 0x0f, 0x70, 0xc0, 0x4e], "pshufhw xmm0, xmm0, 0x4e"); + test_display(&[0xf3, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm1"); + test_display(&[0xf3, 0x4f, 0x0f, 0x7e, 0xc1], "movd r9, mm0"); // use of rex.w demotes to movd r/mm + test_display(&[0xf3, 0x40, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm1"); + test_display(&[0xf3, 0x41, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm9"); + test_display(&[0xf3, 0x42, 0x0f, 0x7e, 0xc1], "movq xmm0, xmm1"); + test_display(&[0xf3, 0x44, 0x0f, 0x7e, 0xc1], "movq xmm8, xmm1"); + test_display(&[0xf3, 0x48, 0x0f, 0x7e, 0xc1], "movd rcx, mm0"); + test_display( + &[0xf3, 0x4f, 0x0f, 0x7f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movdqu [r12 + r11 * 4 - 0x334455cc], xmm11" + ); + + test_display(&[0xf3, 0x0f, 0xc2, 0xc3, 0x08], "cmpss xmm0, xmm3, 0x8"); + test_display(&[0xf3, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmpss xmm8, xmm11, 0x8"); + test_display(&[0xf3, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmpss xmm8, [r11], 0x8"); } // SETLE, SETNG, ... -- cgit v1.1