From 33c520341b373ac18e7924eb9227615ac65c2618 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 22 Feb 2020 00:51:30 -0800 Subject: support 660f sse2 instructions this isn't quite all of sse2, but gets close. the f20f opcode map still needs some touching up. also fix `G_E_xmm_Ib` not respecting rex.r for the rrr operand --- test/test.rs | 367 ++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 278 insertions(+), 89 deletions(-) (limited to 'test') diff --git a/test/test.rs b/test/test.rs index 1bac590..e13f587 100644 --- a/test/test.rs +++ b/test/test.rs @@ -114,6 +114,284 @@ fn test_aesni() { } #[test] +fn test_sse2() { + fn test_instr(bytes: &[u8], text: &'static str) { + // sse and sse2 are part of amd64, so x86_64, meaning even the minimal decoder must support + // them. + test_display_under(&InstDecoder::minimal(), bytes, text); + } + + fn test_instr_invalid(bytes: &[u8]) { + test_invalid_under(&InstDecoder::minimal(), bytes); + test_invalid_under(&InstDecoder::default(), bytes); + } + + test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0xc3], "movhlps xmm8, xmm11"); // reg-reg form is movhlps + test_instr(&[0x66, 0x4f, 0x0f, 0x12, 0x03], "movlpd xmm8, [r11]"); // reg-mem is movlpd + test_instr(&[0x66, 0x4f, 0x0f, 0x13, 0x03], "movlpd [r11], xmm8"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x13, 0xc3]); + test_instr(&[0x66, 0x4f, 0x0f, 0x14, 0x03], "unpcklpd xmm8, [r11]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x14, 0xc3], "unpcklpd xmm8, xmm11"); + test_instr(&[0x66, 0x4f, 0x0f, 0x15, 0x03], "unpckhpd xmm8, [r11]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x15, 0xc3], "unpckhpd xmm8, xmm11"); + test_instr(&[0x66, 0x4f, 0x0f, 0x16, 0x03], "movhpd xmm8, [r11]"); + test_instr(&[0x66, 0x4f, 0x0f, 0x16, 0xc3], "movlhps xmm8, xmm11"); + test_instr(&[0x66, 0x4f, 0x0f, 0x17, 0x03], "movhpd [r11], xmm8"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x17, 0xc3]); + + test_display(&[0x66, 0x4f, 0x0f, 0x28, 0xd0], "movapd xmm10, xmm8"); + test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); + + test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0xcf], "cvtpi2pd xmm9, mm7"); + test_display(&[0x66, 0x4f, 0x0f, 0x2a, 0x0f], "cvtpi2pd xmm9, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2b, 0x0f], "movntpd [r15], xmm9"); + test_display(&[0x66, 0x4f, 0x0f, 0x2c, 0xcf], "cvttpd2pi mm1, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2c, 0x0f], "cvttpd2pi mm1, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2d, 0xcf], "cvtpd2pi mm1, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2d, 0x0f], "cvtpd2pi mm1, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2e, 0xcf], "ucomisd xmm9, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2e, 0x0f], "ucomisd xmm9, [r15]"); + test_display(&[0x66, 0x4f, 0x0f, 0x2f, 0xcf], "comisd xmm9, xmm15"); + test_display(&[0x66, 0x4f, 0x0f, 0x2f, 0x0f], "comisd xmm9, [r15]"); + + /* + * .... 660f38 + * .... 660f7f + */ + + test_invalid(&[0x66, 0x4f, 0x0f, 0x50, 0x01]); + test_display(&[0x66, 0x4f, 0x0f, 0x50, 0xc1], "movmskpd r8d, xmm9"); + test_display(&[0x66, 0x4f, 0x0f, 0x51, 0x01], "sqrtpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x52, 0x01], "rsqrtps xmm8, [r9]"); // note: NOT "rsqrtpd" - no such instruction exists, so fall back to just 0f52 parse. + test_display(&[0x66, 0x4f, 0x0f, 0x53, 0x01], "rcpps xmm8, [r9]"); // note: NOT "rcppd" - no such instruction exists, so fall back to just 0f53 parse. + test_display(&[0x66, 0x4f, 0x0f, 0x54, 0x01], "andpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x55, 0x01], "andnpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x56, 0x01], "orpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x57, 0x01], "xorpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x58, 0x01], "addpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x59, 0x01], "mulpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5a, 0x01], "cvtpd2ps xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5b, 0x01], "cvtps2dq xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5c, 0x01], "subpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5d, 0x01], "minpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5e, 0x01], "divpd xmm8, [r9]"); + test_display(&[0x66, 0x4f, 0x0f, 0x5f, 0x01], "maxpd xmm8, [r9]"); + test_display( + &[0x66, 0x4f, 0x0f, 0x60, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpcklbw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x61, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpcklwd xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x62, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckldq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x63, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "packsswb xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x64, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "pcmpgtb xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x65, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "pcmpgtw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x66, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "pcmpgtd xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x67, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "packuswb xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x68, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhbw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x69, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhwd xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6a, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhdq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6b, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "packssdw xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6c, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpcklqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6d, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "punpckhqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + // TODO: this needs to be clear that the operand is `dword` + test_display( + &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movq xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + test_display( + &[0x66, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], + "movdqa xmm11, [r12 + r11 * 4 - 0x334455cc]" + ); + + test_display(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax"); + test_display(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x10, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x20, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x30, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x10, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x20, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x30, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x10, 0x8f]); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x18, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f"); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f"); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x30, 0x8f]); + test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x38, 0x8f]); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f"); + test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f"); + + test_instr(&[0x66, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm0, xmm3, 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0xc3, 0x08], "cmppd xmm8, xmm11, 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc2, 0x03, 0x08], "cmppd xmm8, [r11], 0x8"); + + test_instr(&[0x66, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm0, ebx, 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0xc3, 0x08], "pinsrw xmm8, r11d, 0x8"); + + test_instr(&[0x66, 0x0f, 0xc4, 0x03, 0x08], "pinsrw xmm0, [rbx], 0x8"); + test_instr(&[0x66, 0x4f, 0x0f, 0xc4, 0x03, 0x08], "pinsrw xmm8, [r11], 0x8"); + +// test_instr(&[0x66, 0x0f, 0xc5, 0xc3, 0x08], "pextrw eax, xmm3, 0x8"); +// test_instr(&[0x66, 0x4f, 0x0f, 0xc5, 0xc3, 0x08], "pextrw r8d, xmm11, 0x8"); +// test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x03, 0x08]); +// test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x40, 0x08]); +// test_instr_invalid(&[0x66, 0x0f, 0xc5, 0x80, 0x08]); + + test_instr(&[0x66, 0x4f, 0x0f, 0xc6, 0x03, 0x08], "shufpd xmm8, [r11], 0x8"); + test_instr(&[0x66, 0x0f, 0xc6, 0x03, 0x08], "shufpd xmm0, [rbx], 0x8"); + test_instr(&[0x66, 0x0f, 0xc6, 0xc3, 0x08], "shufpd xmm0, xmm3, 0x8"); + test_instr(&[0x66, 0x0f, 0xd1, 0xc1], "psrlw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd1, 0x01], "psrlw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd2, 0xc1], "psrld xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd2, 0x01], "psrld xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd3, 0xc1], "psrlq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd3, 0x01], "psrlq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd4, 0xc1], "paddq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd4, 0x01], "paddq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd5, 0xc1], "pmullw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd5, 0x01], "pmullw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd6, 0xc1], "movq xmm1, xmm0"); + test_instr(&[0x66, 0x0f, 0xd6, 0x01], "movq [rcx], xmm0"); + test_instr(&[0x66, 0x0f, 0xd7, 0xc1], "pmovmskb eax, xmm1"); + test_instr(&[0x66, 0x4f, 0x0f, 0xd7, 0xc1], "pmovmskb r8d, xmm9"); + test_invalid(&[0x66, 0x0f, 0xd7, 0x01]); + test_instr(&[0x66, 0x0f, 0xd8, 0xc1], "psubusb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd8, 0x01], "psubusb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xd9, 0xc1], "psubusw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xd9, 0x01], "psubusw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xda, 0xc1], "pminub xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xda, 0x01], "pminub xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdb, 0xc1], "pand xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdb, 0x01], "pand xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdc, 0xc1], "paddusb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdc, 0x01], "paddusb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdd, 0xc1], "paddusw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdd, 0x01], "paddusw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xde, 0xc1], "pmaxub xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xde, 0x01], "pmaxub xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xdf, 0xc1], "pandn xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xdf, 0x01], "pandn xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe0, 0xc1], "pavgb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe0, 0x01], "pavgb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe1, 0xc1], "psraw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe1, 0x01], "psraw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe2, 0xc1], "psrad xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe2, 0x01], "psrad xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe3, 0xc1], "pavgw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe3, 0x01], "pavgw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe4, 0xc1], "pmulhuw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe4, 0x01], "pmulhuw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe5, 0xc1], "pmulhw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe5, 0x01], "pmulhw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe6, 0xc1], "cvttpd2dq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe6, 0x01], "cvttpd2dq xmm0, [rcx]"); + test_invalid(&[0x66, 0x0f, 0xe7, 0xc1]); + test_instr(&[0x66, 0x0f, 0xe7, 0x01], "movntdq [rcx], xmm0"); + test_instr(&[0x66, 0x0f, 0xe8, 0xc1], "psubsb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe8, 0x01], "psubsb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xe9, 0xc1], "psubsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xe9, 0x01], "psubsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xea, 0xc1], "pminsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xea, 0x01], "pminsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xeb, 0xc3], "por xmm0, xmm3"); + test_instr(&[0x66, 0x0f, 0xeb, 0xc4], "por xmm0, xmm4"); + test_instr(&[0x66, 0x0f, 0xeb, 0xd3], "por xmm2, xmm3"); + test_instr(&[0x66, 0x0f, 0xeb, 0x12], "por xmm2, [rdx]"); + test_instr(&[0x66, 0x0f, 0xeb, 0xc1], "por xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xeb, 0x01], "por xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xec, 0xc1], "paddsb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xec, 0x01], "paddsb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xed, 0xc1], "paddsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xed, 0x01], "paddsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xee, 0xc1], "pmaxsw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xee, 0x01], "pmaxsw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xef, 0xc1], "pxor xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xef, 0x01], "pxor xmm0, [rcx]"); + test_invalid(&[0x66, 0x0f, 0xf0, 0xc1]); + test_invalid(&[0x66, 0x0f, 0xf0, 0x01]); + test_instr(&[0x66, 0x0f, 0xf1, 0xc1], "psllw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf1, 0x01], "psllw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf2, 0xc1], "pslld xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf2, 0x01], "pslld xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf3, 0xc1], "psllq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf3, 0x01], "psllq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf4, 0xc1], "pmuludq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf4, 0x01], "pmuludq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf5, 0xc1], "pmaddwd xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf5, 0x01], "pmaddwd xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf6, 0xc1], "psadbw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf6, 0x01], "psadbw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf7, 0xc1], "maskmovdqu xmm0, xmm1"); + test_invalid(&[0x66, 0x0f, 0xf7, 0x01]); + test_instr(&[0x66, 0x0f, 0xf8, 0xc1], "psubb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf8, 0x01], "psubb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xf9, 0xc1], "psubw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xf9, 0x01], "psubw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfa, 0xc1], "psubd xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfa, 0x01], "psubd xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfb, 0xc1], "psubq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfb, 0x01], "psubq xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfc, 0xc1], "paddb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfc, 0x01], "paddb xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfd, 0xc1], "paddw xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfd, 0x01], "paddw xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xfe, 0xc1], "paddd xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xfe, 0x01], "paddd xmm0, [rcx]"); + test_instr(&[0x66, 0x0f, 0xff, 0xc1], "paddq xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0xff, 0x01], "paddq xmm0, [rcx]"); + + test_instr(&[0x66, 0x0f, 0x74, 0xc1], "pcmpeqb xmm0, xmm1"); + test_instr(&[0x66, 0x0f, 0x74, 0x12], "pcmpeqb xmm2, [rdx]"); + test_instr(&[0x66, 0x0f, 0xf8, 0xc8], "psubb xmm1, xmm0"); + test_instr(&[0x66, 0x0f, 0xf8, 0xd0], "psubb xmm2, xmm0"); + test_instr(&[0x66, 0x0f, 0xf8, 0x12], "psubb xmm2, [rdx]"); +} + +#[test] fn test_sse3() { fn test_instr(bytes: &[u8], text: &'static str) { test_display_under(&InstDecoder::minimal().with_sse3(), bytes, text); @@ -298,94 +576,6 @@ fn test_E_decode() { #[test] fn test_sse() { - test_display( - &[0x66, 0x4f, 0x0f, 0x60, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpcklbw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x61, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpcklwd xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x62, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckldq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x63, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "packsswb xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x64, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "pcmpgtb xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x65, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "pcmpgtw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x66, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "pcmpgtd xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x67, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "packuswb xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x68, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhbw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x69, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhwd xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6a, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhdq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6b, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "packssdw xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6c, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpcklqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6d, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "punpckhqdq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - // this needs to be clear that the operand is `dword` - test_display( - &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "movq xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - test_display( - &[0x66, 0x4f, 0x0f, 0x6f, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc], - "movdqa xmm11, [r12 + r11 * 4 - 0x334455cc]" - ); - - test_display(&[0x66, 0x48, 0x0f, 0x6e, 0xc0], "movq xmm0, rax"); - test_display(&[0x66, 0x0f, 0x70, 0xc0, 0x4e], "pshufd xmm0, xmm0, 0x4e"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x10, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xd0, 0x8f], "psrlw xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x20, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xe0, 0x8f], "psraw xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x71, 0x30, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x71, 0xf0, 0x8f], "psllw xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x10, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xd0, 0x8f], "psrld xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x20, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xe0, 0x8f], "psrad xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x72, 0x30, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x72, 0xf0, 0x8f], "pslld xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x10, 0x8f]); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x18, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd0, 0x8f], "psrlq xmm0, 0x8f"); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xd8, 0x8f], "psrldq xmm0, 0x8f"); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x30, 0x8f]); - test_invalid(&[0x66, 0x4f, 0x0f, 0x73, 0x38, 0x8f]); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf0, 0x8f], "psllq xmm0, 0x8f"); - test_display(&[0x66, 0x4f, 0x0f, 0x73, 0xf8, 0x8f], "pslldq xmm0, 0x8f"); test_display(&[0x4f, 0x0f, 0x28, 0x00], "movaps xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x29, 0x00], "movaps [r8], xmm8"); test_display(&[0x4f, 0x0f, 0x2b, 0x00], "movntps [r8], xmm8"); @@ -408,7 +598,6 @@ fn test_sse() { test_display(&[0x4f, 0x0f, 0x59, 0x00], "mulps xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x5a, 0x00], "cvtps2pd xmm8, [r8]"); test_display(&[0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8]"); - test_display(&[0x66, 0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8]"); test_display(&[0x67, 0x4f, 0x0f, 0x5b, 0x00], "cvtdq2ps xmm8, [r8d]"); test_display(&[0x4f, 0x66, 0x0f, 0x28, 0x00], "movapd xmm0, [rax]"); test_display(&[0x66, 0x4f, 0x0f, 0x28, 0x00], "movapd xmm8, [r8]"); -- cgit v1.1