diff options
| author | iximeow <me@iximeow.net> | 2026-07-05 01:31:55 +0000 |
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2026-07-05 01:31:55 +0000 |
| commit | a4e667b20eef547bfd010b8b112710120f64a0b8 (patch) | |
| tree | d8f605f6cbf7f1b2d83fc4d935815c2236dda447 /test/protected_mode | |
| parent | 97dbde69221127d2552cb4fc442b90a2c0ff2a95 (diff) | |
shove all the masm input/output patching into the masm tools bits..
Diffstat (limited to 'test/protected_mode')
| -rw-r--r-- | test/protected_mode/mod.rs | 336 |
1 files changed, 12 insertions, 324 deletions
diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs index 2b1cc70..0c7b8a5 100644 --- a/test/protected_mode/mod.rs +++ b/test/protected_mode/mod.rs @@ -333,276 +333,18 @@ fn check_decodes(decoder: &InstDecoder, decode_ok: bool, bytes: &[u8], disasm: & let vex_prefixed = bytes[0] == 0xc4 || bytes[0] == 0xc5; // match against some testcases that are known to be wrong by MASM/dumpbin. - let external_masm_ish = match bytes { - &[0xf1] => "int 1".to_string(), // dumpbin does not know how to decode f1... - &[0xe5, 0x99] => "in eax, 99h".to_string(), // this is a MASM/dumpbin bug. see notes on testcase. - &[0xe7, 0x99] => "out 99h, eax".to_string(), // this is a MASM/dumpbin bug. see notes on testcase. - // dumpbin prints the instruction as if it was encoded in 32-bit form regardless of object file, so overrule it. - &[0xf3, 0x0f, 0xc7, 0xfd] => "rdpid ebp".to_string(), - &[0x0f, 0x18, 0xc0] => "nop eax".to_string(), // dumpbin would love to call this "prefetchnta eax" ??? - &[0x0f, 0x18, 0xcc] => "nop esp".to_string(), // dumpbin would love to call this "prefetchnta esp" ??? - &[0x0f, 0x18, 0x20] => "nop zmmword ptr [eax]".to_string(), // getting around dumpbin knowing about prefetchrst2.. - &[0x0f, 0x19, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f19.. - &[0x0f, 0x1a, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin wants to call this bndldx, yax doesn't do MPX yet - &[0x0f, 0x1b, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin wants to call this bndstx, yax doesn't do MPX yet - &[0x0f, 0x1c, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f1c.. - &[0x0f, 0x1d, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f1d.. - &[0x0f, 0x1e, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f1e.. - &[0xf2, 0x66, 0x66, 0x0f, 0x10, 0xc0] => "movsd xmm0, xmm0".to_string(), // dumpbin does not love the prefixes - &[0xf3, 0x0f, 0x1e, 0xfc] => "nop".to_string(), // dumpbin does not tolerate this at all, redirect into a boring nop. - &[0x0f, 0x43, 0xec] => "cmovnb ebp, esp".to_string(), // dumpbin writes it "cmovae" instead of yax's cmovnb. - &[0x2e, 0x36, 0x0f, 0x18, 0xe7] => "nop edi".to_string(), // dumpbin reports a mildly-confused prefetchrst2 rdi (even in 32-bit mode!) - &[0x0f, 0xbe, 0x83, 0xb4, 0x00, 0x00, 0x00] => { - "movsx eax, byte ptr [ebx + 0B4h]".to_string() // dumpbin uses %016 formatting, masm happily accepts shorter. - }, - &[0x62, 0xd2, 0x7e, 0x28, 0x3a, 0xca] => { - "vpbroadcastmw2d ymm1, k2".to_string() // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328. - }, - &[0x62, 0xd2, 0x7e, 0x08, 0x28, 0xc2] => { - "vpmovm2b xmm0, k2".to_string() // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328. - }, - &[0x0f, 0x0d, 0x00] => { - // dumpbin interprets this as the 3DNow!-style PREFETCH instruction, but we're definitely not 3dnow.. - "nop zmmword ptr [eax]".to_string() - } - &[0xc4, 0x03, 0x3d, 0x0a, 0xca, 0x77] => { - // dumpbin can't deal with this instruction.. - "vroundss xmm9, xmm8, xmm10, 77h".to_string() - } - &[0xc4, 0x03, 0x3d, 0x0b, 0xca, 0x77] => { - // dumpbin can't deal with this instruction.. - "vroundsd xmm9, xmm8, xmm10, 77h".to_string() - } - &[0x66, 0x0f, 0xd6, 0x01] => { - // dumpbin really wants to use mmword here, but i really don't. - "movq qword ptr [ecx], xmm0".to_string() - } - // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0. - &[0x66, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(), - &[0xf2, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(), - &[0xf3, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(), - &[0x66, 0x0f, 0xff, 0x01] => "ud0 eax, dword ptr [ecx]".to_string(), - &[0x0f, 0xff, 0x6b, 0xac] => "ud0 ebp, dword ptr [ebx - 54h]".to_string(), - // dumpbin does not tolerate the pointless prefixes. - &[0x36, 0x36, 0x2e, 0x0f, 0x38, 0xf9, 0x55, 0x3e] => "movdiri dword ptr cs:[ebp + 3Eh], edx".to_string(), - // dumpbin does not tolerate the pointless prefixes. - &[0x36, 0x26, 0x66, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => "movdir64b ebp, zmmword ptr es:[ebp + 729080Bh]".to_string(), - // dumpbin does not tolerate the pointless prefixes. - &[0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08] => "movdir64b bp, zmmword ptr es:[di + 80Bh]".to_string(), - // and again - &[0xf2, 0xf2, 0x2e, 0x36, 0x0f, 0x38, 0xf8, 0x83, 0x09, 0x1c, 0x9d, 0x3f] => "enqcmd eax, zmmword ptr ss:[ebx + 3F9D1C09h]".to_string(), - // and again. - &[0x3e, 0x64, 0xf3, 0x64, 0x0f, 0x38, 0xf8, 0x72, 0x54] => "enqcmds esi, zmmword ptr fs:[edx + 54h]".to_string(), - // prefixes confuse dumpbin again - &[0x66, 0xf3, 0x0f, 0x01, 0xe8] => "setssbsy".to_string(), - // prefixes confuse dumpbin again - &[0x66, 0xf3, 0x0f, 0x01, 0xea] => "saveprevssp".to_string(), - // prefixes confuse dumpbin again - &[0xf3, 0x66, 0x0f, 0x01, 0xe8] => "setssbsy".to_string(), // TODO: yax does not support `serialize` (yet) - // prefixes confuse dumpbin again - &[0xf3, 0x66, 0x0f, 0x01, 0xea] => "saveprevssp".to_string(), - // prefixes confuse dumpbin again - &[0xf3, 0x66, 0x0f, 0x01, 0x29] => "rstorssp qword ptr [ecx]".to_string(), - // dumpbin writes the repne, but it doesn't do anything.. - &[0xf2, 0x0f, 0x21, 0xc8] => "mov eax, dr1".to_string(), - // dumpbin writes the rep, but it doesn't do anything.. - &[0xf3, 0x0f, 0x21, 0xc8] => "mov eax, dr1".to_string(), - // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no! - &[0xf2, 0x0f, 0xc0, 0xcc] => "xadd ah, cl".to_string(), - // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't. - &[0xf3, 0x0f, 0xc0, 0xcc] => "xadd ah, cl".to_string(), - // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no! - &[0xf2, 0x0f, 0xc1, 0xcc] => "xadd esp, ecx".to_string(), - // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't. - &[0xf3, 0x0f, 0xc1, 0xcc] => "xadd esp, ecx".to_string(), - // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no! - &[0xf2, 0x0f, 0xc7, 0x0f] => "cmpxchg8b qword ptr [edi]".to_string(), - // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't. - &[0xf3, 0x0f, 0xc7, 0x0f] => "cmpxchg8b qword ptr [edi]".to_string(), - // prefixes again.. - &[0x66, 0x36, 0x0f, 0x3a, 0xce, 0x8c, 0x56, 0x9e, 0x82, 0xd1, 0xbe, 0xad] => "gf2p8affineqb xmm1, xmmword ptr ss:[esi + edx * 2 - 412E7D62h], 0ADh".to_string(), - &[0x3e, 0x64, 0x64, 0x66, 0x0f, 0x3a, 0xcf, 0xba, 0x13, 0x23, 0x04, 0xba, 0x6b] => "gf2p8affineinvqb xmm7, xmmword ptr fs:[edx - 45FBDCEDh], 6Bh".to_string(), - &[0xf3, 0x64, 0x2e, 0x65, 0x0f, 0x38, 0xdc, 0xe8] => "loadiwkey xmm5, xmm0".to_string(), - // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either. - &[0x66, 0x0f, 0x38, 0x80, 0x01] => "invept eax, xmmword ptr [ecx]".to_string(), - // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either. - &[0x66, 0x0f, 0x38, 0x81, 0x01] => "invvpid eax, xmmword ptr [ecx]".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - // (and we print jnb instead of jae) - &[0x73, 0x31] => "jnb $+33h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x72, 0x5a] => "jb $+5Ch".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x72, 0xf0] => "jb $-0Eh".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe8, 0x01, 0x00, 0x00, 0x00] => "call $+6".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe8, 0x80, 0x00, 0x00, 0x00] => "call near ptr $+85h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe8, 0xff, 0xff, 0xff, 0xff] => "call $+4".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe9, 0x01, 0x00, 0x00, 0x00] => "jmp $+6".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. there's also the near ptr nonsense.. - &[0xe9, 0x80, 0x00, 0x00, 0x00] => "jmp near ptr $+85h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe9, 0xff, 0xff, 0xff, 0xff] => "jmp $+4".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00] => "jna $+191h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x0f, 0x85, 0x3b, 0x25, 0x00, 0x00] => "jnz $+2541h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x74, 0x47] => "jz $+49h".to_string(), - // dumpbin prints a ds: since this is an absolute address.. - &[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00] => "call dword ptr [0024727Eh]".to_string(), - // dumpbin uses a really wide displacement .. for laughs.. - &[0xff, 0x24, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => "jmp dword ptr [ecx * 8 + 1BCA070h]".to_string(), - // dumpbin uses a really wide displacement .. for laughs.. - &[0xff, 0x14, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => "call dword ptr [ecx * 8 + 1BCA070h]".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe0, 0x12] => "loopnz $+14h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe1, 0x12] => "loopz $+14h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe2, 0x12] => "loop $+14h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe3, 0x12] => "jecxz $+14h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0xe3, 0xf0] => "jecxz $-0Eh".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x67, 0xe3, 0x12] => "jcxz $+15h".to_string(), - // dumpbin uses absolute branch destinations, but yax uses relative. - &[0x67, 0xe3, 0xf0] => "jcxz $-0Dh".to_string(), - // dumpbin dislikes prefixes. - &[0x66, 0xf2, 0x0f, 0x79, 0xcf] => "insertq xmm1, xmm7".to_string(), - &[0xf6, 0x05, 0x2c, 0x9b, 0xff, 0xff, 0x01] => "test byte ptr [0FFFF9B2Ch], 1".to_string(), - // yax uses wider immediates - &[0x3d, 0x01, 0xf0, 0xff, 0xff] => "cmp eax, 0FFFFF001h".to_string(), - // dumpbin gets the size wrong - &[0x62, 0xf2, 0xfd, 0x0f, 0x8a, 0x62, 0xf2] => "vcompresspd xmmword ptr [edx - 70h]{k7}, xmm4".to_string(), - // TODO: yax doesn't know about rdssp{d,q}? - &[0xf3, 0x0f, 0x1e, 0x0f] => "nop".to_string(), - // yax won't mention the pointless repne prefix - &[0xf2, 0x0f, 0x06] => "clts".to_string(), - // yax won't mention the pointless repne prefix - &[0xf2, 0x0f, 0x07] => "sysret".to_string(), - // dumpbin spells this mmword - &[0x0f, 0x6f, 0x00] => "movq mm0, qword ptr [eax]".to_string(), - &[0x66, 0x2e, 0xf2, 0xf0, 0x0f, 0xbb, 0x13] => "xacquire lock btc word ptr cs:[ebx], dx".to_string(), - // dumpbin prints with more.. flourish - &[0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00] => "nop word ptr cs:[eax + eax]".to_string(), - // disp is wider from dumpbin - &[0x0f, 0xfc, 0xaf, 0x40, 0x38, 0x25, 0xbf] => "paddb mm5, mmword ptr [edi - 40DAC7C0h]".to_string(), - &[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56] => "xbegin $+56341216h".to_string(), - &[0x66, 0xc7, 0xf8, 0x10, 0x12] => "xbegin $+1215h".to_string(), - &[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c] => "pi2fw mm6, qword ptr ss:[eax - 5]".to_string(), // more prefix confusion.. - // prefixes confuse dumpbin, and dumpbin says "qword" where we use mmword. masm accepts either - &[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4] => "pfmax mm4, mmword ptr cs:[ebx + edx * 4 - 6Dh]".to_string(), - // dumpbin shows this as a non-rip-rel offset :( - &[0x0f, 0xe5, 0x3d, 0xaa, 0xbb, 0xcc, 0x77] => "pmulhw mm7, qword ptr [77CCBBAAh]".to_string(), - // dumpbin confused about prefixes once again - &[0x66, 0x3e, 0x26, 0x2e, 0x2e, 0x0f, 0x38, 0x2a, 0x2b] => "movntdqa xmm5, xmmword ptr cs:[ebx]".to_string(), - // prefixes.. cs: isn't real in 64-bit mode - &[0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c] => "blendpd xmm7, xmmword ptr cs:[bx + si + 2FF0h], 7Ch".to_string(), - // prefixes confuse dumpbin - &[0x66, 0x66, 0x64, 0x3e, 0x0f, 0x38, 0x23, 0x9d, 0x69, 0x0f, 0xa8, 0x2d] => "pmovsxwd xmm3, qword ptr [ebp + 2DA80F69h]".to_string(), - // prefixes confuse dumpbin - &[0x2e, 0x66, 0x26, 0x64, 0x0f, 0x3a, 0x21, 0x0b, 0xb1] => "insertps xmm1, dword ptr fs:[ebx], 0B1h".to_string(), - // prefixes confuse dumpbin - &[0x66, 0x26, 0x0f, 0x3a, 0x42, 0x96, 0x74, 0x29, 0x96, 0xf9, 0x6a] => "mpsadbw xmm2, xmmword ptr es:[esi - 669D68Ch], 6Ah".to_string(), - // prefixes confuse dumpbin - &[0x67, 0x26, 0x66, 0x65, 0x0f, 0x38, 0x3f, 0x9d, 0xcc, 0x03] => "pmaxud xmm3, xmmword ptr gs:[di + 3CCh]".to_string(), - // prefixes confuse dumpbin - &[0x67, 0x66, 0x65, 0x3e, 0x0f, 0x6d, 0xd1] => "punpckhqdq xmm2, xmm1".to_string(), - // prefixes confuse dumpbin - &[0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f] => "lddqu xmm4, xmmword ptr es:[bx + si + 5F1Bh]".to_string(), - // prefixes confuse dumpbin - &[0x2e, 0x3e, 0x66, 0x3e, 0x0f, 0x3a, 0x41, 0x30, 0x48] => "dppd xmm6, xmmword ptr [eax], 48h".to_string(), - // again prefixes confuse dumpbin - &[0x65, 0x66, 0x66, 0x64, 0x0f, 0x38, 0xdb, 0x0f] => "aesimc xmm1, xmmword ptr fs:[edi]".to_string(), - // dumpbin prints the order backwards =| - &[0x65, 0xf0, 0x87, 0x0f] => "lock xchg dword ptr gs:[edi], ecx".to_string(), - // dumpbin knows about "fstpnce" as "fstp1", but masm does not. - // since this is an undocumented instruction anyway, decode it ourselves.. - &[0xd9, 0xdb] => "fstpnce st(3), st(0)".to_string(), - // dumpbin calls this "fcom2", but it's just an undocumented fcom alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xdc, 0xd3] => "fcom st(3)".to_string(), - // dumpbin calls this "fcomp3", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xdc, 0xdb] => "fcomp st(3)".to_string(), - // dumpbin calls this "fxch4", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xdd, 0xcb] => "fxch st(3)".to_string(), - // dumpbin calls this "fcomp5", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xde, 0xd3] => "fcomp st(3)".to_string(), - // dumpbin calls this "fxch7", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xdf, 0xcb] => "fxch st(3)".to_string(), - // dumpbin calls this "fstp8", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xdf, 0xd3] => "fstp st(3)".to_string(), - // dumpbin calls this "fstp9", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right. - &[0xdf, 0xdb] => "fstp st(3)".to_string(), - &[0xf2, 0x0f, 0xbc, 0xd3] => "bsf edx, ebx".to_string(), - // mov abs in 32-bit mode gets a ds: prefix even though that's the default. masm does not need this prefix, so we round-trip fine without it. - &[0xa0, 0x93, 0x62, 0xc4, 0x00] => "mov al, byte ptr [00C46293h]".to_string(), - &[0x67, 0xa0, 0x93, 0x62] => "mov al, byte ptr [00006293h]".to_string(), - &[0xa1, 0x93, 0x62, 0xc4, 0x00] => "mov eax, dword ptr [00C46293h]".to_string(), - &[0x67, 0xa1, 0x93, 0x62] => "mov eax, dword ptr [00006293h]".to_string(), - &[0xa2, 0x93, 0x62, 0xc4, 0x00] => "mov byte ptr [00C46293h], al".to_string(), - &[0x67, 0xa2, 0x93, 0x62] => "mov byte ptr [00006293h], al".to_string(), - &[0xa3, 0x93, 0x62, 0xc4, 0x00] => "mov dword ptr [00C46293h], eax".to_string(), - &[0x67, 0xa3, 0x93, 0x62] => "mov dword ptr [00006293h], eax".to_string(), - &[0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => "xor eax, dword ptr [12345678h]".to_string(), - &[0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(), - &[0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(), - &[0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => "xor esi, dword ptr [50403020h]".to_string(), - &[0xa0, 0xc0, 0xb0, 0xa0, 0x90] => "mov al, byte ptr [90A0B0C0h]".to_string(), - &[0x67, 0xa0, 0xc0, 0xb0] => "mov al, byte ptr [0B0C0h]".to_string(), - &[0x67, 0xa1, 0xc0, 0xb0] => "mov eax, dword ptr [0000B0C0h]".to_string(), - &[0x66, 0x67, 0xa1, 0xc0, 0xb0] => "mov ax, word ptr [0000B0C0h]".to_string(), - // same for wrssd - &[0x3e, 0x0f, 0x38, 0xf6, 0x23] => "wrssd dword ptr [ebx], esp".to_string(), - // dumpbin believes that rex.w works even in 32-bit code, thus prints `rorx rax, ..`. haha what a dingus - &[0xc4, 0xe3, 0xfb, 0xf0, 0x01, 0x05] => "rorx eax, dword ptr [ecx], 5".to_string(), - &[0xc4, 0xe2, 0xe3, 0xf5, 0x07] => "pdep eax, ebx, dword ptr [edi]".to_string(), - &[0xc4, 0xe2, 0xe3, 0xf6, 0x07] => "mulx eax, ebx, dword ptr [edi]".to_string(), - &[0xc4, 0xe2, 0xe3, 0xf7, 0x01] => "shrx eax, dword ptr [ecx], ebx".to_string(), - &[0xc4, 0xe2, 0xe2, 0xf5, 0x07] => "pext eax, ebx, dword ptr [edi]".to_string(), - &[0xc4, 0xe2, 0xe2, 0xf7, 0x01] => "sarx eax, dword ptr [ecx], ebx".to_string(), - &[0xc4, 0xe2, 0xe0, 0xf5, 0x07] => "bzhi eax, dword ptr [edi], ebx".to_string(), - &[0xc4, 0xe2, 0xe1, 0xf7, 0x01] => "shlx eax, dword ptr [ecx], ebx".to_string(), - &[0xc4, 0xe2, 0xe0, 0xf2, 0x01] => "andn eax, ebx, dword ptr [ecx]".to_string(), - &[0xc4, 0xe2, 0xf8, 0xf3, 0x09] => "blsr eax, dword ptr [ecx]".to_string(), - &[0xc4, 0xe2, 0xf8, 0xf3, 0x11] => "blsmsk eax, dword ptr [ecx]".to_string(), - &[0xc4, 0xe2, 0xf8, 0xf3, 0x19] => "blsi eax, dword ptr [ecx]".to_string(), - &[0xc4, 0xe2, 0xe0, 0xf7, 0x01] => "bextr eax, dword ptr [ecx], ebx".to_string(), - &[0xc4, 0xc3, 0x39, 0x0c, 0xca, 0x77] => "vblendps xmm1, xmm0, xmm2, 77h".to_string(), - // just have to decide we know better than dumpbin: masm does not accept an absolute far call/far jump destination, - // so we definitely can't round-trip by following dumpbin. dumpbin doesn't use hex suffixes here, instead printing - // "6655:44332211" as the destination. this is technically not ambiguous since `:` is a hint that this is a absolute - // far address and that both numbers are base 16, but that's ... subtle and easy to miss. so add some h's. - &[0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66] => "call 6655h:44332211h".to_string(), - &[0x66, 0x9a, 0x11, 0x22, 0x33, 0x44] => "call 4433h:2211h".to_string(), - // terribly unfortunate: masm reasonably encodes this instruction as a 32-bit offset, which causes yax to spell the offset - // as 0000AA55 instead of AA55. override dumpbin to use the (worse) encoding for the sake of matching with the test. - &[0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa] => "mov cx, word ptr [0AA55h]".to_string(), - // inexplicably, dumpbin spells this "aamb", for .. ascii adjust after multiplcation (byte) ??? - // additionally, masm does not accept an integer operand: it only supports `aam 10` as in d4 0a. so.. bummer. - &[0xd4, 0x01] => "aam 1".to_string(), - // same as above - &[0xd5, 0x01] => "aad 1".to_string(), - // dunno why dumpbin doesn't like this one.. - &[0xc5, 0b1_1111_100, 0x2e, 0b00_001_010] => "vucomiss xmm1, dword ptr [edx]".to_string(), - &[0xc5, 0b1_1111_100, 0x2f, 0b00_001_010] => "vcomiss xmm1, dword ptr [edx]".to_string(), - other => { - let dumpbin_res = tools::dumpbin(other, CodeModel::Bits32); - match dumpbin_res { - Ok(text) => text, - Err(e) => { - if vex_prefixed { - // this might be an instance of dumpbin not being great: consider vucomiss, as in "c5f82eca". - return; - } - - // otherwise: unexpected, what da heck. - panic!("{}: {e:?}", format!("could not get an instruction after dumpbining {other:x?}")); + let external_masm_ish = { + let dumpbin_res = tools::dumpbin(bytes, CodeModel::Bits32); + match dumpbin_res { + Ok(text) => text, + Err(e) => { + if vex_prefixed { + // this might be an instance of dumpbin not being great: consider vucomiss, as in "c5f82eca". + return; } + + // otherwise: unexpected, what da heck. + panic!("{}: {e:?}", format!("could not get an instruction after dumpbining {bytes:x?}")); } } }; @@ -633,61 +375,7 @@ fn check_decodes(decoder: &InstDecoder, decode_ok: bool, bytes: &[u8], disasm: & } let displayed_masm = decoder.decode_slice(bytes).expect("can decode").display_with(DisplayStyle::Masm).to_string(); - let masm_as_bytes = match displayed_masm.as_str() { - "nop zmmword ptr [eax]" => vec![0x0f, 0x18, 0x20], // MASM doesn't accept `nop zmmword ..`, no way to round trip 0f1820 - "sysenter" => vec![0x0f, 0x34], // MASM doesn't accept sysenter, but dumpbin prints it. - "sysexit" => vec![0x0f, 0x35], // MASM doesn't accept sysexit, but dumpbin prints it. - // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0. - "ud0 eax, ecx" => vec![0x66, 0x0f, 0xff, 0xc1], - "ud0 eax, dword ptr [ecx]" => vec![0x66, 0x0f, 0xff, 0x01], - "ud0 ebp, dword ptr [ebx - 54h]" => vec![0x0f, 0xff, 0x6b, 0xac], - // masm seems to not know about fstpnce/fstp1 at all. since this is an undocumented instruction anyway, assemble it ourselves.. - "fstpnce st(3), st(0)" => vec![0xd9, 0xdb], - // masm inserts a wait prefix here.. - "feni" => vec![0xdb, 0xe0], - "fdisi" => vec![0xdb, 0xe1], - "fsetpm" => vec![0xdb, 0xe4], - // masm doesn't know how to assemble address-size overrides..? - // > cannot use 16-bit register with a 32-bit address - "aesimc xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdb, 0x0f], - "aesenc xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdc, 0x0f], - "aesenclast xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdd, 0x0f], - "aesdec xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xde, 0x0f], - "aesdeclast xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdf, 0x0f], - "blendpd xmm7, xmmword ptr cs:[bx + si + 2FF0h], 7Ch" => vec![0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c], - // more - "movdir64b bp, zmmword ptr es:[di + 80Bh]" => vec![0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08], - "lss eax, fword ptr [bx + si]" => vec![0x67, 0x0f, 0xb2, 0x00], - "lddqu xmm4, xmmword ptr es:[bx + si + 5F1Bh]" => vec![0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f], - "lods byte ptr [si]" => vec![0x67, 0xac], - "scas byte ptr es:[di]" => vec![0x67, 0xae], - "rep movs byte ptr es:[di], byte ptr [si]" => vec![0x67, 0xf3, 0xa4], - "rep movs dword ptr es:[di], dword ptr [si]" => vec![0x67, 0xf3, 0xa5], - "movapd xmm0, xmmword ptr [bx + si]" => vec![0x67, 0x66, 0x0f, 0x28, 0x00], - "cvtdq2ps xmm0, xmmword ptr [bx + di]" => vec![0x67, 0x0f, 0x5b, 0x01], - // i tried really hard to find a MASM syntax for absolute far call/jump destinations! i turned up a bunch of blanks. - // https://mirrors.nycbug.org/pub/The_Unix_Archive/Unix_Usenet/comp.unix.xenix/1989-February/001910.html is the funniest, - // given that it is OS hackers experiencing the same issue and concluding they should emit the bytes themselves. - // so yax will emit something like bindump would, and we'll just swallow the text as if masm worked like i'd hope.. - "call 6655h:44332211h" => vec![0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66], - "call 4433h:2211h" => vec![0x66, 0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66], - // terribly unfortunate: masm reasonably encodes this instruction as a 32-bit offset, which causes yax to spell the offset - // as 0000AA55 instead of AA55. override dumpbin to use the (worse) encoding for the sake of matching with the test. - "mov cx, word ptr [0AA55h]" => vec![0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa], - // same deal, different instruction. - "mov al, byte ptr [0B0C0h]" => vec![0x67, 0xa0, 0xc0, 0xb0], - "mov eax, dword ptr [0000B0C0h]" => vec![0x67, 0xa1, 0xc0, 0xb0], - // if you operand-size override pushad/popad you get the 16-bit forms, pusha/popa. dumpbin reflects this, but in 32-bit mode - // accepts either as a way of spelling pushad/popad. override it here for tests to match up, but this is an unfortunately - // disastrous difference in round-tripping.. - "pusha" => vec![0x66, 0x60], - "popa" => vec![0x66, 0x61], - // masm does not accept an integer operand: it only supports `aam 10` as in d4 0a. so.. bummer. - "aam 1" => vec![0xd4, 0x01], - // same as above - "aad 1" => vec![0xd5, 0x01], - _other => { tools::masm(&displayed_masm, CodeModel::Bits32).expect("can assemble") } - }; + let masm_as_bytes = tools::masm(&displayed_masm, CodeModel::Bits32).expect("can assemble"); let masm_roundtrip = decoder.decode_slice(&masm_as_bytes).expect("can decode").display_with(DisplayStyle::Masm).to_string(); // chasing down differences in how dumpbin/yax write "qword" is not useful to anyone.. let external_masm_ish = external_masm_ish.replace(" mmword ", " qword "); |
