aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2026-07-05 01:31:55 +0000
committeriximeow <me@iximeow.net>2026-07-05 01:31:55 +0000
commita4e667b20eef547bfd010b8b112710120f64a0b8 (patch)
treed8f605f6cbf7f1b2d83fc4d935815c2236dda447 /test
parent97dbde69221127d2552cb4fc442b90a2c0ff2a95 (diff)
shove all the masm input/output patching into the masm tools bits..
Diffstat (limited to 'test')
-rw-r--r--test/long_mode/mod.rs343
-rw-r--r--test/protected_mode/mod.rs336
-rw-r--r--test/tools.rs671
3 files changed, 687 insertions, 663 deletions
diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs
index fdc4288..ff9020a 100644
--- a/test/long_mode/mod.rs
+++ b/test/long_mode/mod.rs
@@ -327,346 +327,11 @@ fn check_decodes(decoder: &InstDecoder, decode_ok: bool, bytes: &[u8], disasm: &
// `mlexe`, and `dumpbin.exe`.
// match against some testcases that are known to be wrong by MASM/dumpbin.
- let external_masm_ish = match bytes {
- &[0xf1] => "int 1".to_string(), // dumpbin does not know how to decode f1...
- &[0x4f, 0xe5, 0x99] => "in eax, 99h".to_string(), // this is a MASM/dumpbin bug. see notes on testcase.
- &[0x4f, 0xe7, 0x99] => "out 99h, eax".to_string(), // this is a MASM/dumpbin bug. see notes on testcase.
- // dumpbin prints the instruction as if it was encoded in 32-bit form regardless of object file, so overrule it.
- &[0xf3, 0x0f, 0xc7, 0xfd] => "rdpid rbp".to_string(),
- &[0x0f, 0x18, 0xc0] => "nop eax".to_string(), // dumpbin would love to call this "prefetchnta rax" ???
- &[0x0f, 0x18, 0xcc] => "nop esp".to_string(), // dumpbin would love to call this "prefetchnta rsp" ???
- &[0x0f, 0x18, 0x20] => "nop zmmword ptr [rax]".to_string(), // getting around dumpbin knowing about prefetchrst2..
- &[0x4f, 0x0f, 0x18, 0x20] => "nop zmmword ptr [r8]".to_string(), // getting around dumpbin knowing about prefetchrst2..
- &[0x2e, 0x36, 0x47, 0x0f, 0x18, 0xe7] => "nop r15d".to_string(), // getting around dumpbin knowing about prefetchrst2..
- &[0x0f, 0x19, 0x20] => "nop dword ptr [rax]".to_string(), // dumpbin doesn't know about 0f19..
- &[0x0f, 0x1a, 0x20] => "nop dword ptr [rax]".to_string(), // dumpbin wants to call this bndldx, yax doesn't do MPX yet
- &[0x0f, 0x1b, 0x20] => "nop dword ptr [rax]".to_string(), // dumpbin wants to call this bndstx, yax doesn't do MPX yet
- &[0x0f, 0x1c, 0x20] => "nop dword ptr [rax]".to_string(), // dumpbin doesn't know about 0f1c..
- &[0x0f, 0x1d, 0x20] => "nop dword ptr [rax]".to_string(), // dumpbin doesn't know about 0f1d..
- &[0x0f, 0x1e, 0x20] => "nop dword ptr [rax]".to_string(), // dumpbin doesn't know about 0f1e..
- &[0xf2, 0x66, 0x66, 0x4d, 0x0f, 0x10, 0xc0] => "movsd xmm8, xmm8".to_string(), // dumpbin does not love the prefixes
- &[0x4f, 0x66, 0x0f, 0x28, 0x00] => "movapd xmm0, xmmword ptr [rax]".to_string(), // dumpbin does not love the prefixes
- &[0x67, 0x4f, 0x66, 0x0f, 0x28, 0x00] => "movapd xmm0, xmmword ptr [eax]".to_string(), // dumpbin does not love the prefixes
- &[0xf3, 0x0f, 0x1e, 0xfc] => "nop".to_string(), // dumpbin does not tolerate this at all, redirect into a boring nop.
- &[0x4d, 0x0f, 0x43, 0xec] => "cmovnb r13, r12".to_string(), // dumpbin writes it "cmovae" instead of yax's cmovnb.
- &[0x65, 0x4c, 0x89, 0x04, 0x25, 0xa8, 0x01, 0x00, 0x00] => {
- "mov qword ptr gs:[000001A8h], r8".to_string() // dumpbin uses %016 formatting, masm happily accepts shorter.
- },
- &[0x0f, 0xbe, 0x83, 0xb4, 0x00, 0x00, 0x00] => {
- "movsx eax, byte ptr [rbx + 0B4h]".to_string() // dumpbin uses %016 formatting, masm happily accepts shorter.
- },
- &[0x46, 0x63, 0xc1] => "movsxd r8, ecx".to_string(), // dumpbin writes 32-bit destinations for this, but masm accepts either?
- &[0x62, 0xd2, 0x7e, 0x28, 0x3a, 0xca] => {
- "vpbroadcastmw2d ymm1, k2".to_string() // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
- },
- &[0x62, 0xd2, 0x7e, 0x08, 0x28, 0xc2] => {
- "vpmovm2b xmm0, k2".to_string() // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
- },
- &[0x0f, 0x01, 0x51, 0xff] => {
- "lgdt fword ptr [rcx - 1]".to_string() // dumpbin prints this as "tbyte", which masm does not accept.
- },
- &[0x0f, 0x01, 0x59, 0xff] => {
- "lidt fword ptr [rcx - 1]".to_string() // dumpbin prints this as "tbyte", which masm does not accept.
- },
- &[0x2e, 0x67, 0x65, 0x2e, 0x46, 0x0f, 0x01, 0xff] => {
- "tlbsync".to_string() // dumpbin does not exactly tolerate the extra prefixes.
- },
- &[0x0f, 0x0d, 0x00] => {
- // dumpbin interprets this as the 3DNow!-style PREFETCH instruction, but we're definitely not 3dnow..
- "nop zmmword ptr [rax]".to_string()
- }
- &[0xf2, 0x41, 0x0f, 0xbc, 0xd3] => {
- // masm doesn't like the extra prefix
- "bsf edx, r11d".to_string()
- }
- &[0x4f, 0x4e, 0x00, 0xcc] => {
- // masm doesn't like the extra prefix
- "add spl, r9b".to_string()
- }
- &[0xc4, 0x03, 0x3d, 0x0a, 0xca, 0x77] => {
- // dumpbin can't deal with this instruction..
- "vroundss xmm9, xmm8, xmm10, 77h".to_string()
- }
- &[0xc4, 0x03, 0x3d, 0x0b, 0xca, 0x77] => {
- // dumpbin can't deal with this instruction..
- "vroundsd xmm9, xmm8, xmm10, 77h".to_string()
- }
- &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc] => {
- // dumpbin really wants to use mmword here, but i really don't.
- "movq xmm11, qword ptr [r12 + r11 * 4 - 334455CCh]".to_string()
- }
- &[0x66, 0x0f, 0xd6, 0x01] => {
- // dumpbin really wants to use mmword here, but i really don't.
- "movq qword ptr [rcx], xmm0".to_string()
- }
- &[0x66, 0x4f, 0x0f, 0xd7, 0xc1] => {
- // yax bug? default operand size is 64-bit in 64-bit mode, so the register should be r8?
- "pmovmskb r8d, xmm9".to_string()
- }
- // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
- &[0x66, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(),
- &[0xf2, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(),
- &[0xf3, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(),
- &[0x66, 0x0f, 0xff, 0x01] => "ud0 eax, dword ptr [rcx]".to_string(),
- &[0x66, 0x4f, 0x0f, 0xff, 0xc1] => "ud0 r8d, r9d".to_string(),
- &[0x4c, 0x0f, 0xff, 0x6b, 0xac] => "ud0 r13d, dword ptr [rbx - 54h]".to_string(),
- // dumpbin does not tolerate the pointless rex prefix.
- &[0x4f, 0x66, 0x0f, 0x2a, 0xcf] => "cvtpi2pd xmm1, mm7".to_string(),
- // dumpbin does not tolerate the pointless rex prefix.
- &[0x4f, 0xf3, 0x0f, 0x2a, 0xcf] => "cvtsi2ss xmm1, edi".to_string(),
- // dumpbin does not tolerate the pointless rex prefix.
- &[0x4f, 0xf2, 0x0f, 0x2a, 0xcf] => "cvtsi2sd xmm1, edi".to_string(),
- // dumpbin does not tolerate the pointless rex prefix.
- &[0x4f, 0xf2, 0x0f, 0x2a, 0x00] => "cvtsi2sd xmm0, dword ptr [rax]".to_string(),
- // dumpbin does not tolerate the pointless rex prefix.
- &[0x4f, 0xf3, 0x0f, 0x2a, 0x00] => "cvtsi2ss xmm0, dword ptr [rax]".to_string(),
- // dumpbin does not tolerate the pointless rex prefix.
- &[0x4f, 0x66, 0x0f, 0x2a, 0x00] => "cvtpi2pd xmm0, mmword ptr [rax]".to_string(),
- // dumpbin does not tolerate the pointless prefixes.
- &[0x36, 0x36, 0x2e, 0x0f, 0x38, 0xf9, 0x55, 0x3e] => "movdiri dword ptr [rbp + 3Eh], edx".to_string(),
- // dumpbin does not tolerate the pointless prefixes.
- &[0x36, 0x26, 0x66, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => "movdir64b rbp, zmmword ptr [rbp + 729080Bh]".to_string(),
- // dumpbin does not tolerate the pointless prefixes.
- &[0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => "movdir64b ebp, zmmword ptr [ebp + 729080Bh]".to_string(),
- // dumpbin is super confused about the prefixing.
- &[0xf2, 0xf2, 0x2e, 0x36, 0x47, 0x0f, 0x38, 0xf8, 0x83, 0x09, 0x1c, 0x9d, 0x3f] => "enqcmd r8, zmmword ptr [r11 + 3F9D1C09h]".to_string(),
- // and again.
- &[0x3e, 0x64, 0xf3, 0x64, 0x0f, 0x38, 0xf8, 0x72, 0x54] => "enqcmds rsi, zmmword ptr fs:[rdx + 54h]".to_string(),
- // dumpbin shows a ds prefix; this is tolerated by masm but is kinda incorrect in x86_64. either way masm accepts it though.
- &[0x3e, 0x4f, 0x0f, 0x38, 0xf6, 0x23] => "wrssq qword ptr [r11], r12".to_string(),
- // prefixes confuse dumpbin again
- &[0x66, 0xf3, 0x0f, 0x01, 0xe8] => "setssbsy".to_string(),
- // prefixes confuse dumpbin again
- &[0x66, 0xf3, 0x0f, 0x01, 0xea] => "saveprevssp".to_string(),
- // prefixes confuse dumpbin again
- &[0xf3, 0x66, 0x0f, 0x01, 0xe8] => "setssbsy".to_string(), // TODO: yax does not support `serialize` (yet)
- // prefixes confuse dumpbin again
- &[0xf3, 0x66, 0x0f, 0x01, 0xea] => "saveprevssp".to_string(),
- // prefixes confuse dumpbin again
- &[0xf3, 0x66, 0x0f, 0x01, 0x29] => "rstorssp qword ptr [rcx]".to_string(),
- // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
- &[0xf2, 0x0f, 0xc0, 0xcc] => "xadd ah, cl".to_string(),
- // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
- &[0xf3, 0x0f, 0xc0, 0xcc] => "xadd ah, cl".to_string(),
- // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
- &[0xf2, 0x0f, 0xc1, 0xcc] => "xadd esp, ecx".to_string(),
- // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
- &[0xf3, 0x0f, 0xc1, 0xcc] => "xadd esp, ecx".to_string(),
- // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
- &[0xf2, 0x0f, 0xc7, 0x0f] => "cmpxchg8b qword ptr [rdi]".to_string(),
- // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
- &[0xf3, 0x0f, 0xc7, 0x0f] => "cmpxchg8b qword ptr [rdi]".to_string(),
- // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
- &[0x4f, 0x0f, 0xc7, 0x0f] => "cmpxchg16b xmmword ptr [r15]".to_string(),
- // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
- &[0x66, 0x4f, 0x0f, 0xc7, 0x0f] => "cmpxchg16b xmmword ptr [r15]".to_string(),
- // dumpbin prints out repne prefix, which does not round-trip.
- &[0xf2, 0x4f, 0x0f, 0xc7, 0x0f] => "cmpxchg16b xmmword ptr [r15]".to_string(),
- // dumpbin prints out rep prefix, which does not round-trip.
- &[0xf3, 0x4f, 0x0f, 0xc7, 0x0f] => "cmpxchg16b xmmword ptr [r15]".to_string(),
- // prefixes again..
- &[0x3e, 0x64, 0x64, 0x66, 0x4e, 0x0f, 0x3a, 0xcf, 0xba, 0x13, 0x23, 0x04, 0xba, 0x6b] => "gf2p8affineinvqb xmm15, xmmword ptr fs:[rdx - 45FBDCEDh], 6Bh".to_string(),
- &[0x66, 0x36, 0x0f, 0x3a, 0xce, 0x8c, 0x56, 0x9e, 0x82, 0xd1, 0xbe, 0xad] => "gf2p8affineqb xmm1, xmmword ptr [rsi + rdx * 2 - 412E7D62h], 0ADh".to_string(),
- &[0xf3, 0x64, 0x2e, 0x65, 0x0f, 0x38, 0xdc, 0xe8] => "loadiwkey xmm5, xmm0".to_string(),
- // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
- &[0x66, 0x0f, 0x38, 0x80, 0x01] => "invept rax, xmmword ptr [rcx]".to_string(),
- // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
- &[0x66, 0x0f, 0x38, 0x81, 0x01] => "invvpid rax, xmmword ptr [rcx]".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- // (and we print jnb instead of jae)
- &[0x73, 0x31] => "jnb $+33h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x72, 0x5a] => "jb $+5Ch".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x72, 0xf0] => "jb $-0Eh".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe8, 0x01, 0x00, 0x00, 0x00] => "call $+6".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe8, 0x80, 0x00, 0x00, 0x00] => "call near ptr $+85h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe8, 0xff, 0xff, 0xff, 0xff] => "call $+4".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe9, 0x01, 0x00, 0x00, 0x00] => "jmp $+6".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative. there's also the near ptr nonsense..
- &[0xe9, 0x80, 0x00, 0x00, 0x00] => "jmp near ptr $+85h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe9, 0xff, 0xff, 0xff, 0xff] => "jmp $+4".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00] => "jna $+191h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x0f, 0x85, 0x3b, 0x25, 0x00, 0x00] => "jnz $+2541h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x74, 0x47] => "jz $+49h".to_string(),
- // dumpbin invents a label for laughs.
- &[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00] => "call qword ptr [$ + 24727Eh]".to_string(),
- // dumpbin uses a really wide displacement .. for laughs..
- &[0xff, 0x24, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => "jmp qword ptr [rcx * 8 + 1BCA070h]".to_string(),
- // dumpbin uses a really wide displacement .. for laughs..
- &[0xff, 0x14, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => "call qword ptr [rcx * 8 + 1BCA070h]".to_string(),
- // dumpbin bug: 66-prefixed jmp/call does not pick 16-bit registers
- &[0x66, 0xff, 0xe0] => "jmp rax".to_string(),
- // dumpbin bug: 66-prefixed jmp/call does not pick 16-bit registers
- &[0x66, 0xff, 0xd0] => "call rax".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe0, 0x12] => "loopnz $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe1, 0x12] => "loopz $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe2, 0x12] => "loop $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe3, 0x12] => "jrcxz $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe3, 0xf0] => "jrcxz $-0Eh".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x67, 0xe3, 0x12] => "jecxz $+15h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x67, 0xe3, 0xf0] => "jecxz $-0Dh".to_string(),
- // dumpbin dislikes prefixes.
- &[0x66, 0xf2, 0x0f, 0x79, 0xcf] => "insertq xmm1, xmm7".to_string(),
- // rip-rel: oh dear
- &[0xf6, 0x05, 0x2c, 0x9b, 0xff, 0xff, 0x01] => "test byte ptr [$ - 64D4h], 1".to_string(),
- // yax uses wider immediates
- &[0x3d, 0x01, 0xf0, 0xff, 0xff] => "cmp eax, 0FFFFFFFFFFFFF001h".to_string(),
- // dumpbin doesn't print the $ of rip-rel :(
- &[0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => "xor eax, dword ptr [$ + 12345678h]".to_string(),
- &[0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => "xor eax, dword ptr [rcx + 43650123h]".to_string(),
- &[0x48, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => "xor rax, qword ptr [$ + 12345678h]".to_string(),
- &[0x48, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => "xor rax, qword ptr [rcx + 43650123h]".to_string(),
- &[0x44, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => "xor r8d, dword ptr [$ + 12345678h]".to_string(),
- &[0x44, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => "xor r8d, dword ptr [rcx + 43650123h]".to_string(),
- &[0x45, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => "xor r8d, dword ptr [$ + 12345678h]".to_string(),
- &[0x45, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => "xor r8d, dword ptr [r9 + 43650123h]".to_string(),
- &[0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(),
- &[0x41, 0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(),
- &[0x33, 0x84, 0xa5, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [rbp + 44332211h]".to_string(),
- &[0x41, 0x33, 0x84, 0xa5, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [r13 + 44332211h]".to_string(),
- &[0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(),
- &[0x41, 0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(),
- &[0x42, 0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => "xor esi, dword ptr [r12 + 50403020h]".to_string(),
- &[0x43, 0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => "xor esi, dword ptr [r12 + 50403020h]".to_string(),
- &[0x42, 0x33, 0xb4, 0x25, 0x20, 0x30, 0x40, 0x50] => "xor esi, dword ptr [rbp + r12 + 50403020h]".to_string(),
- &[0x43, 0x33, 0xb4, 0x25, 0x20, 0x30, 0x40, 0x50] => "xor esi, dword ptr [r13 + r12 + 50403020h]".to_string(),
- // dumpbin gets the size wrong
- &[0x62, 0xf2, 0xfd, 0x0f, 0x8a, 0x62, 0xf2] => "vcompresspd xmmword ptr [rdx - 70h]{k7}, xmm4".to_string(),
- // TODO: yax doesn't know about rdssp{d,q}?
- &[0xf3, 0x0f, 0x1e, 0x0f] => "nop".to_string(),
- // yax won't mention the pointless repne prefix
- &[0xf2, 0x0f, 0x06] => "clts".to_string(),
- // yax won't mention the pointless repne prefix
- &[0xf2, 0x0f, 0x07] => "sysret".to_string(),
- // dumpbin spells this mmword
- &[0x0f, 0x6f, 0x00] => "movq mm0, qword ptr [rax]".to_string(),
- &[0x66, 0x2e, 0xf2, 0xf0, 0x0f, 0xbb, 0x13] => "xacquire lock btc word ptr [rbx], dx".to_string(),
- // dumpbin handles this right (like this!) but the output is weird to parse
- &[0x45, 0x66, 0x0f, 0x21, 0xc8] => "mov rax, dr1".to_string(),
- // dumpbin says repne, but that doesn't round-trip.
- &[0x45, 0xf2, 0x0f, 0x21, 0xc8] => "mov rax, dr1".to_string(),
- // dumpbin says rep, but that doesn't round-trip.
- &[0x45, 0xf3, 0x0f, 0x21, 0xc8] => "mov rax, dr1".to_string(),
- // dumpbin prints with more.. flourish
- &[0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00] => "nop word ptr [rax + rax]".to_string(),
- // disp is wider from dumpbin
- &[0x48, 0x8d, 0xa4, 0xc7, 0x20, 0x00, 0x00, 0x12] => "lea rsp, [rdi + rax * 8 + 12000020h]".to_string(),
- &[0x0f, 0xfc, 0xaf, 0x40, 0x38, 0x25, 0xbf] => "paddb mm5, mmword ptr [rdi - 40DAC7C0h]".to_string(),
- &[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56] => "xbegin $+56341216h".to_string(),
- &[0x66, 0xc7, 0xf8, 0x10, 0x12] => "xbegin $+1215h".to_string(),
- &[0xf2, 0xf3, 0x66, 0x65, 0x4f, 0x25, 0x9b, 0x5e, 0xda, 0x44] => "and rax, 44DA5E9Bh".to_string(),
- &[0x65, 0x66, 0x66, 0x64, 0x48, 0x0f, 0x38, 0xdb, 0x0f] => "aesimc xmm1, xmmword ptr fs:[rdi]".to_string(),
- &[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c] => "pi2fw mm6, qword ptr [rax - 5]".to_string(), // more prefix confusion..
- // prefixes confuse dumpbin, and dumpbin says "qword" where we use mmword. masm accepts either
- &[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4] => "pfmax mm4, mmword ptr [rbx + rdx * 4 - 6Dh]".to_string(),
- // dumpbin calls this movq?
- &[0x4f, 0x0f, 0x7e, 0xcf] => "movd r15, mm1".to_string(),
- // dumpbin shows this as a wide register but it doesn't *really* matter and yax uses 32-bit always.
- &[0x4f, 0x0f, 0xd7, 0xcf] => "pmovmskb r9d, mm7".to_string(),
- // dumpbin shows this as a non-rip-rel offset :(
- &[0x0f, 0xe5, 0x3d, 0xaa, 0xbb, 0xcc, 0x77] => "pmulhw mm7, qword ptr [$ + 77CCBBAAh]".to_string(),
- // dumpbin confused about prefixes once again
- &[0x66, 0x3e, 0x26, 0x2e, 0x2e, 0x0f, 0x38, 0x2a, 0x2b] => "movntdqa xmm5, xmmword ptr [rbx]".to_string(),
- // prefixes.. cs: isn't real in 64-bit mode
- &[0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c, 0xf0, 0x63] => "blendpd xmm7, xmmword ptr [eax - 0F83D010h], 63h".to_string(),
- // prefixes confuse dumpbin
- &[0x66, 0x66, 0x64, 0x3e, 0x0f, 0x38, 0x23, 0x9d, 0x69, 0x0f, 0xa8, 0x2d] => "pmovsxwd xmm3, qword ptr fs:[rbp + 2DA80F69h]".to_string(),
- // prefixes confuse dumpbin
- &[0x2e, 0x66, 0x26, 0x64, 0x49, 0x0f, 0x3a, 0x21, 0x0b, 0xb1] => "insertps xmm1, dword ptr fs:[r11], 0FFFFFFFFFFFFFFB1h".to_string(),
- // prefixes confuse dumpbin
- &[0x66, 0x26, 0x45, 0x0f, 0x3a, 0x42, 0x96, 0x74, 0x29, 0x96, 0xf9, 0x6a] => "mpsadbw xmm10, xmmword ptr [r14 - 669D68Ch], 6Ah".to_string(),
- // prefixes confuse dumpbin
- &[0x67, 0x26, 0x66, 0x65, 0x0f, 0x38, 0x3f, 0x9d, 0xcc, 0x03, 0xb3, 0xfa] => "pmaxud xmm3, xmmword ptr gs:[ebp - 54CFC34h]".to_string(),
- // prefixes confuse dumpbin
- &[0x67, 0x66, 0x65, 0x3e, 0x0f, 0x6d, 0xd1] => "punpckhqdq xmm2, xmm1".to_string(),
- // prefixes confuse dumpbin
- &[0x2e, 0x66, 0x40, 0x0f, 0x3a, 0x0d, 0x40, 0x2d, 0x57] => "blendpd xmm0, xmmword ptr [rax + 2Dh], 57h".to_string(),
- // prefixes confuse dumpbin
- &[0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f, 0xcd, 0xd7] => "lddqu xmm4, xmmword ptr [eax - 2832A0E5h]".to_string(),
- // prefixes confuse dumpbin
- &[0x2e, 0x3e, 0x66, 0x3e, 0x49, 0x0f, 0x3a, 0x41, 0x30, 0x48] => "dppd xmm6, xmmword ptr [r8], 48h".to_string(),
- // dumpbin prints the order backwards =|
- &[0x65, 0xf0, 0x87, 0x0f] => "lock xchg dword ptr gs:[rdi], ecx".to_string(),
- // displacement gets a bunch of extra zeroes
- &[0x66, 0x4e, 0x0f, 0x3a, 0x44, 0x88, 0xb3, 0xad, 0x26, 0x35, 0x75] => "pclmulqdq xmm9, xmmword ptr [rax + 3526ADB3h], 75h".to_string(),
- // dumpbin knows about "fstpnce" as "fstp1", but masm does not.
- // since this is an undocumented instruction anyway, decode it ourselves..
- &[0xd9, 0xdb] => "fstpnce st(3), st(0)".to_string(),
- // dumpbin calls this "fcom2", but it's just an undocumented fcom alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdc, 0xd3] => "fcom st(3)".to_string(),
- // dumpbin calls this "fcomp3", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdc, 0xdb] => "fcomp st(3)".to_string(),
- // dumpbin calls this "fxch4", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdd, 0xcb] => "fxch st(3)".to_string(),
- // dumpbin calls this "fcomp5", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xde, 0xd3] => "fcomp st(3)".to_string(),
- // dumpbin calls this "fxch7", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdf, 0xcb] => "fxch st(3)".to_string(),
- // dumpbin calls this "fstp8", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdf, 0xd3] => "fstp st(3)".to_string(),
- // dumpbin calls this "fstp9", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdf, 0xdb] => "fstp st(3)".to_string(),
- // dunno why dumpbin doesn't like this one..
- &[0xc5, 0b0_1111_100, 0x2e, 0b00_001_010] => "vucomiss xmm9, dword ptr [rdx]".to_string(),
- &[0xc5, 0b0_1111_100, 0x2f, 0b00_001_010] => "vcomiss xmm9, dword ptr [rdx]".to_string(),
- other => {
- tools::dumpbin(other, CodeModel::Bits64).unwrap_or_else(|e| {
- panic!("{}: {e:?}", format!("could not get an instruction after dumpbining {other:x?}"));
- })
- }
- };
+ let external_masm_ish = tools::dumpbin(bytes, CodeModel::Bits64).unwrap_or_else(|e| {
+ panic!("{}: {e:?}", format!("could not get an instruction after dumpbining {bytes:x?}"));
+ });
let displayed_masm = decoder.decode_slice(bytes).expect("can decode").display_with(DisplayStyle::Masm).to_string();
- let masm_as_bytes = match displayed_masm.as_str() {
- "nop zmmword ptr [rax]" => vec![0x0f, 0x18, 0x20], // MASM doesn't accept `nop zmmword ..`, no way to round trip 0f1820
- "nop zmmword ptr [r8]" => vec![0x41, 0x0f, 0x18, 0x20], // MASM doesn't accept `nop zmmword ..`, no way to round trip 410f1820
- "sysenter" => vec![0x0f, 0x34], // MASM doesn't accept sysenter, but dumpbin prints it.
- "sysexit" => vec![0x0f, 0x35], // MASM doesn't accept sysexit, but dumpbin prints it.
- "vpscatterdd dword ptr [r15 + xmm29]{k6}, xmm8" => vec![0x62, 0x12, 0x7d, 0x06, 0xa0, 0x04, 0x2f], // MASM ...??? assembles vpscatter wrong???
- "vpscatterdd dword ptr [r15 + xmm25]{k6}, xmm10" => vec![0x62, 0x12, 0x7d, 0x06, 0xa0, 0x14, 0x0f], // MASM ...??? assembles vpscatter wrong???
- "vpscatterdd dword ptr [r15 + ymm25]{k6}, ymm10" => vec![0x62, 0x12, 0x7d, 0x26, 0xa0, 0x14, 0x0f], // MASM ...??? assembles vpscatter wrong???
- "vpscatterdd dword ptr [r15 + zmm25]{k6}, zmm10" => vec![0x62, 0x12, 0x7d, 0x46, 0xa0, 0x14, 0x0f], // MASM ...??? assembles vpscatter wrong???
- "vpscatterdq qword ptr [r15 + xmm25]{k6}, xmm10" => vec![0x62, 0x12, 0xfd, 0x46, 0xa0, 0x14, 0x0f], // MASM ...??? assembles vpscatter wrong???
- "vpscatterqd dword ptr [r15 + ymm25]{k6}, ymm10" => vec![0x62, 0x12, 0x7d, 0x46, 0xa1, 0x14, 0x0f], // MASM ...??? assembles vpscatter wrong???
- "vpscatterqq qword ptr [r15 + zmm25]{k6}, zmm10" => vec![0x62, 0x12, 0xfd, 0x46, 0xa1, 0x14, 0x0f], // MASM ...??? assembles vpscatter wrong???
-/*
- "vpinsrb xmm9, xmm8, r10d, 77h" => vec![0xc4, 0x03, 0x39, 0x20, 0xca, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpinsrb xmm9, xmm8, byte ptr [r10], 77h" => vec![0xc4, 0x03, 0x39, 0x20, 0x0a, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpinsrd xmm9, xmm8, r10d, 77h" => vec![0xc4, 0x03, 0x39, 0x22, 0xca, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpinsrd xmm9, xmm8, dword ptr [r10], 77h" => vec![0xc4, 0x03, 0x39, 0x22, 0x0a, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpinsrq xmm9, xmm8, r10, 77h" => vec![0xc4, 0x03, 0xb9, 0x22, 0xca, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpinsrq xmm9, xmm8, qword ptr [r10], 77h" => vec![0xc4, 0x03, 0xb9, 0x22, 0x0a, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpblendvb xmm9, xmm8, xmm10, xmm7" => vec![0xc4, 0x03, 0x39, 0x4c, 0xca, 0x77], // MASM ...??? assembles the extra register number bit wrong???
- "vpblendvb ymm9, ymm8, ymm10, ymm7" => vec![0xc4, 0x03, 0x3d, 0x4c, 0xca, 0x77], // MASM ...??? assembles the extra register number bit wrong???
-*/
- // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
- "ud0 eax, ecx" => vec![0x66, 0x0f, 0xff, 0xc1],
- "ud0 eax, dword ptr [rcx]" => vec![0x66, 0x0f, 0xff, 0x01],
- "ud0 r8d, r9d" => vec![0x66, 0x4f, 0x0f, 0xff, 0xc1],
- "ud0 r13d, dword ptr [rbx - 54h]" => vec![0x4c, 0x0f, 0xff, 0x6b, 0xac],
- // masm seems to not know about fstpnce/fstp1 at all. since this is an undocumented instruction anyway, assemble it ourselves..
- "fstpnce st(3), st(0)" => vec![0xd9, 0xdb],
- // masm inserts a wait prefix here..
- "feni" => vec![0xdb, 0xe0],
- "fdisi" => vec![0xdb, 0xe1],
- "fsetpm" => vec![0xdb, 0xe4],
- _other => { tools::masm(&displayed_masm, CodeModel::Bits64).expect("can assemble") }
- };
+ let masm_as_bytes = tools::masm(&displayed_masm, CodeModel::Bits64).expect("can assemble");
let masm_roundtrip = decoder.decode_slice(&masm_as_bytes).expect("can decode").display_with(DisplayStyle::Masm).to_string();
// chasing down differences in how dumpbin/yax write "qword" is not useful to anyone..
let external_masm_ish = external_masm_ish.replace(" mmword ", " qword ");
diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs
index 2b1cc70..0c7b8a5 100644
--- a/test/protected_mode/mod.rs
+++ b/test/protected_mode/mod.rs
@@ -333,276 +333,18 @@ fn check_decodes(decoder: &InstDecoder, decode_ok: bool, bytes: &[u8], disasm: &
let vex_prefixed = bytes[0] == 0xc4 || bytes[0] == 0xc5;
// match against some testcases that are known to be wrong by MASM/dumpbin.
- let external_masm_ish = match bytes {
- &[0xf1] => "int 1".to_string(), // dumpbin does not know how to decode f1...
- &[0xe5, 0x99] => "in eax, 99h".to_string(), // this is a MASM/dumpbin bug. see notes on testcase.
- &[0xe7, 0x99] => "out 99h, eax".to_string(), // this is a MASM/dumpbin bug. see notes on testcase.
- // dumpbin prints the instruction as if it was encoded in 32-bit form regardless of object file, so overrule it.
- &[0xf3, 0x0f, 0xc7, 0xfd] => "rdpid ebp".to_string(),
- &[0x0f, 0x18, 0xc0] => "nop eax".to_string(), // dumpbin would love to call this "prefetchnta eax" ???
- &[0x0f, 0x18, 0xcc] => "nop esp".to_string(), // dumpbin would love to call this "prefetchnta esp" ???
- &[0x0f, 0x18, 0x20] => "nop zmmword ptr [eax]".to_string(), // getting around dumpbin knowing about prefetchrst2..
- &[0x0f, 0x19, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f19..
- &[0x0f, 0x1a, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin wants to call this bndldx, yax doesn't do MPX yet
- &[0x0f, 0x1b, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin wants to call this bndstx, yax doesn't do MPX yet
- &[0x0f, 0x1c, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f1c..
- &[0x0f, 0x1d, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f1d..
- &[0x0f, 0x1e, 0x20] => "nop dword ptr [eax]".to_string(), // dumpbin doesn't know about 0f1e..
- &[0xf2, 0x66, 0x66, 0x0f, 0x10, 0xc0] => "movsd xmm0, xmm0".to_string(), // dumpbin does not love the prefixes
- &[0xf3, 0x0f, 0x1e, 0xfc] => "nop".to_string(), // dumpbin does not tolerate this at all, redirect into a boring nop.
- &[0x0f, 0x43, 0xec] => "cmovnb ebp, esp".to_string(), // dumpbin writes it "cmovae" instead of yax's cmovnb.
- &[0x2e, 0x36, 0x0f, 0x18, 0xe7] => "nop edi".to_string(), // dumpbin reports a mildly-confused prefetchrst2 rdi (even in 32-bit mode!)
- &[0x0f, 0xbe, 0x83, 0xb4, 0x00, 0x00, 0x00] => {
- "movsx eax, byte ptr [ebx + 0B4h]".to_string() // dumpbin uses %016 formatting, masm happily accepts shorter.
- },
- &[0x62, 0xd2, 0x7e, 0x28, 0x3a, 0xca] => {
- "vpbroadcastmw2d ymm1, k2".to_string() // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
- },
- &[0x62, 0xd2, 0x7e, 0x08, 0x28, 0xc2] => {
- "vpmovm2b xmm0, k2".to_string() // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
- },
- &[0x0f, 0x0d, 0x00] => {
- // dumpbin interprets this as the 3DNow!-style PREFETCH instruction, but we're definitely not 3dnow..
- "nop zmmword ptr [eax]".to_string()
- }
- &[0xc4, 0x03, 0x3d, 0x0a, 0xca, 0x77] => {
- // dumpbin can't deal with this instruction..
- "vroundss xmm9, xmm8, xmm10, 77h".to_string()
- }
- &[0xc4, 0x03, 0x3d, 0x0b, 0xca, 0x77] => {
- // dumpbin can't deal with this instruction..
- "vroundsd xmm9, xmm8, xmm10, 77h".to_string()
- }
- &[0x66, 0x0f, 0xd6, 0x01] => {
- // dumpbin really wants to use mmword here, but i really don't.
- "movq qword ptr [ecx], xmm0".to_string()
- }
- // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
- &[0x66, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(),
- &[0xf2, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(),
- &[0xf3, 0x0f, 0xff, 0xc1] => "ud0 eax, ecx".to_string(),
- &[0x66, 0x0f, 0xff, 0x01] => "ud0 eax, dword ptr [ecx]".to_string(),
- &[0x0f, 0xff, 0x6b, 0xac] => "ud0 ebp, dword ptr [ebx - 54h]".to_string(),
- // dumpbin does not tolerate the pointless prefixes.
- &[0x36, 0x36, 0x2e, 0x0f, 0x38, 0xf9, 0x55, 0x3e] => "movdiri dword ptr cs:[ebp + 3Eh], edx".to_string(),
- // dumpbin does not tolerate the pointless prefixes.
- &[0x36, 0x26, 0x66, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => "movdir64b ebp, zmmword ptr es:[ebp + 729080Bh]".to_string(),
- // dumpbin does not tolerate the pointless prefixes.
- &[0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08] => "movdir64b bp, zmmword ptr es:[di + 80Bh]".to_string(),
- // and again
- &[0xf2, 0xf2, 0x2e, 0x36, 0x0f, 0x38, 0xf8, 0x83, 0x09, 0x1c, 0x9d, 0x3f] => "enqcmd eax, zmmword ptr ss:[ebx + 3F9D1C09h]".to_string(),
- // and again.
- &[0x3e, 0x64, 0xf3, 0x64, 0x0f, 0x38, 0xf8, 0x72, 0x54] => "enqcmds esi, zmmword ptr fs:[edx + 54h]".to_string(),
- // prefixes confuse dumpbin again
- &[0x66, 0xf3, 0x0f, 0x01, 0xe8] => "setssbsy".to_string(),
- // prefixes confuse dumpbin again
- &[0x66, 0xf3, 0x0f, 0x01, 0xea] => "saveprevssp".to_string(),
- // prefixes confuse dumpbin again
- &[0xf3, 0x66, 0x0f, 0x01, 0xe8] => "setssbsy".to_string(), // TODO: yax does not support `serialize` (yet)
- // prefixes confuse dumpbin again
- &[0xf3, 0x66, 0x0f, 0x01, 0xea] => "saveprevssp".to_string(),
- // prefixes confuse dumpbin again
- &[0xf3, 0x66, 0x0f, 0x01, 0x29] => "rstorssp qword ptr [ecx]".to_string(),
- // dumpbin writes the repne, but it doesn't do anything..
- &[0xf2, 0x0f, 0x21, 0xc8] => "mov eax, dr1".to_string(),
- // dumpbin writes the rep, but it doesn't do anything..
- &[0xf3, 0x0f, 0x21, 0xc8] => "mov eax, dr1".to_string(),
- // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
- &[0xf2, 0x0f, 0xc0, 0xcc] => "xadd ah, cl".to_string(),
- // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
- &[0xf3, 0x0f, 0xc0, 0xcc] => "xadd ah, cl".to_string(),
- // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
- &[0xf2, 0x0f, 0xc1, 0xcc] => "xadd esp, ecx".to_string(),
- // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
- &[0xf3, 0x0f, 0xc1, 0xcc] => "xadd esp, ecx".to_string(),
- // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
- &[0xf2, 0x0f, 0xc7, 0x0f] => "cmpxchg8b qword ptr [edi]".to_string(),
- // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
- &[0xf3, 0x0f, 0xc7, 0x0f] => "cmpxchg8b qword ptr [edi]".to_string(),
- // prefixes again..
- &[0x66, 0x36, 0x0f, 0x3a, 0xce, 0x8c, 0x56, 0x9e, 0x82, 0xd1, 0xbe, 0xad] => "gf2p8affineqb xmm1, xmmword ptr ss:[esi + edx * 2 - 412E7D62h], 0ADh".to_string(),
- &[0x3e, 0x64, 0x64, 0x66, 0x0f, 0x3a, 0xcf, 0xba, 0x13, 0x23, 0x04, 0xba, 0x6b] => "gf2p8affineinvqb xmm7, xmmword ptr fs:[edx - 45FBDCEDh], 6Bh".to_string(),
- &[0xf3, 0x64, 0x2e, 0x65, 0x0f, 0x38, 0xdc, 0xe8] => "loadiwkey xmm5, xmm0".to_string(),
- // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
- &[0x66, 0x0f, 0x38, 0x80, 0x01] => "invept eax, xmmword ptr [ecx]".to_string(),
- // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
- &[0x66, 0x0f, 0x38, 0x81, 0x01] => "invvpid eax, xmmword ptr [ecx]".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- // (and we print jnb instead of jae)
- &[0x73, 0x31] => "jnb $+33h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x72, 0x5a] => "jb $+5Ch".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x72, 0xf0] => "jb $-0Eh".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe8, 0x01, 0x00, 0x00, 0x00] => "call $+6".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe8, 0x80, 0x00, 0x00, 0x00] => "call near ptr $+85h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe8, 0xff, 0xff, 0xff, 0xff] => "call $+4".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe9, 0x01, 0x00, 0x00, 0x00] => "jmp $+6".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative. there's also the near ptr nonsense..
- &[0xe9, 0x80, 0x00, 0x00, 0x00] => "jmp near ptr $+85h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe9, 0xff, 0xff, 0xff, 0xff] => "jmp $+4".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00] => "jna $+191h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x0f, 0x85, 0x3b, 0x25, 0x00, 0x00] => "jnz $+2541h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x74, 0x47] => "jz $+49h".to_string(),
- // dumpbin prints a ds: since this is an absolute address..
- &[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00] => "call dword ptr [0024727Eh]".to_string(),
- // dumpbin uses a really wide displacement .. for laughs..
- &[0xff, 0x24, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => "jmp dword ptr [ecx * 8 + 1BCA070h]".to_string(),
- // dumpbin uses a really wide displacement .. for laughs..
- &[0xff, 0x14, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => "call dword ptr [ecx * 8 + 1BCA070h]".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe0, 0x12] => "loopnz $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe1, 0x12] => "loopz $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe2, 0x12] => "loop $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe3, 0x12] => "jecxz $+14h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0xe3, 0xf0] => "jecxz $-0Eh".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x67, 0xe3, 0x12] => "jcxz $+15h".to_string(),
- // dumpbin uses absolute branch destinations, but yax uses relative.
- &[0x67, 0xe3, 0xf0] => "jcxz $-0Dh".to_string(),
- // dumpbin dislikes prefixes.
- &[0x66, 0xf2, 0x0f, 0x79, 0xcf] => "insertq xmm1, xmm7".to_string(),
- &[0xf6, 0x05, 0x2c, 0x9b, 0xff, 0xff, 0x01] => "test byte ptr [0FFFF9B2Ch], 1".to_string(),
- // yax uses wider immediates
- &[0x3d, 0x01, 0xf0, 0xff, 0xff] => "cmp eax, 0FFFFF001h".to_string(),
- // dumpbin gets the size wrong
- &[0x62, 0xf2, 0xfd, 0x0f, 0x8a, 0x62, 0xf2] => "vcompresspd xmmword ptr [edx - 70h]{k7}, xmm4".to_string(),
- // TODO: yax doesn't know about rdssp{d,q}?
- &[0xf3, 0x0f, 0x1e, 0x0f] => "nop".to_string(),
- // yax won't mention the pointless repne prefix
- &[0xf2, 0x0f, 0x06] => "clts".to_string(),
- // yax won't mention the pointless repne prefix
- &[0xf2, 0x0f, 0x07] => "sysret".to_string(),
- // dumpbin spells this mmword
- &[0x0f, 0x6f, 0x00] => "movq mm0, qword ptr [eax]".to_string(),
- &[0x66, 0x2e, 0xf2, 0xf0, 0x0f, 0xbb, 0x13] => "xacquire lock btc word ptr cs:[ebx], dx".to_string(),
- // dumpbin prints with more.. flourish
- &[0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00] => "nop word ptr cs:[eax + eax]".to_string(),
- // disp is wider from dumpbin
- &[0x0f, 0xfc, 0xaf, 0x40, 0x38, 0x25, 0xbf] => "paddb mm5, mmword ptr [edi - 40DAC7C0h]".to_string(),
- &[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56] => "xbegin $+56341216h".to_string(),
- &[0x66, 0xc7, 0xf8, 0x10, 0x12] => "xbegin $+1215h".to_string(),
- &[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c] => "pi2fw mm6, qword ptr ss:[eax - 5]".to_string(), // more prefix confusion..
- // prefixes confuse dumpbin, and dumpbin says "qword" where we use mmword. masm accepts either
- &[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4] => "pfmax mm4, mmword ptr cs:[ebx + edx * 4 - 6Dh]".to_string(),
- // dumpbin shows this as a non-rip-rel offset :(
- &[0x0f, 0xe5, 0x3d, 0xaa, 0xbb, 0xcc, 0x77] => "pmulhw mm7, qword ptr [77CCBBAAh]".to_string(),
- // dumpbin confused about prefixes once again
- &[0x66, 0x3e, 0x26, 0x2e, 0x2e, 0x0f, 0x38, 0x2a, 0x2b] => "movntdqa xmm5, xmmword ptr cs:[ebx]".to_string(),
- // prefixes.. cs: isn't real in 64-bit mode
- &[0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c] => "blendpd xmm7, xmmword ptr cs:[bx + si + 2FF0h], 7Ch".to_string(),
- // prefixes confuse dumpbin
- &[0x66, 0x66, 0x64, 0x3e, 0x0f, 0x38, 0x23, 0x9d, 0x69, 0x0f, 0xa8, 0x2d] => "pmovsxwd xmm3, qword ptr [ebp + 2DA80F69h]".to_string(),
- // prefixes confuse dumpbin
- &[0x2e, 0x66, 0x26, 0x64, 0x0f, 0x3a, 0x21, 0x0b, 0xb1] => "insertps xmm1, dword ptr fs:[ebx], 0B1h".to_string(),
- // prefixes confuse dumpbin
- &[0x66, 0x26, 0x0f, 0x3a, 0x42, 0x96, 0x74, 0x29, 0x96, 0xf9, 0x6a] => "mpsadbw xmm2, xmmword ptr es:[esi - 669D68Ch], 6Ah".to_string(),
- // prefixes confuse dumpbin
- &[0x67, 0x26, 0x66, 0x65, 0x0f, 0x38, 0x3f, 0x9d, 0xcc, 0x03] => "pmaxud xmm3, xmmword ptr gs:[di + 3CCh]".to_string(),
- // prefixes confuse dumpbin
- &[0x67, 0x66, 0x65, 0x3e, 0x0f, 0x6d, 0xd1] => "punpckhqdq xmm2, xmm1".to_string(),
- // prefixes confuse dumpbin
- &[0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f] => "lddqu xmm4, xmmword ptr es:[bx + si + 5F1Bh]".to_string(),
- // prefixes confuse dumpbin
- &[0x2e, 0x3e, 0x66, 0x3e, 0x0f, 0x3a, 0x41, 0x30, 0x48] => "dppd xmm6, xmmword ptr [eax], 48h".to_string(),
- // again prefixes confuse dumpbin
- &[0x65, 0x66, 0x66, 0x64, 0x0f, 0x38, 0xdb, 0x0f] => "aesimc xmm1, xmmword ptr fs:[edi]".to_string(),
- // dumpbin prints the order backwards =|
- &[0x65, 0xf0, 0x87, 0x0f] => "lock xchg dword ptr gs:[edi], ecx".to_string(),
- // dumpbin knows about "fstpnce" as "fstp1", but masm does not.
- // since this is an undocumented instruction anyway, decode it ourselves..
- &[0xd9, 0xdb] => "fstpnce st(3), st(0)".to_string(),
- // dumpbin calls this "fcom2", but it's just an undocumented fcom alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdc, 0xd3] => "fcom st(3)".to_string(),
- // dumpbin calls this "fcomp3", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdc, 0xdb] => "fcomp st(3)".to_string(),
- // dumpbin calls this "fxch4", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdd, 0xcb] => "fxch st(3)".to_string(),
- // dumpbin calls this "fcomp5", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xde, 0xd3] => "fcomp st(3)".to_string(),
- // dumpbin calls this "fxch7", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdf, 0xcb] => "fxch st(3)".to_string(),
- // dumpbin calls this "fstp8", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdf, 0xd3] => "fstp st(3)".to_string(),
- // dumpbin calls this "fstp9", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
- &[0xdf, 0xdb] => "fstp st(3)".to_string(),
- &[0xf2, 0x0f, 0xbc, 0xd3] => "bsf edx, ebx".to_string(),
- // mov abs in 32-bit mode gets a ds: prefix even though that's the default. masm does not need this prefix, so we round-trip fine without it.
- &[0xa0, 0x93, 0x62, 0xc4, 0x00] => "mov al, byte ptr [00C46293h]".to_string(),
- &[0x67, 0xa0, 0x93, 0x62] => "mov al, byte ptr [00006293h]".to_string(),
- &[0xa1, 0x93, 0x62, 0xc4, 0x00] => "mov eax, dword ptr [00C46293h]".to_string(),
- &[0x67, 0xa1, 0x93, 0x62] => "mov eax, dword ptr [00006293h]".to_string(),
- &[0xa2, 0x93, 0x62, 0xc4, 0x00] => "mov byte ptr [00C46293h], al".to_string(),
- &[0x67, 0xa2, 0x93, 0x62] => "mov byte ptr [00006293h], al".to_string(),
- &[0xa3, 0x93, 0x62, 0xc4, 0x00] => "mov dword ptr [00C46293h], eax".to_string(),
- &[0x67, 0xa3, 0x93, 0x62] => "mov dword ptr [00006293h], eax".to_string(),
- &[0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => "xor eax, dword ptr [12345678h]".to_string(),
- &[0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(),
- &[0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => "xor eax, dword ptr [44332211h]".to_string(),
- &[0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => "xor esi, dword ptr [50403020h]".to_string(),
- &[0xa0, 0xc0, 0xb0, 0xa0, 0x90] => "mov al, byte ptr [90A0B0C0h]".to_string(),
- &[0x67, 0xa0, 0xc0, 0xb0] => "mov al, byte ptr [0B0C0h]".to_string(),
- &[0x67, 0xa1, 0xc0, 0xb0] => "mov eax, dword ptr [0000B0C0h]".to_string(),
- &[0x66, 0x67, 0xa1, 0xc0, 0xb0] => "mov ax, word ptr [0000B0C0h]".to_string(),
- // same for wrssd
- &[0x3e, 0x0f, 0x38, 0xf6, 0x23] => "wrssd dword ptr [ebx], esp".to_string(),
- // dumpbin believes that rex.w works even in 32-bit code, thus prints `rorx rax, ..`. haha what a dingus
- &[0xc4, 0xe3, 0xfb, 0xf0, 0x01, 0x05] => "rorx eax, dword ptr [ecx], 5".to_string(),
- &[0xc4, 0xe2, 0xe3, 0xf5, 0x07] => "pdep eax, ebx, dword ptr [edi]".to_string(),
- &[0xc4, 0xe2, 0xe3, 0xf6, 0x07] => "mulx eax, ebx, dword ptr [edi]".to_string(),
- &[0xc4, 0xe2, 0xe3, 0xf7, 0x01] => "shrx eax, dword ptr [ecx], ebx".to_string(),
- &[0xc4, 0xe2, 0xe2, 0xf5, 0x07] => "pext eax, ebx, dword ptr [edi]".to_string(),
- &[0xc4, 0xe2, 0xe2, 0xf7, 0x01] => "sarx eax, dword ptr [ecx], ebx".to_string(),
- &[0xc4, 0xe2, 0xe0, 0xf5, 0x07] => "bzhi eax, dword ptr [edi], ebx".to_string(),
- &[0xc4, 0xe2, 0xe1, 0xf7, 0x01] => "shlx eax, dword ptr [ecx], ebx".to_string(),
- &[0xc4, 0xe2, 0xe0, 0xf2, 0x01] => "andn eax, ebx, dword ptr [ecx]".to_string(),
- &[0xc4, 0xe2, 0xf8, 0xf3, 0x09] => "blsr eax, dword ptr [ecx]".to_string(),
- &[0xc4, 0xe2, 0xf8, 0xf3, 0x11] => "blsmsk eax, dword ptr [ecx]".to_string(),
- &[0xc4, 0xe2, 0xf8, 0xf3, 0x19] => "blsi eax, dword ptr [ecx]".to_string(),
- &[0xc4, 0xe2, 0xe0, 0xf7, 0x01] => "bextr eax, dword ptr [ecx], ebx".to_string(),
- &[0xc4, 0xc3, 0x39, 0x0c, 0xca, 0x77] => "vblendps xmm1, xmm0, xmm2, 77h".to_string(),
- // just have to decide we know better than dumpbin: masm does not accept an absolute far call/far jump destination,
- // so we definitely can't round-trip by following dumpbin. dumpbin doesn't use hex suffixes here, instead printing
- // "6655:44332211" as the destination. this is technically not ambiguous since `:` is a hint that this is a absolute
- // far address and that both numbers are base 16, but that's ... subtle and easy to miss. so add some h's.
- &[0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66] => "call 6655h:44332211h".to_string(),
- &[0x66, 0x9a, 0x11, 0x22, 0x33, 0x44] => "call 4433h:2211h".to_string(),
- // terribly unfortunate: masm reasonably encodes this instruction as a 32-bit offset, which causes yax to spell the offset
- // as 0000AA55 instead of AA55. override dumpbin to use the (worse) encoding for the sake of matching with the test.
- &[0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa] => "mov cx, word ptr [0AA55h]".to_string(),
- // inexplicably, dumpbin spells this "aamb", for .. ascii adjust after multiplcation (byte) ???
- // additionally, masm does not accept an integer operand: it only supports `aam 10` as in d4 0a. so.. bummer.
- &[0xd4, 0x01] => "aam 1".to_string(),
- // same as above
- &[0xd5, 0x01] => "aad 1".to_string(),
- // dunno why dumpbin doesn't like this one..
- &[0xc5, 0b1_1111_100, 0x2e, 0b00_001_010] => "vucomiss xmm1, dword ptr [edx]".to_string(),
- &[0xc5, 0b1_1111_100, 0x2f, 0b00_001_010] => "vcomiss xmm1, dword ptr [edx]".to_string(),
- other => {
- let dumpbin_res = tools::dumpbin(other, CodeModel::Bits32);
- match dumpbin_res {
- Ok(text) => text,
- Err(e) => {
- if vex_prefixed {
- // this might be an instance of dumpbin not being great: consider vucomiss, as in "c5f82eca".
- return;
- }
-
- // otherwise: unexpected, what da heck.
- panic!("{}: {e:?}", format!("could not get an instruction after dumpbining {other:x?}"));
+ let external_masm_ish = {
+ let dumpbin_res = tools::dumpbin(bytes, CodeModel::Bits32);
+ match dumpbin_res {
+ Ok(text) => text,
+ Err(e) => {
+ if vex_prefixed {
+ // this might be an instance of dumpbin not being great: consider vucomiss, as in "c5f82eca".
+ return;
}
+
+ // otherwise: unexpected, what da heck.
+ panic!("{}: {e:?}", format!("could not get an instruction after dumpbining {bytes:x?}"));
}
}
};
@@ -633,61 +375,7 @@ fn check_decodes(decoder: &InstDecoder, decode_ok: bool, bytes: &[u8], disasm: &
}
let displayed_masm = decoder.decode_slice(bytes).expect("can decode").display_with(DisplayStyle::Masm).to_string();
- let masm_as_bytes = match displayed_masm.as_str() {
- "nop zmmword ptr [eax]" => vec![0x0f, 0x18, 0x20], // MASM doesn't accept `nop zmmword ..`, no way to round trip 0f1820
- "sysenter" => vec![0x0f, 0x34], // MASM doesn't accept sysenter, but dumpbin prints it.
- "sysexit" => vec![0x0f, 0x35], // MASM doesn't accept sysexit, but dumpbin prints it.
- // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
- "ud0 eax, ecx" => vec![0x66, 0x0f, 0xff, 0xc1],
- "ud0 eax, dword ptr [ecx]" => vec![0x66, 0x0f, 0xff, 0x01],
- "ud0 ebp, dword ptr [ebx - 54h]" => vec![0x0f, 0xff, 0x6b, 0xac],
- // masm seems to not know about fstpnce/fstp1 at all. since this is an undocumented instruction anyway, assemble it ourselves..
- "fstpnce st(3), st(0)" => vec![0xd9, 0xdb],
- // masm inserts a wait prefix here..
- "feni" => vec![0xdb, 0xe0],
- "fdisi" => vec![0xdb, 0xe1],
- "fsetpm" => vec![0xdb, 0xe4],
- // masm doesn't know how to assemble address-size overrides..?
- // > cannot use 16-bit register with a 32-bit address
- "aesimc xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdb, 0x0f],
- "aesenc xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdc, 0x0f],
- "aesenclast xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdd, 0x0f],
- "aesdec xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xde, 0x0f],
- "aesdeclast xmm1, xmmword ptr [bx]" => vec![0x67, 0x66, 0x0f, 0x38, 0xdf, 0x0f],
- "blendpd xmm7, xmmword ptr cs:[bx + si + 2FF0h], 7Ch" => vec![0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c],
- // more
- "movdir64b bp, zmmword ptr es:[di + 80Bh]" => vec![0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08],
- "lss eax, fword ptr [bx + si]" => vec![0x67, 0x0f, 0xb2, 0x00],
- "lddqu xmm4, xmmword ptr es:[bx + si + 5F1Bh]" => vec![0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f],
- "lods byte ptr [si]" => vec![0x67, 0xac],
- "scas byte ptr es:[di]" => vec![0x67, 0xae],
- "rep movs byte ptr es:[di], byte ptr [si]" => vec![0x67, 0xf3, 0xa4],
- "rep movs dword ptr es:[di], dword ptr [si]" => vec![0x67, 0xf3, 0xa5],
- "movapd xmm0, xmmword ptr [bx + si]" => vec![0x67, 0x66, 0x0f, 0x28, 0x00],
- "cvtdq2ps xmm0, xmmword ptr [bx + di]" => vec![0x67, 0x0f, 0x5b, 0x01],
- // i tried really hard to find a MASM syntax for absolute far call/jump destinations! i turned up a bunch of blanks.
- // https://mirrors.nycbug.org/pub/The_Unix_Archive/Unix_Usenet/comp.unix.xenix/1989-February/001910.html is the funniest,
- // given that it is OS hackers experiencing the same issue and concluding they should emit the bytes themselves.
- // so yax will emit something like bindump would, and we'll just swallow the text as if masm worked like i'd hope..
- "call 6655h:44332211h" => vec![0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66],
- "call 4433h:2211h" => vec![0x66, 0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66],
- // terribly unfortunate: masm reasonably encodes this instruction as a 32-bit offset, which causes yax to spell the offset
- // as 0000AA55 instead of AA55. override dumpbin to use the (worse) encoding for the sake of matching with the test.
- "mov cx, word ptr [0AA55h]" => vec![0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa],
- // same deal, different instruction.
- "mov al, byte ptr [0B0C0h]" => vec![0x67, 0xa0, 0xc0, 0xb0],
- "mov eax, dword ptr [0000B0C0h]" => vec![0x67, 0xa1, 0xc0, 0xb0],
- // if you operand-size override pushad/popad you get the 16-bit forms, pusha/popa. dumpbin reflects this, but in 32-bit mode
- // accepts either as a way of spelling pushad/popad. override it here for tests to match up, but this is an unfortunately
- // disastrous difference in round-tripping..
- "pusha" => vec![0x66, 0x60],
- "popa" => vec![0x66, 0x61],
- // masm does not accept an integer operand: it only supports `aam 10` as in d4 0a. so.. bummer.
- "aam 1" => vec![0xd4, 0x01],
- // same as above
- "aad 1" => vec![0xd5, 0x01],
- _other => { tools::masm(&displayed_masm, CodeModel::Bits32).expect("can assemble") }
- };
+ let masm_as_bytes = tools::masm(&displayed_masm, CodeModel::Bits32).expect("can assemble");
let masm_roundtrip = decoder.decode_slice(&masm_as_bytes).expect("can decode").display_with(DisplayStyle::Masm).to_string();
// chasing down differences in how dumpbin/yax write "qword" is not useful to anyone..
let external_masm_ish = external_masm_ish.replace(" mmword ", " qword ");
diff --git a/test/tools.rs b/test/tools.rs
index 34b194f..c58ec9a 100644
--- a/test/tools.rs
+++ b/test/tools.rs
@@ -58,6 +58,582 @@ mod imp {
use tempfile::NamedTempFile;
pub fn dumpbin(bytes: &[u8], codeness: CodeModel) -> Result<String, String> {
+ let replacement = match codeness {
+ CodeModel::Bits16 => {
+ // no replacements for 16-bit yet, because masm is little-tested in 16-bit mode..
+ None
+ }
+ CodeModel::Bits32 => {
+ match bytes {
+ &[0xf1] => Some("int 1"), // dumpbin does not know how to decode f1...
+ &[0xe5, 0x99] => Some("in eax, 99h"), // this is a MASM/dumpbin bug. see notes on testcase.
+ &[0xe7, 0x99] => Some("out 99h, eax"), // this is a MASM/dumpbin bug. see notes on testcase.
+ // dumpbin prints the instruction as if it was encoded in 32-bit form regardless of object file, so overrule it.
+ &[0xf3, 0x0f, 0xc7, 0xfd] => Some("rdpid ebp"),
+ &[0x0f, 0x18, 0xc0] => Some("nop eax"), // dumpbin would love to call this "prefetchnta eax" ???
+ &[0x0f, 0x18, 0xcc] => Some("nop esp"), // dumpbin would love to call this "prefetchnta esp" ???
+ &[0x0f, 0x18, 0x20] => Some("nop zmmword ptr [eax]"), // getting around dumpbin knowing about prefetchrst2..
+ &[0x0f, 0x19, 0x20] => Some("nop dword ptr [eax]"), // dumpbin doesn't know about 0f19..
+ &[0x0f, 0x1a, 0x20] => Some("nop dword ptr [eax]"), // dumpbin wants to call this bndldx, yax doesn't do MPX yet
+ &[0x0f, 0x1b, 0x20] => Some("nop dword ptr [eax]"), // dumpbin wants to call this bndstx, yax doesn't do MPX yet
+ &[0x0f, 0x1c, 0x20] => Some("nop dword ptr [eax]"), // dumpbin doesn't know about 0f1c..
+ &[0x0f, 0x1d, 0x20] => Some("nop dword ptr [eax]"), // dumpbin doesn't know about 0f1d..
+ &[0x0f, 0x1e, 0x20] => Some("nop dword ptr [eax]"), // dumpbin doesn't know about 0f1e..
+ &[0xf2, 0x66, 0x66, 0x0f, 0x10, 0xc0] => Some("movsd xmm0, xmm0"), // dumpbin does not love the prefixes
+ &[0xf3, 0x0f, 0x1e, 0xfc] => Some("nop"), // dumpbin does not tolerate this at all, redirect into a boring nop.
+ &[0x0f, 0x43, 0xec] => Some("cmovnb ebp, esp"), // dumpbin writes it "cmovae" instead of yax's cmovnb.
+ &[0x2e, 0x36, 0x0f, 0x18, 0xe7] => Some("nop edi"), // dumpbin reports a mildly-confused prefetchrst2 rdi (even in 32-bit mode!)
+ &[0x0f, 0xbe, 0x83, 0xb4, 0x00, 0x00, 0x00] => {
+ Some("movsx eax, byte ptr [ebx + 0B4h]") // dumpbin uses %016 formatting, masm happily accepts shorter.
+ },
+ &[0x62, 0xd2, 0x7e, 0x28, 0x3a, 0xca] => {
+ Some("vpbroadcastmw2d ymm1, k2") // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
+ },
+ &[0x62, 0xd2, 0x7e, 0x08, 0x28, 0xc2] => {
+ Some("vpmovm2b xmm0, k2") // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
+ },
+ &[0x0f, 0x0d, 0x00] => {
+ // dumpbin interprets this as the 3DNow!-style PREFETCH instruction, but we're definitely not 3dnow..
+ Some("nop zmmword ptr [eax]")
+ }
+ &[0xc4, 0x03, 0x3d, 0x0a, 0xca, 0x77] => {
+ // dumpbin can't deal with this instruction..
+ Some("vroundss xmm9, xmm8, xmm10, 77h")
+ }
+ &[0xc4, 0x03, 0x3d, 0x0b, 0xca, 0x77] => {
+ // dumpbin can't deal with this instruction..
+ Some("vroundsd xmm9, xmm8, xmm10, 77h")
+ }
+ &[0x66, 0x0f, 0xd6, 0x01] => {
+ // dumpbin really wants to use mmword here, but i really don't.
+ Some("movq qword ptr [ecx], xmm0")
+ }
+ // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
+ &[0x66, 0x0f, 0xff, 0xc1] => Some("ud0 eax, ecx"),
+ &[0xf2, 0x0f, 0xff, 0xc1] => Some("ud0 eax, ecx"),
+ &[0xf3, 0x0f, 0xff, 0xc1] => Some("ud0 eax, ecx"),
+ &[0x66, 0x0f, 0xff, 0x01] => Some("ud0 eax, dword ptr [ecx]"),
+ &[0x0f, 0xff, 0x6b, 0xac] => Some("ud0 ebp, dword ptr [ebx - 54h]"),
+ // dumpbin does not tolerate the pointless prefixes.
+ &[0x36, 0x36, 0x2e, 0x0f, 0x38, 0xf9, 0x55, 0x3e] => Some("movdiri dword ptr cs:[ebp + 3Eh], edx"),
+ // dumpbin does not tolerate the pointless prefixes.
+ &[0x36, 0x26, 0x66, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => Some("movdir64b ebp, zmmword ptr es:[ebp + 729080Bh]"),
+ // dumpbin does not tolerate the pointless prefixes.
+ &[0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08] => Some("movdir64b bp, zmmword ptr es:[di + 80Bh]"),
+ // and again
+ &[0xf2, 0xf2, 0x2e, 0x36, 0x0f, 0x38, 0xf8, 0x83, 0x09, 0x1c, 0x9d, 0x3f] => Some("enqcmd eax, zmmword ptr ss:[ebx + 3F9D1C09h]"),
+ // and again.
+ &[0x3e, 0x64, 0xf3, 0x64, 0x0f, 0x38, 0xf8, 0x72, 0x54] => Some("enqcmds esi, zmmword ptr fs:[edx + 54h]"),
+ // prefixes confuse dumpbin again
+ &[0x66, 0xf3, 0x0f, 0x01, 0xe8] => Some("setssbsy"),
+ // prefixes confuse dumpbin again
+ &[0x66, 0xf3, 0x0f, 0x01, 0xea] => Some("saveprevssp"),
+ // prefixes confuse dumpbin again
+ &[0xf3, 0x66, 0x0f, 0x01, 0xe8] => Some("setssbsy"), // TODO: yax does not support `serialize` (yet)
+ // prefixes confuse dumpbin again
+ &[0xf3, 0x66, 0x0f, 0x01, 0xea] => Some("saveprevssp"),
+ // prefixes confuse dumpbin again
+ &[0xf3, 0x66, 0x0f, 0x01, 0x29] => Some("rstorssp qword ptr [ecx]"),
+ // dumpbin writes the repne, but it doesn't do anything..
+ &[0xf2, 0x0f, 0x21, 0xc8] => Some("mov eax, dr1"),
+ // dumpbin writes the rep, but it doesn't do anything..
+ &[0xf3, 0x0f, 0x21, 0xc8] => Some("mov eax, dr1"),
+ // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
+ &[0xf2, 0x0f, 0xc0, 0xcc] => Some("xadd ah, cl"),
+ // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
+ &[0xf3, 0x0f, 0xc0, 0xcc] => Some("xadd ah, cl"),
+ // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
+ &[0xf2, 0x0f, 0xc1, 0xcc] => Some("xadd esp, ecx"),
+ // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
+ &[0xf3, 0x0f, 0xc1, 0xcc] => Some("xadd esp, ecx"),
+ // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
+ &[0xf2, 0x0f, 0xc7, 0x0f] => Some("cmpxchg8b qword ptr [edi]"),
+ // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
+ &[0xf3, 0x0f, 0xc7, 0x0f] => Some("cmpxchg8b qword ptr [edi]"),
+ // prefixes again..
+ &[0x66, 0x36, 0x0f, 0x3a, 0xce, 0x8c, 0x56, 0x9e, 0x82, 0xd1, 0xbe, 0xad] => Some("gf2p8affineqb xmm1, xmmword ptr ss:[esi + edx * 2 - 412E7D62h], 0ADh"),
+ &[0x3e, 0x64, 0x64, 0x66, 0x0f, 0x3a, 0xcf, 0xba, 0x13, 0x23, 0x04, 0xba, 0x6b] => Some("gf2p8affineinvqb xmm7, xmmword ptr fs:[edx - 45FBDCEDh], 6Bh"),
+ &[0xf3, 0x64, 0x2e, 0x65, 0x0f, 0x38, 0xdc, 0xe8] => Some("loadiwkey xmm5, xmm0"),
+ // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
+ &[0x66, 0x0f, 0x38, 0x80, 0x01] => Some("invept eax, xmmword ptr [ecx]"),
+ // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
+ &[0x66, 0x0f, 0x38, 0x81, 0x01] => Some("invvpid eax, xmmword ptr [ecx]"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ // (and we print jnb instead of jae)
+ &[0x73, 0x31] => Some("jnb $+33h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x72, 0x5a] => Some("jb $+5Ch"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x72, 0xf0] => Some("jb $-0Eh"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe8, 0x01, 0x00, 0x00, 0x00] => Some("call $+6"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe8, 0x80, 0x00, 0x00, 0x00] => Some("call near ptr $+85h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe8, 0xff, 0xff, 0xff, 0xff] => Some("call $+4"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe9, 0x01, 0x00, 0x00, 0x00] => Some("jmp $+6"),
+ // dumpbin uses absolute branch destinations, but yax uses relative. there's also the near ptr nonsense..
+ &[0xe9, 0x80, 0x00, 0x00, 0x00] => Some("jmp near ptr $+85h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe9, 0xff, 0xff, 0xff, 0xff] => Some("jmp $+4"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00] => Some("jna $+191h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x0f, 0x85, 0x3b, 0x25, 0x00, 0x00] => Some("jnz $+2541h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x74, 0x47] => Some("jz $+49h"),
+ // dumpbin prints a ds: since this is an absolute address..
+ &[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00] => Some("call dword ptr [0024727Eh]"),
+ // dumpbin uses a really wide displacement .. for laughs..
+ &[0xff, 0x24, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => Some("jmp dword ptr [ecx * 8 + 1BCA070h]"),
+ // dumpbin uses a really wide displacement .. for laughs..
+ &[0xff, 0x14, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => Some("call dword ptr [ecx * 8 + 1BCA070h]"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe0, 0x12] => Some("loopnz $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe1, 0x12] => Some("loopz $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe2, 0x12] => Some("loop $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe3, 0x12] => Some("jecxz $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe3, 0xf0] => Some("jecxz $-0Eh"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x67, 0xe3, 0x12] => Some("jcxz $+15h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x67, 0xe3, 0xf0] => Some("jcxz $-0Dh"),
+ // dumpbin dislikes prefixes.
+ &[0x66, 0xf2, 0x0f, 0x79, 0xcf] => Some("insertq xmm1, xmm7"),
+ &[0xf6, 0x05, 0x2c, 0x9b, 0xff, 0xff, 0x01] => Some("test byte ptr [0FFFF9B2Ch], 1"),
+ // yax uses wider immediates
+ &[0x3d, 0x01, 0xf0, 0xff, 0xff] => Some("cmp eax, 0FFFFF001h"),
+ // dumpbin gets the size wrong
+ &[0x62, 0xf2, 0xfd, 0x0f, 0x8a, 0x62, 0xf2] => Some("vcompresspd xmmword ptr [edx - 70h]{k7}, xmm4"),
+ // TODO: yax doesn't know about rdssp{d,q}?
+ &[0xf3, 0x0f, 0x1e, 0x0f] => Some("nop"),
+ // yax won't mention the pointless repne prefix
+ &[0xf2, 0x0f, 0x06] => Some("clts"),
+ // yax won't mention the pointless repne prefix
+ &[0xf2, 0x0f, 0x07] => Some("sysret"),
+ // dumpbin spells this mmword
+ &[0x0f, 0x6f, 0x00] => Some("movq mm0, qword ptr [eax]"),
+ &[0x66, 0x2e, 0xf2, 0xf0, 0x0f, 0xbb, 0x13] => Some("xacquire lock btc word ptr cs:[ebx], dx"),
+ // dumpbin prints with more.. flourish
+ &[0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00] => Some("nop word ptr cs:[eax + eax]"),
+ // disp is wider from dumpbin
+ &[0x0f, 0xfc, 0xaf, 0x40, 0x38, 0x25, 0xbf] => Some("paddb mm5, mmword ptr [edi - 40DAC7C0h]"),
+ &[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56] => Some("xbegin $+56341216h"),
+ &[0x66, 0xc7, 0xf8, 0x10, 0x12] => Some("xbegin $+1215h"),
+ &[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c] => Some("pi2fw mm6, qword ptr ss:[eax - 5]"), // more prefix confusion..
+ // prefixes confuse dumpbin, and dumpbin says "qword" where we use mmword. masm accepts either
+ &[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4] => Some("pfmax mm4, mmword ptr cs:[ebx + edx * 4 - 6Dh]"),
+ // dumpbin shows this as a non-rip-rel offset :(
+ &[0x0f, 0xe5, 0x3d, 0xaa, 0xbb, 0xcc, 0x77] => Some("pmulhw mm7, qword ptr [77CCBBAAh]"),
+ // dumpbin confused about prefixes once again
+ &[0x66, 0x3e, 0x26, 0x2e, 0x2e, 0x0f, 0x38, 0x2a, 0x2b] => Some("movntdqa xmm5, xmmword ptr cs:[ebx]"),
+ // prefixes.. cs: isn't real in 64-bit mode
+ &[0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c] => Some("blendpd xmm7, xmmword ptr cs:[bx + si + 2FF0h], 7Ch"),
+ // prefixes confuse dumpbin
+ &[0x66, 0x66, 0x64, 0x3e, 0x0f, 0x38, 0x23, 0x9d, 0x69, 0x0f, 0xa8, 0x2d] => Some("pmovsxwd xmm3, qword ptr [ebp + 2DA80F69h]"),
+ // prefixes confuse dumpbin
+ &[0x2e, 0x66, 0x26, 0x64, 0x0f, 0x3a, 0x21, 0x0b, 0xb1] => Some("insertps xmm1, dword ptr fs:[ebx], 0B1h"),
+ // prefixes confuse dumpbin
+ &[0x66, 0x26, 0x0f, 0x3a, 0x42, 0x96, 0x74, 0x29, 0x96, 0xf9, 0x6a] => Some("mpsadbw xmm2, xmmword ptr es:[esi - 669D68Ch], 6Ah"),
+ // prefixes confuse dumpbin
+ &[0x67, 0x26, 0x66, 0x65, 0x0f, 0x38, 0x3f, 0x9d, 0xcc, 0x03] => Some("pmaxud xmm3, xmmword ptr gs:[di + 3CCh]"),
+ // prefixes confuse dumpbin
+ &[0x67, 0x66, 0x65, 0x3e, 0x0f, 0x6d, 0xd1] => Some("punpckhqdq xmm2, xmm1"),
+ // prefixes confuse dumpbin
+ &[0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f] => Some("lddqu xmm4, xmmword ptr es:[bx + si + 5F1Bh]"),
+ // prefixes confuse dumpbin
+ &[0x2e, 0x3e, 0x66, 0x3e, 0x0f, 0x3a, 0x41, 0x30, 0x48] => Some("dppd xmm6, xmmword ptr [eax], 48h"),
+ // again prefixes confuse dumpbin
+ &[0x65, 0x66, 0x66, 0x64, 0x0f, 0x38, 0xdb, 0x0f] => Some("aesimc xmm1, xmmword ptr fs:[edi]"),
+ // dumpbin prints the order backwards =|
+ &[0x65, 0xf0, 0x87, 0x0f] => Some("lock xchg dword ptr gs:[edi], ecx"),
+ // dumpbin knows about "fstpnce" as "fstp1", but masm does not.
+ // since this is an undocumented instruction anyway, decode it ourselves..
+ &[0xd9, 0xdb] => Some("fstpnce st(3), st(0)"),
+ // dumpbin calls this "fcom2", but it's just an undocumented fcom alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdc, 0xd3] => Some("fcom st(3)"),
+ // dumpbin calls this "fcomp3", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdc, 0xdb] => Some("fcomp st(3)"),
+ // dumpbin calls this "fxch4", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdd, 0xcb] => Some("fxch st(3)"),
+ // dumpbin calls this "fcomp5", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xde, 0xd3] => Some("fcomp st(3)"),
+ // dumpbin calls this "fxch7", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdf, 0xcb] => Some("fxch st(3)"),
+ // dumpbin calls this "fstp8", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdf, 0xd3] => Some("fstp st(3)"),
+ // dumpbin calls this "fstp9", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdf, 0xdb] => Some("fstp st(3)"),
+ &[0xf2, 0x0f, 0xbc, 0xd3] => Some("bsf edx, ebx"),
+ // mov abs in 32-bit mode gets a ds: prefix even though that's the default. masm does not need this prefix, so we round-trip fine without it.
+ &[0xa0, 0x93, 0x62, 0xc4, 0x00] => Some("mov al, byte ptr [00C46293h]"),
+ &[0x67, 0xa0, 0x93, 0x62] => Some("mov al, byte ptr [00006293h]"),
+ &[0xa1, 0x93, 0x62, 0xc4, 0x00] => Some("mov eax, dword ptr [00C46293h]"),
+ &[0x67, 0xa1, 0x93, 0x62] => Some("mov eax, dword ptr [00006293h]"),
+ &[0xa2, 0x93, 0x62, 0xc4, 0x00] => Some("mov byte ptr [00C46293h], al"),
+ &[0x67, 0xa2, 0x93, 0x62] => Some("mov byte ptr [00006293h], al"),
+ &[0xa3, 0x93, 0x62, 0xc4, 0x00] => Some("mov dword ptr [00C46293h], eax"),
+ &[0x67, 0xa3, 0x93, 0x62] => Some("mov dword ptr [00006293h], eax"),
+ &[0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => Some("xor eax, dword ptr [12345678h]"),
+ &[0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [44332211h]"),
+ &[0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [44332211h]"),
+ &[0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => Some("xor esi, dword ptr [50403020h]"),
+ &[0xa0, 0xc0, 0xb0, 0xa0, 0x90] => Some("mov al, byte ptr [90A0B0C0h]"),
+ &[0x67, 0xa0, 0xc0, 0xb0] => Some("mov al, byte ptr [0B0C0h]"),
+ &[0x67, 0xa1, 0xc0, 0xb0] => Some("mov eax, dword ptr [0000B0C0h]"),
+ &[0x66, 0x67, 0xa1, 0xc0, 0xb0] => Some("mov ax, word ptr [0000B0C0h]"),
+ // same for wrssd
+ &[0x3e, 0x0f, 0x38, 0xf6, 0x23] => Some("wrssd dword ptr [ebx], esp"),
+ // dumpbin believes that rex.w works even in 32-bit code, thus prints `rorx rax, ..`. haha what a dingus
+ &[0xc4, 0xe3, 0xfb, 0xf0, 0x01, 0x05] => Some("rorx eax, dword ptr [ecx], 5"),
+ &[0xc4, 0xe2, 0xe3, 0xf5, 0x07] => Some("pdep eax, ebx, dword ptr [edi]"),
+ &[0xc4, 0xe2, 0xe3, 0xf6, 0x07] => Some("mulx eax, ebx, dword ptr [edi]"),
+ &[0xc4, 0xe2, 0xe3, 0xf7, 0x01] => Some("shrx eax, dword ptr [ecx], ebx"),
+ &[0xc4, 0xe2, 0xe2, 0xf5, 0x07] => Some("pext eax, ebx, dword ptr [edi]"),
+ &[0xc4, 0xe2, 0xe2, 0xf7, 0x01] => Some("sarx eax, dword ptr [ecx], ebx"),
+ &[0xc4, 0xe2, 0xe0, 0xf5, 0x07] => Some("bzhi eax, dword ptr [edi], ebx"),
+ &[0xc4, 0xe2, 0xe1, 0xf7, 0x01] => Some("shlx eax, dword ptr [ecx], ebx"),
+ &[0xc4, 0xe2, 0xe0, 0xf2, 0x01] => Some("andn eax, ebx, dword ptr [ecx]"),
+ &[0xc4, 0xe2, 0xf8, 0xf3, 0x09] => Some("blsr eax, dword ptr [ecx]"),
+ &[0xc4, 0xe2, 0xf8, 0xf3, 0x11] => Some("blsmsk eax, dword ptr [ecx]"),
+ &[0xc4, 0xe2, 0xf8, 0xf3, 0x19] => Some("blsi eax, dword ptr [ecx]"),
+ &[0xc4, 0xe2, 0xe0, 0xf7, 0x01] => Some("bextr eax, dword ptr [ecx], ebx"),
+ &[0xc4, 0xc3, 0x39, 0x0c, 0xca, 0x77] => Some("vblendps xmm1, xmm0, xmm2, 77h"),
+ // just have to decide we know better than dumpbin: masm does not accept an absolute far call/far jump destination,
+ // so we definitely can't round-trip by following dumpbin. dumpbin doesn't use hex suffixes here, instead printing
+ // "6655:44332211" as the destination. this is technically not ambiguous since `:` is a hint that this is a absolute
+ // far address and that both numbers are base 16, but that's ... subtle and easy to miss. so add some h's.
+ &[0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66] => Some("call 6655h:44332211h"),
+ &[0x66, 0x9a, 0x11, 0x22, 0x33, 0x44] => Some("call 4433h:2211h"),
+ // terribly unfortunate: masm reasonably encodes this instruction as a 32-bit offset, which causes yax to spell the offset
+ // as 0000AA55 instead of AA55. override dumpbin to use the (worse) encoding for the sake of matching with the test.
+ &[0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa] => Some("mov cx, word ptr [0AA55h]"),
+ // inexplicably, dumpbin spells this "aamb", for .. ascii adjust after multiplcation (byte) ???
+ // additionally, masm does not accept an integer operand: it only supports `aam 10` as in d4 0a. so.. bummer.
+ &[0xd4, 0x01] => Some("aam 1"),
+ // same as above
+ &[0xd5, 0x01] => Some("aad 1"),
+ // dunno why dumpbin doesn't like this one..
+ &[0xc5, 0b1_1111_100, 0x2e, 0b00_001_010] => Some("vucomiss xmm1, dword ptr [edx]"),
+ &[0xc5, 0b1_1111_100, 0x2f, 0b00_001_010] => Some("vcomiss xmm1, dword ptr [edx]"),
+ _ => None,
+ }
+ }
+ CodeModel::Bits64 => {
+ match bytes {
+ &[0xf1] => Some("int 1"), // dumpbin does not know how to decode f1...
+ &[0x4f, 0xe5, 0x99] => Some("in eax, 99h"), // this is a MASM/dumpbin bug. see notes on testcase.
+ &[0x4f, 0xe7, 0x99] => Some("out 99h, eax"), // this is a MASM/dumpbin bug. see notes on testcase.
+ // dumpbin prints the instruction as if it was encoded in 32-bit form regardless of object file, so overrule it.
+ &[0xf3, 0x0f, 0xc7, 0xfd] => Some("rdpid rbp"),
+ &[0x0f, 0x18, 0xc0] => Some("nop eax"), // dumpbin would love to call this "prefetchnta rax" ???
+ &[0x0f, 0x18, 0xcc] => Some("nop esp"), // dumpbin would love to call this "prefetchnta rsp" ???
+ &[0x0f, 0x18, 0x20] => Some("nop zmmword ptr [rax]"), // getting around dumpbin knowing about prefetchrst2..
+ &[0x4f, 0x0f, 0x18, 0x20] => Some("nop zmmword ptr [r8]"), // getting around dumpbin knowing about prefetchrst2..
+ &[0x2e, 0x36, 0x47, 0x0f, 0x18, 0xe7] => Some("nop r15d"), // getting around dumpbin knowing about prefetchrst2..
+ &[0x0f, 0x19, 0x20] => Some("nop dword ptr [rax]"), // dumpbin doesn't know about 0f19..
+ &[0x0f, 0x1a, 0x20] => Some("nop dword ptr [rax]"), // dumpbin wants to call this bndldx, yax doesn't do MPX yet
+ &[0x0f, 0x1b, 0x20] => Some("nop dword ptr [rax]"), // dumpbin wants to call this bndstx, yax doesn't do MPX yet
+ &[0x0f, 0x1c, 0x20] => Some("nop dword ptr [rax]"), // dumpbin doesn't know about 0f1c..
+ &[0x0f, 0x1d, 0x20] => Some("nop dword ptr [rax]"), // dumpbin doesn't know about 0f1d..
+ &[0x0f, 0x1e, 0x20] => Some("nop dword ptr [rax]"), // dumpbin doesn't know about 0f1e..
+ &[0xf2, 0x66, 0x66, 0x4d, 0x0f, 0x10, 0xc0] => Some("movsd xmm8, xmm8"), // dumpbin does not love the prefixes
+ &[0x4f, 0x66, 0x0f, 0x28, 0x00] => Some("movapd xmm0, xmmword ptr [rax]"), // dumpbin does not love the prefixes
+ &[0x67, 0x4f, 0x66, 0x0f, 0x28, 0x00] => Some("movapd xmm0, xmmword ptr [eax]"), // dumpbin does not love the prefixes
+ &[0xf3, 0x0f, 0x1e, 0xfc] => Some("nop"), // dumpbin does not tolerate this at all, redirect into a boring nop.
+ &[0x4d, 0x0f, 0x43, 0xec] => Some("cmovnb r13, r12"), // dumpbin writes it "cmovae" instead of yax's cmovnb.
+ &[0x65, 0x4c, 0x89, 0x04, 0x25, 0xa8, 0x01, 0x00, 0x00] => {
+ Some("mov qword ptr gs:[000001A8h], r8") // dumpbin uses %016 formatting, masm happily accepts shorter.
+ },
+ &[0x0f, 0xbe, 0x83, 0xb4, 0x00, 0x00, 0x00] => {
+ Some("movsx eax, byte ptr [rbx + 0B4h]") // dumpbin uses %016 formatting, masm happily accepts shorter.
+ },
+ &[0x46, 0x63, 0xc1] => Some("movsxd r8, ecx"), // dumpbin writes 32-bit destinations for this, but masm accepts either?
+ &[0x62, 0xd2, 0x7e, 0x28, 0x3a, 0xca] => {
+ Some("vpbroadcastmw2d ymm1, k2") // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
+ },
+ &[0x62, 0xd2, 0x7e, 0x08, 0x28, 0xc2] => {
+ Some("vpmovm2b xmm0, k2") // dumpbin inexplicably uses "bnd2" as the source register??? MSVC 14.52.36328.
+ },
+ &[0x0f, 0x01, 0x51, 0xff] => {
+ Some("lgdt fword ptr [rcx - 1]") // dumpbin prints this as "tbyte", which masm does not accept.
+ },
+ &[0x0f, 0x01, 0x59, 0xff] => {
+ Some("lidt fword ptr [rcx - 1]") // dumpbin prints this as "tbyte", which masm does not accept.
+ },
+ &[0x2e, 0x67, 0x65, 0x2e, 0x46, 0x0f, 0x01, 0xff] => {
+ Some("tlbsync") // dumpbin does not exactly tolerate the extra prefixes.
+ },
+ &[0x0f, 0x0d, 0x00] => {
+ // dumpbin interprets this as the 3DNow!-style PREFETCH instruction, but we're definitely not 3dnow..
+ Some("nop zmmword ptr [rax]")
+ }
+ &[0xf2, 0x41, 0x0f, 0xbc, 0xd3] => {
+ // masm doesn't like the extra prefix
+ Some("bsf edx, r11d")
+ }
+ &[0x4f, 0x4e, 0x00, 0xcc] => {
+ // masm doesn't like the extra prefix
+ Some("add spl, r9b")
+ }
+ &[0xc4, 0x03, 0x3d, 0x0a, 0xca, 0x77] => {
+ // dumpbin can't deal with this instruction..
+ Some("vroundss xmm9, xmm8, xmm10, 77h")
+ }
+ &[0xc4, 0x03, 0x3d, 0x0b, 0xca, 0x77] => {
+ // dumpbin can't deal with this instruction..
+ Some("vroundsd xmm9, xmm8, xmm10, 77h")
+ }
+ &[0x66, 0x4f, 0x0f, 0x6e, 0x9c, 0x9c, 0x34, 0xaa, 0xbb, 0xcc] => {
+ // dumpbin really wants to use mmword here, but i really don't.
+ Some("movq xmm11, qword ptr [r12 + r11 * 4 - 334455CCh]")
+ }
+ &[0x66, 0x0f, 0xd6, 0x01] => {
+ // dumpbin really wants to use mmword here, but i really don't.
+ Some("movq qword ptr [rcx], xmm0")
+ }
+ &[0x66, 0x4f, 0x0f, 0xd7, 0xc1] => {
+ // yax bug? default operand size is 64-bit in 64-bit mode, so the register should be r8?
+ Some("pmovmskb r8d, xmm9")
+ }
+ // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
+ &[0x66, 0x0f, 0xff, 0xc1] => Some("ud0 eax, ecx"),
+ &[0xf2, 0x0f, 0xff, 0xc1] => Some("ud0 eax, ecx"),
+ &[0xf3, 0x0f, 0xff, 0xc1] => Some("ud0 eax, ecx"),
+ &[0x66, 0x0f, 0xff, 0x01] => Some("ud0 eax, dword ptr [rcx]"),
+ &[0x66, 0x4f, 0x0f, 0xff, 0xc1] => Some("ud0 r8d, r9d"),
+ &[0x4c, 0x0f, 0xff, 0x6b, 0xac] => Some("ud0 r13d, dword ptr [rbx - 54h]"),
+ // dumpbin does not tolerate the pointless rex prefix.
+ &[0x4f, 0x66, 0x0f, 0x2a, 0xcf] => Some("cvtpi2pd xmm1, mm7"),
+ // dumpbin does not tolerate the pointless rex prefix.
+ &[0x4f, 0xf3, 0x0f, 0x2a, 0xcf] => Some("cvtsi2ss xmm1, edi"),
+ // dumpbin does not tolerate the pointless rex prefix.
+ &[0x4f, 0xf2, 0x0f, 0x2a, 0xcf] => Some("cvtsi2sd xmm1, edi"),
+ // dumpbin does not tolerate the pointless rex prefix.
+ &[0x4f, 0xf2, 0x0f, 0x2a, 0x00] => Some("cvtsi2sd xmm0, dword ptr [rax]"),
+ // dumpbin does not tolerate the pointless rex prefix.
+ &[0x4f, 0xf3, 0x0f, 0x2a, 0x00] => Some("cvtsi2ss xmm0, dword ptr [rax]"),
+ // dumpbin does not tolerate the pointless rex prefix.
+ &[0x4f, 0x66, 0x0f, 0x2a, 0x00] => Some("cvtpi2pd xmm0, mmword ptr [rax]"),
+ // dumpbin does not tolerate the pointless prefixes.
+ &[0x36, 0x36, 0x2e, 0x0f, 0x38, 0xf9, 0x55, 0x3e] => Some("movdiri dword ptr [rbp + 3Eh], edx"),
+ // dumpbin does not tolerate the pointless prefixes.
+ &[0x36, 0x26, 0x66, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => Some("movdir64b rbp, zmmword ptr [rbp + 729080Bh]"),
+ // dumpbin does not tolerate the pointless prefixes.
+ &[0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08, 0x29, 0x07] => Some("movdir64b ebp, zmmword ptr [ebp + 729080Bh]"),
+ // dumpbin is super confused about the prefixing.
+ &[0xf2, 0xf2, 0x2e, 0x36, 0x47, 0x0f, 0x38, 0xf8, 0x83, 0x09, 0x1c, 0x9d, 0x3f] => Some("enqcmd r8, zmmword ptr [r11 + 3F9D1C09h]"),
+ // and again.
+ &[0x3e, 0x64, 0xf3, 0x64, 0x0f, 0x38, 0xf8, 0x72, 0x54] => Some("enqcmds rsi, zmmword ptr fs:[rdx + 54h]"),
+ // dumpbin shows a ds prefix; this is tolerated by masm but is kinda incorrect in x86_64. either way masm accepts it though.
+ &[0x3e, 0x4f, 0x0f, 0x38, 0xf6, 0x23] => Some("wrssq qword ptr [r11], r12"),
+ // prefixes confuse dumpbin again
+ &[0x66, 0xf3, 0x0f, 0x01, 0xe8] => Some("setssbsy"),
+ // prefixes confuse dumpbin again
+ &[0x66, 0xf3, 0x0f, 0x01, 0xea] => Some("saveprevssp"),
+ // prefixes confuse dumpbin again
+ &[0xf3, 0x66, 0x0f, 0x01, 0xe8] => Some("setssbsy"), // TODO: yax does not support `serialize` (yet)
+ // prefixes confuse dumpbin again
+ &[0xf3, 0x66, 0x0f, 0x01, 0xea] => Some("saveprevssp"),
+ // prefixes confuse dumpbin again
+ &[0xf3, 0x66, 0x0f, 0x01, 0x29] => Some("rstorssp qword ptr [rcx]"),
+ // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
+ &[0xf2, 0x0f, 0xc0, 0xcc] => Some("xadd ah, cl"),
+ // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
+ &[0xf3, 0x0f, 0xc0, 0xcc] => Some("xadd ah, cl"),
+ // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
+ &[0xf2, 0x0f, 0xc1, 0xcc] => Some("xadd esp, ecx"),
+ // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
+ &[0xf3, 0x0f, 0xc1, 0xcc] => Some("xadd esp, ecx"),
+ // dumpbin prints out an xacquire when there is no lock prefix, which causes the instruction to grow a lock prefix in round-tripping. no!
+ &[0xf2, 0x0f, 0xc7, 0x0f] => Some("cmpxchg8b qword ptr [rdi]"),
+ // dumpbin prints out an rep when one is not allowed, which fails round-tripping. yax doesn't.
+ &[0xf3, 0x0f, 0xc7, 0x0f] => Some("cmpxchg8b qword ptr [rdi]"),
+ // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
+ &[0x4f, 0x0f, 0xc7, 0x0f] => Some("cmpxchg16b xmmword ptr [r15]"),
+ // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
+ &[0x66, 0x4f, 0x0f, 0xc7, 0x0f] => Some("cmpxchg16b xmmword ptr [r15]"),
+ // dumpbin prints out repne prefix, which does not round-trip.
+ &[0xf2, 0x4f, 0x0f, 0xc7, 0x0f] => Some("cmpxchg16b xmmword ptr [r15]"),
+ // dumpbin prints out rep prefix, which does not round-trip.
+ &[0xf3, 0x4f, 0x0f, 0xc7, 0x0f] => Some("cmpxchg16b xmmword ptr [r15]"),
+ // prefixes again..
+ &[0x3e, 0x64, 0x64, 0x66, 0x4e, 0x0f, 0x3a, 0xcf, 0xba, 0x13, 0x23, 0x04, 0xba, 0x6b] => Some("gf2p8affineinvqb xmm15, xmmword ptr fs:[rdx - 45FBDCEDh], 6Bh"),
+ &[0x66, 0x36, 0x0f, 0x3a, 0xce, 0x8c, 0x56, 0x9e, 0x82, 0xd1, 0xbe, 0xad] => Some("gf2p8affineqb xmm1, xmmword ptr [rsi + rdx * 2 - 412E7D62h], 0ADh"),
+ &[0xf3, 0x64, 0x2e, 0x65, 0x0f, 0x38, 0xdc, 0xe8] => Some("loadiwkey xmm5, xmm0"),
+ // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
+ &[0x66, 0x0f, 0x38, 0x80, 0x01] => Some("invept rax, xmmword ptr [rcx]"),
+ // dumpbin prints out the memory size as "oword", but yax uses "xmmword". masm accepts either.
+ &[0x66, 0x0f, 0x38, 0x81, 0x01] => Some("invvpid rax, xmmword ptr [rcx]"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ // (and we print jnb instead of jae)
+ &[0x73, 0x31] => Some("jnb $+33h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x72, 0x5a] => Some("jb $+5Ch"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x72, 0xf0] => Some("jb $-0Eh"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe8, 0x01, 0x00, 0x00, 0x00] => Some("call $+6"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe8, 0x80, 0x00, 0x00, 0x00] => Some("call near ptr $+85h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe8, 0xff, 0xff, 0xff, 0xff] => Some("call $+4"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe9, 0x01, 0x00, 0x00, 0x00] => Some("jmp $+6"),
+ // dumpbin uses absolute branch destinations, but yax uses relative. there's also the near ptr nonsense..
+ &[0xe9, 0x80, 0x00, 0x00, 0x00] => Some("jmp near ptr $+85h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe9, 0xff, 0xff, 0xff, 0xff] => Some("jmp $+4"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00] => Some("jna $+191h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x0f, 0x85, 0x3b, 0x25, 0x00, 0x00] => Some("jnz $+2541h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x74, 0x47] => Some("jz $+49h"),
+ // dumpbin invents a label for laughs.
+ &[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00] => Some("call qword ptr [$ + 24727Eh]"),
+ // dumpbin uses a really wide displacement .. for laughs..
+ &[0xff, 0x24, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => Some("jmp qword ptr [rcx * 8 + 1BCA070h]"),
+ // dumpbin uses a really wide displacement .. for laughs..
+ &[0xff, 0x14, 0xcd, 0x70, 0xa0, 0xbc, 0x01] => Some("call qword ptr [rcx * 8 + 1BCA070h]"),
+ // dumpbin bug: 66-prefixed jmp/call does not pick 16-bit registers
+ &[0x66, 0xff, 0xe0] => Some("jmp rax"),
+ // dumpbin bug: 66-prefixed jmp/call does not pick 16-bit registers
+ &[0x66, 0xff, 0xd0] => Some("call rax"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe0, 0x12] => Some("loopnz $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe1, 0x12] => Some("loopz $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe2, 0x12] => Some("loop $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe3, 0x12] => Some("jrcxz $+14h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0xe3, 0xf0] => Some("jrcxz $-0Eh"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x67, 0xe3, 0x12] => Some("jecxz $+15h"),
+ // dumpbin uses absolute branch destinations, but yax uses relative.
+ &[0x67, 0xe3, 0xf0] => Some("jecxz $-0Dh"),
+ // dumpbin dislikes prefixes.
+ &[0x66, 0xf2, 0x0f, 0x79, 0xcf] => Some("insertq xmm1, xmm7"),
+ // rip-rel: oh dear
+ &[0xf6, 0x05, 0x2c, 0x9b, 0xff, 0xff, 0x01] => Some("test byte ptr [$ - 64D4h], 1"),
+ // yax uses wider immediates
+ &[0x3d, 0x01, 0xf0, 0xff, 0xff] => Some("cmp eax, 0FFFFFFFFFFFFF001h"),
+ // dumpbin doesn't print the $ of rip-rel :(
+ &[0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => Some("xor eax, dword ptr [$ + 12345678h]"),
+ &[0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => Some("xor eax, dword ptr [rcx + 43650123h]"),
+ &[0x48, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => Some("xor rax, qword ptr [$ + 12345678h]"),
+ &[0x48, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => Some("xor rax, qword ptr [rcx + 43650123h]"),
+ &[0x44, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => Some("xor r8d, dword ptr [$ + 12345678h]"),
+ &[0x44, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => Some("xor r8d, dword ptr [rcx + 43650123h]"),
+ &[0x45, 0x33, 0x05, 0x78, 0x56, 0x34, 0x12] => Some("xor r8d, dword ptr [$ + 12345678h]"),
+ &[0x45, 0x33, 0x81, 0x23, 0x01, 0x65, 0x43] => Some("xor r8d, dword ptr [r9 + 43650123h]"),
+ &[0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [44332211h]"),
+ &[0x41, 0x33, 0x04, 0x25, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [44332211h]"),
+ &[0x33, 0x84, 0xa5, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [rbp + 44332211h]"),
+ &[0x41, 0x33, 0x84, 0xa5, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [r13 + 44332211h]"),
+ &[0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [44332211h]"),
+ &[0x41, 0x33, 0x04, 0xe5, 0x11, 0x22, 0x33, 0x44] => Some("xor eax, dword ptr [44332211h]"),
+ &[0x42, 0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => Some("xor esi, dword ptr [r12 + 50403020h]"),
+ &[0x43, 0x33, 0x34, 0x25, 0x20, 0x30, 0x40, 0x50] => Some("xor esi, dword ptr [r12 + 50403020h]"),
+ &[0x42, 0x33, 0xb4, 0x25, 0x20, 0x30, 0x40, 0x50] => Some("xor esi, dword ptr [rbp + r12 + 50403020h]"),
+ &[0x43, 0x33, 0xb4, 0x25, 0x20, 0x30, 0x40, 0x50] => Some("xor esi, dword ptr [r13 + r12 + 50403020h]"),
+ // dumpbin gets the size wrong
+ &[0x62, 0xf2, 0xfd, 0x0f, 0x8a, 0x62, 0xf2] => Some("vcompresspd xmmword ptr [rdx - 70h]{k7}, xmm4"),
+ // TODO: yax doesn't know about rdssp{d,q}?
+ &[0xf3, 0x0f, 0x1e, 0x0f] => Some("nop"),
+ // yax won't mention the pointless repne prefix
+ &[0xf2, 0x0f, 0x06] => Some("clts"),
+ // yax won't mention the pointless repne prefix
+ &[0xf2, 0x0f, 0x07] => Some("sysret"),
+ // dumpbin spells this mmword
+ &[0x0f, 0x6f, 0x00] => Some("movq mm0, qword ptr [rax]"),
+ &[0x66, 0x2e, 0xf2, 0xf0, 0x0f, 0xbb, 0x13] => Some("xacquire lock btc word ptr [rbx], dx"),
+ // dumpbin handles this right (like this!) but the output is weird to parse
+ &[0x45, 0x66, 0x0f, 0x21, 0xc8] => Some("mov rax, dr1"),
+ // dumpbin says repne, but that doesn't round-trip.
+ &[0x45, 0xf2, 0x0f, 0x21, 0xc8] => Some("mov rax, dr1"),
+ // dumpbin says rep, but that doesn't round-trip.
+ &[0x45, 0xf3, 0x0f, 0x21, 0xc8] => Some("mov rax, dr1"),
+ // dumpbin prints with more.. flourish
+ &[0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00] => Some("nop word ptr [rax + rax]"),
+ // disp is wider from dumpbin
+ &[0x48, 0x8d, 0xa4, 0xc7, 0x20, 0x00, 0x00, 0x12] => Some("lea rsp, [rdi + rax * 8 + 12000020h]"),
+ &[0x0f, 0xfc, 0xaf, 0x40, 0x38, 0x25, 0xbf] => Some("paddb mm5, mmword ptr [rdi - 40DAC7C0h]"),
+ &[0xc7, 0xf8, 0x10, 0x12, 0x34, 0x56] => Some("xbegin $+56341216h"),
+ &[0x66, 0xc7, 0xf8, 0x10, 0x12] => Some("xbegin $+1215h"),
+ &[0xf2, 0xf3, 0x66, 0x65, 0x4f, 0x25, 0x9b, 0x5e, 0xda, 0x44] => Some("and rax, 44DA5E9Bh"),
+ &[0x65, 0x66, 0x66, 0x64, 0x48, 0x0f, 0x38, 0xdb, 0x0f] => Some("aesimc xmm1, xmmword ptr fs:[rdi]"),
+ &[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c] => Some("pi2fw mm6, qword ptr [rax - 5]"), // more prefix confusion..
+ // prefixes confuse dumpbin, and dumpbin says "qword" where we use mmword. masm accepts either
+ &[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4] => Some("pfmax mm4, mmword ptr [rbx + rdx * 4 - 6Dh]"),
+ // dumpbin calls this movq?
+ &[0x4f, 0x0f, 0x7e, 0xcf] => Some("movd r15, mm1"),
+ // dumpbin shows this as a wide register but it doesn't *really* matter and yax uses 32-bit always.
+ &[0x4f, 0x0f, 0xd7, 0xcf] => Some("pmovmskb r9d, mm7"),
+ // dumpbin shows this as a non-rip-rel offset :(
+ &[0x0f, 0xe5, 0x3d, 0xaa, 0xbb, 0xcc, 0x77] => Some("pmulhw mm7, qword ptr [$ + 77CCBBAAh]"),
+ // dumpbin confused about prefixes once again
+ &[0x66, 0x3e, 0x26, 0x2e, 0x2e, 0x0f, 0x38, 0x2a, 0x2b] => Some("movntdqa xmm5, xmmword ptr [rbx]"),
+ // prefixes.. cs: isn't real in 64-bit mode
+ &[0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c, 0xf0, 0x63] => Some("blendpd xmm7, xmmword ptr [eax - 0F83D010h], 63h"),
+ // prefixes confuse dumpbin
+ &[0x66, 0x66, 0x64, 0x3e, 0x0f, 0x38, 0x23, 0x9d, 0x69, 0x0f, 0xa8, 0x2d] => Some("pmovsxwd xmm3, qword ptr fs:[rbp + 2DA80F69h]"),
+ // prefixes confuse dumpbin
+ &[0x2e, 0x66, 0x26, 0x64, 0x49, 0x0f, 0x3a, 0x21, 0x0b, 0xb1] => Some("insertps xmm1, dword ptr fs:[r11], 0FFFFFFFFFFFFFFB1h"),
+ // prefixes confuse dumpbin
+ &[0x66, 0x26, 0x45, 0x0f, 0x3a, 0x42, 0x96, 0x74, 0x29, 0x96, 0xf9, 0x6a] => Some("mpsadbw xmm10, xmmword ptr [r14 - 669D68Ch], 6Ah"),
+ // prefixes confuse dumpbin
+ &[0x67, 0x26, 0x66, 0x65, 0x0f, 0x38, 0x3f, 0x9d, 0xcc, 0x03, 0xb3, 0xfa] => Some("pmaxud xmm3, xmmword ptr gs:[ebp - 54CFC34h]"),
+ // prefixes confuse dumpbin
+ &[0x67, 0x66, 0x65, 0x3e, 0x0f, 0x6d, 0xd1] => Some("punpckhqdq xmm2, xmm1"),
+ // prefixes confuse dumpbin
+ &[0x2e, 0x66, 0x40, 0x0f, 0x3a, 0x0d, 0x40, 0x2d, 0x57] => Some("blendpd xmm0, xmmword ptr [rax + 2Dh], 57h"),
+ // prefixes confuse dumpbin
+ &[0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f, 0xcd, 0xd7] => Some("lddqu xmm4, xmmword ptr [eax - 2832A0E5h]"),
+ // prefixes confuse dumpbin
+ &[0x2e, 0x3e, 0x66, 0x3e, 0x49, 0x0f, 0x3a, 0x41, 0x30, 0x48] => Some("dppd xmm6, xmmword ptr [r8], 48h"),
+ // dumpbin prints the order backwards =|
+ &[0x65, 0xf0, 0x87, 0x0f] => Some("lock xchg dword ptr gs:[rdi], ecx"),
+ // displacement gets a bunch of extra zeroes
+ &[0x66, 0x4e, 0x0f, 0x3a, 0x44, 0x88, 0xb3, 0xad, 0x26, 0x35, 0x75] => Some("pclmulqdq xmm9, xmmword ptr [rax + 3526ADB3h], 75h"),
+ // dumpbin knows about "fstpnce" as "fstp1", but masm does not.
+ // since this is an undocumented instruction anyway, decode it ourselves..
+ &[0xd9, 0xdb] => Some("fstpnce st(3), st(0)"),
+ // dumpbin calls this "fcom2", but it's just an undocumented fcom alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdc, 0xd3] => Some("fcom st(3)"),
+ // dumpbin calls this "fcomp3", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdc, 0xdb] => Some("fcomp st(3)"),
+ // dumpbin calls this "fxch4", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdd, 0xcb] => Some("fxch st(3)"),
+ // dumpbin calls this "fcomp5", but it's just an undocumented fcomp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xde, 0xd3] => Some("fcomp st(3)"),
+ // dumpbin calls this "fxch7", but it's just an undocumented fxch alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdf, 0xcb] => Some("fxch st(3)"),
+ // dumpbin calls this "fstp8", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdf, 0xd3] => Some("fstp st(3)"),
+ // dumpbin calls this "fstp9", but it's just an undocumented fstp alias. this round-trips to a different instruction but it's at least.. kinda right.
+ &[0xdf, 0xdb] => Some("fstp st(3)"),
+ // dunno why dumpbin doesn't like this one..
+ &[0xc5, 0b0_1111_100, 0x2e, 0b00_001_010] => Some("vucomiss xmm9, dword ptr [rdx]"),
+ &[0xc5, 0b0_1111_100, 0x2f, 0b00_001_010] => Some("vcomiss xmm9, dword ptr [rdx]"),
+ _other => {
+ None
+ }
+ }
+ }
+ };
+
+ if let Some(replacement) = replacement {
+ return Ok(replacement);
+ }
+
let mut source = String::new();
match codeness {
@@ -168,6 +744,101 @@ mod imp {
}
pub fn masm(text: &str, codeness: CodeModel) -> Result<Vec<u8>, String> {
+ let replacement = match codeness {
+ CodeModel::Bits16 => {
+ // no replacements for 16-bit yet, because masm is little-tested in 16-bit mode..
+ None
+ }
+ CodeModel::Bits32 => {
+ match test {
+ "nop zmmword ptr [eax]" => Some(vec![0x0f, 0x18, 0x20]), // MASM doesn't accept `nop zmmword ..`, no way to round trip 0f1820
+ "sysenter" => Some(vec![0x0f, 0x34]), // MASM doesn't accept sysenter, but dumpbin prints it.
+ "sysexit" => Some(vec![0x0f, 0x35]), // MASM doesn't accept sysexit, but dumpbin prints it.
+ // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
+ "ud0 eax, ecx" => Some(vec![0x66, 0x0f, 0xff, 0xc1]),
+ "ud0 eax, dword ptr [ecx]" => Some(vec![0x66, 0x0f, 0xff, 0x01]),
+ "ud0 ebp, dword ptr [ebx - 54h]" => Some(vec![0x0f, 0xff, 0x6b, 0xac]),
+ // masm seems to not know about fstpnce/fstp1 at all. since this is an undocumented instruction anyway, assemble it ourselves..
+ "fstpnce st(3), st(0)" => Some(vec![0xd9, 0xdb]),
+ // masm inserts a wait prefix here..
+ "feni" => Some(vec![0xdb, 0xe0]),
+ "fdisi" => Some(vec![0xdb, 0xe1]),
+ "fsetpm" => Some(vec![0xdb, 0xe4]),
+ // masm doesn't know how to assemble address-size overrides..?
+ // > cannot use 16-bit register with a 32-bit address
+ "aesimc xmm1, xmmword ptr [bx]" => Some(vec![0x67, 0x66, 0x0f, 0x38, 0xdb, 0x0f]),
+ "aesenc xmm1, xmmword ptr [bx]" => Some(vec![0x67, 0x66, 0x0f, 0x38, 0xdc, 0x0f]),
+ "aesenclast xmm1, xmmword ptr [bx]" => Some(vec![0x67, 0x66, 0x0f, 0x38, 0xdd, 0x0f]),
+ "aesdec xmm1, xmmword ptr [bx]" => Some(vec![0x67, 0x66, 0x0f, 0x38, 0xde, 0x0f]),
+ "aesdeclast xmm1, xmmword ptr [bx]" => Some(vec![0x67, 0x66, 0x0f, 0x38, 0xdf, 0x0f]),
+ "blendpd xmm7, xmmword ptr cs:[bx + si + 2FF0h], 7Ch" => Some(vec![0x66, 0x2e, 0x67, 0x0f, 0x3a, 0x0d, 0xb8, 0xf0, 0x2f, 0x7c]),
+ // more
+ "movdir64b bp, zmmword ptr es:[di + 80Bh]" => Some(vec![0x36, 0x26, 0x66, 0x67, 0x0f, 0x38, 0xf8, 0xad, 0x0b, 0x08]),
+ "lss eax, fword ptr [bx + si]" => Some(vec![0x67, 0x0f, 0xb2, 0x00]),
+ "lddqu xmm4, xmmword ptr es:[bx + si + 5F1Bh]" => Some(vec![0xf2, 0x3e, 0x26, 0x67, 0x0f, 0xf0, 0xa0, 0x1b, 0x5f]),
+ "lods byte ptr [si]" => Some(vec![0x67, 0xac]),
+ "scas byte ptr es:[di]" => Some(vec![0x67, 0xae]),
+ "rep movs byte ptr es:[di], byte ptr [si]" => Some(vec![0x67, 0xf3, 0xa4]),
+ "rep movs dword ptr es:[di], dword ptr [si]" => Some(vec![0x67, 0xf3, 0xa5]),
+ "movapd xmm0, xmmword ptr [bx + si]" => Some(vec![0x67, 0x66, 0x0f, 0x28, 0x00]),
+ "cvtdq2ps xmm0, xmmword ptr [bx + di]" => Some(vec![0x67, 0x0f, 0x5b, 0x01]),
+ // i tried really hard to find a MASM syntax for absolute far call/jump destinations! i turned up a bunch of blanks.
+ // https://mirrors.nycbug.org/pub/The_Unix_Archive/Unix_Usenet/comp.unix.xenix/1989-February/001910.html is the funniest,
+ // given that it is OS hackers experiencing the same issue and concluding they should emit the bytes themselves.
+ // so yax will emit something like bindump would, and we'll just swallow the text as if masm worked like i'd hope..
+ "call 6655h:44332211h" => Some(vec![0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66]),
+ "call 4433h:2211h" => Some(vec![0x66, 0x9a, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66]),
+ // terribly unfortunate: masm reasonably encodes this instruction as a 32-bit offset, which causes yax to spell the offset
+ // as 0000AA55 instead of AA55. override dumpbin to use the (worse) encoding for the sake of matching with the test.
+ "mov cx, word ptr [0AA55h]" => Some(vec![0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa]),
+ // same deal, different instruction.
+ "mov al, byte ptr [0B0C0h]" => Some(vec![0x67, 0xa0, 0xc0, 0xb0]),
+ "mov eax, dword ptr [0000B0C0h]" => Some(vec![0x67, 0xa1, 0xc0, 0xb0]),
+ // if you operand-size override pushad/popad you get the 16-bit forms, pusha/popa. dumpbin reflects this, but in 32-bit mode
+ // accepts either as a way of spelling pushad/popad. override it here for tests to match up, but this is an unfortunately
+ // disastrous difference in round-tripping..
+ "pusha" => Some(vec![0x66, 0x60]),
+ "popa" => Some(vec![0x66, 0x61]),
+ // masm does not accept an integer operand: it only supports `aam 10` as in d4 0a. so.. bummer.
+ "aam 1" => Some(vec![0xd4, 0x01]),
+ // same as above
+ "aad 1" => Some(vec![0xd5, 0x01]),
+ _ => None,
+ }
+ }
+ CodeModel::Bits64 => {
+ match text {
+ "nop zmmword ptr [rax]" => Some(vec![0x0f, 0x18, 0x20]), // MASM doesn't accept `nop zmmword ..`, no way to round trip 0f1820
+ "nop zmmword ptr [r8]" => Some(vec![0x41, 0x0f, 0x18, 0x20]), // MASM doesn't accept `nop zmmword ..`, no way to round trip 410f1820
+ "sysenter" => Some(vec![0x0f, 0x34]), // MASM doesn't accept sysenter, but dumpbin prints it.
+ "sysexit" => Some(vec![0x0f, 0x35]), // MASM doesn't accept sysexit, but dumpbin prints it.
+ "vpscatterdd dword ptr [r15 + xmm29]{k6}, xmm8" => Some(vec![0x62, 0x12, 0x7d, 0x06, 0xa0, 0x04, 0x2f]), // MASM ...??? assembles vpscatter wrong???
+ "vpscatterdd dword ptr [r15 + xmm25]{k6}, xmm10" => Some(vec![0x62, 0x12, 0x7d, 0x06, 0xa0, 0x14, 0x0f]), // MASM ...??? assembles vpscatter wrong???
+ "vpscatterdd dword ptr [r15 + ymm25]{k6}, ymm10" => Some(vec![0x62, 0x12, 0x7d, 0x26, 0xa0, 0x14, 0x0f]), // MASM ...??? assembles vpscatter wrong???
+ "vpscatterdd dword ptr [r15 + zmm25]{k6}, zmm10" => Some(vec![0x62, 0x12, 0x7d, 0x46, 0xa0, 0x14, 0x0f]), // MASM ...??? assembles vpscatter wrong???
+ "vpscatterdq qword ptr [r15 + xmm25]{k6}, xmm10" => Some(vec![0x62, 0x12, 0xfd, 0x46, 0xa0, 0x14, 0x0f]), // MASM ...??? assembles vpscatter wrong???
+ "vpscatterqd dword ptr [r15 + ymm25]{k6}, ymm10" => Some(vec![0x62, 0x12, 0x7d, 0x46, 0xa1, 0x14, 0x0f]), // MASM ...??? assembles vpscatter wrong???
+ "vpscatterqq qword ptr [r15 + zmm25]{k6}, zmm10" => Some(vec![0x62, 0x12, 0xfd, 0x46, 0xa1, 0x14, 0x0f]), // MASM ...??? assembles vpscatter wrong???
+ // dumpbin doesn't know how to decode, and masm doesn't know how to *en*code, ud0.
+ "ud0 eax, ecx" => Some(vec![0x66, 0x0f, 0xff, 0xc1]),
+ "ud0 eax, dword ptr [rcx]" => Some(vec![0x66, 0x0f, 0xff, 0x01]),
+ "ud0 r8d, r9d" => Some(vec![0x66, 0x4f, 0x0f, 0xff, 0xc1]),
+ "ud0 r13d, dword ptr [rbx - 54h]" => Some(vec![0x4c, 0x0f, 0xff, 0x6b, 0xac]),
+ // masm seems to not know about fstpnce/fstp1 at all. since this is an undocumented instruction anyway, assemble it ourselves..
+ "fstpnce st(3), st(0)" => Some(vec![0xd9, 0xdb]),
+ // masm inserts a wait prefix here..
+ "feni" => Some(vec![0xdb, 0xe0]),
+ "fdisi" => Some(vec![0xdb, 0xe1]),
+ "fsetpm" => Some(vec![0xdb, 0xe4]),
+ _other => None,
+ }
+ }
+ };
+
+ if let Some(replacement) = replacement {
+ return Ok(replacement);
+ }
+
let mut source = String::new();
match codeness {