diff options
| author | iximeow <me@iximeow.net> | 2024-03-16 12:09:15 -0700 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2024-03-16 12:09:15 -0700 | 
| commit | 507a1c14b335a273304070289cb35a4bef7d1de3 (patch) | |
| tree | 72b46bd1647d978046e2f0df98497c02f8794dda /differential-tests | |
| parent | 97c724d483c309b95cba75dae3445b069e8b7915 (diff) | |
multithread differential disassembly and support pc-relative operands, remove a few more exceptions
Diffstat (limited to 'differential-tests')
| -rw-r--r-- | differential-tests/tests/capstone-differential.rs | 558 | 
1 files changed, 297 insertions, 261 deletions
| diff --git a/differential-tests/tests/capstone-differential.rs b/differential-tests/tests/capstone-differential.rs index 8457bce..cb97fef 100644 --- a/differential-tests/tests/capstone-differential.rs +++ b/differential-tests/tests/capstone-differential.rs @@ -4,17 +4,21 @@  use capstone::prelude::*;  use yaxpeax_arch::{Arch, Decoder}; +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering};  use std::num::ParseIntError;  #[derive(Debug)]  enum ParsedOperand {      Register { size: char, num: u8 },      Memory(String), +    MemoryWithOffset { base: String, offset: u32, writeback: bool },      SIMDRegister { size: char, num: u8 },  //    SIMDRegisterElements { num: u8, elems: u8, elem_size: char },  //    SIMDRegisterElement { num: u8, elem_size: char, elem: u8 },      SIMDElementLane { elem: String, lane_selector: u8 },      Immediate(i64), +    PCRel(i64),      Float(f64),      Other(String),      RegisterFamily(String), @@ -31,9 +35,28 @@ impl PartialEq for ParsedOperand {              (Memory(l), Memory(r)) => {                  l == r              }, +            ( +                MemoryWithOffset { base: base_l, offset: offset_l, writeback: writeback_l }, +                MemoryWithOffset { base: base_r, offset: offset_r, writeback: writeback_r }, +            ) => { +                base_l == base_r && +                offset_l == offset_r && +                writeback_l == writeback_r +            },              (Immediate(l), Immediate(r)) => {                  l == r              }, +            (PCRel(l), PCRel(r)) => { +                l == r +            }, +            (Immediate(l), PCRel(r)) => { +                // assume pc=0 as capstone does by default +                *l == 0 + r +            }, +            (PCRel(l), Immediate(r)) => { +                // assume pc=0 as capstone does by default +                0 + l == *r +            },              (Float(l), Float(r)) => {                  l.to_ne_bytes() == r.to_ne_bytes()              }, @@ -121,6 +144,16 @@ impl ParsedOperand {                  let imm = parse_hex_or_dec(imm_str);                  (ParsedOperand::Immediate(imm), end)              } +        } else if s.as_bytes()[0] == b'$' { +            let end = s.find(',').unwrap_or(s.len()); +            let imm_str = &s[1..end]; +            let imm_str = if imm_str.starts_with("+") { +                &imm_str[1..] +            } else { +                imm_str +            }; +            let imm = parse_hex_or_dec(imm_str); +            (ParsedOperand::PCRel(imm), end)          } else if s.as_bytes()[0] == b'[' {              let mut end = s.find(']').map(|x| x + 1).unwrap_or(s.len());              if s.as_bytes().get(end) == Some(&b'!') { @@ -244,324 +277,327 @@ impl ParsedDisassembly {  #[test]  fn capstone_differential() { -    let cs = Capstone::new() -        .arm64() -        .mode(capstone::arch::arm64::ArchMode::Arm) -        .build() -        .expect("can create capstone"); - -    let yax = <yaxpeax_arm::armv8::a64::ARMv8 as Arch>::Decoder::default(); - -    let mut mismatch = 0; -    let mut good = 0; -    let mut yax_reject = 0; -    let mut missed_incomplete = 0; - -    for i in 0x00_00_00_00..u32::MAX { -        let bytes = &i.to_le_bytes(); -        if i % 0x00_10_00_00 == 0 { -            eprintln!("case {:08x}", i); -        } +    struct Stats { +        mismatch: AtomicUsize, +        good: AtomicUsize, +        yax_reject: AtomicUsize, +        missed_incomplete: AtomicUsize, +    } +    let mut yax_reject = AtomicUsize::new(0); +    let mut missed_incomplete = AtomicUsize::new(0); + +    let stats = Stats { +        mismatch: AtomicUsize::new(0), +        good: AtomicUsize::new(0), +        yax_reject: AtomicUsize::new(0), +        missed_incomplete: AtomicUsize::new(0), +    }; + +    fn test_range(start: u64, end: u64, stats: Arc<Stats>) { +        let cs = Capstone::new() +            .arm64() +            .mode(capstone::arch::arm64::ArchMode::Arm) +            .build() +            .expect("can create capstone"); + +        let yax = <yaxpeax_arm::armv8::a64::ARMv8 as Arch>::Decoder::default(); + +        for i in start..=end { +            let i = i as u32; +            let bytes = &i.to_le_bytes(); +            if i % 0x00_10_00_00 == 0 { +                eprintln!("case {:08x}", i); +            } -        let res = cs.disasm_all(bytes, 0); -        if let Ok(insts) = &res { -            let insts_slice = insts.as_ref(); -            if insts_slice.len() == 1 { -                // then yax should also succeed.. -                // and it should only be one instruction -                let cs_text = format!("{}", insts_slice[0]); -                let cs_text = &cs_text[5..]; - -                let yax_res = yax.decode(&mut yaxpeax_arch::U8Reader::new(bytes)); -                let yax_text = if let Ok(inst) = yax_res { -                    format!("{}", inst) -                } else if let Err(yaxpeax_arm::armv8::a64::DecodeError::IncompleteDecoder) = yax_res { -                    missed_incomplete += 1; -                    continue; -                } else { -                    panic!("yax errored where capstone succeeded. cs text: '{}', bytes: {:x?}", cs_text, bytes); -                }; +            let res = cs.disasm_all(bytes, 0); +            if let Ok(insts) = &res { +                let insts_slice = insts.as_ref(); +                if insts_slice.len() == 1 { +                    // then yax should also succeed.. +                    // and it should only be one instruction +                    let cs_text = format!("{}", insts_slice[0]); +                    let cs_text = &cs_text[5..]; + +                    let yax_res = yax.decode(&mut yaxpeax_arch::U8Reader::new(bytes)); +                    let yax_text = if let Ok(inst) = yax_res { +                        format!("{}", inst) +                    } else if let Err(yaxpeax_arm::armv8::a64::DecodeError::IncompleteDecoder) = yax_res { +                        stats.missed_incomplete.fetch_add(1, Ordering::SeqCst); +                        continue; +                    } else { +                        panic!("yax errored where capstone succeeded. cs text: '{}', bytes: {:x?}", cs_text, bytes); +                    }; -                fn acceptable_match(yax_text: &str, cs_text: &str) -> bool { -                    if yax_text == cs_text { -                        return true; -                    } +                    fn acceptable_match(yax_text: &str, cs_text: &str) -> bool { +                        if yax_text == cs_text { +                            return true; +                        } -                    let parsed_yax = ParsedDisassembly::parse(yax_text); -                    let parsed_cs = ParsedDisassembly::parse(cs_text); +                        let parsed_yax = ParsedDisassembly::parse(yax_text); +                        let parsed_cs = ParsedDisassembly::parse(cs_text); -                    if parsed_yax == parsed_cs { -                        return true; -                    } - -//                    eprintln!("yax: {} -> {:?}", yax_text, parsed_yax); -//                    eprintln!("cs: {} -> {:?}", cs_text, parsed_cs); +                        if parsed_yax == parsed_cs { +                            return true; +                        } -                    if cs_text -                        .replace("uxtw #0", "uxtw") -                        .replace("uxtx #0", "uxtx") == yax_text { +                        if false { +                            eprintln!("yax: {} -> {:?}", yax_text, parsed_yax); +                            eprintln!("cs: {} -> {:?}", cs_text, parsed_cs); +                        } -                        return true; -                    } +                        if cs_text +                            .replace("uxtw #0", "uxtw") +                            .replace("uxtx #0", "uxtx") == yax_text { -                    // capstone discards uxtw in some circumstances for reasons i don't yet -                    // know -                    if let Some(yax_text) = yax_text.strip_suffix(", uxtw") { -                        if yax_text == cs_text {                              return true;                          } -                    } -                    if let Some(cs_text) = cs_text.strip_suffix(", uxtw") { -                        if yax_text == cs_text { -                            return true; + +                        // capstone discards uxtw in some circumstances for reasons i don't yet +                        // know +                        if let Some(yax_text) = yax_text.strip_suffix(", uxtw") { +                            if yax_text == cs_text { +                                return true; +                            } +                        } +                        if let Some(cs_text) = cs_text.strip_suffix(", uxtw") { +                            if yax_text == cs_text { +                                return true; +                            }                          } -                    } -                    if yax_text.replace("lsl", "uxtw") == cs_text { -                        return true; -                    } -                    if let Some(yax_text) = yax_text.strip_suffix(" #0") { -                        if yax_text == cs_text { +                        if yax_text.replace("lsl", "uxtw") == cs_text {                              return true;                          } -                    } -                    if let Some(cs_text) = cs_text.strip_suffix(" #0") { -                        if yax_text == cs_text { + +                        if cs_text.starts_with("ubfx ") {                              return true;                          } -                    } -                    // TODO: what kind of cases is this for? -                    if cs_text.starts_with(yax_text) && cs_text.ends_with("000") { -                        return true; -                    }; -                    if cs_text.starts_with("ubfx ") { -                        return true; -                    } +                        if yax_text.starts_with("adrp ") { +                            return true; +                        } -                    if yax_text.starts_with("adrp ") { -                        return true; -                    } +                        if yax_text.starts_with("adr ") { +                            return true; +                        } -                    if yax_text.starts_with("adr ") { -                        return true; -                    } +                        // some instructions like `11400000` have an immeidate lsl #12 as their +                        // last operand. yax normalizes this to an unshifted `imm << 12`, capstone +                        // just prints lsl #12. +                        if cs_text.starts_with(yax_text) && cs_text.ends_with(", lsl #12") { +                            return true; +                        } -                    if yax_text.starts_with("b ") { -                        return true; -                    } +                        // yax and capstone deal with immediates in `mov reg, imm` a little +                        // differently. they're correct, but displayed differently (0xffffffff +                        // instead of -1) +                        if cs_text.starts_with("mov ") && yax_text.starts_with("mov ") { +                            return true; +                        } -                    if yax_text.starts_with("bl ") { -                        return true; -                    } +                        // capstone just shows empty string for unrecognized prf{,u}m immediates, +                        // leaving broken text +                        if cs_text.starts_with("prfum ") && yax_text.starts_with("prfum ") { +                            return true; +                        } +                        if cs_text.starts_with("prfm ") && yax_text.starts_with("prfm ") { +                            return true; +                        } -                    // some instructions like `11400000` have an immeidate lsl #12 as their -                    // last operand. yax normalizes this to an unshifted `imm << 12`, capstone -                    // just prints lsl #12. -                    if cs_text.starts_with(yax_text) && cs_text.ends_with(", lsl #12") { -                        return true; -                    } +                        // don't totally understand aliasing rules for `ORR (immediate)` and mov.. +                        if cs_text.starts_with("mov ") && yax_text.starts_with("orr ") || +                            cs_text.starts_with("orr ") && yax_text.starts_with("mov ") +                        { +                            return true; +                        } -                    // yax and capstone deal with immediates in `mov reg, imm` a little -                    // differently. they're correct, but displayed differently (0xffffffff -                    // instead of -1) -                    if cs_text.starts_with("mov ") && yax_text.starts_with("mov ") { -                        return true; -                    } +                        // yax notmalizes movn to mov +                        if cs_text.starts_with("movn ") && yax_text.starts_with("mov ") { +                            return true; +                        } -                    // capstone just shows empty string for unrecognized prf{,u}m immediates, -                    // leaving broken text -                    if cs_text.starts_with("prfum ") && yax_text.starts_with("prfum ") { -                        return true; -                    } -                    if cs_text.starts_with("prfm ") && yax_text.starts_with("prfm ") { -                        return true; -                    } +                        // yax notmalizes movz to mov +                        if cs_text.starts_with("movz ") && yax_text.starts_with("mov ") { +                            return true; +                        } -                    // don't totally understand aliasing rules for `ORR (immediate)` and mov.. -                    if cs_text.starts_with("mov ") && yax_text.starts_with("orr ") || -                        cs_text.starts_with("orr ") && yax_text.starts_with("mov ") -                    { -                        return true; -                    } +                        // differences on displaying immediates.. +                        let new_cs_text = cs_text +                            .replace("#0x", "") +                            .replace("#-0x", "") +                            .replace("#-", "") +                            .replace("#", ""); +                        let new_yax_text = yax_text +                            .replace("#0x", "") +                            .replace("#-0x", "") +                            .replace("#-", "") +                            .replace("#", "") +                            .replace("$+0x", ""); +                        if new_cs_text == new_yax_text { +                            return true; +                        } -                    // yax notmalizes movn to mov -                    if cs_text.starts_with("movn ") && yax_text.starts_with("mov ") { -                        return true; -                    } +                        if cs_text.len() > 7 && yax_text.len() > 7 { +                            if &cs_text[..7] == &yax_text[..7] && (cs_text.contains("#-") || yax_text.contains("#-")) { +                                return true; +                            } +                            if &cs_text[..7] == &yax_text[..7] && (cs_text.contains("shll") || yax_text.contains("shll")) { +                                return true; +                            } +                        } -                    // yax notmalizes movz to mov -                    if cs_text.starts_with("movz ") && yax_text.starts_with("mov ") { -                        return true; -                    } +                        if parsed_yax.opcode == "mov" && parsed_cs.opcode == "dup" { +                            if parsed_yax.operands == parsed_cs.operands { +                                return true; +                            } +                        } +    //                    if cs_text.starts_with("dup") && yax_text.starts_with("mov ") && cs_text.replace("dup ", "mov ") == yax_text { +    //                        return true; +    //                    } +                        // capstone bug! e0030033 is `bfxil w0, wzr, #0, #1`, but capstone picks +                        // the bfc alias instead. skip these, generally. +                        if yax_text.starts_with("bfxil") && (cs_text.starts_with("bfc") || cs_text.starts_with("bfi")) { +                            return true; +                        } -                    // differences on displaying immediates.. -                    let new_cs_text = cs_text -                        .replace("#0x", "") -                        .replace("#-0x", "") -                        .replace("#-", "") -                        .replace("#", ""); -                    let new_yax_text = yax_text -                        .replace("#0x", "") -                        .replace("#-0x", "") -                        .replace("#-", "") -                        .replace("#", "") -                        .replace("$+0x", ""); -                    if new_cs_text == new_yax_text { -                        return true; -                    } +                        if cs_text.len() > 10 && yax_text.len() > 10 { +                            // eh they're probably the same but yax has a signed hex and capstone has +                            // unsigned +                            if &cs_text[..10] == &yax_text[..10] && cs_text.contains("ffffffff") && yax_text.contains("#-0x") { +                                return true; +                            } +                            // yax, for reg + shifted-reg operands, does not omit shift amount +                            if &cs_text[..10] == &yax_text[..10] && yax_text.contains(" #0x0]") { +                                return true; +                            } + +                            // postindex offsets are base 10 in capstone sometimes? +                            if yax_text.contains("], #0x") && cs_text.contains("], #") && +                                &cs_text[..20] == &yax_text[..20] { +                                return true; +                            } +                        } -                    if cs_text.len() > 7 && yax_text.len() > 7 { -                        if &cs_text[..7] == &yax_text[..7] && (cs_text.contains("#-") || yax_text.contains("#-")) { +                        // yax omits `uxt{w,x}` for extended reg where extension matches the +                        // register size +                        if cs_text.starts_with(yax_text) && (cs_text.ends_with("uxtx") || cs_text.ends_with("uxtw")) {                              return true;                          } -                        if &cs_text[..7] == &yax_text[..7] && (cs_text.contains("shll") || yax_text.contains("shll")) { + +                        if cs_text.starts_with(yax_text) && cs_text.ends_with("0") {                              return true;                          } -                    } -                    // capstone doesn't show relative offsets, always makes absolute for some -                    // ip -                    if yax_text.contains("$-0x") || yax_text.contains("$+0x") { -                        return true; -                    } - -                    if yax_text.contains("esb") { -                        return true; -                    } -                    if yax_text.contains("movi") { -                        return true; -                    } +                        // S being present or not has no bearing on the shift amount, #0 either +                        // way. +                        // yax will not print shift because of its ineffectual nature. +                        if (cs_text.starts_with("strb") || cs_text.starts_with("ldrb") || cs_text.starts_with("ldrsb") || cs_text.starts_with("ldr b") || cs_text.starts_with("str b")) && cs_text.contains(" lsl #0]") { +                            return true; +                        } -                    if parsed_yax.opcode == "mov" && parsed_cs.opcode == "dup" { -                        if parsed_yax.operands == parsed_cs.operands { +                        if cs_text == yax_text.replace(" #0", "") {                              return true;                          } -                    } -//                    if cs_text.starts_with("dup") && yax_text.starts_with("mov ") && cs_text.replace("dup ", "mov ") == yax_text { -//                        return true; -//                    } -                    // capstone bug! e0030033 is `bfxil w0, wzr, #0, #1`, but capstone picks -                    // the bfc alias instead. skip these, generally. -                    if yax_text.starts_with("bfxil") && (cs_text.starts_with("bfc") || cs_text.starts_with("bfi")) { -                        return true; -                    } -                    if cs_text.len() > 10 && yax_text.len() > 10 { -                        // eh they're probably the same but yax has a signed hex and capstone has -                        // unsigned -                        if &cs_text[..10] == &yax_text[..10] && cs_text.contains("ffffffff") && yax_text.contains("#-0x") { +                        // yax uses lsl instead of uxtx when the reg size is uxtx. same for +                        // uxtw/w-regs +                        if cs_text.replace("uxtx", "lsl") == yax_text || +                            cs_text.replace("uxtw", "lsl") == yax_text {                              return true;                          } -                        // yax, for reg + shifted-reg operands, does not omit shift amount -                        if &cs_text[..10] == &yax_text[..10] && yax_text.contains(" #0x0]") { + +                        // yax shows dcps{1,2} operand, capstone does not? +                        if yax_text.starts_with("dcps") {                              return true;                          } -                        // postindex offsets are base 10 in capstone sometimes? -                        if yax_text.contains("], #0x") && cs_text.contains("], #") && -                            &cs_text[..20] == &yax_text[..20] { +                        if cs_text.starts_with("msr ") {                              return true;                          } -                    } -                    // yax omits `uxt{w,x}` for extended reg where extension matches the -                    // register size -                    if cs_text.starts_with(yax_text) && (cs_text.ends_with("uxtx") || cs_text.ends_with("uxtw")) { -                        return true; -                    } +                        // yax does not handle aliases for msr instructions yet +                        if yax_text.starts_with("msr ") { +                            return true; +                        } -                    if cs_text.starts_with(yax_text) && cs_text.ends_with("0") { -                        return true; -                    } +                        // some kinda bug to deal with hint value width +                        if cs_text.starts_with("hint ") { +                            return true; +                        } +                        if cs_text.starts_with("dsb ") { +                            return true; +                        } +                        if cs_text.starts_with("clrex ") { +                            return true; +                        } +                        if yax_text.starts_with("sys ") { +                            return true; +                        } +                        if cs_text.starts_with("yield ") { +                            return true; +                        } +                        if cs_text.starts_with("wfe ") { +                            return true; +                        } +                        if cs_text.starts_with("wfi ") { +                            return true; +                        } +                        if cs_text.starts_with("sev ") { +                            return true; +                        } +                        if cs_text.starts_with("mrs ") { +                            return true; +                        } +                        if cs_text.starts_with("sysl ") { +                            return true; +                        } +                        if yax_text.starts_with("hint ") { +                            return true; +                        } -                    // S being present or not has no bearing on the shift amount, #0 either -                    // way. -                    // yax will not print shift because of its ineffectual nature. -                    if (cs_text.starts_with("strb") || cs_text.starts_with("ldrb") || cs_text.starts_with("ldrsb") || cs_text.starts_with("ldr b") || cs_text.starts_with("str b")) && cs_text.contains(" lsl #0]") { -                        return true; -                    } +                        if yax_text == &cs_text[..cs_text.len() - 1] && cs_text.ends_with(" ") { +                            return true; +                        } -                    if cs_text == yax_text.replace(" #0", "") { -                        return true; +                        return false;                      } -                    // yax uses lsl instead of uxtx when the reg size is uxtx. same for -                    // uxtw/w-regs -                    if cs_text.replace("uxtx", "lsl") == yax_text || -                        cs_text.replace("uxtw", "lsl") == yax_text { -                        return true; +    //                eprintln!("{}", yax_text); +                    if !acceptable_match(&yax_text, cs_text) { +                        eprintln!("disassembly mismatch: {} != {}. bytes: {:x?}", yax_text, cs_text, bytes); +                        std::process::abort(); +                    } else { +                        stats.good.fetch_add(1, Ordering::SeqCst);                      } +                } else { +                    // yax should also fail? +                } +            } +        } +    } -                    // yax shows dcps{1,2} operand, capstone does not? -                    if yax_text.starts_with("dcps") { -                        return true; -                    } +    const NR_THREADS: u64 = 64; -                    if cs_text.starts_with("msr ") { -                        return true; -                    } +    let range_size = (u32::MAX as u64 + 1) / NR_THREADS; -                    // yax does not handle aliases for msr instructions yet -                    if yax_text.starts_with("msr ") { -                        return true; -                    } +    let mut handles = Vec::new(); -                    // some kinda bug to deal with hint value width -                    if cs_text.starts_with("hint ") { -                        return true; -                    } -                    if cs_text.starts_with("dsb ") { -                        return true; -                    } -                    if cs_text.starts_with("clrex ") { -                        return true; -                    } -                    if yax_text.starts_with("sys ") { -                        return true; -                    } -                    if cs_text.starts_with("yield ") { -                        return true; -                    } -                    if cs_text.starts_with("wfe ") { -                        return true; -                    } -                    if cs_text.starts_with("wfi ") { -                        return true; -                    } -                    if cs_text.starts_with("sev ") { -                        return true; -                    } -                    if cs_text.starts_with("mrs ") { -                        return true; -                    } -                    if cs_text.starts_with("sysl ") { -                        return true; -                    } -                    if yax_text.starts_with("hint ") { -                        return true; -                    } +    let stats = Arc::new(stats); -                    if yax_text == &cs_text[..cs_text.len() - 1] && cs_text.ends_with(" ") { -                        return true; -                    } +    test_range(0x54_80_00_00, 0x54_80_00_10, Arc::clone(&stats)); -                    return false; -                } +    for i in 0..NR_THREADS { +        let stats = Arc::clone(&stats); +        let handle = std::thread::spawn(move || test_range(i * range_size, i * range_size + range_size, stats)); +        handles.push(handle); +    } -//                eprintln!("{}", yax_text); -                if !acceptable_match(&yax_text, cs_text) { -                    panic!("disassembly mismatch: {} != {}. bytes: {:x?}", yax_text, cs_text, bytes); -                } else { -                    good += 1; -                } -            } else { -                // yax should also fail? -            } -        } +    while let Some(handle) = handles.pop() { +        handle.join().unwrap();      } -    eprintln!("match:      {}", good); -    eprintln!("mismatch:   {}", mismatch); -    eprintln!("bad reject: {}", yax_reject); -    eprintln!("incomplete: {}", missed_incomplete); + +    eprintln!("match:      {}", stats.good.load(Ordering::SeqCst)); +    eprintln!("mismatch:   {}", stats.mismatch.load(Ordering::SeqCst)); +    eprintln!("bad reject: {}", stats.yax_reject.load(Ordering::SeqCst)); +    eprintln!("incomplete: {}", stats.missed_incomplete.load(Ordering::SeqCst));  } | 
