aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2022-01-02 13:52:53 -0800
committeriximeow <me@iximeow.net>2022-01-02 13:52:53 -0800
commit27d01bf05c16c96ad4bd211a29142e7f43069c37 (patch)
treed5d72571c0ef54f186db2694c10c2bf072d9e8bc
parent0bcea6031e3d0b48acb15f6a7dfdab670ec8fcfc (diff)
add differential tests against capstone
whoo boy these are gross sorry about that
-rw-r--r--Cargo.toml7
-rw-r--r--differential-tests/Cargo.toml12
-rw-r--r--differential-tests/tests/capstone-differential.rs307
3 files changed, 326 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 3f266c9..b576755 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,13 @@ description = "arm decoders for the yaxpeax project"
keywords = ["disassembler", "decoder", "armv7", "armv8"]
edition = "2018"
+[lib]
+
+[workspace]
+members = [
+ "differential-tests"
+]
+
[dependencies]
yaxpeax-arch = { version = "0.2.2", default-features = false, features = [] }
bitvec = "0.19"
diff --git a/differential-tests/Cargo.toml b/differential-tests/Cargo.toml
new file mode 100644
index 0000000..419f230
--- /dev/null
+++ b/differential-tests/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "differential-tests"
+version = "0.1.0"
+authors = ["iximeow <yax@iximeow.net>"]
+edition = "2018"
+description = "differential testing between yaxpeax-arm and other disassemblers"
+
+[dependencies]
+yaxpeax-arm = { path = "../", version = "*" }
+yaxpeax-arch = { version = "0.2.2", default-features = false, features = [] }
+
+capstone = "*"
diff --git a/differential-tests/tests/capstone-differential.rs b/differential-tests/tests/capstone-differential.rs
new file mode 100644
index 0000000..73b0f2b
--- /dev/null
+++ b/differential-tests/tests/capstone-differential.rs
@@ -0,0 +1,307 @@
+//! this is a distinct set of tests from the `yaxpeax-arm` root tests because i don't want extra
+//! (optional!) dependencies in the disassembler's dependency tree.
+
+use capstone::prelude::*;
+use yaxpeax_arch::{Arch, Decoder};
+
+#[test]
+fn capstone_differential() {
+ let cs = Capstone::new()
+ .arm64()
+ .mode(capstone::arch::arm64::ArchMode::Arm)
+ .build()
+ .expect("can create capstone");
+
+ let yax = <yaxpeax_arm::armv8::a64::ARMv8 as Arch>::Decoder::default();
+
+ let mut mismatch = 0;
+ let mut good = 0;
+ let mut yax_reject = 0;
+ let mut missed_incomplete = 0;
+
+ for i in 0x00_00_00_00..u32::MAX {
+ let bytes = &i.to_le_bytes();
+ if i % 0x00_10_00_00 == 0 {
+ eprintln!("case {:08x}", i);
+ }
+
+ let res = cs.disasm_all(bytes, 0);
+ if let Ok(insts) = &res {
+ let insts_slice = insts.as_ref();
+ if insts_slice.len() == 1 {
+ // then yax should also succeed..
+ // and it should only be one instruction
+ let cs_text = format!("{}", insts_slice[0]);
+ let cs_text = &cs_text[5..];
+
+ let yax_res = yax.decode(&mut yaxpeax_arch::U8Reader::new(bytes));
+ let yax_text = if let Ok(inst) = yax_res {
+ format!("{}", inst)
+ } else if let Err(yaxpeax_arm::armv8::a64::DecodeError::IncompleteDecoder) = yax_res {
+ missed_incomplete += 1;
+ continue;
+ } else {
+ panic!("yax errored where capstone succeeded. cs text: '{}', bytes: {:x?}", cs_text, bytes);
+ };
+
+ fn acceptable_match(yax_text: &str, cs_text: &str) -> bool {
+ if yax_text == cs_text {
+ return true;
+ }
+
+ if cs_text
+ .replace("uxtw #0", "uxtw")
+ .replace("uxtx #0", "uxtx") == yax_text {
+
+ return true;
+ }
+
+ // capstone discards uxtw in some circumstances for reasons i don't yet
+ // know
+ if yax_text.ends_with("uxtw") &&
+ &yax_text[..yax_text.len() - 6] == cs_text {
+ return true;
+ }
+ if cs_text.ends_with("uxtw") &&
+ &cs_text[..cs_text.len() - 6] == yax_text {
+ return true;
+ }
+ if yax_text.replace("lsl", "uxtw") == cs_text {
+ return true;
+ }
+ if yax_text.ends_with("#0") &&
+ &yax_text[..yax_text.len() - 3] == cs_text {
+ return true;
+ }
+ if cs_text.ends_with("#0") &&
+ &cs_text[..cs_text.len() - 3] == yax_text {
+ return true;
+ }
+ if cs_text.starts_with(yax_text) && cs_text.ends_with("000") {
+ return true;
+ };
+
+ if cs_text.starts_with("ubfx ") {
+ return true;
+ }
+
+ if yax_text.starts_with("adrp ") {
+ return true;
+ }
+
+ if yax_text.starts_with("adr ") {
+ return true;
+ }
+
+ if yax_text.starts_with("b ") {
+ return true;
+ }
+
+ if yax_text.starts_with("bl ") {
+ return true;
+ }
+
+ // some instructions like `11400000` have an immeidate lsl #12 as their
+ // last operand. yax normalizes this to an unshifted `imm << 12`, capstone
+ // just prints lsl #12.
+ if cs_text.starts_with(yax_text) && cs_text.ends_with(", lsl #12") {
+ return true;
+ }
+
+ // yax and capstone deal with immediates in `mov reg, imm` a little
+ // differently. they're correct, but displayed differently (0xffffffff
+ // instead of -1)
+ if cs_text.starts_with("mov ") && yax_text.starts_with("mov ") {
+ return true;
+ }
+
+ // capstone just shows empty string for unrecognized prf{,u}m immediates,
+ // leaving broken text
+ if cs_text.starts_with("prfum ") && yax_text.starts_with("prfum ") {
+ return true;
+ }
+ if cs_text.starts_with("prfm ") && yax_text.starts_with("prfm ") {
+ return true;
+ }
+
+ // don't totally understand aliasing rules for `ORR (immediate)` and mov..
+ if cs_text.starts_with("mov ") && yax_text.starts_with("orr ") ||
+ cs_text.starts_with("orr ") && yax_text.starts_with("mov ")
+ {
+ return true;
+ }
+
+ // yax notmalizes movn to mov
+ if cs_text.starts_with("movn ") && yax_text.starts_with("mov ") {
+ return true;
+ }
+
+ // yax notmalizes movz to mov
+ if cs_text.starts_with("movz ") && yax_text.starts_with("mov ") {
+ return true;
+ }
+
+ // differences on displaying immediates..
+ let new_cs_text = cs_text
+ .replace("#0x", "")
+ .replace("#-0x", "")
+ .replace("#-", "")
+ .replace("#", "");
+ let new_yax_text = yax_text
+ .replace("#0x", "")
+ .replace("#-0x", "")
+ .replace("#-", "")
+ .replace("#", "")
+ .replace("$+0x", "");
+ if new_cs_text == new_yax_text {
+ return true;
+ }
+
+ if cs_text.len() > 7 && yax_text.len() > 7 {
+ if &cs_text[..7] == &yax_text[..7] && (cs_text.contains("#-") || yax_text.contains("#-")) {
+ return true;
+ }
+ if &cs_text[..7] == &yax_text[..7] && (cs_text.contains("shll") || yax_text.contains("shll")) {
+ return true;
+ }
+ }
+ // capstone doesn't show relative offsets, always makes absolute for some
+ // ip
+ if yax_text.contains("$-0x") || yax_text.contains("$+0x") {
+ return true;
+ }
+
+ if yax_text.contains("esb") {
+ return true;
+ }
+
+ if yax_text.contains("movi") {
+ return true;
+ }
+
+ if cs_text.starts_with("dup") && yax_text.starts_with("mov ") && cs_text.replace("dup ", "mov ") == yax_text {
+ return true;
+ }
+ // capstone bug! e0030033 is `bfxil w0, wzr, #0, #1`, but capstone picks
+ // the bfc alias instead. skip these, generally.
+ if yax_text.starts_with("bfxil") && (cs_text.starts_with("bfc") || cs_text.starts_with("bfi")) {
+ return true;
+ }
+
+ if cs_text.len() > 10 && yax_text.len() > 10 {
+ // eh they're probably the same but yax has a signed hex and capstone has
+ // unsigned
+ if &cs_text[..10] == &yax_text[..10] && cs_text.contains("ffffffff") && yax_text.contains("#-0x") {
+ return true;
+ }
+ // yax, for reg + shifted-reg operands, does not omit shift amount
+ if &cs_text[..10] == &yax_text[..10] && yax_text.contains(" #0x0]") {
+ return true;
+ }
+
+ // postindex offsets are base 10 in capstone sometimes?
+ if yax_text.contains("], #0x") && cs_text.contains("], #") &&
+ &cs_text[..20] == &yax_text[..20] {
+ return true;
+ }
+ }
+
+ // yax omits `uxt{w,x}` for extended reg where extension matches the
+ // register size
+ if cs_text.starts_with(yax_text) && (cs_text.ends_with("uxtx") || cs_text.ends_with("uxtw")) {
+ return true;
+ }
+
+ if cs_text.starts_with(yax_text) && cs_text.ends_with("0") {
+ return true;
+ }
+
+ // S being present or not has no bearing on the shift amount, #0 either
+ // way.
+ // yax will not print shift because of its ineffectual nature.
+ if (cs_text.starts_with("strb") || cs_text.starts_with("ldrb") || cs_text.starts_with("ldrsb") || cs_text.starts_with("ldr b") || cs_text.starts_with("str b")) && cs_text.contains(" lsl #0]") {
+ return true;
+ }
+
+ if cs_text == yax_text.replace(" #0", "") {
+ return true;
+ }
+
+ // yax uses lsl instead of uxtx when the reg size is uxtx. same for
+ // uxtw/w-regs
+ if cs_text.replace("uxtx", "lsl") == yax_text ||
+ cs_text.replace("uxtw", "lsl") == yax_text {
+ return true;
+ }
+
+ // yax shows dcps{1,2} operand, capstone does not?
+ if yax_text.starts_with("dcps") {
+ return true;
+ }
+
+ if cs_text.starts_with("msr ") {
+ return true;
+ }
+
+ // yax does not handle aliases for msr instructions yet
+ if yax_text.starts_with("msr ") {
+ return true;
+ }
+
+ // some kinda bug to deal with hint value width
+ if cs_text.starts_with("hint ") {
+ return true;
+ }
+ if cs_text.starts_with("dsb ") {
+ return true;
+ }
+ if cs_text.starts_with("clrex ") {
+ return true;
+ }
+ if yax_text.starts_with("sys ") {
+ return true;
+ }
+ if cs_text.starts_with("yield ") {
+ return true;
+ }
+ if cs_text.starts_with("wfe ") {
+ return true;
+ }
+ if cs_text.starts_with("wfi ") {
+ return true;
+ }
+ if cs_text.starts_with("sev ") {
+ return true;
+ }
+ if cs_text.starts_with("mrs ") {
+ return true;
+ }
+ if cs_text.starts_with("sysl ") {
+ return true;
+ }
+ if yax_text.starts_with("hint ") {
+ return true;
+ }
+
+ if yax_text == &cs_text[..cs_text.len() - 1] && cs_text.ends_with(" ") {
+ return true;
+ }
+
+ return false;
+ }
+
+ if !acceptable_match(&yax_text, cs_text) {
+ panic!("disassembly mismatch: {} != {}. bytes: {:x?}", yax_text, cs_text, bytes);
+ } else {
+ good += 1;
+ }
+ } else {
+ // yax should also fail?
+ }
+ }
+ }
+ eprintln!("match: {}", good);
+ eprintln!("mismatch: {}", mismatch);
+ eprintln!("bad reject: {}", yax_reject);
+ eprintln!("incomplete: {}", missed_incomplete);
+}