diff options
| author | iximeow <me@iximeow.net> | 2025-10-19 20:50:09 +0000 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2025-10-19 23:29:05 +0000 | 
| commit | 153b5b3383d016a8a1440b2b932815efed25f847 (patch) | |
| tree | dd861216ad571e479b74a8bb48cab0c35c091592 | |
| parent | d1fefcc4461363b30f69b3cab386254c77de08e0 (diff) | |
tag instruction decode fixes, differential test precision
| -rw-r--r-- | CHANGELOG | 7 | ||||
| -rw-r--r-- | differential-tests/tests/capstone-differential.rs | 73 | ||||
| -rw-r--r-- | src/armv8/a64.rs | 20 | ||||
| -rw-r--r-- | tests/armv8/a64.rs | 34 | 
4 files changed, 119 insertions, 15 deletions
| @@ -1,3 +1,10 @@ +## 0.3.3 + +* fix some issues with tag instruction decoding +  - the raw tag offset was reported, rather than the shifted-by-four value that +    corresponds to a real address +  - ldg was displayed as "ldm" (thank you @martin-fink!) +  ## 0.3.2  * fully support PAC instructions diff --git a/differential-tests/tests/capstone-differential.rs b/differential-tests/tests/capstone-differential.rs index 3602ac5..9f85e83 100644 --- a/differential-tests/tests/capstone-differential.rs +++ b/differential-tests/tests/capstone-differential.rs @@ -523,16 +523,6 @@ fn capstone_differential() {                          panic!("text: {}, bytes: {:?}", yax_text, bytes);                      } -                    if cs_text.contains("stz2g ") || cs_text.contains("stzg ") || cs_text.contains("st2g ") || cs_text.contains("stg ") || cs_text.contains("irg ") || cs_text.contains("ssbb") { -                        // TODO yax might not scale the offset right? -                        continue; -                    } - -                    if cs_text.contains("ldg ") { -                        // TODO: yax says ldm, cs says ldg -                        continue; -                    } -                      if cs_text.contains("dfb ") {                          // TODO: yax and cs disagree?                          continue; @@ -683,9 +673,9 @@ fn capstone_differential() {                          if cs_text.starts_with("sev ") {                              return true;                          } -//                        if yax_text.starts_with("hint ") { -//                            return true; -//                        } +                        if yax_text.starts_with("hint ") { +                            return true; +                        }                          // fmlal v0.2s, v0.4h, v0.h[0] != fmlal v0.2s, v0.2h, v0.h[0]. bytes: [0,                          // 0, 80, f] @@ -694,13 +684,68 @@ fn capstone_differential() {                              return true;                          } +                        if cs_text.starts_with("irg") && yax_text.starts_with(cs_text) && yax_text.ends_with(", xzr") { +                            // a trailing xzr is implicit in irg if omitted in the instruction +                            // text. yax includes xzr in this case, capstone does not. +                            return true; +                        } + +                        const TAG_INSTRUCTIONS: [&'static str; 8] = [ +                            "stzgm", "stg", "ldg", "stzg", +                            "stgm", "st2g", "ldgm", "stz2g" +                        ]; + +                        for mnemonic in TAG_INSTRUCTIONS { +                            if parsed_yax.opcode != mnemonic || parsed_cs.opcode != mnemonic { +                                continue; +                            } + +                            for (yax_op, cs_op) in parsed_yax.operands.iter().zip(parsed_cs.operands.iter()) { +                                if yax_op == cs_op { +                                    continue; +                                } +                                match (yax_op, cs_op) { +                                    ( +                                        Some(ParsedOperand::MemoryWithOffset { +                                            base: cs_base, offset: Some(cs_offset), writeback: cs_writeback +                                        }), +                                        Some(ParsedOperand::MemoryWithOffset { +                                            base: yax_base, offset: Some(yax_offset), writeback: yax_writeback +                                        }) +                                    ) => { +                                        if cs_base != yax_base || cs_writeback != yax_writeback { +                                            // a pretty fundamental decode mismatch.. +                                            return false; +                                        } + +                                        if *cs_offset != *yax_offset && *yax_offset < 0 { +                                            // capstone decodes offsets as an unsigned integer, not +                                            // clamping to the signed range -4096 to 4080. yaxpeax +                                            // decodes the operand into this range. +                                            if (!*yax_offset + 1) & 0x1fff != *cs_offset { +                                                return false; +                                            } +                                        } +                                    } +                                    _ => { +                                        // some operands differ +                                        return false; +                                    } +                                } +                            } + +                            // operand matching above looks OK, opcodes are the same, we're good +                            // here. +                            return true; +                        } +                          return false;                      }  //                    eprintln!("{}", yax_text);                      if !acceptable_match(&yax_text, &cs_text) {                          eprintln!("disassembly mismatch: {} != {}. bytes: {:x?}", yax_text, cs_text, bytes); -//                        std::process::abort(); +                        std::process::abort();                      } else {                          stats.good.fetch_add(1, Ordering::Relaxed);                      } diff --git a/src/armv8/a64.rs b/src/armv8/a64.rs index 56d9ef2..8775d77 100644 --- a/src/armv8/a64.rs +++ b/src/armv8/a64.rs @@ -1263,7 +1263,10 @@ pub enum Opcode {      DSB(u8),      DMB(u8),      SB, +    #[deprecated(since = "0.4.0", note = "i spelled `ssbb` incorrectly.")]      SSSB, +    PSSBB, +    SSBB,      HINT,      CLREX,      CSEL, @@ -1867,6 +1870,10 @@ impl Display for Opcode {              Opcode::MRS => "mrs",              Opcode::ISB => "isb",              Opcode::SB => "sb", +            Opcode::SSBB => "ssbb", +            Opcode::PSSBB => "pssbb", +            // this arm should never, never, never be hit +            #[allow(deprecated)]              Opcode::SSSB => "sssb",              Opcode::CLREX => "clrex",              Opcode::CSEL => "csel", @@ -8669,6 +8676,9 @@ impl Decoder<ARMv8> for InstDecoder {                              let Rt = ((word >> 0) & 0b11111) as u16;                              let simm = (((imm9 as i16) << 7) >> 7) as i32; +                            // tag granularity is 16 bytes, so tags are encoded with the low four +                            // (`LOG2_TAG_GRANULE `) zeroes shifted out for all tag instructions. +                            let simm = simm << 4;                              let opcode = &[                                  Opcode::STZGM, Opcode::STG, Opcode::STG, Opcode::STG, @@ -10597,7 +10607,15 @@ impl Decoder<ARMv8> for InstDecoder {                                                      },                                                      0b100 => {                                                          if CRm == 0b0000 { -                                                            inst.opcode = Opcode::SSSB; +                                                            inst.opcode = Opcode::SSBB; +                                                            inst.operands = [ +                                                                Operand::Nothing, +                                                                Operand::Nothing, +                                                                Operand::Nothing, +                                                                Operand::Nothing, +                                                            ]; +                                                        } else if CRm == 0b0100 { +                                                            inst.opcode = Opcode::PSSBB;                                                              inst.operands = [                                                                  Operand::Nothing,                                                                  Operand::Nothing, diff --git a/tests/armv8/a64.rs b/tests/armv8/a64.rs index baf841b..81d4a2e 100644 --- a/tests/armv8/a64.rs +++ b/tests/armv8/a64.rs @@ -83,6 +83,21 @@ fn test_barrier() {      test_display([0xbf, 0x3f, 0x03, 0xd5], "dmb sy");      // only with FEAT_SB      test_display([0xff, 0x30, 0x03, 0xd5], "sb"); + +    test_display([0x9f, 0x34, 0x03, 0xd5], "pssbb"); + +    // when printing the instruction the third operand defaults to xzr if omitted, so yax probably +    // could/should omit it. but it's not *wrong*.. +    test_display([0x00, 0x10, 0xdf, 0x9a], "irg x0, x0, xzr"); +    test_display([0x90, 0x10, 0xdf, 0x9a], "irg x16, x4, xzr"); +    test_display([0x90, 0x10, 0xcf, 0x9a], "irg x16, x4, x15"); + +    test_display([0x00, 0x10, 0x60, 0xd9], "ldg x0, [x0, #0x10]"); +    test_display([0x90, 0x10, 0x60, 0xd9], "ldg x16, [x4, #0x10]"); +    test_display([0x90, 0x90, 0x60, 0xd9], "ldg x16, [x4, #0x90]"); +    // the immediate offfset in tag instructions is a signed offset in the range of -4096 to 4096. +    // yax decodes it as signed in this range, capstone does not. +    test_display([0x90, 0x90, 0x7f, 0xd9], "ldg x16, [x4, #-0x70]");  }  #[test] @@ -5026,3 +5041,22 @@ fn test_bitfield() {      assert!(errs.is_empty());  } + +#[test] +fn test_tags() { +    const TESTS: &[([u8; 4], &'static str)] = &[ +        ([0x00, 0x10, 0x60, 0xd9], "ldg x0, [x0, #0x10]"), +        ([0x00, 0x20, 0x60, 0xd9], "ldg x0, [x0, #0x20]"), +        ([0x00, 0x21, 0x60, 0xd9], "ldg x0, [x8, #0x20]"), +        ([0x03, 0x21, 0x60, 0xd9], "ldg x3, [x8, #0x20]"), +        ([0x03, 0x21, 0x7f, 0xd9], "ldg x3, [x8, #-0xe0]"), +    ]; + +    let errs = run_tests(TESTS); + +    for err in errs.iter() { +        println!("{}", err); +    } + +    assert!(errs.is_empty()); +} | 
