aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2025-10-19 20:50:09 +0000
committeriximeow <me@iximeow.net>2025-10-19 23:29:05 +0000
commit153b5b3383d016a8a1440b2b932815efed25f847 (patch)
treedd861216ad571e479b74a8bb48cab0c35c091592
parentd1fefcc4461363b30f69b3cab386254c77de08e0 (diff)
tag instruction decode fixes, differential test precision
-rw-r--r--CHANGELOG7
-rw-r--r--differential-tests/tests/capstone-differential.rs73
-rw-r--r--src/armv8/a64.rs20
-rw-r--r--tests/armv8/a64.rs34
4 files changed, 119 insertions, 15 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 588429a..9958744 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,10 @@
+## 0.3.3
+
+* fix some issues with tag instruction decoding
+ - the raw tag offset was reported, rather than the shifted-by-four value that
+ corresponds to a real address
+ - ldg was displayed as "ldm" (thank you @martin-fink!)
+
## 0.3.2
* fully support PAC instructions
diff --git a/differential-tests/tests/capstone-differential.rs b/differential-tests/tests/capstone-differential.rs
index 3602ac5..9f85e83 100644
--- a/differential-tests/tests/capstone-differential.rs
+++ b/differential-tests/tests/capstone-differential.rs
@@ -523,16 +523,6 @@ fn capstone_differential() {
panic!("text: {}, bytes: {:?}", yax_text, bytes);
}
- if cs_text.contains("stz2g ") || cs_text.contains("stzg ") || cs_text.contains("st2g ") || cs_text.contains("stg ") || cs_text.contains("irg ") || cs_text.contains("ssbb") {
- // TODO yax might not scale the offset right?
- continue;
- }
-
- if cs_text.contains("ldg ") {
- // TODO: yax says ldm, cs says ldg
- continue;
- }
-
if cs_text.contains("dfb ") {
// TODO: yax and cs disagree?
continue;
@@ -683,9 +673,9 @@ fn capstone_differential() {
if cs_text.starts_with("sev ") {
return true;
}
-// if yax_text.starts_with("hint ") {
-// return true;
-// }
+ if yax_text.starts_with("hint ") {
+ return true;
+ }
// fmlal v0.2s, v0.4h, v0.h[0] != fmlal v0.2s, v0.2h, v0.h[0]. bytes: [0,
// 0, 80, f]
@@ -694,13 +684,68 @@ fn capstone_differential() {
return true;
}
+ if cs_text.starts_with("irg") && yax_text.starts_with(cs_text) && yax_text.ends_with(", xzr") {
+ // a trailing xzr is implicit in irg if omitted in the instruction
+ // text. yax includes xzr in this case, capstone does not.
+ return true;
+ }
+
+ const TAG_INSTRUCTIONS: [&'static str; 8] = [
+ "stzgm", "stg", "ldg", "stzg",
+ "stgm", "st2g", "ldgm", "stz2g"
+ ];
+
+ for mnemonic in TAG_INSTRUCTIONS {
+ if parsed_yax.opcode != mnemonic || parsed_cs.opcode != mnemonic {
+ continue;
+ }
+
+ for (yax_op, cs_op) in parsed_yax.operands.iter().zip(parsed_cs.operands.iter()) {
+ if yax_op == cs_op {
+ continue;
+ }
+ match (yax_op, cs_op) {
+ (
+ Some(ParsedOperand::MemoryWithOffset {
+ base: cs_base, offset: Some(cs_offset), writeback: cs_writeback
+ }),
+ Some(ParsedOperand::MemoryWithOffset {
+ base: yax_base, offset: Some(yax_offset), writeback: yax_writeback
+ })
+ ) => {
+ if cs_base != yax_base || cs_writeback != yax_writeback {
+ // a pretty fundamental decode mismatch..
+ return false;
+ }
+
+ if *cs_offset != *yax_offset && *yax_offset < 0 {
+ // capstone decodes offsets as an unsigned integer, not
+ // clamping to the signed range -4096 to 4080. yaxpeax
+ // decodes the operand into this range.
+ if (!*yax_offset + 1) & 0x1fff != *cs_offset {
+ return false;
+ }
+ }
+ }
+ _ => {
+ // some operands differ
+ return false;
+ }
+ }
+ }
+
+ // operand matching above looks OK, opcodes are the same, we're good
+ // here.
+ return true;
+ }
+
return false;
}
// eprintln!("{}", yax_text);
if !acceptable_match(&yax_text, &cs_text) {
eprintln!("disassembly mismatch: {} != {}. bytes: {:x?}", yax_text, cs_text, bytes);
-// std::process::abort();
+ std::process::abort();
} else {
stats.good.fetch_add(1, Ordering::Relaxed);
}
diff --git a/src/armv8/a64.rs b/src/armv8/a64.rs
index 56d9ef2..8775d77 100644
--- a/src/armv8/a64.rs
+++ b/src/armv8/a64.rs
@@ -1263,7 +1263,10 @@ pub enum Opcode {
DSB(u8),
DMB(u8),
SB,
+ #[deprecated(since = "0.4.0", note = "i spelled `ssbb` incorrectly.")]
SSSB,
+ PSSBB,
+ SSBB,
HINT,
CLREX,
CSEL,
@@ -1867,6 +1870,10 @@ impl Display for Opcode {
Opcode::MRS => "mrs",
Opcode::ISB => "isb",
Opcode::SB => "sb",
+ Opcode::SSBB => "ssbb",
+ Opcode::PSSBB => "pssbb",
+ // this arm should never, never, never be hit
+ #[allow(deprecated)]
Opcode::SSSB => "sssb",
Opcode::CLREX => "clrex",
Opcode::CSEL => "csel",
@@ -8669,6 +8676,9 @@ impl Decoder<ARMv8> for InstDecoder {
let Rt = ((word >> 0) & 0b11111) as u16;
let simm = (((imm9 as i16) << 7) >> 7) as i32;
+ // tag granularity is 16 bytes, so tags are encoded with the low four
+ // (`LOG2_TAG_GRANULE `) zeroes shifted out for all tag instructions.
+ let simm = simm << 4;
let opcode = &[
Opcode::STZGM, Opcode::STG, Opcode::STG, Opcode::STG,
@@ -10597,7 +10607,15 @@ impl Decoder<ARMv8> for InstDecoder {
},
0b100 => {
if CRm == 0b0000 {
- inst.opcode = Opcode::SSSB;
+ inst.opcode = Opcode::SSBB;
+ inst.operands = [
+ Operand::Nothing,
+ Operand::Nothing,
+ Operand::Nothing,
+ Operand::Nothing,
+ ];
+ } else if CRm == 0b0100 {
+ inst.opcode = Opcode::PSSBB;
inst.operands = [
Operand::Nothing,
Operand::Nothing,
diff --git a/tests/armv8/a64.rs b/tests/armv8/a64.rs
index baf841b..81d4a2e 100644
--- a/tests/armv8/a64.rs
+++ b/tests/armv8/a64.rs
@@ -83,6 +83,21 @@ fn test_barrier() {
test_display([0xbf, 0x3f, 0x03, 0xd5], "dmb sy");
// only with FEAT_SB
test_display([0xff, 0x30, 0x03, 0xd5], "sb");
+
+ test_display([0x9f, 0x34, 0x03, 0xd5], "pssbb");
+
+ // when printing the instruction the third operand defaults to xzr if omitted, so yax probably
+ // could/should omit it. but it's not *wrong*..
+ test_display([0x00, 0x10, 0xdf, 0x9a], "irg x0, x0, xzr");
+ test_display([0x90, 0x10, 0xdf, 0x9a], "irg x16, x4, xzr");
+ test_display([0x90, 0x10, 0xcf, 0x9a], "irg x16, x4, x15");
+
+ test_display([0x00, 0x10, 0x60, 0xd9], "ldg x0, [x0, #0x10]");
+ test_display([0x90, 0x10, 0x60, 0xd9], "ldg x16, [x4, #0x10]");
+ test_display([0x90, 0x90, 0x60, 0xd9], "ldg x16, [x4, #0x90]");
+ // the immediate offfset in tag instructions is a signed offset in the range of -4096 to 4096.
+ // yax decodes it as signed in this range, capstone does not.
+ test_display([0x90, 0x90, 0x7f, 0xd9], "ldg x16, [x4, #-0x70]");
}
#[test]
@@ -5026,3 +5041,22 @@ fn test_bitfield() {
assert!(errs.is_empty());
}
+
+#[test]
+fn test_tags() {
+ const TESTS: &[([u8; 4], &'static str)] = &[
+ ([0x00, 0x10, 0x60, 0xd9], "ldg x0, [x0, #0x10]"),
+ ([0x00, 0x20, 0x60, 0xd9], "ldg x0, [x0, #0x20]"),
+ ([0x00, 0x21, 0x60, 0xd9], "ldg x0, [x8, #0x20]"),
+ ([0x03, 0x21, 0x60, 0xd9], "ldg x3, [x8, #0x20]"),
+ ([0x03, 0x21, 0x7f, 0xd9], "ldg x3, [x8, #-0xe0]"),
+ ];
+
+ let errs = run_tests(TESTS);
+
+ for err in errs.iter() {
+ println!("{}", err);
+ }
+
+ assert!(errs.is_empty());
+}