aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2023-12-16 13:26:48 -0800
committeriximeow <me@iximeow.net>2023-12-16 13:26:48 -0800
commit110f797005cca70e18cbcc0975397d26d8045245 (patch)
treea3ff79c0c3a7519d00e19d213447c268614cef00
parent85668b222582ef1edae537beea452d5e1c933389 (diff)
fix opportunity for unhandled register synonyms
registers `al`, `cl`, `dl`, and `bl` could have two different representations - with `rex.w` and without. these two forms of `RegSpec` would not compare equal, nor has the same, so for code relying on `RegSpec` to faithfully represent a 1-1 mapping to x86 registers, these synonyms would introduce bugs in register analysis. for example, in `yaxpeax-core`, this would result in instructions writing to `rex.w al` not being visible as definitions for a future read of `!rex.w al`. fix this in `x86_64` code, add new test cases about the confusion, adjust register names to make this situation more clearly a bug, and introduce two new fuzz targets that would have helped spot this error.
-rw-r--r--fuzz/Cargo.toml12
-rw-r--r--fuzz/fuzz_targets/does_not_decode_invalid_registers.rs30
-rw-r--r--fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs56
-rw-r--r--src/long_mode/display.rs4
-rw-r--r--src/long_mode/mod.rs68
-rw-r--r--test/long_mode/mod.rs30
6 files changed, 180 insertions, 20 deletions
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 53bcb81..60690f6 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -32,3 +32,15 @@ name = "display_c_does_not_panic"
path = "fuzz_targets/display_c_does_not_panic.rs"
test = false
doc = false
+
+[[bin]]
+name = "does_not_decode_invalid_registers"
+path = "fuzz_targets/does_not_decode_invalid_registers.rs"
+test = false
+doc = false
+
+[[bin]]
+name = "small_reg_is_always_old_bank_if_possible"
+path = "fuzz_targets/small_reg_is_always_old_bank_if_possible.rs"
+test = false
+doc = false
diff --git a/fuzz/fuzz_targets/does_not_decode_invalid_registers.rs b/fuzz/fuzz_targets/does_not_decode_invalid_registers.rs
new file mode 100644
index 0000000..0f32f73
--- /dev/null
+++ b/fuzz/fuzz_targets/does_not_decode_invalid_registers.rs
@@ -0,0 +1,30 @@
+//! instruction text should never include the word BUG - this is a symptom of selecting an invalid
+//! RegSpec while disassembling.
+
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+extern crate yaxpeax_x86;
+
+fuzz_target!(|data: &[u8]| {
+ let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default();
+ let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default();
+ let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default();
+
+ if let Ok(inst) = x86_64_decoder.decode_slice(data) {
+ let mut res = String::new();
+ inst.write_to(&mut res).expect("format does not panic");
+ assert!(!res.contains("BUG"));
+ };
+
+ if let Ok(inst) = x86_32_decoder.decode_slice(data) {
+ let mut res = String::new();
+ inst.write_to(&mut res).expect("format does not panic");
+ assert!(!res.contains("BUG"));
+ };
+
+ if let Ok(inst) = x86_16_decoder.decode_slice(data) {
+ let mut res = String::new();
+ inst.write_to(&mut res).expect("format does not panic");
+ assert!(!res.contains("BUG"));
+ };
+});
diff --git a/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs b/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs
new file mode 100644
index 0000000..a143205
--- /dev/null
+++ b/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs
@@ -0,0 +1,56 @@
+//! if a register has a single-byte register operand, and it's one of `al`, `bl`, `cl`, or `dl`, it
+//! should compare equal to the `RegSpec` produced by `RegSpec::al()` and so on.
+//!
+//! at one point this was a bug; `RegSpec::al()` would use `RegisterBank::B`, but an instruction
+//! with `rex.w` set could get an `al` backed by a `RegSpec` in `RegisterBank::rB`.
+
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+extern crate yaxpeax_x86;
+
+// this test is not meaningful for 32-bit or 16-bit modes, there are no register synonyms in those
+// cases. leaving them in for fuzz targets to match other cases, and In Case Of Future Change.
+fuzz_target!(|data: &[u8]| {
+ let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default();
+ let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default();
+ let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default();
+
+ if let Ok(inst) = x86_64_decoder.decode_slice(data) {
+ for i in 0..inst.operand_count() {
+ match inst.operand(i) {
+ yaxpeax_x86::long_mode::Operand::Register(reg) => {
+ if reg.num() < 4 && reg.class() == yaxpeax_x86::long_mode::register_class::RB {
+ assert!(false, "instruction has rex.w register that aliases old byte registers");
+ } else {
+ /* not a potentially-unwanted register */
+ }
+ },
+ _ => { /* not a relevant operand kind. immediate or memory of some kind. */ }
+ }
+ }
+ };
+
+ /*
+ if let Ok(inst) = x86_32_decoder.decode_slice(data) {
+ for i in 0..inst.operand_count() {
+ match inst.operand(i) {
+ Operand::Register(_reg) => {
+ /* not a potentially-unwanted register */
+ },
+ _ => { /* not a relevant operand kind. immediate or memory of some kind. */ }
+ }
+ }
+ };
+
+ if let Ok(inst) = x86_16_decoder.decode_slice(data) {
+ for i in 0..inst.operand_count() {
+ match inst.operand(i) {
+ Operand::Register(_reg) => {
+ /* not a potentially-unwanted register */
+ },
+ _ => { /* not a relevant operand kind. immediate or memory of some kind. */ }
+ }
+ }
+ };
+ */
+});
diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs
index b1aeee2..9c6795e 100644
--- a/src/long_mode/display.rs
+++ b/src/long_mode/display.rs
@@ -105,11 +105,11 @@ impl fmt::Display for Segment {
// register names are grouped by indices scaled by 16.
// xmm, ymm, zmm all get two indices.
const REG_NAMES: &[&'static str] = &[
- "", "", "", "", "", "", "", "",
+ "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w",
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
- "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b",
+ "BUG", "BUG", "BUG", "BUG", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b",
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "cr8", "cr9", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15",
"dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", "dr8", "dr9", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15",
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index 7f9719c..4eb0a36 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -883,9 +883,9 @@ pub mod register_class {
pub const D: RegisterClass = RegisterClass { kind: RegisterBank::D };
/// word registers: ax through r15w
pub const W: RegisterClass = RegisterClass { kind: RegisterBank::W };
- /// byte registers: al, cl, dl, bl, ah, ch, dh, bh. `B` registers do *not* have a rex prefix.
+ /// byte registers: al, cl, dl, bl, ah, ch, dh, bh.
pub const B: RegisterClass = RegisterClass { kind: RegisterBank::B };
- /// byte registers with rex prefix present: al through r15b. `RB` registers have a rex prefix.
+ /// byte registers added in x86_64: spl through r15b.
pub const RB: RegisterClass = RegisterClass { kind: RegisterBank::rB };
/// control registers cr0 through cr15.
pub const CR: RegisterClass = RegisterClass { kind: RegisterBank::CR};
@@ -6730,18 +6730,27 @@ fn read_operands<
if operand_code.is_only_modrm_operands() {
let bank;
+ let modrm = read_modrm(words)?;
+ let rrr = (modrm >> 3) & 7;
+ self.rrr = rrr;
+ let num = rrr + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 };
+ instruction.regs[0].num = num;
+
// cool! we can precompute width and know we need to read_E.
if !operand_code.has_byte_operands() {
// further, this is an vdq E
bank = self.vqp_size();
instruction.mem_size = bank as u8;
+ instruction.regs[0].bank = bank;
} else {
- bank = self.rb_size();
instruction.mem_size = 1;
+ bank = self.rb_size();
+ if num < 4 {
+ instruction.regs[0].bank = RegisterBank::B;
+ } else {
+ instruction.regs[0].bank = bank;
+ }
};
- let modrm = read_modrm(words)?;
- instruction.regs[0].bank = bank;
- instruction.regs[0].num = ((modrm >> 3) & 7) + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 };
// for some encodings, the rrr field selects an opcode, not an operand
if operand_code.bits() != OperandCode::ModRM_0xc1_Ev_Ib as u16 && operand_code.bits() != OperandCode::ModRM_0xff_Ev as u16 {
@@ -6767,6 +6776,11 @@ fn read_operands<
} else {
read_M(words, instruction, modrm, sink)?
};
+ if mem_oper == OperandSpec::RegMMM {
+ if instruction.regs[1].bank == RegisterBank::rB && instruction.regs[1].num < 4 {
+ instruction.regs[1].bank = RegisterBank::B;
+ }
+ }
if !operand_code.has_reg_mem() {
instruction.operands[0] = mem_oper;
instruction.operands[1] = OperandSpec::RegRRR;
@@ -6811,20 +6825,27 @@ fn read_operands<
let mut mem_oper = OperandSpec::Nothing;
if operand_code.has_read_E() {
let bank;
+ let modrm = read_modrm(words)?;
+ let rrr = (modrm >> 3) & 7;
+ self.rrr = rrr;
+ let num = rrr + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 };
+ instruction.regs[0].num = num;
+
// cool! we can precompute width and know we need to read_E.
if !operand_code.has_byte_operands() {
// further, this is an vdq E
bank = self.vqp_size();
instruction.mem_size = bank as u8;
+ instruction.regs[0].bank = bank;
} else {
- bank = self.rb_size();
instruction.mem_size = 1;
+ bank = self.rb_size();
+ if num < 4 {
+ instruction.regs[0].bank = RegisterBank::B;
+ } else {
+ instruction.regs[0].bank = bank;
+ }
};
- let modrm = read_modrm(words)?;
- instruction.regs[0].bank = bank;
- let rrr = (modrm >> 3) & 7;
- self.rrr = rrr;
- instruction.regs[0].num = rrr + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 };
// for some encodings, the rrr field selects an opcode, not an operand
if operand_code.bits() != OperandCode::ModRM_0xc1_Ev_Ib as u16 && operand_code.bits() != OperandCode::ModRM_0xff_Ev as u16 {
@@ -6850,6 +6871,11 @@ fn read_operands<
} else {
read_M(words, instruction, modrm, sink)?
};
+ if mem_oper == OperandSpec::RegMMM {
+ if instruction.regs[1].bank == RegisterBank::rB && instruction.regs[1].num < 4 {
+ instruction.regs[1].bank = RegisterBank::B;
+ }
+ }
if !operand_code.has_reg_mem() {
instruction.operands[0] = mem_oper;
instruction.operands[1] = OperandSpec::RegRRR;
@@ -6944,11 +6970,13 @@ fn read_operands<
}
2 => {
// these are Zb_Ib_R
- instruction.regs[0] =
- RegSpec {
- num: reg + if instruction.prefixes.rex_unchecked().b() { 0b1000 } else { 0 },
- bank: self.rb_size()
- };
+ let num = reg + if instruction.prefixes.rex_unchecked().b() { 0b1000 } else { 0 };
+ let bank = if num < 4 {
+ RegisterBank::B
+ } else {
+ self.rb_size()
+ };
+ instruction.regs[0] = RegSpec { num, bank };
sink.record(
opcode_start,
opcode_start + 2,
@@ -7425,7 +7453,11 @@ fn read_operands<
);
if instruction.operands[1] == OperandSpec::RegMMM {
instruction.mem_size = 0;
- instruction.regs[1].bank = w;
+ if instruction.regs[1].num < 4 {
+ instruction.regs[1].bank = RegisterBank::B;
+ } else {
+ instruction.regs[1].bank = w;
+ }
} else {
instruction.mem_size = 1;
}
diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs
index 1e8a72e..25a303c 100644
--- a/test/long_mode/mod.rs
+++ b/test/long_mode/mod.rs
@@ -3456,6 +3456,36 @@ fn from_reports() {
test_display(&[0xf3, 0x0f, 0x1e, 0x0f], "nop dword [rdi], ecx");
}
+/// the first four single byte registers (`al`, `cl`, `dl`, `bl`) can be described as either the
+/// first four registers of the old `B` bank (`al..bh`) or the first four registers of the new `rB`
+/// bank, used when `rex.w` is present on the instruction. `RegSpec` relies on the bank matching to
+/// compare equality, as well as for `Hash` and `Ord` to work correctly, so having two spellings of
+/// the register `al` (and friends) can be a correctness issue for dependent code.
+#[test]
+fn register_synonyms_use_old_bank() {
+ // a few instructions where adding a rex.w prefix would yield a synonymous byte-size register
+ let cases: &'static [&'static [u8]] = &[
+ &[0x30, 0x00],
+ &[0x30, 0xc1],
+ &[0x0f, 0xb6, 0xc0],
+ &[0x0f, 0xbe, 0xc0],
+ &[0xb2, 0xff], // from `does_not_decode_invalid_registers` fuzz target 🎉
+ ];
+
+ for case in cases.iter() {
+ let mut case_with_rexw: Vec<u8> = vec![0x48];
+ case_with_rexw.extend_from_slice(case);
+
+ let mut reader = yaxpeax_arch::U8Reader::new(case);
+ let no_rexw = InstDecoder::default().decode(&mut reader).expect("can disassemble test instruction");
+
+ let mut reader = yaxpeax_arch::U8Reader::new(case);
+ let with_rexw = InstDecoder::default().decode(&mut reader).expect("can disassemble test instruction");
+
+ assert_eq!(no_rexw, with_rexw);
+ }
+}
+
mod reg_specs {
use yaxpeax_x86::long_mode::RegSpec;