diff options
| -rw-r--r-- | fuzz/Cargo.toml | 12 | ||||
| -rw-r--r-- | fuzz/fuzz_targets/does_not_decode_invalid_registers.rs | 30 | ||||
| -rw-r--r-- | fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs | 56 | ||||
| -rw-r--r-- | src/long_mode/display.rs | 4 | ||||
| -rw-r--r-- | src/long_mode/mod.rs | 68 | ||||
| -rw-r--r-- | test/long_mode/mod.rs | 30 | 
6 files changed, 180 insertions, 20 deletions
| diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 53bcb81..60690f6 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -32,3 +32,15 @@ name = "display_c_does_not_panic"  path = "fuzz_targets/display_c_does_not_panic.rs"  test = false  doc = false + +[[bin]] +name = "does_not_decode_invalid_registers" +path = "fuzz_targets/does_not_decode_invalid_registers.rs" +test = false +doc = false + +[[bin]] +name = "small_reg_is_always_old_bank_if_possible" +path = "fuzz_targets/small_reg_is_always_old_bank_if_possible.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/does_not_decode_invalid_registers.rs b/fuzz/fuzz_targets/does_not_decode_invalid_registers.rs new file mode 100644 index 0000000..0f32f73 --- /dev/null +++ b/fuzz/fuzz_targets/does_not_decode_invalid_registers.rs @@ -0,0 +1,30 @@ +//! instruction text should never include the word BUG - this is a symptom of selecting an invalid +//! RegSpec while disassembling. + +#![no_main] +#[macro_use] extern crate libfuzzer_sys; +extern crate yaxpeax_x86; + +fuzz_target!(|data: &[u8]| { +    let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default(); +    let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); +    let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); + +    if let Ok(inst) = x86_64_decoder.decode_slice(data) { +        let mut res = String::new(); +        inst.write_to(&mut res).expect("format does not panic"); +        assert!(!res.contains("BUG")); +    }; + +    if let Ok(inst) = x86_32_decoder.decode_slice(data) { +        let mut res = String::new(); +        inst.write_to(&mut res).expect("format does not panic"); +        assert!(!res.contains("BUG")); +    }; + +    if let Ok(inst) = x86_16_decoder.decode_slice(data) { +        let mut res = String::new(); +        inst.write_to(&mut res).expect("format does not panic"); +        assert!(!res.contains("BUG")); +    }; +}); diff --git a/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs b/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs new file mode 100644 index 0000000..a143205 --- /dev/null +++ b/fuzz/fuzz_targets/small_reg_is_always_old_bank_if_possible.rs @@ -0,0 +1,56 @@ +//! if a register has a single-byte register operand, and it's one of `al`, `bl`, `cl`, or `dl`, it +//! should compare equal to the `RegSpec` produced by `RegSpec::al()` and so on. +//! +//! at one point this was a bug; `RegSpec::al()` would use `RegisterBank::B`, but an instruction +//! with `rex.w` set could get an `al` backed by a `RegSpec` in `RegisterBank::rB`. + +#![no_main] +#[macro_use] extern crate libfuzzer_sys; +extern crate yaxpeax_x86; + +// this test is not meaningful for 32-bit or 16-bit modes, there are no register synonyms in those +// cases. leaving them in for fuzz targets to match other cases, and In Case Of Future Change. +fuzz_target!(|data: &[u8]| { +    let x86_64_decoder = yaxpeax_x86::long_mode::InstDecoder::default(); +    let x86_32_decoder = yaxpeax_x86::protected_mode::InstDecoder::default(); +    let x86_16_decoder = yaxpeax_x86::real_mode::InstDecoder::default(); + +    if let Ok(inst) = x86_64_decoder.decode_slice(data) { +        for i in 0..inst.operand_count() { +            match inst.operand(i) { +                yaxpeax_x86::long_mode::Operand::Register(reg) => { +                    if reg.num() < 4 && reg.class() == yaxpeax_x86::long_mode::register_class::RB { +                        assert!(false, "instruction has rex.w register that aliases old byte registers"); +                    } else { +                        /* not a potentially-unwanted register */ +                    } +                }, +                _ => { /* not a relevant operand kind. immediate or memory of some kind. */ } +            } +        } +    }; + +    /* +    if let Ok(inst) = x86_32_decoder.decode_slice(data) { +        for i in 0..inst.operand_count() { +            match inst.operand(i) { +                Operand::Register(_reg) => { +                    /* not a potentially-unwanted register */ +                }, +                _ => { /* not a relevant operand kind. immediate or memory of some kind. */ } +            } +        } +    }; + +    if let Ok(inst) = x86_16_decoder.decode_slice(data) { +        for i in 0..inst.operand_count() { +            match inst.operand(i) { +                Operand::Register(_reg) => { +                    /* not a potentially-unwanted register */ +                }, +                _ => { /* not a relevant operand kind. immediate or memory of some kind. */ } +            } +        } +    }; +    */ +}); diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index b1aeee2..9c6795e 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -105,11 +105,11 @@ impl fmt::Display for Segment {  // register names are grouped by indices scaled by 16.  // xmm, ymm, zmm all get two indices.  const REG_NAMES: &[&'static str] = &[ -    "", "", "", "", "", "", "", "", +    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",      "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",      "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w",      "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d", -    "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", +    "BUG", "BUG", "BUG", "BUG", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b",      "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",      "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "cr8", "cr9", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15",      "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", "dr8", "dr9", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15", diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 7f9719c..4eb0a36 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -883,9 +883,9 @@ pub mod register_class {      pub const D: RegisterClass = RegisterClass { kind: RegisterBank::D };      /// word registers: ax through r15w      pub const W: RegisterClass = RegisterClass { kind: RegisterBank::W }; -    /// byte registers: al, cl, dl, bl, ah, ch, dh, bh. `B` registers do *not* have a rex prefix. +    /// byte registers: al, cl, dl, bl, ah, ch, dh, bh.      pub const B: RegisterClass = RegisterClass { kind: RegisterBank::B }; -    /// byte registers with rex prefix present: al through r15b. `RB` registers have a rex prefix. +    /// byte registers added in x86_64: spl through r15b.      pub const RB: RegisterClass = RegisterClass { kind: RegisterBank::rB };      /// control registers cr0 through cr15.      pub const CR: RegisterClass = RegisterClass { kind: RegisterBank::CR}; @@ -6730,18 +6730,27 @@ fn read_operands<      if operand_code.is_only_modrm_operands() {          let bank; +        let modrm = read_modrm(words)?; +        let rrr = (modrm >> 3) & 7; +        self.rrr = rrr; +        let num = rrr + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 }; +        instruction.regs[0].num = num; +          // cool! we can precompute width and know we need to read_E.          if !operand_code.has_byte_operands() {              // further, this is an vdq E              bank = self.vqp_size();              instruction.mem_size = bank as u8; +            instruction.regs[0].bank = bank;          } else { -            bank = self.rb_size();              instruction.mem_size = 1; +            bank = self.rb_size(); +            if num < 4 { +                instruction.regs[0].bank = RegisterBank::B; +            } else { +                instruction.regs[0].bank = bank; +            }          }; -        let modrm = read_modrm(words)?; -        instruction.regs[0].bank = bank; -        instruction.regs[0].num = ((modrm >> 3) & 7) + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 };          // for some encodings, the rrr field selects an opcode, not an operand          if operand_code.bits() != OperandCode::ModRM_0xc1_Ev_Ib as u16 && operand_code.bits() != OperandCode::ModRM_0xff_Ev as u16 { @@ -6767,6 +6776,11 @@ fn read_operands<          } else {              read_M(words, instruction, modrm, sink)?          }; +        if mem_oper == OperandSpec::RegMMM { +            if instruction.regs[1].bank == RegisterBank::rB && instruction.regs[1].num < 4 { +                instruction.regs[1].bank = RegisterBank::B; +            } +        }          if !operand_code.has_reg_mem() {              instruction.operands[0] = mem_oper;              instruction.operands[1] = OperandSpec::RegRRR; @@ -6811,20 +6825,27 @@ fn read_operands<      let mut mem_oper = OperandSpec::Nothing;      if operand_code.has_read_E() {          let bank; +        let modrm = read_modrm(words)?; +        let rrr = (modrm >> 3) & 7; +        self.rrr = rrr; +        let num = rrr + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 }; +        instruction.regs[0].num = num; +          // cool! we can precompute width and know we need to read_E.          if !operand_code.has_byte_operands() {              // further, this is an vdq E              bank = self.vqp_size();              instruction.mem_size = bank as u8; +            instruction.regs[0].bank = bank;          } else { -            bank = self.rb_size();              instruction.mem_size = 1; +            bank = self.rb_size(); +            if num < 4 { +                instruction.regs[0].bank = RegisterBank::B; +            } else { +                instruction.regs[0].bank = bank; +            }          }; -        let modrm = read_modrm(words)?; -        instruction.regs[0].bank = bank; -        let rrr = (modrm >> 3) & 7; -        self.rrr = rrr; -        instruction.regs[0].num = rrr + if instruction.prefixes.rex_unchecked().r() { 0b1000 } else { 0 };          // for some encodings, the rrr field selects an opcode, not an operand          if operand_code.bits() != OperandCode::ModRM_0xc1_Ev_Ib as u16 && operand_code.bits() != OperandCode::ModRM_0xff_Ev as u16 { @@ -6850,6 +6871,11 @@ fn read_operands<          } else {              read_M(words, instruction, modrm, sink)?          }; +        if mem_oper == OperandSpec::RegMMM { +            if instruction.regs[1].bank == RegisterBank::rB && instruction.regs[1].num < 4 { +                instruction.regs[1].bank = RegisterBank::B; +            } +        }          if !operand_code.has_reg_mem() {              instruction.operands[0] = mem_oper;              instruction.operands[1] = OperandSpec::RegRRR; @@ -6944,11 +6970,13 @@ fn read_operands<              }              2 => {                  // these are Zb_Ib_R -                instruction.regs[0] = -                    RegSpec { -                        num: reg + if instruction.prefixes.rex_unchecked().b() { 0b1000 } else { 0 }, -                        bank: self.rb_size() -                    }; +                let num = reg + if instruction.prefixes.rex_unchecked().b() { 0b1000 } else { 0 }; +                let bank = if num < 4 { +                    RegisterBank::B +                } else { +                    self.rb_size() +                }; +                instruction.regs[0] = RegSpec { num, bank };                  sink.record(                      opcode_start,                      opcode_start + 2, @@ -7425,7 +7453,11 @@ fn read_operands<              );              if instruction.operands[1] == OperandSpec::RegMMM {                  instruction.mem_size = 0; -                instruction.regs[1].bank = w; +                if instruction.regs[1].num < 4 { +                    instruction.regs[1].bank = RegisterBank::B; +                } else { +                    instruction.regs[1].bank = w; +                }              } else {                  instruction.mem_size = 1;              } diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 1e8a72e..25a303c 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -3456,6 +3456,36 @@ fn from_reports() {      test_display(&[0xf3, 0x0f, 0x1e, 0x0f], "nop dword [rdi], ecx");  } +/// the first four single byte registers (`al`, `cl`, `dl`, `bl`) can be described as either the +/// first four registers of the old `B` bank (`al..bh`) or the first four registers of the new `rB` +/// bank, used when `rex.w` is present on the instruction. `RegSpec` relies on the bank matching to +/// compare equality, as well as for `Hash` and `Ord` to work correctly, so having two spellings of +/// the register `al` (and friends) can be a correctness issue for dependent code. +#[test] +fn register_synonyms_use_old_bank() { +    // a few instructions where adding a rex.w prefix would yield a synonymous byte-size register +    let cases: &'static [&'static [u8]] = &[ +        &[0x30, 0x00], +        &[0x30, 0xc1], +        &[0x0f, 0xb6, 0xc0], +        &[0x0f, 0xbe, 0xc0], +        &[0xb2, 0xff], // from `does_not_decode_invalid_registers` fuzz target 🎉 +    ]; + +    for case in cases.iter() { +        let mut case_with_rexw: Vec<u8> = vec![0x48]; +        case_with_rexw.extend_from_slice(case); + +        let mut reader = yaxpeax_arch::U8Reader::new(case); +        let no_rexw = InstDecoder::default().decode(&mut reader).expect("can disassemble test instruction"); + +        let mut reader = yaxpeax_arch::U8Reader::new(case); +        let with_rexw = InstDecoder::default().decode(&mut reader).expect("can disassemble test instruction"); + +        assert_eq!(no_rexw, with_rexw); +    } +} +  mod reg_specs {      use yaxpeax_x86::long_mode::RegSpec; | 
