diff options
author | iximeow <me@iximeow.net> | 2021-08-20 01:59:03 -0700 |
---|---|---|
committer | iximeow <me@iximeow.net> | 2021-08-21 19:20:41 -0700 |
commit | 796e955cdc4f17d191b39da55ce704f90c6f37da (patch) | |
tree | 5f16c9152c6998156aabbce1abc34dfe046b19f0 /src/real_mode | |
parent | 49a09cccdd2d4920154e3bc68048dc85754640e3 (diff) |
extend decoder annotation through all of 64-, 32-, and 16-bit modes
Diffstat (limited to 'src/real_mode')
-rw-r--r-- | src/real_mode/mod.rs | 777 |
1 files changed, 678 insertions, 99 deletions
diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 0af0b0b..7404195 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -4105,7 +4105,18 @@ impl Decoder<Arch> for InstDecoder { Ok(instr) } fn decode_into<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(&self, instr: &mut Instruction, words: &mut T) -> Result<(), <Arch as yaxpeax_arch::Arch>::DecodeError> { - read_with_annotations(self, words, instr, &mut NullSink)?; + self.decode_with_annotation(instr, words, &mut NullSink) + } +} + +impl AnnotatingDecoder<Arch> for InstDecoder { + type FieldDescription = FieldDescription; + + fn decode_with_annotation< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<Self::FieldDescription> + >(&self, instr: &mut Instruction, words: &mut T, sink: &mut S) -> Result<(), <Arch as yaxpeax_arch::Arch>::DecodeError> { + read_with_annotations(self, words, instr, sink)?; instr.length = words.offset() as u8; if words.offset() > 15 { @@ -5510,52 +5521,70 @@ const OPCODES: [OpcodeRecord; 256] = [ ]; #[allow(non_snake_case)] -pub(self) fn read_E<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8, width: u8) -> Result<OperandSpec, DecodeError> { +pub(self) fn read_E< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instr: &mut Instruction, modrm: u8, width: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { let bank = width_to_gp_reg_bank(width); if modrm >= 0b11000000 { read_modrm_reg(instr, modrm, bank) } else { - read_M(words, instr, modrm) + read_M(words, instr, modrm, sink) } } #[allow(non_snake_case)] -pub(self) fn read_E_mm<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> { +pub(self) fn read_E_mm< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instr: &mut Instruction, modrm: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { if modrm >= 0b11000000 { read_modrm_reg(instr, modrm, RegisterBank::MM) } else { - read_M(words, instr, modrm) + read_M(words, instr, modrm, sink) } } #[allow(non_snake_case)] -pub(self) fn read_E_st<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> { +pub(self) fn read_E_st< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instr: &mut Instruction, modrm: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { if modrm >= 0b11000000 { read_modrm_reg(instr, modrm, RegisterBank::ST) } else { - read_M(words, instr, modrm) + read_M(words, instr, modrm, sink) } } #[allow(non_snake_case)] -pub(self) fn read_E_xmm<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> { +pub(self) fn read_E_xmm< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instr: &mut Instruction, modrm: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { if modrm >= 0b11000000 { read_modrm_reg(instr, modrm, RegisterBank::X) } else { - read_M(words, instr, modrm) + read_M(words, instr, modrm, sink) } } #[allow(non_snake_case)] -pub(self) fn read_E_ymm<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> { +pub(self) fn read_E_ymm< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instr: &mut Instruction, modrm: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { if modrm >= 0b11000000 { read_modrm_reg(instr, modrm, RegisterBank::Y) } else { - read_M(words, instr, modrm) + read_M(words, instr, modrm, sink) } } #[allow(non_snake_case)] -pub(self) fn read_E_vex<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8, bank: RegisterBank) -> Result<OperandSpec, DecodeError> { +pub(self) fn read_E_vex< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instr: &mut Instruction, modrm: u8, bank: RegisterBank, sink: &mut S) -> Result<OperandSpec, DecodeError> { if modrm >= 0b11000000 { read_modrm_reg(instr, modrm, bank) } else { - let res = read_M(words, instr, modrm)?; + let res = read_M(words, instr, modrm, sink)?; if (modrm & 0b01_000_000) == 0b01_000_000 { instr.prefixes.apply_compressed_disp(true); } @@ -5569,37 +5598,110 @@ fn read_modrm_reg(instr: &mut Instruction, modrm: u8, reg_bank: RegisterBank) -> Ok(OperandSpec::RegMMM) } -#[allow(non_snake_case)] -fn read_sib<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> { - let modbits = modrm >> 6; - let sibbyte = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; - instr.regs[1].num |= sibbyte & 7; - instr.regs[2].num |= (sibbyte >> 3) & 7; +#[inline(always)] +fn read_sib_disp< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(instr: &Instruction, words: &mut T, modbits: u8, sibbyte: u8, sink: &mut S) -> Result<i32, DecodeError> { + let sib_start = words.offset() as u32 * 8 - 8; + let modbit_addr = words.offset() as u32 * 8 - 10; + let disp_start = words.offset() as u32 * 8; let disp = if modbits == 0b00 { if (sibbyte & 7) == 0b101 { - read_num(words, 4)? as i32 + sink.record(modbit_addr, modbit_addr + 1, + InnerDescription::Misc("4-byte displacement").with_id(sib_start + 0)); + sink.record(sib_start, sib_start + 2, + InnerDescription::Misc("4-byte displacement").with_id(sib_start + 0)); + let disp = read_num(words, 4)? as i32; + sink.record(disp_start, disp_start + 31, + InnerDescription::Number("displacement", disp as i64).with_id(sib_start + 1)); + disp } else { 0 } } else if modbits == 0b01 { - read_num(words, 1)? as i8 as i32 + sink.record(modbit_addr, modbit_addr + 1, + InnerDescription::Misc("1-byte displacement").with_id(sib_start + 0)); + if instr.prefixes.evex().is_some() { + sink.record(modbit_addr, modbit_addr + 1, + InnerDescription::Misc("EVEX prefix implies displacement is scaled by vector size") + .with_id(sib_start + 0)); + } + let disp = read_num(words, 1)? as i8 as i32; + sink.record(disp_start, disp_start + 7, + InnerDescription::Number("displacement", disp as i64).with_id(sib_start + 1)); + disp } else { - read_num(words, 4)? as i32 + sink.record(modbit_addr, modbit_addr + 1, + InnerDescription::Misc("4-byte displacement").with_id(sib_start + 0)); + let disp = read_num(words, 4)? as i32; + sink.record(disp_start, disp_start + 31, + InnerDescription::Number("displacement", disp as i64).with_id(sib_start + 1)); + disp }; + + Ok(disp) +} + +#[allow(non_snake_case)] +#[inline(always)] +fn read_sib< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription> +>(words: &mut T, instr: &mut Instruction, modrm: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { + let modrm_start = words.offset() as u32 * 8 - 8; + let sib_start = words.offset() as u32 * 8; + + let modbits = modrm >> 6; + let sibbyte = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + instr.regs[1].num |= sibbyte & 7; + instr.regs[2].num |= (sibbyte >> 3) & 7; + + let disp = read_sib_disp(instr, words, modbits, sibbyte, sink)?; instr.disp = disp as u32; let scale = 1u8 << (sibbyte >> 6); instr.scale = scale; let op_spec = if (sibbyte & 7) == 0b101 { + sink.record( + sib_start, + sib_start + 2, + InnerDescription::Misc("bbb selects displacement in address") + .with_id(sib_start + 0) + ); if ((sibbyte >> 3) & 7) == 0b100 { + sink.record( + sib_start + 3, + sib_start + 5, + InnerDescription::Misc("iii selects no index register") + .with_id(sib_start + 0) + ); if modbits == 0b00 { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits select no base register, absolute [disp32] only") + .with_id(sib_start + 0) + ); OperandSpec::DispU32 } else { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) + .with_id(sib_start + 0) + ); instr.regs[1].num |= 0b101; if disp == 0 { + sink.record( + sib_start, + sib_start + 2, + InnerDescription::Misc("memory access is [rbp + disp] but displacement is 0") + .with_id(sib_start + 0) + ); OperandSpec::Deref } else { OperandSpec::RegDisp @@ -5609,27 +5711,70 @@ fn read_sib<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ar instr.regs[1].num |= 0b101; instr.regs[2].num |= (sibbyte >> 3) & 7; + sink.record( + sib_start + 3, + sib_start + 5, + InnerDescription::RegisterNumber("iii", instr.regs[2].num & 0b111, instr.regs[2]) + .with_id(sib_start + 0) + ); + let scale = 1u8 << (sibbyte >> 6); instr.scale = scale; if disp == 0 { if modbits == 0 { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits select no base register, [index+disp] only, but displacement is 0") + .with_id(sib_start + 0) + ); OperandSpec::RegScale } else { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) + .with_id(sib_start + 0) + ); OperandSpec::RegIndexBaseScale } } else { if modbits == 0 { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits select no base register, [index+disp] only") + .with_id(sib_start + 0) + ); OperandSpec::RegScaleDisp } else { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::RegisterNumber("mod", 0b101, instr.regs[1]) + .with_id(sib_start + 0) + ); OperandSpec::RegIndexBaseScaleDisp } } } } else { instr.regs[1].num |= sibbyte & 7; + sink.record( + sib_start + 0, + sib_start + 2, + InnerDescription::RegisterNumber("bbb", instr.regs[1].num & 0b111, instr.regs[2]) + .with_id(sib_start + 0) + ); if ((sibbyte >> 3) & 7) == 0b100 { + sink.record( + sib_start + 3, + sib_start + 5, + InnerDescription::Misc("iii selects no index register") + .with_id(sib_start + 0) + ); if disp == 0 { OperandSpec::Deref } else { @@ -5637,6 +5782,12 @@ fn read_sib<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ar } } else { instr.regs[2].num |= (sibbyte >> 3) & 7; + sink.record( + sib_start + 3, + sib_start + 5, + InnerDescription::RegisterNumber("iii", instr.regs[2].num & 0b111, instr.regs[2]) + .with_id(sib_start + 0) + ); let scale = 1u8 << (sibbyte >> 6); instr.scale = scale; @@ -5736,7 +5887,12 @@ fn read_M_16bit<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpea } #[allow(non_snake_case)] -fn read_M<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> { +fn read_M< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription> +>(words: &mut T, instr: &mut Instruction, modrm: u8, sink: &mut S) -> Result<OperandSpec, DecodeError> { + let modrm_start = words.offset() as u32 * 8 - 8; + // by default read M as a 16-bit size address if !instr.prefixes.address_size() { return read_M_16bit(words, instr, modrm); @@ -5745,21 +5901,74 @@ fn read_M<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch let modbits = modrm >> 6; let mmm = modrm & 7; let op_spec = if mmm == 4 { - return read_sib(words, instr, modrm); + sink.record( + modrm_start, + modrm_start + 2, + InnerDescription::Misc("`mmm` field selects sib access") + .with_id(modrm_start + 2) + ); + return read_sib(words, instr, modrm, sink); } else if mmm == 5 && modbits == 0b00 { + // TODO: uhhhh + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("absolute disp32") + .with_id(modrm_start + 0) + ); + sink.record( + modrm_start + 0, + modrm_start + 2, + InnerDescription::Misc("absolute disp32") + .with_id(modrm_start + 0) + ); + instr.disp = read_num(words, 4)?; OperandSpec::DispU32 } else { instr.regs[1].num |= mmm; + sink.record( + modrm_start, + modrm_start + 2, + InnerDescription::RegisterNumber("mmm", modrm & 7, instr.regs[1]) + .with_id(modrm_start + 2) + ); if modbits == 0b00 { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mmm field is a simple register dereference (mod bits: 00)") + .with_id(modrm_start + 0) + ); OperandSpec::Deref } else { + let disp_start = words.offset(); let disp = if modbits == 0b01 { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("memory operand is [reg+disp8] indexed by register selected by `mmm` (mod bits: 01)") + .with_id(modrm_start + 0) + ); read_num(words, 1)? as i8 as i32 } else { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("memory operand is [reg+disp32] indexed by register selected by `mmm` (mod bits: 10)") + .with_id(modrm_start + 0) + ); read_num(words, 4)? as i32 }; + let disp_end = words.offset(); + + sink.record( + disp_start as u32 * 8, + disp_end as u32 * 8 - 1, + InnerDescription::Number("displacement", disp as i64) + .with_id(words.offset() as u32 * 8 + 3) + ); if disp == 0 { OperandSpec::Deref } else { @@ -7097,6 +7306,7 @@ enum InnerDescription { RegisterNumber(&'static str, u8, RegSpec), Misc(&'static str), Number(&'static str, i64), + Boundary(&'static str), } impl InnerDescription { @@ -7137,6 +7347,9 @@ impl fmt::Display for InnerDescription { InnerDescription::RegisterNumber(name, num, reg) => { write!(f, "`{}` (`{}` selects register number {})", reg, name, num) } + InnerDescription::Boundary(desc) => { + write!(f, "{}", desc) + } } } } @@ -7152,7 +7365,11 @@ impl yaxpeax_arch::FieldDescription for FieldDescription { self.id } fn is_separator(&self) -> bool { - false + if let InnerDescription::Boundary(_) = &self.desc { + true + } else { + false + } } } @@ -7182,6 +7399,13 @@ fn read_with_annotations< let record: OpcodeRecord = loop { let record = next_rec; if let Interpretation::Instruction(_) = record.0 { + if words.offset() > 1 { + sink.record( + words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1, + InnerDescription::Boundary("prefixes end") + .with_id(words.offset() as u32 * 8 - 9) + ); + } break record; } else { let b = nextb; @@ -7189,6 +7413,13 @@ fn read_with_annotations< return Err(DecodeError::TooLong); } if b == 0x0f { + if words.offset() > 1 { + sink.record( + words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1, + InnerDescription::Boundary("prefixes end") + .with_id(words.offset() as u32 * 8 - 9) + ); + } let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; if b == 0x38 { let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; @@ -7308,6 +7539,8 @@ fn read_operands< T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, S: DescriptionSink<FieldDescription> >(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, sink: &mut S) -> Result<(), DecodeError> { + let modrm_start = words.offset() as u32 * 8; + let opcode_start = modrm_start + 8; instruction.operands[0] = OperandSpec::RegRRR; instruction.operand_count = 2; let operand_code = OperandCodeBuilder::from_bits(operand_code as u16); @@ -7326,6 +7559,12 @@ fn read_operands< instruction.regs[0] = RegSpec::from_parts(reg, bank); instruction.mem_size = 2; + sink.record( + opcode_start + 0, + opcode_start + 2, + InnerDescription::RegisterNumber("zzz", reg, instruction.regs[0]) + .with_id(opcode_start + 2) + ); instruction.operand_count = 1; } 1 => { @@ -7340,14 +7579,45 @@ fn read_operands< instruction.operands[1] = OperandSpec::RegMMM; instruction.regs[1] = RegSpec::from_parts(reg, bank); + sink.record( + opcode_start + 0, + opcode_start + 2, + InnerDescription::RegisterNumber("zzz", reg, instruction.regs[1]) + .with_id(opcode_start + 2) + ); + sink.record( + opcode_start + 3, + opcode_start + 7, + InnerDescription::Misc("opcode selects `ax` operand") + .with_id(opcode_start + 2) + ); + if instruction.prefixes.operand_size() { + sink.record( + opcode_start + 3, + opcode_start + 7, + InnerDescription::Misc("operand-size prefix override selects `eax`") + .with_id(opcode_start + 2) + ); + } instruction.operand_count = 2; } 2 => { // these are Zb_Ib_R instruction.regs[0] = RegSpec::from_parts(reg, RegisterBank::B); + sink.record( + opcode_start, + opcode_start + 2, + InnerDescription::RegisterNumber("zzz", reg, instruction.regs[0]) + .with_id(opcode_start + 1) + ); instruction.imm = read_imm_unsigned(words, 1)?; + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - 8)); instruction.operands[1] = OperandSpec::ImmU8; } 3 => { @@ -7355,14 +7625,38 @@ fn read_operands< if instruction.prefixes.operand_size() { instruction.regs[0] = RegSpec::from_parts(reg, RegisterBank::D); + sink.record( + opcode_start, + opcode_start + 2, + InnerDescription::RegisterNumber("zzz", reg, instruction.regs[0]) + .with_id(opcode_start + 2) + ); instruction.imm = read_imm_unsigned(words, 4)?; + sink.record( + words.offset() as u32 * 8 - 32, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - 32 + 1) + ); instruction.operands[1] = OperandSpec::ImmI32; } else { instruction.regs[0] = RegSpec::from_parts(reg, RegisterBank::W); + sink.record( + opcode_start, + opcode_start + 2, + InnerDescription::RegisterNumber("zzz", reg, instruction.regs[0]) + .with_id(opcode_start + 2) + ); instruction.imm = read_imm_unsigned(words, 2)?; + sink.record( + words.offset() as u32 * 8 - 16, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - 16 + 1) + ); instruction.operands[1] = OperandSpec::ImmI16; } } @@ -7400,14 +7694,33 @@ fn read_operands< modrm = read_modrm(words)?; instruction.regs[0].bank = bank; instruction.regs[0].num = (modrm >> 3) & 7; + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::RegisterNumber("rrr", (modrm >> 3) & 7, instruction.regs[0]) + .with_id(modrm_start + 1) + ); mem_oper = if modrm >= 0b11000000 { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mmm field is a register number (mod bits: 11)") + .with_id(modrm_start + 0) + ); if operand_code.bits() == (OperandCode::Gv_M as u16) { return Err(DecodeError::InvalidOperand); } - read_modrm_reg(instruction, modrm, bank)? + let res = read_modrm_reg(instruction, modrm, bank)?; + sink.record( + modrm_start, + modrm_start + 2, + InnerDescription::RegisterNumber("mmm", modrm & 7, instruction.regs[1]) + .with_id(modrm_start + 2) + ); + res } else { - read_M(words, instruction, modrm)? + read_M(words, instruction, modrm, sink)? }; instruction.operands[1] = mem_oper; } @@ -7415,6 +7728,22 @@ fn read_operands< if let Some((only_imm, immsz)) = operand_code.has_imm() { instruction.imm = read_imm_signed(words, 1 << (immsz * 1))? as u32; + // TODO: hmmm + if immsz == 0 { + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("1-byte immediate", instruction.imm as i64) + .with_id(words.offset() as u32 * 8), + ); + } else { + sink.record( + words.offset() as u32 * 8 - 16, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("2-byte immediate", instruction.imm as i64) + .with_id(words.offset() as u32 * 8), + ); + } if only_imm { if immsz == 0 { instruction.operands[0] = OperandSpec::ImmI8; @@ -7445,6 +7774,19 @@ fn read_operands< instruction.opcode = base_opcode_map((modrm >> 3) & 7); instruction.operands[0] = mem_oper; instruction.operands[1] = OperandSpec::ImmI8; + + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - 8) + ); instruction.operand_count = 2; } 2 => { @@ -7456,6 +7798,18 @@ fn read_operands< }; instruction.imm = read_imm_signed(words, numwidth)? as u32; instruction.opcode = base_opcode_map((modrm >> 3) & 7); + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); + sink.record( + words.offset() as u32 * 8 - (numwidth as u32 * 8), + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - (numwidth as u32 * 8)) + ); instruction.operands[1] = match numwidth { 2 => OperandSpec::ImmI16, 4 => OperandSpec::ImmI32, @@ -7467,17 +7821,35 @@ fn read_operands< if modrm == 0xf8 { instruction.opcode = Opcode::XABORT; instruction.imm = read_imm_signed(words, 1)? as u32; + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - 8) + ); instruction.operands[0] = OperandSpec::ImmI8; instruction.operand_count = 1; return Ok(()); } if (modrm & 0b00111000) != 0 { + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Misc("invalid rrr field: must be zero") + .with_id(modrm_start - 8) + ); return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string()); } instruction.operands[0] = mem_oper; instruction.opcode = Opcode::MOV; instruction.imm = read_imm_signed(words, 1)? as u32; + sink.record( + modrm_start + 8, + modrm_start + 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start + 8) + ); instruction.operands[1] = OperandSpec::ImmI8; instruction.operand_count = 2; } @@ -7485,8 +7857,21 @@ fn read_operands< if modrm == 0xf8 { instruction.opcode = Opcode::XBEGIN; instruction.imm = if instruction.prefixes.operand_size() { + sink.record( + modrm_start + 8, + modrm_start + 8 + 32 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start + 8) + ); + // TODO: 5? read_imm_signed(words, 5)? as i32 as u32 } else { + sink.record( + modrm_start + 8, + modrm_start + 8 + 16 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start + 8) + ); read_imm_signed(words, 2)? as i16 as i32 as u32 }; instruction.operands[0] = OperandSpec::ImmI32; @@ -7494,15 +7879,33 @@ fn read_operands< return Ok(()); } if (modrm & 0b00111000) != 0 { + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Misc("invalid rrr field: must be zero") + .with_id(modrm_start - 8) + ); return Err(DecodeError::InvalidOperand); // Err("Invalid modr/m for opcode 0xc7".to_string()); } instruction.operands[0] = mem_oper; instruction.opcode = Opcode::MOV; if !instruction.prefixes.operand_size() { + sink.record( + modrm_start + 8, + modrm_start + 8 + 16 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start + 8) + ); instruction.imm = read_imm_signed(words, 2)? as u32; instruction.operands[1] = OperandSpec::ImmI16; } else { + sink.record( + modrm_start + 8, + modrm_start + 8 + 32 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start + 8) + ); instruction.imm = read_imm_signed(words, 4)? as u32; instruction.operands[1] = OperandSpec::ImmI32; } @@ -7515,11 +7918,29 @@ fn read_operands< op @ 10 => { instruction.operands[0] = mem_oper; instruction.opcode = BITWISE_OPCODE_MAP[((modrm >> 3) & 7) as usize].clone(); + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); if op == 10 { instruction.regs[0] = RegSpec::cl(); + sink.record( + modrm_start - 8, + modrm_start - 1, + InnerDescription::RegisterNumber("reg", 1, instruction.regs[0]) + .with_id(modrm_start - 7) + ); instruction.operands[1] = OperandSpec::RegRRR; } else if op == 9 { instruction.regs[0] = RegSpec::cl(); + sink.record( + modrm_start - 8, + modrm_start - 1, + InnerDescription::RegisterNumber("reg", 1, instruction.regs[0]) + .with_id(modrm_start - 7) + ); instruction.operands[1] = OperandSpec::RegRRR; } else { let num = match op { @@ -7532,6 +7953,22 @@ fn read_operands< 1 } }; + // TODO: op == 6? + if op == 5 { + sink.record( + modrm_start - 8, + modrm_start - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start - 8) + ); + } else { + sink.record( + modrm_start - 8, + modrm_start - 1, + InnerDescription::Misc("opcode specifies integer immediate 1") + .with_id(modrm_start - 8) + ); + } instruction.imm = num; instruction.operands[1] = OperandSpec::ImmI8; } @@ -7555,6 +7992,12 @@ fn read_operands< ]; let rrr = (modrm >> 3) & 7; instruction.opcode = TABLE[rrr as usize]; + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); if rrr < 2 { instruction.opcode = Opcode::TEST; let numwidth = if opwidth == 8 { 4 } else { opwidth }; @@ -7565,6 +8008,12 @@ fn read_operands< 4 => OperandSpec::ImmI32, _ => unsafe { unreachable_unchecked() } }; + sink.record( + modrm_start + 8, + modrm_start + 8 + numwidth as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(modrm_start + 8) + ); } else { instruction.operand_count = 1; } @@ -7573,12 +8022,24 @@ fn read_operands< instruction.operands[0] = mem_oper; let r = (modrm >> 3) & 7; if r >= 2 { + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Misc("invalid rrr: opcode requires rrr < 0b010") + .with_id(modrm_start - 8) + ); return Err(DecodeError::InvalidOpcode); } instruction.opcode = [ Opcode::INC, Opcode::DEC, ][r as usize]; + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); instruction.operand_count = 1; } 14 => { @@ -7596,6 +8057,12 @@ fn read_operands< Opcode::JMPF, Opcode::PUSH, ][r as usize]; + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(opcode) + .with_id(modrm_start - 8) + ); if instruction.operands[0] == OperandSpec::RegMMM { // in real mode, `xed` reports that operand-size does in fact override from word to // dword. unlikely larger modes, operand-size can't shrink the call operand down. @@ -7622,12 +8089,18 @@ fn read_operands< 15 => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E(words, instruction, modrm, 1)?; + instruction.operands[1] = read_E(words, instruction, modrm, 1, sink)?; instruction.regs[0] = if instruction.prefixes.operand_size() || instruction.opcode == Opcode::CRC32 { RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::D) } else { RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::W) }; + sink.record( + modrm_start as u32 + 3, + modrm_start as u32 + 5, + InnerDescription::RegisterNumber("rrr", (modrm >> 3) & 7, instruction.regs[0]) + .with_id(modrm_start as u32 + 3) + ); if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 1; } @@ -7636,12 +8109,18 @@ fn read_operands< 16 => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E(words, instruction, modrm, 2)?; + instruction.operands[1] = read_E(words, instruction, modrm, 2, sink)?; instruction.regs[0] = if instruction.prefixes.operand_size() { RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::D) } else { RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::W) }; + sink.record( + modrm_start as u32 + 3, + modrm_start as u32 + 5, + InnerDescription::RegisterNumber("rrr", (modrm >> 3) & 7, instruction.regs[0]) + .with_id(modrm_start as u32 + 3) + ); if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 2; } @@ -7657,6 +8136,12 @@ fn read_operands< instruction.operands[1] = OperandSpec::RegRRR; instruction.operand_count = 2; if instruction.operands[0] == OperandSpec::RegMMM { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits 0b11 select register operand, width fixed to xmm") + .with_id(modrm_start as u32 + 1) + ); // fix the register to XMM instruction.regs[1].bank = RegisterBank::X; } else { @@ -7671,6 +8156,12 @@ fn read_operands< if op == 20 { return Err(DecodeError::InvalidOperand); } else { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits 0b11 select register operand, width fixed to xmm") + .with_id(modrm_start as u32 + 1) + ); // fix the register to XMM instruction.regs[1].bank = RegisterBank::X; } @@ -7685,11 +8176,23 @@ fn read_operands< 22 => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[1] = read_E_xmm(words, instruction, modrm, sink)?; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::X); + sink.record( + modrm_start as u32 + 3, + modrm_start as u32 + 5, + InnerDescription::RegisterNumber("rrr", (modrm >> 3) & 7, instruction.regs[0]) + .with_id(modrm_start as u32 + 3) + ); instruction.imm = read_num(words, 1)? as u8 as u32; + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - 8 + 1) + ); if instruction.operands[1] != OperandSpec::RegMMM { if instruction.opcode == Opcode::CMPSS { instruction.mem_size = 4; @@ -7725,6 +8228,12 @@ fn read_operands< 4 => OperandSpec::ImmI32, _ => unsafe { unreachable_unchecked() } }; + sink.record( + words.offset() as u32 * 8 - opwidth as u32 * 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - opwidth as u32 * 8 + 1) + ); instruction.operand_count = 2; } 25 => { @@ -7735,6 +8244,12 @@ fn read_operands< }; instruction.imm = read_imm_unsigned(words, opwidth)?; + sink.record( + words.offset() as u32 * 8 - opwidth as u32 * 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("imm", instruction.imm as i64) + .with_id(words.offset() as u32 * 8 - opwidth as u32 * 8 + 1) + ); instruction.operands[0] = match opwidth { 2 => OperandSpec::ImmI16, 4 => OperandSpec::ImmI32, @@ -7745,10 +8260,22 @@ fn read_operands< 26 => { instruction.operands[0] = mem_oper; instruction.opcode = base_opcode_map((modrm >> 3) & 7); + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); instruction.operands[1] = OperandSpec::ImmI8; instruction.operand_count = 2; }, 27 => { + sink.record( + modrm_start - 8, + modrm_start - 1, + InnerDescription::Number("int", 3 as i64) + .with_id(modrm_start - 1) + ); instruction.imm = 3; instruction.operands[0] = OperandSpec::ImmU8; instruction.operand_count = 1; @@ -7776,6 +8303,12 @@ fn read_operands< if instruction.opcode == Opcode::MOVD { instruction.regs[1].bank = RegisterBank::D; } else { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits 0b11 select register operand, width fixed to xmm") + .with_id(modrm_start as u32 + 1) + ); instruction.regs[1].bank = RegisterBank::X; } } else { @@ -7786,18 +8319,36 @@ fn read_operands< instruction.operands[0] = mem_oper; let r = (modrm >> 3) & 7; if r >= 1 { + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Misc("rrr field > 0b000 for this opcode is illegal, except with XOP extensions") + .with_id(modrm_start - 8) + ); // TODO: this is where XOP decoding would occur return Err(DecodeError::IncompleteDecoder); } instruction.opcode = [ Opcode::POP, ][r as usize]; + sink.record( + modrm_start + 3, + modrm_start + 5, + InnerDescription::Opcode(instruction.opcode) + .with_id(modrm_start - 8) + ); instruction.operand_count = 1; } 31 => { instruction.regs[0].bank = RegisterBank::X; instruction.operand_count = 2; if instruction.operands[1] == OperandSpec::RegMMM { + sink.record( + modrm_start + 6, + modrm_start + 7, + InnerDescription::Misc("mod bits 0b11 select register operand, width fixed to xmm") + .with_id(modrm_start as u32 + 1) + ); // fix the register to XMM instruction.regs[1].bank = RegisterBank::X; } else { @@ -7821,7 +8372,7 @@ fn unlikely_operands< OperandCode::G_E_mm_Ib => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E_mm(words, instruction, modrm)?; + instruction.operands[1] = read_E_mm(words, instruction, modrm, sink)?; instruction.regs[0] = RegSpec { bank: RegisterBank::MM, num: (modrm >> 3) & 7 }; if instruction.operands[1] == OperandSpec::RegMMM { instruction.regs[1].bank = RegisterBank::MM; @@ -7835,7 +8386,7 @@ fn unlikely_operands< OperandCode::G_Ev_xmm_Ib => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[1] = read_E_xmm(words, instruction, modrm, sink)?; instruction.regs[0] = RegSpec { bank: RegisterBank::X, num: (modrm >> 3) & 7 }; instruction.imm = read_num(words, 1)? as u8 as u32; if instruction.operands[1] != OperandSpec::RegMMM { @@ -7859,7 +8410,7 @@ fn unlikely_operands< instruction.regs[0] = RegSpec { bank: RegisterBank::X, num: (modrm >> 3) & 7 }; instruction.operands[1] = OperandSpec::RegRRR; - instruction.operands[0] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[0] = read_E_xmm(words, instruction, modrm, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { if [].contains(&instruction.opcode) { instruction.mem_size = 2; @@ -7877,7 +8428,7 @@ fn unlikely_operands< instruction.regs[0] = RegSpec { bank: RegisterBank::X, num: (modrm >> 3) & 7 }; instruction.operands[0] = OperandSpec::RegRRR; - instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[1] = read_E_xmm(words, instruction, modrm, sink)?; if instruction.opcode == Opcode::CVTTSD2SI || instruction.opcode == Opcode::CVTSD2SI { instruction.regs[0].bank = RegisterBank::D; } @@ -7903,7 +8454,7 @@ fn unlikely_operands< instruction.regs[0] = RegSpec { bank: RegisterBank::D, num: (modrm >> 3) & 7 }; instruction.operands[0] = OperandSpec::RegRRR; - instruction.operands[1] = read_M(words, instruction, modrm)?; + instruction.operands[1] = read_M(words, instruction, modrm, sink)?; if [Opcode::LFS, Opcode::LGS, Opcode::LSS].contains(&instruction.opcode) { if instruction.prefixes.operand_size() { instruction.mem_size = 4; @@ -7925,6 +8476,12 @@ fn unlikely_operands< // prefixes and then vex is invalid! reject it. return Err(DecodeError::InvalidPrefixes); } else { + sink.record( + words.offset() as u32 * 8 - 16, + words.offset() as u32 * 8 - 9, + InnerDescription::Misc("three-byte vex prefix (0xc4)") + .with_id(words.offset() as u32 * 8 - 16) + ); vex::three_byte_vex(words, modrm, instruction, sink)?; if decoder != &InstDecoder::default() { @@ -7936,7 +8493,7 @@ fn unlikely_operands< // LES instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, if instruction.prefixes.operand_size() { RegisterBank::D } else { RegisterBank::W }); instruction.operands[0] = OperandSpec::RegRRR; - instruction.operands[1] = read_M(words, instruction, modrm)?; + instruction.operands[1] = read_M(words, instruction, modrm, sink)?; if instruction.prefixes.operand_size() { instruction.mem_size = 6; } else { @@ -7952,6 +8509,12 @@ fn unlikely_operands< // prefixes and then vex is invalid! reject it. return Err(DecodeError::InvalidPrefixes); } else { + sink.record( + words.offset() as u32 * 8 - 16, + words.offset() as u32 * 8 - 9, + InnerDescription::Misc("two-byte vex prefix (0xc5)") + .with_id(words.offset() as u32 * 8 - 16) + ); vex::two_byte_vex(words, modrm, instruction, sink)?; if decoder != &InstDecoder::default() { @@ -7963,7 +8526,7 @@ fn unlikely_operands< // LDS instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, if instruction.prefixes.operand_size() { RegisterBank::D } else { RegisterBank::W }); instruction.operands[0] = OperandSpec::RegRRR; - instruction.operands[1] = read_M(words, instruction, modrm)?; + instruction.operands[1] = read_M(words, instruction, modrm, sink)?; if instruction.prefixes.operand_size() { instruction.mem_size = 6; } else { @@ -7974,7 +8537,7 @@ fn unlikely_operands< OperandCode::G_U_xmm_Ub => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[1] = read_E_xmm(words, instruction, modrm, sink)?; if instruction.operands[1] != OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -8036,7 +8599,7 @@ fn unlikely_operands< OperandCode::G_E_xmm_Ub => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[1] = read_E_xmm(words, instruction, modrm, sink)?; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::X); instruction.imm = @@ -8112,7 +8675,7 @@ fn unlikely_operands< OperandCode::G_mm_Ew_Ib => { let modrm = read_modrm(words)?; - instruction.operands[1] = read_E(words, instruction, modrm, 4)?; + instruction.operands[1] = read_E(words, instruction, modrm, 4, sink)?; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::MM); if instruction.operands[1] == OperandSpec::RegMMM { @@ -8159,7 +8722,7 @@ fn unlikely_operands< RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::W); }; - instruction.operands[1] = read_E(words, instruction, modrm, 2)?; + instruction.operands[1] = read_E(words, instruction, modrm, 2, sink)?; // lsl is weird. the full register width is written, but only the low 16 bits are used. if instruction.operands[1] == OperandSpec::RegMMM { instruction.regs[1].bank = RegisterBank::W; @@ -8176,7 +8739,7 @@ fn unlikely_operands< } else { 2 }; - instruction.operands[1] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[1] = read_E(words, instruction, modrm, opwidth, sink)?; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::D); instruction.operand_count = 2; @@ -8287,7 +8850,7 @@ fn unlikely_operands< let modrm = read_modrm(words)?; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; instruction.operands[1] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::D); @@ -8306,7 +8869,7 @@ fn unlikely_operands< instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::D); - instruction.operands[1] = read_E(words, instruction, modrm, 4)?; + instruction.operands[1] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { instruction.mem_size = 4; } @@ -8329,7 +8892,7 @@ fn unlikely_operands< let modrm = read_modrm(words)?; instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, RegisterBank::X); - instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + instruction.operands[1] = read_E_xmm(words, instruction, modrm, sink)?; if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 8; } @@ -8352,7 +8915,7 @@ fn unlikely_operands< instruction.opcode = Opcode::NOP; } } - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { instruction.mem_size = 64; } @@ -8363,7 +8926,7 @@ fn unlikely_operands< // instruction. let modrm = read_modrm(words)?; - instruction.operands[1] = read_E_mm(words, instruction, modrm)?; + instruction.operands[1] = read_E_mm(words, instruction, modrm, sink)?; instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec { bank: RegisterBank::MM, num: (modrm >> 3) & 7 }; if instruction.operands[1] != OperandSpec::RegMMM { @@ -8463,7 +9026,7 @@ fn unlikely_operands< instruction.opcode = Opcode::CMPXCHG8B; instruction.mem_size = 8; instruction.operand_count = 1; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; } return Ok(()); } @@ -8490,13 +9053,13 @@ fn unlikely_operands< instruction.opcode = Opcode::CMPXCHG8B; instruction.mem_size = 8; instruction.operand_count = 1; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; } return Ok(()); } 6 => { instruction.opcode = Opcode::VMCLEAR; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { // this would be invalid as `vmclear`, so fall back to the parse as // 66-prefixed rdrand. this is a register operand, so just demote it to the @@ -8510,7 +9073,7 @@ fn unlikely_operands< return Ok(()); } 7 => { - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { // this would be invalid as `vmclear`, so fall back to the parse as // 66-prefixed rdrand. this is a register operand, so just demote it to the @@ -8547,12 +9110,12 @@ fn unlikely_operands< instruction.opcode = Opcode::CMPXCHG8B; instruction.mem_size = 8; instruction.operand_count = 1; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; } } 6 => { instruction.opcode = Opcode::VMXON; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { // invalid as `vmxon`, reg-form is `senduipi` instruction.opcode = Opcode::SENDUIPI; @@ -8565,7 +9128,7 @@ fn unlikely_operands< } 7 => { instruction.opcode = Opcode::RDPID; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -8644,7 +9207,7 @@ fn unlikely_operands< } else { 2 }; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; }, OperandCode::ModRM_0x0f71 => { if instruction.prefixes.rep() || instruction.prefixes.repnz() { @@ -8777,7 +9340,7 @@ fn unlikely_operands< } instruction.mem_size = 48; instruction.opcode = Opcode::AESENCWIDE128KL; - instruction.operands[0] = read_M(words, instruction, modrm)?; + instruction.operands[0] = read_M(words, instruction, modrm, sink)?; return Ok(()); } 0b001 => { @@ -8786,7 +9349,7 @@ fn unlikely_operands< } instruction.mem_size = 48; instruction.opcode = Opcode::AESDECWIDE128KL; - instruction.operands[0] = read_M(words, instruction, modrm)?; + instruction.operands[0] = read_M(words, instruction, modrm, sink)?; return Ok(()); } 0b010 => { @@ -8795,7 +9358,7 @@ fn unlikely_operands< } instruction.mem_size = 64; instruction.opcode = Opcode::AESENCWIDE256KL; - instruction.operands[0] = read_M(words, instruction, modrm)?; + instruction.operands[0] = read_M(words, instruction, modrm, sink)?; return Ok(()); } 0b011 => { @@ -8804,7 +9367,7 @@ fn unlikely_operands< } instruction.mem_size = 64; instruction.opcode = Opcode::AESDECWIDE256KL; - instruction.operands[0] = read_M(words, instruction, modrm)?; + instruction.operands[0] = read_M(words, instruction, modrm, sink)?; return Ok(()); } _ => { @@ -9157,7 +9720,7 @@ fn unlikely_operands< instruction.regs[0] = RegSpec { bank: RegisterBank::W, num: (modrm >> 3) & 7 }; - instruction.operands[0] = read_E(words, instruction, modrm, 2)?; + instruction.operands[0] = read_E(words, instruction, modrm, 2, sink)?; instruction.operands[1] = OperandSpec::RegRRR; instruction.mem_size = 2; instruction.operand_count = 2; @@ -9183,7 +9746,7 @@ fn unlikely_operands< RegSpec { bank: RegisterBank::W, num: modrm & 7}; instruction.operands[0] = OperandSpec::RegMMM; } else { - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; instruction.mem_size = 2; } }, @@ -9217,7 +9780,7 @@ fn unlikely_operands< RegSpec { bank: RegisterBank::W, num: modrm & 7}; instruction.operands[1] = OperandSpec::RegMMM; } else { - instruction.operands[1] = read_M(words, instruction, modrm)?; + instruction.operands[1] = read_M(words, instruction, modrm, sink)?; instruction.mem_size = 2; } }, @@ -9282,7 +9845,7 @@ fn unlikely_operands< } else { unreachable!("r <= 8"); } - instruction.operands[0] = read_E(words, instruction, modrm, 2)?; + instruction.operands[0] = read_E(words, instruction, modrm, 2, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { instruction.mem_size = 2; } @@ -9329,7 +9892,7 @@ fn unlikely_operands< instruction.opcode = Opcode::SGDT; instruction.operand_count = 1; instruction.mem_size = 63; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; } } else if r == 1 { let mod_bits = modrm >> 6; @@ -9384,7 +9947,7 @@ fn unlikely_operands< instruction.opcode = Opcode::SIDT; instruction.operand_count = 1; instruction.mem_size = 63; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; } } else if r == 2 { let mod_bits = modrm >> 6; @@ -9423,7 +9986,7 @@ fn unlikely_operands< instruction.opcode = Opcode::LGDT; instruction.operand_count = 1; instruction.mem_size = 63; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; } } else if r == 3 { let mod_bits = modrm >> 6; @@ -9487,7 +10050,7 @@ fn unlikely_operands< instruction.opcode = Opcode::LIDT; instruction.operand_count = 1; instruction.mem_size = 63; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; } } else if r == 4 { // TODO: this permits storing only to word-size registers @@ -9495,7 +10058,7 @@ fn unlikely_operands< instruction.opcode = Opcode::SMSW; instruction.operand_count = 1; instruction.mem_size = 2; - instruction.operands[0] = read_E(words, instruction, modrm, 2)?; + instruction.operands[0] = read_E(words, instruction, modrm, 2, sink)?; } else if r == 5 { let mod_bits = modrm >> 6; if mod_bits != 0b11 { @@ -9503,7 +10066,7 @@ fn unlikely_operands< return Err(DecodeError::InvalidOpcode); } instruction.opcode = Opcode::RSTORSSP; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; instruction.mem_size = 8; instruction.operand_count = 1; return Ok(()); @@ -9603,7 +10166,7 @@ fn unlikely_operands< instruction.opcode = Opcode::LMSW; instruction.operand_count = 1; instruction.mem_size = 2; - instruction.operands[0] = read_E(words, instruction, modrm, 2)?; + instruction.operands[0] = read_E(words, instruction, modrm, 2, sink)?; } else if r == 7 { let mod_bits = modrm >> 6; let m = modrm & 7; @@ -9687,7 +10250,7 @@ fn unlikely_operands< instruction.opcode = Opcode::INVLPG; instruction.operand_count = 1; instruction.mem_size = 1; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; } } else { unreachable!("r <= 8"); @@ -9712,7 +10275,7 @@ fn unlikely_operands< return Err(DecodeError::InvalidOpcode); } }; - instruction.operands[0] = read_E(words, instruction, modrm, 1 /* opwidth */)?; + instruction.operands[0] = read_E(words, instruction, modrm, 1 /* opwidth */, sink)?; instruction.mem_size = 64; instruction.operand_count = 1; } else { @@ -9724,7 +10287,7 @@ fn unlikely_operands< return Err(DecodeError::InvalidOpcode); } }; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; instruction.operand_count = 1; } @@ -9759,7 +10322,7 @@ fn unlikely_operands< return Err(DecodeError::InvalidOpcode); } instruction.opcode = Opcode::PTWRITE; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { instruction.mem_size = 4; } @@ -9818,7 +10381,7 @@ fn unlikely_operands< match r { 6 => { instruction.opcode = Opcode::CLRSSBSY; - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; instruction.operand_count = 1; instruction.mem_size = 8; return Ok(()); @@ -9891,7 +10454,7 @@ fn unlikely_operands< ][r as usize]; instruction.opcode = opcode; instruction.mem_size = mem_size; - instruction.operands[0] = read_M(words, instruction, modrm)?; + instruction.operands[0] = read_M(words, instruction, modrm, sink)?; } }, OperandCode::ModRM_0x0fba => { @@ -9923,7 +10486,7 @@ fn unlikely_operands< } } - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, opwidth, sink)?; if instruction.operands[0] != OperandSpec::RegMMM { instruction.mem_size = opwidth; } @@ -10125,7 +10688,7 @@ fn unlikely_operands< OperandCode::x87_dd | OperandCode::x87_de | OperandCode::x87_df => { - return decode_x87(words, instruction, operand_code); + return decode_x87(words, instruction, operand_code, sink); } OperandCode::M_Gv => { // `lea` operands (`Gv_M`) opportunistically reject a register form of `mmm` early, but @@ -10152,13 +10715,19 @@ fn unlikely_operands< } instruction.operands[0] = OperandSpec::RegRRR; - instruction.operands[1] = read_M(words, instruction, modrm)?; + instruction.operands[1] = read_M(words, instruction, modrm, sink)?; instruction.operand_count = 2; } else { let prefixes = &instruction.prefixes; if prefixes.lock() || prefixes.operand_size() || prefixes.rep_any() { return Err(DecodeError::InvalidPrefixes); } else { + sink.record( + words.offset() as u32 * 8 - 16, + words.offset() as u32 * 8 - 9, + InnerDescription::Misc("evex prefix (0x62)") + .with_id(words.offset() as u32 * 8 - 16) + ); evex::read_evex(words, instruction, Some(modrm), sink)?; } } @@ -10178,7 +10747,17 @@ fn unlikely_operands< Ok(()) } -fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instruction: &mut Instruction, operand_code: OperandCode) -> Result<(), DecodeError> { +fn decode_x87< + T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, + S: DescriptionSink<FieldDescription>, +>(words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, sink: &mut S) -> Result<(), DecodeError> { + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Misc("x87 opcode") + .with_id(words.offset() as u32 * 8 - 1) + ); + #[allow(non_camel_case_types)] enum OperandCodeX87 { Est, @@ -10525,19 +11104,19 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ match x87_operands { OperandCodeX87::Est => { - instruction.operands[0] = read_E_st(words, instruction, modrm)?; + instruction.operands[0] = read_E_st(words, instruction, modrm, sink)?; instruction.operand_count = 1; } OperandCodeX87::St_Est => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E_st(words, instruction, modrm)?; + instruction.operands[1] = read_E_st(words, instruction, modrm, sink)?; instruction.operand_count = 2; } OperandCodeX87::St_Edst => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E_st(words, instruction, modrm)?; + instruction.operands[1] = read_E_st(words, instruction, modrm, sink)?; if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 4; } @@ -10546,7 +11125,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ OperandCodeX87::St_Eqst => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E_st(words, instruction, modrm)?; + instruction.operands[1] = read_E_st(words, instruction, modrm, sink)?; if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 8; } @@ -10555,7 +11134,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ OperandCodeX87::St_Ew => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E(words, instruction, modrm, 2)?; + instruction.operands[1] = read_E(words, instruction, modrm, 2, sink)?; if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 2; } @@ -10564,7 +11143,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ OperandCodeX87::St_Mm => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E(words, instruction, modrm, 4)?; + instruction.operands[1] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[1] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10574,7 +11153,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ OperandCodeX87::St_Mq => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E(words, instruction, modrm, 4)?; + instruction.operands[1] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[1] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10584,7 +11163,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ OperandCodeX87::St_Md => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E(words, instruction, modrm, 4)?; + instruction.operands[1] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[1] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10594,7 +11173,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ OperandCodeX87::St_Mw => { instruction.operands[0] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); - instruction.operands[1] = read_E(words, instruction, modrm, 4)?; + instruction.operands[1] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[1] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10602,20 +11181,20 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ instruction.operand_count = 2; } OperandCodeX87::Ew => { - instruction.operands[0] = read_E(words, instruction, modrm, 2)?; + instruction.operands[0] = read_E(words, instruction, modrm, 2, sink)?; instruction.operand_count = 1; if instruction.operands[0] != OperandSpec::RegMMM { instruction.mem_size = 2; } } OperandCodeX87::Est_St => { - instruction.operands[0] = read_E_st(words, instruction, modrm)?; + instruction.operands[0] = read_E_st(words, instruction, modrm, sink)?; instruction.operands[1] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); instruction.operand_count = 2; } OperandCodeX87::Edst_St => { - instruction.operands[0] = read_E_st(words, instruction, modrm)?; + instruction.operands[0] = read_E_st(words, instruction, modrm, sink)?; instruction.operands[1] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); instruction.operand_count = 2; @@ -10624,7 +11203,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ } } OperandCodeX87::Eqst_St => { - instruction.operands[0] = read_E_st(words, instruction, modrm)?; + instruction.operands[0] = read_E_st(words, instruction, modrm, sink)?; instruction.operands[1] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); instruction.operand_count = 2; @@ -10633,7 +11212,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ } } OperandCodeX87::Ed_St => { - instruction.operands[0] = read_E_st(words, instruction, modrm)?; + instruction.operands[0] = read_E_st(words, instruction, modrm, sink)?; instruction.operands[1] = OperandSpec::RegRRR; instruction.regs[0] = RegSpec::st(0); if instruction.operands[0] != OperandSpec::RegMMM { @@ -10642,7 +11221,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ instruction.operand_count = 2; } OperandCodeX87::Mm_St => { - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10652,7 +11231,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ instruction.operand_count = 2; } OperandCodeX87::Mq_St => { - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10662,7 +11241,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ instruction.operand_count = 2; } OperandCodeX87::Md_St => { - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10672,7 +11251,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ instruction.operand_count = 2; } OperandCodeX87::Mw_St => { - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; if instruction.operands[0] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -10682,7 +11261,7 @@ fn decode_x87<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_ instruction.operand_count = 2; } OperandCodeX87::Ex87S => { - instruction.operands[0] = read_E(words, instruction, modrm, 4)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4, sink)?; instruction.operand_count = 1; if instruction.operands[0] == OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); |