From a07098315bea8b880688831f931bc5213ebb6a5f Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 13 Aug 2021 23:44:28 -0700 Subject: extend annotation reporting to 32- and 16-bit modes, kinda --- src/long_mode/mod.rs | 9 ++-- src/long_mode/vex.rs | 12 ++++- src/protected_mode/mod.rs | 118 +++++++++++++++++++++++++++++++++++++++------- src/protected_mode/vex.rs | 12 ++++- src/real_mode/mod.rs | 118 +++++++++++++++++++++++++++++++++++++++------- src/real_mode/vex.rs | 12 ++++- src/shared/evex.in | 8 +++- 7 files changed, 245 insertions(+), 44 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 25a1735..54d83e8 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -7462,6 +7462,9 @@ impl yaxpeax_arch::FieldDescription for FieldDescription { fn id(&self) -> u32 { self.id } + fn is_separator(&self) -> bool { + false + } } impl fmt::Display for FieldDescription { @@ -7547,7 +7550,7 @@ fn read_with_annotations< return Err(DecodeError::InvalidPrefixes); } else { instruction.prefixes = prefixes; - vex::two_byte_vex(words, instruction)?; + vex::two_byte_vex(words, instruction, sink)?; return Ok(()); } } else if b == 0xc4 { @@ -7556,7 +7559,7 @@ fn read_with_annotations< return Err(DecodeError::InvalidPrefixes); } else { instruction.prefixes = prefixes; - vex::three_byte_vex(words, instruction)?; + vex::three_byte_vex(words, instruction, sink)?; return Ok(()); } } else if b == 0x62 { @@ -7565,7 +7568,7 @@ fn read_with_annotations< return Err(DecodeError::InvalidPrefixes); } else { instruction.prefixes = prefixes; - evex::read_evex(words, instruction, None)?; + evex::read_evex(words, instruction, None, sink)?; return Ok(()); } } diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index 9649e72..893d624 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -1,8 +1,10 @@ use yaxpeax_arch::Reader; +use yaxpeax_arch::DescriptionSink; use crate::long_mode::Arch; use crate::long_mode::OperandSpec; use crate::long_mode::DecodeError; +use crate::long_mode::FieldDescription; use crate::long_mode::RegSpec; use crate::long_mode::RegisterBank; use crate::long_mode::Instruction; @@ -100,7 +102,10 @@ enum VEXOperandCode { } #[inline(never)] -pub(crate) fn three_byte_vex::Address, ::Word>>(words: &mut T, instruction: &mut Instruction) -> Result<(), DecodeError> { +pub(crate) fn three_byte_vex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { let vex_byte_one = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let vex_byte_two = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let p = vex_byte_two & 0x03; @@ -130,7 +135,10 @@ pub(crate) fn three_byte_vex::Address, ::Address, ::Word>>(words: &mut T, instruction: &mut Instruction) -> Result<(), DecodeError> { +pub(crate) fn two_byte_vex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { let vex_byte = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let p = vex_byte & 0x03; let p = match p { diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 2d37bc6..ab55460 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -13,6 +13,7 @@ use core::cmp::PartialEq; use core::hint::unreachable_unchecked; use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction}; +use yaxpeax_arch::{AnnotatingDecoder, DescriptionSink, NullSink}; use yaxpeax_arch::{DecodeError as ArchDecodeError}; use core::fmt; @@ -4090,7 +4091,7 @@ impl Default for InstDecoder { impl Decoder for InstDecoder { fn decode::Address, ::Word>>(&self, words: &mut T) -> Result::DecodeError> { let mut instr = Instruction::invalid(); - read_instr(self, words, &mut instr)?; + read_with_annotations(self, words, &mut instr, &mut NullSink)?; instr.length = words.offset() as u8; if words.offset() > 15 { @@ -4104,7 +4105,7 @@ impl Decoder for InstDecoder { Ok(instr) } fn decode_into::Address, ::Word>>(&self, instr: &mut Instruction, words: &mut T) -> Result<(), ::DecodeError> { - read_instr(self, words, instr)?; + read_with_annotations(self, words, instr, &mut NullSink)?; instr.length = words.offset() as u8; if words.offset() > 15 { @@ -4822,7 +4823,7 @@ impl OperandCodeBuilder { // | // ---------------------------> read modr/m? #[repr(u16)] -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] enum OperandCode { Ivs = OperandCodeBuilder::new().special_case(25).bits(), I_3 = OperandCodeBuilder::new().special_case(27).bits(), @@ -7086,7 +7087,84 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord { }; } -fn read_instr::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction) -> Result<(), DecodeError> { +#[derive(Clone, Debug, PartialEq, Eq)] +enum InnerDescription { + RexPrefix(u8), + SegmentPrefix(Segment), + Opcode(Opcode), + OperandCode(OperandCode), + RegisterNumber(&'static str, u8, RegSpec), + Misc(&'static str), + Number(&'static str, i64), +} + +impl InnerDescription { + fn with_id(self, id: u32) -> FieldDescription { + FieldDescription { + desc: self, + id, + } + } +} + +impl fmt::Display for InnerDescription { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + InnerDescription::RexPrefix(bits) => { + write!(f, "rex prefix: {}{}{}{}", + if bits & 0x8 != 0 { "w" } else { "-" }, + if bits & 0x4 != 0 { "r" } else { "-" }, + if bits & 0x2 != 0 { "x" } else { "-" }, + if bits & 0x1 != 0 { "b" } else { "-" }, + ) + } + InnerDescription::SegmentPrefix(segment) => { + write!(f, "segment override: {}", segment) + } + InnerDescription::Misc(text) => { + f.write_str(text) + } + InnerDescription::Number(text, num) => { + write!(f, "{}: {:#x}", text, num) + } + InnerDescription::Opcode(opc) => { + write!(f, "opcode `{}`", opc) + } + InnerDescription::OperandCode(code) => { + write!(f, "operand code `{:?}`", code) + } + InnerDescription::RegisterNumber(name, num, reg) => { + write!(f, "`{}` (`{}` selects register number {})", reg, name, num) + } + } + } +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct FieldDescription { + desc: InnerDescription, + id: u32, +} + +impl yaxpeax_arch::FieldDescription for FieldDescription { + fn id(&self) -> u32 { + self.id + } + fn is_separator(&self) -> bool { + false + } +} + +impl fmt::Display for FieldDescription { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.desc, f) + } +} + +fn read_with_annotations< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { words.mark(); let mut nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let mut next_rec = OPCODES[nextb as usize]; @@ -7173,7 +7251,7 @@ fn read_instr::Address, 15 { return Err(DecodeError::TooLong); @@ -7225,7 +7303,10 @@ fn read_instr::Address, 30 */ -fn read_operands::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode) -> Result<(), DecodeError> { +fn read_operands< + T: Reader<::Address, ::Word>, + S: DescriptionSink +>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, sink: &mut S) -> Result<(), DecodeError> { instruction.operands[0] = OperandSpec::RegRRR; instruction.operand_count = 2; let operand_code = OperandCodeBuilder::from_bits(operand_code as u16); @@ -7725,14 +7806,17 @@ fn read_operands::Address, { let operand_code: OperandCode = unsafe { core::mem::transmute(operand_code.bits()) }; - unlikely_operands(decoder, words, instruction, operand_code, mem_oper)?; + unlikely_operands(decoder, words, instruction, operand_code, mem_oper, sink)?; } }; } Ok(()) } -fn unlikely_operands::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec) -> Result<(), DecodeError> { +fn unlikely_operands< + T: Reader<::Address, ::Word>, + S: DescriptionSink +>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec, sink: &mut S) -> Result<(), DecodeError> { match operand_code { OperandCode::G_E_mm_Ib => { let modrm = read_modrm(words)?; @@ -7840,7 +7924,7 @@ fn unlikely_operands::Address, ::Address, ::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { instruction.opcode = Opcode::LOADIWKEY; } else { @@ -8734,7 +8818,7 @@ fn unlikely_operands::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8743,7 +8827,7 @@ fn unlikely_operands::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8752,7 +8836,7 @@ fn unlikely_operands::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8762,13 +8846,13 @@ fn unlikely_operands::Address, { instruction.opcode = Opcode::ENCODEKEY128; - read_operands(decoder, words, instruction, OperandCode::G_U_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_U_xmm, sink)?; instruction.regs[0].bank = RegisterBank::D; instruction.regs[1].bank = RegisterBank::D; } OperandCode::ModRM_0xf30f38fb => { instruction.opcode = Opcode::ENCODEKEY256; - read_operands(decoder, words, instruction, OperandCode::G_U_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_U_xmm, sink)?; instruction.regs[0].bank = RegisterBank::D; instruction.regs[1].bank = RegisterBank::D; } @@ -10072,7 +10156,7 @@ fn unlikely_operands::Address, ::Address, ::Word>>(words: &mut T, vex_byte_one: u8, instruction: &mut Instruction) -> Result<(), DecodeError> { +pub(crate) fn three_byte_vex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, vex_byte_one: u8, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { let vex_byte_two = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let p = vex_byte_two & 0x03; let p = match p { @@ -129,7 +134,10 @@ pub(crate) fn three_byte_vex::Address, ::Address, ::Word>>(words: &mut T, vex_byte: u8, instruction: &mut Instruction) -> Result<(), DecodeError> { +pub(crate) fn two_byte_vex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, vex_byte: u8, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { let p = vex_byte & 0x03; let p = match p { 0x00 => VEXOpcodePrefix::None, diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 7f3ad42..0af0b0b 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -13,6 +13,7 @@ use core::cmp::PartialEq; use core::hint::unreachable_unchecked; use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction}; +use yaxpeax_arch::{AnnotatingDecoder, DescriptionSink, NullSink}; use yaxpeax_arch::{DecodeError as ArchDecodeError}; use core::fmt; @@ -4090,7 +4091,7 @@ impl Default for InstDecoder { impl Decoder for InstDecoder { fn decode::Address, ::Word>>(&self, words: &mut T) -> Result::DecodeError> { let mut instr = Instruction::invalid(); - read_instr(self, words, &mut instr)?; + read_with_annotations(self, words, &mut instr, &mut NullSink)?; instr.length = words.offset() as u8; if words.offset() > 15 { @@ -4104,7 +4105,7 @@ impl Decoder for InstDecoder { Ok(instr) } fn decode_into::Address, ::Word>>(&self, instr: &mut Instruction, words: &mut T) -> Result<(), ::DecodeError> { - read_instr(self, words, instr)?; + read_with_annotations(self, words, instr, &mut NullSink)?; instr.length = words.offset() as u8; if words.offset() > 15 { @@ -4822,7 +4823,7 @@ impl OperandCodeBuilder { // | // ---------------------------> read modr/m? #[repr(u16)] -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] enum OperandCode { Ivs = OperandCodeBuilder::new().special_case(25).bits(), I_3 = OperandCodeBuilder::new().special_case(27).bits(), @@ -7087,7 +7088,84 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord { }; } -fn read_instr::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction) -> Result<(), DecodeError> { +#[derive(Clone, Debug, PartialEq, Eq)] +enum InnerDescription { + RexPrefix(u8), + SegmentPrefix(Segment), + Opcode(Opcode), + OperandCode(OperandCode), + RegisterNumber(&'static str, u8, RegSpec), + Misc(&'static str), + Number(&'static str, i64), +} + +impl InnerDescription { + fn with_id(self, id: u32) -> FieldDescription { + FieldDescription { + desc: self, + id, + } + } +} + +impl fmt::Display for InnerDescription { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + InnerDescription::RexPrefix(bits) => { + write!(f, "rex prefix: {}{}{}{}", + if bits & 0x8 != 0 { "w" } else { "-" }, + if bits & 0x4 != 0 { "r" } else { "-" }, + if bits & 0x2 != 0 { "x" } else { "-" }, + if bits & 0x1 != 0 { "b" } else { "-" }, + ) + } + InnerDescription::SegmentPrefix(segment) => { + write!(f, "segment override: {}", segment) + } + InnerDescription::Misc(text) => { + f.write_str(text) + } + InnerDescription::Number(text, num) => { + write!(f, "{}: {:#x}", text, num) + } + InnerDescription::Opcode(opc) => { + write!(f, "opcode `{}`", opc) + } + InnerDescription::OperandCode(code) => { + write!(f, "operand code `{:?}`", code) + } + InnerDescription::RegisterNumber(name, num, reg) => { + write!(f, "`{}` (`{}` selects register number {})", reg, name, num) + } + } + } +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct FieldDescription { + desc: InnerDescription, + id: u32, +} + +impl yaxpeax_arch::FieldDescription for FieldDescription { + fn id(&self) -> u32 { + self.id + } + fn is_separator(&self) -> bool { + false + } +} + +impl fmt::Display for FieldDescription { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.desc, f) + } +} + +fn read_with_annotations< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { words.mark(); let mut nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let mut next_rec = OPCODES[nextb as usize]; @@ -7174,7 +7252,7 @@ fn read_instr::Address, 15 { return Err(DecodeError::TooLong); @@ -7226,7 +7304,10 @@ fn read_instr::Address, 30 */ -fn read_operands::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode) -> Result<(), DecodeError> { +fn read_operands< + T: Reader<::Address, ::Word>, + S: DescriptionSink +>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, sink: &mut S) -> Result<(), DecodeError> { instruction.operands[0] = OperandSpec::RegRRR; instruction.operand_count = 2; let operand_code = OperandCodeBuilder::from_bits(operand_code as u16); @@ -7725,14 +7806,17 @@ fn read_operands::Address, { let operand_code: OperandCode = unsafe { core::mem::transmute(operand_code.bits()) }; - unlikely_operands(decoder, words, instruction, operand_code, mem_oper)?; + unlikely_operands(decoder, words, instruction, operand_code, mem_oper, sink)?; } }; } Ok(()) } -fn unlikely_operands::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec) -> Result<(), DecodeError> { +fn unlikely_operands< + T: Reader<::Address, ::Word>, + S: DescriptionSink +>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec, sink: &mut S) -> Result<(), DecodeError> { match operand_code { OperandCode::G_E_mm_Ib => { let modrm = read_modrm(words)?; @@ -7841,7 +7925,7 @@ fn unlikely_operands::Address, ::Address, ::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { instruction.opcode = Opcode::LOADIWKEY; } else { @@ -8738,7 +8822,7 @@ fn unlikely_operands::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8747,7 +8831,7 @@ fn unlikely_operands::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8756,7 +8840,7 @@ fn unlikely_operands::Address, { - read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm, sink)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8766,13 +8850,13 @@ fn unlikely_operands::Address, { instruction.opcode = Opcode::ENCODEKEY128; - read_operands(decoder, words, instruction, OperandCode::G_U_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_U_xmm, sink)?; instruction.regs[0].bank = RegisterBank::D; instruction.regs[1].bank = RegisterBank::D; } OperandCode::ModRM_0xf30f38fb => { instruction.opcode = Opcode::ENCODEKEY256; - read_operands(decoder, words, instruction, OperandCode::G_U_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_U_xmm, sink)?; instruction.regs[0].bank = RegisterBank::D; instruction.regs[1].bank = RegisterBank::D; } @@ -10075,7 +10159,7 @@ fn unlikely_operands::Address, ::Address, ::Word>>(words: &mut T, vex_byte_one: u8, instruction: &mut Instruction) -> Result<(), DecodeError> { +pub(crate) fn three_byte_vex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, vex_byte_one: u8, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { let vex_byte_two = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let p = vex_byte_two & 0x03; let p = match p { @@ -129,7 +134,10 @@ pub(crate) fn three_byte_vex::Address, ::Address, ::Word>>(words: &mut T, vex_byte: u8, instruction: &mut Instruction) -> Result<(), DecodeError> { +pub(crate) fn two_byte_vex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, vex_byte: u8, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { let p = vex_byte & 0x03; let p = match p { 0x00 => VEXOpcodePrefix::None, diff --git a/src/shared/evex.in b/src/shared/evex.in index 8a1e4eb..2f1cbe5 100644 --- a/src/shared/evex.in +++ b/src/shared/evex.in @@ -1,11 +1,17 @@ use super::OperandSpec; +use super::FieldDescription; + +use yaxpeax_arch::DescriptionSink; // `evex_byte_one` is an option because the caller *may* have already read it, // but may have not. `long_mode` can decide immediately that `0x62` should be read // as an `EVEX` instruction, but for other modes we can only make this // determination when reading a `bound`'s `modrm` byte. #[inline(never)] -pub(crate) fn read_evex::Address, ::Word>>(words: &mut T, instruction: &mut Instruction, evex_byte_one: Option) -> Result<(), DecodeError> { +pub(crate) fn read_evex< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(words: &mut T, instruction: &mut Instruction, evex_byte_one: Option, sink: &mut S) -> Result<(), DecodeError> { let evex_byte_one = if let Some(b) = evex_byte_one { b } else { -- cgit v1.1