diff options
| -rw-r--r-- | CHANGELOG | 19 | ||||
| -rw-r--r-- | src/long_mode/mod.rs | 50 | ||||
| -rw-r--r-- | src/protected_mode/mod.rs | 52 | ||||
| -rw-r--r-- | src/real_mode/mod.rs | 52 | 
4 files changed, 161 insertions, 12 deletions
| @@ -1,5 +1,24 @@  ## 1.1.0 +* implement `AnnotatingDecoder` from `yaxpeax-arch=0.2.6` and later. +  this is a relatively involved addition. for rustc reasons, there are several +  additional `inline(always)` attributes applied to keep non-annotating decoder +  calls yielding the same generated code (and performance) as before. + +  annotations are produced for much but not all of 16-, 32-, and 64-bit x86, +  describing prefixes, opcodes, operand encoding, and for more common +  instructions, operand encoding as well. descriptions provided are described +  by the `FieldDescription` struct in all architectures. `id` generally matches +  some kind of parse order for the instruction, typically the order that +  `yaxpeax-x86` considers bit fields in decoding an instruction. prefixes will +  have lower id than opcodes, opcodes will have lower id than operands, +  immediates will have the highest id due to being last values read in an +  instruction. + +  between prefixes, opcodes, and operands, "Boundary" field descriptions are +  reported as a hint to library clients that a logical grouping of descriptions +  has ended. +  * `pub const fn` builders for all general-purpose registers, segment registers, and ip/flags registers.    - this corrects a spotty and inconsistent set of builders filled in on-demand.  * `DisplayStyle::Intel` now shows relative offsets as `$+0xXX`, rather than `0xXX`. diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 8ec2b7f..dee759d 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4898,6 +4898,11 @@ impl OperandCodeBuilder {      }  } +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: OperandCode } +  #[allow(non_camel_case_types)]  // might be able to pack these into a u8, but with `Operand` being u16 as well now there's little  // point. table entries will have a padding byte per record already. @@ -7366,15 +7371,45 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord {      };  } +/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`]. +/// +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption.  #[derive(Clone, Debug, PartialEq, Eq)] -enum InnerDescription { +pub enum InnerDescription { +    /// the literal byte read for a `rex` prefix, `0x4_`.      RexPrefix(u8), +    /// the segment selected by a segment override prefix. this is not necessarily the actual +    /// segement used in the instruction's memory accesses, if any are made.      SegmentPrefix(Segment), +    /// the opcode read for this instruction. this may be reported multiple times in an instruction +    /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two +    /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from +    /// decoding the same instruction. this invariant is not well-tested, and may occur in +    /// practice.      Opcode(Opcode), -    OperandCode(OperandCode), +    /// the operand code indicating how to read operands for this instruction. this is an internal +    /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the +    /// decoding process. `OperandCode` names are unstable, and this variant is only useful for +    /// displaying for human consumption. +    OperandCode(OperandCodeWrapper), +    /// a decoded register: a name for the bits used to decode it, the register number those bits +    /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.      RegisterNumber(&'static str, u8, RegSpec), +    /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, +    /// internal details of a prefix, error or constraints on an opcode, operand encoding details, +    /// or other items involved in an instruction.      Misc(&'static str), +    /// a number involved in the instruction: typically either a disaplacement or immediate. the +    /// string describes which. the `i64` member is typically a sign-extended value from the +    /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign +    /// extending to `-1`. bug reports are highly encouraged for unexpected values.      Number(&'static str, i64), +    /// a boundary between two logically distinct sections of an instruction. these typically +    /// separate the leading prefix string (if any), opcode, and operands (if any). the included +    /// string describes which boundary this is. boundary names should not be considered stable, +    /// and are useful at most for displaying for human consumption.      Boundary(&'static str),  } @@ -7410,7 +7445,7 @@ impl fmt::Display for InnerDescription {              InnerDescription::Opcode(opc) => {                  write!(f, "opcode `{}`", opc)              } -            InnerDescription::OperandCode(code) => { +            InnerDescription::OperandCode(OperandCodeWrapper { code }) => {                  write!(f, "operand code `{:?}`", code)              }              InnerDescription::RegisterNumber(name, num, reg) => { @@ -7429,6 +7464,13 @@ pub struct FieldDescription {      id: u32,  } +impl FieldDescription { +    /// the actual description associated with this bitfield. +    pub fn desc(&self) -> &InnerDescription { +        &self.desc +    } +} +  impl yaxpeax_arch::FieldDescription for FieldDescription {      fn id(&self) -> u32 {          self.id @@ -7496,7 +7538,7 @@ fn read_with_annotations<                  });              }              sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { -                desc: InnerDescription::OperandCode(record.1), +                desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),                  id: words.offset() as u32 * 8 - 8 + 1,              });              break record; diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 61aca45..422f6d9 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -4817,6 +4817,11 @@ impl OperandCodeBuilder {      }  } +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: OperandCode } +  #[allow(non_camel_case_types)]  // might be able to pack these into a u8, but with `Operand` being u16 as well now there's little  // point. table entries will have a padding byte per record already. @@ -7381,15 +7386,47 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord {      };  } +/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`]. +/// +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption.  #[derive(Clone, Debug, PartialEq, Eq)] -enum InnerDescription { +pub enum InnerDescription { +    /// the literal byte read for a `rex` prefix, `0x4_`. while 32-bit code does not have `rex` +    /// prefixes, this description is also used for the implied `rex`-type bits in `vex` and `evex` +    /// prefixes.      RexPrefix(u8), +    /// the segment selected by a segment override prefix. this is not necessarily the actual +    /// segement used in the instruction's memory accesses, if any are made.      SegmentPrefix(Segment), +    /// the opcode read for this instruction. this may be reported multiple times in an instruction +    /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two +    /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from +    /// decoding the same instruction. this invariant is not well-tested, and may occur in +    /// practice.      Opcode(Opcode), -    OperandCode(OperandCode), +    /// the operand code indicating how to read operands for this instruction. this is an internal +    /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the +    /// decoding process. `OperandCode` names are unstable, and this variant is only useful for +    /// displaying for human consumption. +    OperandCode(OperandCodeWrapper), +    /// a decoded register: a name for the bits used to decode it, the register number those bits +    /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.      RegisterNumber(&'static str, u8, RegSpec), +    /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, +    /// internal details of a prefix, error or constraints on an opcode, operand encoding details, +    /// or other items involved in an instruction.      Misc(&'static str), +    /// a number involved in the instruction: typically either a disaplacement or immediate. the +    /// string describes which. the `i64` member is typically a sign-extended value from the +    /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign +    /// extending to `-1`. bug reports are highly encouraged for unexpected values.      Number(&'static str, i64), +    /// a boundary between two logically distinct sections of an instruction. these typically +    /// separate the leading prefix string (if any), opcode, and operands (if any). the included +    /// string describes which boundary this is. boundary names should not be considered stable, +    /// and are useful at most for displaying for human consumption.      Boundary(&'static str),  } @@ -7425,7 +7462,7 @@ impl fmt::Display for InnerDescription {              InnerDescription::Opcode(opc) => {                  write!(f, "opcode `{}`", opc)              } -            InnerDescription::OperandCode(code) => { +            InnerDescription::OperandCode(OperandCodeWrapper { code }) => {                  write!(f, "operand code `{:?}`", code)              }              InnerDescription::RegisterNumber(name, num, reg) => { @@ -7444,6 +7481,13 @@ pub struct FieldDescription {      id: u32,  } +impl FieldDescription { +    /// the actual description associated with this bitfield. +    pub fn desc(&self) -> &InnerDescription { +        &self.desc +    } +} +  impl yaxpeax_arch::FieldDescription for FieldDescription {      fn id(&self) -> u32 {          self.id @@ -7497,7 +7541,7 @@ fn read_with_annotations<                  });              }              sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { -                desc: InnerDescription::OperandCode(record.1), +                desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),                  id: words.offset() as u32 * 8 - 8 + 1,              });              break record; diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index b60e3ee..3d78fa3 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -4817,6 +4817,11 @@ impl OperandCodeBuilder {      }  } +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: OperandCode } +  #[allow(non_camel_case_types)]  // might be able to pack these into a u8, but with `Operand` being u16 as well now there's little  // point. table entries will have a padding byte per record already. @@ -7383,15 +7388,47 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord {      };  } +/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`]. +/// +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption.  #[derive(Clone, Debug, PartialEq, Eq)] -enum InnerDescription { +pub enum InnerDescription { +    /// the literal byte read for a `rex` prefix, `0x4_`. while 32-bit code does not have `rex` +    /// prefixes, this description is also used for the implied `rex`-type bits in `vex` and `evex` +    /// prefixes.      RexPrefix(u8), +    /// the segment selected by a segment override prefix. this is not necessarily the actual +    /// segement used in the instruction's memory accesses, if any are made.      SegmentPrefix(Segment), +    /// the opcode read for this instruction. this may be reported multiple times in an instruction +    /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two +    /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from +    /// decoding the same instruction. this invariant is not well-tested, and may occur in +    /// practice.      Opcode(Opcode), -    OperandCode(OperandCode), +    /// the operand code indicating how to read operands for this instruction. this is an internal +    /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the +    /// decoding process. `OperandCode` names are unstable, and this variant is only useful for +    /// displaying for human consumption. +    OperandCode(OperandCodeWrapper), +    /// a decoded register: a name for the bits used to decode it, the register number those bits +    /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.      RegisterNumber(&'static str, u8, RegSpec), +    /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, +    /// internal details of a prefix, error or constraints on an opcode, operand encoding details, +    /// or other items involved in an instruction.      Misc(&'static str), +    /// a number involved in the instruction: typically either a disaplacement or immediate. the +    /// string describes which. the `i64` member is typically a sign-extended value from the +    /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign +    /// extending to `-1`. bug reports are highly encouraged for unexpected values.      Number(&'static str, i64), +    /// a boundary between two logically distinct sections of an instruction. these typically +    /// separate the leading prefix string (if any), opcode, and operands (if any). the included +    /// string describes which boundary this is. boundary names should not be considered stable, +    /// and are useful at most for displaying for human consumption.      Boundary(&'static str),  } @@ -7427,7 +7464,7 @@ impl fmt::Display for InnerDescription {              InnerDescription::Opcode(opc) => {                  write!(f, "opcode `{}`", opc)              } -            InnerDescription::OperandCode(code) => { +            InnerDescription::OperandCode(OperandCodeWrapper { code }) => {                  write!(f, "operand code `{:?}`", code)              }              InnerDescription::RegisterNumber(name, num, reg) => { @@ -7446,6 +7483,13 @@ pub struct FieldDescription {      id: u32,  } +impl FieldDescription { +    /// the actual description associated with this bitfield. +    pub fn desc(&self) -> &InnerDescription { +        &self.desc +    } +} +  impl yaxpeax_arch::FieldDescription for FieldDescription {      fn id(&self) -> u32 {          self.id @@ -7499,7 +7543,7 @@ fn read_with_annotations<                  });              }              sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { -                desc: InnerDescription::OperandCode(record.1), +                desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),                  id: words.offset() as u32 * 8 - 8 + 1,              });              break record; | 
