From 39eef01e04e478ec5cfa3c8f520c831631ecd67d Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 21 Aug 2021 22:17:24 -0700 Subject: add `AnnotatingDecoder` note to CHANGELOG and publicize descriptions --- CHANGELOG | 19 +++++++++++++++++ src/long_mode/mod.rs | 50 +++++++++++++++++++++++++++++++++++++++++---- src/protected_mode/mod.rs | 52 +++++++++++++++++++++++++++++++++++++++++++---- src/real_mode/mod.rs | 52 +++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 161 insertions(+), 12 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c5fcfd0..a647897 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,24 @@ ## 1.1.0 +* implement `AnnotatingDecoder` from `yaxpeax-arch=0.2.6` and later. + this is a relatively involved addition. for rustc reasons, there are several + additional `inline(always)` attributes applied to keep non-annotating decoder + calls yielding the same generated code (and performance) as before. + + annotations are produced for much but not all of 16-, 32-, and 64-bit x86, + describing prefixes, opcodes, operand encoding, and for more common + instructions, operand encoding as well. descriptions provided are described + by the `FieldDescription` struct in all architectures. `id` generally matches + some kind of parse order for the instruction, typically the order that + `yaxpeax-x86` considers bit fields in decoding an instruction. prefixes will + have lower id than opcodes, opcodes will have lower id than operands, + immediates will have the highest id due to being last values read in an + instruction. + + between prefixes, opcodes, and operands, "Boundary" field descriptions are + reported as a hint to library clients that a logical grouping of descriptions + has ended. + * `pub const fn` builders for all general-purpose registers, segment registers, and ip/flags registers. - this corrects a spotty and inconsistent set of builders filled in on-demand. * `DisplayStyle::Intel` now shows relative offsets as `$+0xXX`, rather than `0xXX`. diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 8ec2b7f..dee759d 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4898,6 +4898,11 @@ impl OperandCodeBuilder { } } +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: OperandCode } + #[allow(non_camel_case_types)] // might be able to pack these into a u8, but with `Operand` being u16 as well now there's little // point. table entries will have a padding byte per record already. @@ -7366,15 +7371,45 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord { }; } +/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`]. +/// +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption. #[derive(Clone, Debug, PartialEq, Eq)] -enum InnerDescription { +pub enum InnerDescription { + /// the literal byte read for a `rex` prefix, `0x4_`. RexPrefix(u8), + /// the segment selected by a segment override prefix. this is not necessarily the actual + /// segement used in the instruction's memory accesses, if any are made. SegmentPrefix(Segment), + /// the opcode read for this instruction. this may be reported multiple times in an instruction + /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two + /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from + /// decoding the same instruction. this invariant is not well-tested, and may occur in + /// practice. Opcode(Opcode), - OperandCode(OperandCode), + /// the operand code indicating how to read operands for this instruction. this is an internal + /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the + /// decoding process. `OperandCode` names are unstable, and this variant is only useful for + /// displaying for human consumption. + OperandCode(OperandCodeWrapper), + /// a decoded register: a name for the bits used to decode it, the register number those bits + /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded. RegisterNumber(&'static str, u8, RegSpec), + /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, + /// internal details of a prefix, error or constraints on an opcode, operand encoding details, + /// or other items involved in an instruction. Misc(&'static str), + /// a number involved in the instruction: typically either a disaplacement or immediate. the + /// string describes which. the `i64` member is typically a sign-extended value from the + /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign + /// extending to `-1`. bug reports are highly encouraged for unexpected values. Number(&'static str, i64), + /// a boundary between two logically distinct sections of an instruction. these typically + /// separate the leading prefix string (if any), opcode, and operands (if any). the included + /// string describes which boundary this is. boundary names should not be considered stable, + /// and are useful at most for displaying for human consumption. Boundary(&'static str), } @@ -7410,7 +7445,7 @@ impl fmt::Display for InnerDescription { InnerDescription::Opcode(opc) => { write!(f, "opcode `{}`", opc) } - InnerDescription::OperandCode(code) => { + InnerDescription::OperandCode(OperandCodeWrapper { code }) => { write!(f, "operand code `{:?}`", code) } InnerDescription::RegisterNumber(name, num, reg) => { @@ -7429,6 +7464,13 @@ pub struct FieldDescription { id: u32, } +impl FieldDescription { + /// the actual description associated with this bitfield. + pub fn desc(&self) -> &InnerDescription { + &self.desc + } +} + impl yaxpeax_arch::FieldDescription for FieldDescription { fn id(&self) -> u32 { self.id @@ -7496,7 +7538,7 @@ fn read_with_annotations< }); } sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::OperandCode(record.1), + desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }), id: words.offset() as u32 * 8 - 8 + 1, }); break record; diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 61aca45..422f6d9 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -4817,6 +4817,11 @@ impl OperandCodeBuilder { } } +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: OperandCode } + #[allow(non_camel_case_types)] // might be able to pack these into a u8, but with `Operand` being u16 as well now there's little // point. table entries will have a padding byte per record already. @@ -7381,15 +7386,47 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord { }; } +/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`]. +/// +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption. #[derive(Clone, Debug, PartialEq, Eq)] -enum InnerDescription { +pub enum InnerDescription { + /// the literal byte read for a `rex` prefix, `0x4_`. while 32-bit code does not have `rex` + /// prefixes, this description is also used for the implied `rex`-type bits in `vex` and `evex` + /// prefixes. RexPrefix(u8), + /// the segment selected by a segment override prefix. this is not necessarily the actual + /// segement used in the instruction's memory accesses, if any are made. SegmentPrefix(Segment), + /// the opcode read for this instruction. this may be reported multiple times in an instruction + /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two + /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from + /// decoding the same instruction. this invariant is not well-tested, and may occur in + /// practice. Opcode(Opcode), - OperandCode(OperandCode), + /// the operand code indicating how to read operands for this instruction. this is an internal + /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the + /// decoding process. `OperandCode` names are unstable, and this variant is only useful for + /// displaying for human consumption. + OperandCode(OperandCodeWrapper), + /// a decoded register: a name for the bits used to decode it, the register number those bits + /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded. RegisterNumber(&'static str, u8, RegSpec), + /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, + /// internal details of a prefix, error or constraints on an opcode, operand encoding details, + /// or other items involved in an instruction. Misc(&'static str), + /// a number involved in the instruction: typically either a disaplacement or immediate. the + /// string describes which. the `i64` member is typically a sign-extended value from the + /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign + /// extending to `-1`. bug reports are highly encouraged for unexpected values. Number(&'static str, i64), + /// a boundary between two logically distinct sections of an instruction. these typically + /// separate the leading prefix string (if any), opcode, and operands (if any). the included + /// string describes which boundary this is. boundary names should not be considered stable, + /// and are useful at most for displaying for human consumption. Boundary(&'static str), } @@ -7425,7 +7462,7 @@ impl fmt::Display for InnerDescription { InnerDescription::Opcode(opc) => { write!(f, "opcode `{}`", opc) } - InnerDescription::OperandCode(code) => { + InnerDescription::OperandCode(OperandCodeWrapper { code }) => { write!(f, "operand code `{:?}`", code) } InnerDescription::RegisterNumber(name, num, reg) => { @@ -7444,6 +7481,13 @@ pub struct FieldDescription { id: u32, } +impl FieldDescription { + /// the actual description associated with this bitfield. + pub fn desc(&self) -> &InnerDescription { + &self.desc + } +} + impl yaxpeax_arch::FieldDescription for FieldDescription { fn id(&self) -> u32 { self.id @@ -7497,7 +7541,7 @@ fn read_with_annotations< }); } sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::OperandCode(record.1), + desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }), id: words.offset() as u32 * 8 - 8 + 1, }); break record; diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index b60e3ee..3d78fa3 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -4817,6 +4817,11 @@ impl OperandCodeBuilder { } } +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: OperandCode } + #[allow(non_camel_case_types)] // might be able to pack these into a u8, but with `Operand` being u16 as well now there's little // point. table entries will have a padding byte per record already. @@ -7383,15 +7388,47 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord { }; } +/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`]. +/// +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption. #[derive(Clone, Debug, PartialEq, Eq)] -enum InnerDescription { +pub enum InnerDescription { + /// the literal byte read for a `rex` prefix, `0x4_`. while 32-bit code does not have `rex` + /// prefixes, this description is also used for the implied `rex`-type bits in `vex` and `evex` + /// prefixes. RexPrefix(u8), + /// the segment selected by a segment override prefix. this is not necessarily the actual + /// segement used in the instruction's memory accesses, if any are made. SegmentPrefix(Segment), + /// the opcode read for this instruction. this may be reported multiple times in an instruction + /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two + /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from + /// decoding the same instruction. this invariant is not well-tested, and may occur in + /// practice. Opcode(Opcode), - OperandCode(OperandCode), + /// the operand code indicating how to read operands for this instruction. this is an internal + /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the + /// decoding process. `OperandCode` names are unstable, and this variant is only useful for + /// displaying for human consumption. + OperandCode(OperandCodeWrapper), + /// a decoded register: a name for the bits used to decode it, the register number those bits + /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded. RegisterNumber(&'static str, u8, RegSpec), + /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, + /// internal details of a prefix, error or constraints on an opcode, operand encoding details, + /// or other items involved in an instruction. Misc(&'static str), + /// a number involved in the instruction: typically either a disaplacement or immediate. the + /// string describes which. the `i64` member is typically a sign-extended value from the + /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign + /// extending to `-1`. bug reports are highly encouraged for unexpected values. Number(&'static str, i64), + /// a boundary between two logically distinct sections of an instruction. these typically + /// separate the leading prefix string (if any), opcode, and operands (if any). the included + /// string describes which boundary this is. boundary names should not be considered stable, + /// and are useful at most for displaying for human consumption. Boundary(&'static str), } @@ -7427,7 +7464,7 @@ impl fmt::Display for InnerDescription { InnerDescription::Opcode(opc) => { write!(f, "opcode `{}`", opc) } - InnerDescription::OperandCode(code) => { + InnerDescription::OperandCode(OperandCodeWrapper { code }) => { write!(f, "operand code `{:?}`", code) } InnerDescription::RegisterNumber(name, num, reg) => { @@ -7446,6 +7483,13 @@ pub struct FieldDescription { id: u32, } +impl FieldDescription { + /// the actual description associated with this bitfield. + pub fn desc(&self) -> &InnerDescription { + &self.desc + } +} + impl yaxpeax_arch::FieldDescription for FieldDescription { fn id(&self) -> u32 { self.id @@ -7499,7 +7543,7 @@ fn read_with_annotations< }); } sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::OperandCode(record.1), + desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }), id: words.offset() as u32 * 8 - 8 + 1, }); break record; -- cgit v1.1