aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2021-08-21 22:17:24 -0700
committeriximeow <me@iximeow.net>2021-08-21 22:17:24 -0700
commit39eef01e04e478ec5cfa3c8f520c831631ecd67d (patch)
tree51f5c5fc7282b47b16cf6fcaf597a69bbcfe8605
parent1c06541a85fadbc5b9fc0a3bfee10cec3c8e5667 (diff)
add `AnnotatingDecoder` note to CHANGELOG and publicize descriptions
-rw-r--r--CHANGELOG19
-rw-r--r--src/long_mode/mod.rs50
-rw-r--r--src/protected_mode/mod.rs52
-rw-r--r--src/real_mode/mod.rs52
4 files changed, 161 insertions, 12 deletions
diff --git a/CHANGELOG b/CHANGELOG
index c5fcfd0..a647897 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,24 @@
## 1.1.0
+* implement `AnnotatingDecoder` from `yaxpeax-arch=0.2.6` and later.
+ this is a relatively involved addition. for rustc reasons, there are several
+ additional `inline(always)` attributes applied to keep non-annotating decoder
+ calls yielding the same generated code (and performance) as before.
+
+ annotations are produced for much but not all of 16-, 32-, and 64-bit x86,
+ describing prefixes, opcodes, operand encoding, and for more common
+ instructions, operand encoding as well. descriptions provided are described
+ by the `FieldDescription` struct in all architectures. `id` generally matches
+ some kind of parse order for the instruction, typically the order that
+ `yaxpeax-x86` considers bit fields in decoding an instruction. prefixes will
+ have lower id than opcodes, opcodes will have lower id than operands,
+ immediates will have the highest id due to being last values read in an
+ instruction.
+
+ between prefixes, opcodes, and operands, "Boundary" field descriptions are
+ reported as a hint to library clients that a logical grouping of descriptions
+ has ended.
+
* `pub const fn` builders for all general-purpose registers, segment registers, and ip/flags registers.
- this corrects a spotty and inconsistent set of builders filled in on-demand.
* `DisplayStyle::Intel` now shows relative offsets as `$+0xXX`, rather than `0xXX`.
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index 8ec2b7f..dee759d 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -4898,6 +4898,11 @@ impl OperandCodeBuilder {
}
}
+/// a wrapper to hide internal library implementation details. this is only useful for the inner
+/// content's `Display` impl, which itself is unstable and suitable only for human consumption.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct OperandCodeWrapper { code: OperandCode }
+
#[allow(non_camel_case_types)]
// might be able to pack these into a u8, but with `Operand` being u16 as well now there's little
// point. table entries will have a padding byte per record already.
@@ -7366,15 +7371,45 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord {
};
}
+/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`].
+///
+/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full
+/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable
+/// and only useful for displaying for human consumption.
#[derive(Clone, Debug, PartialEq, Eq)]
-enum InnerDescription {
+pub enum InnerDescription {
+ /// the literal byte read for a `rex` prefix, `0x4_`.
RexPrefix(u8),
+ /// the segment selected by a segment override prefix. this is not necessarily the actual
+ /// segement used in the instruction's memory accesses, if any are made.
SegmentPrefix(Segment),
+ /// the opcode read for this instruction. this may be reported multiple times in an instruction
+ /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two
+ /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from
+ /// decoding the same instruction. this invariant is not well-tested, and may occur in
+ /// practice.
Opcode(Opcode),
- OperandCode(OperandCode),
+ /// the operand code indicating how to read operands for this instruction. this is an internal
+ /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the
+ /// decoding process. `OperandCode` names are unstable, and this variant is only useful for
+ /// displaying for human consumption.
+ OperandCode(OperandCodeWrapper),
+ /// a decoded register: a name for the bits used to decode it, the register number those bits
+ /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.
RegisterNumber(&'static str, u8, RegSpec),
+ /// a miscellaneous string describing some bits of the instruction. this may describe a prefix,
+ /// internal details of a prefix, error or constraints on an opcode, operand encoding details,
+ /// or other items involved in an instruction.
Misc(&'static str),
+ /// a number involved in the instruction: typically either a disaplacement or immediate. the
+ /// string describes which. the `i64` member is typically a sign-extended value from the
+ /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign
+ /// extending to `-1`. bug reports are highly encouraged for unexpected values.
Number(&'static str, i64),
+ /// a boundary between two logically distinct sections of an instruction. these typically
+ /// separate the leading prefix string (if any), opcode, and operands (if any). the included
+ /// string describes which boundary this is. boundary names should not be considered stable,
+ /// and are useful at most for displaying for human consumption.
Boundary(&'static str),
}
@@ -7410,7 +7445,7 @@ impl fmt::Display for InnerDescription {
InnerDescription::Opcode(opc) => {
write!(f, "opcode `{}`", opc)
}
- InnerDescription::OperandCode(code) => {
+ InnerDescription::OperandCode(OperandCodeWrapper { code }) => {
write!(f, "operand code `{:?}`", code)
}
InnerDescription::RegisterNumber(name, num, reg) => {
@@ -7429,6 +7464,13 @@ pub struct FieldDescription {
id: u32,
}
+impl FieldDescription {
+ /// the actual description associated with this bitfield.
+ pub fn desc(&self) -> &InnerDescription {
+ &self.desc
+ }
+}
+
impl yaxpeax_arch::FieldDescription for FieldDescription {
fn id(&self) -> u32 {
self.id
@@ -7496,7 +7538,7 @@ fn read_with_annotations<
});
}
sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
- desc: InnerDescription::OperandCode(record.1),
+ desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),
id: words.offset() as u32 * 8 - 8 + 1,
});
break record;
diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs
index 61aca45..422f6d9 100644
--- a/src/protected_mode/mod.rs
+++ b/src/protected_mode/mod.rs
@@ -4817,6 +4817,11 @@ impl OperandCodeBuilder {
}
}
+/// a wrapper to hide internal library implementation details. this is only useful for the inner
+/// content's `Display` impl, which itself is unstable and suitable only for human consumption.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct OperandCodeWrapper { code: OperandCode }
+
#[allow(non_camel_case_types)]
// might be able to pack these into a u8, but with `Operand` being u16 as well now there's little
// point. table entries will have a padding byte per record already.
@@ -7381,15 +7386,47 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord {
};
}
+/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`].
+///
+/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full
+/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable
+/// and only useful for displaying for human consumption.
#[derive(Clone, Debug, PartialEq, Eq)]
-enum InnerDescription {
+pub enum InnerDescription {
+ /// the literal byte read for a `rex` prefix, `0x4_`. while 32-bit code does not have `rex`
+ /// prefixes, this description is also used for the implied `rex`-type bits in `vex` and `evex`
+ /// prefixes.
RexPrefix(u8),
+ /// the segment selected by a segment override prefix. this is not necessarily the actual
+ /// segement used in the instruction's memory accesses, if any are made.
SegmentPrefix(Segment),
+ /// the opcode read for this instruction. this may be reported multiple times in an instruction
+ /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two
+ /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from
+ /// decoding the same instruction. this invariant is not well-tested, and may occur in
+ /// practice.
Opcode(Opcode),
- OperandCode(OperandCode),
+ /// the operand code indicating how to read operands for this instruction. this is an internal
+ /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the
+ /// decoding process. `OperandCode` names are unstable, and this variant is only useful for
+ /// displaying for human consumption.
+ OperandCode(OperandCodeWrapper),
+ /// a decoded register: a name for the bits used to decode it, the register number those bits
+ /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.
RegisterNumber(&'static str, u8, RegSpec),
+ /// a miscellaneous string describing some bits of the instruction. this may describe a prefix,
+ /// internal details of a prefix, error or constraints on an opcode, operand encoding details,
+ /// or other items involved in an instruction.
Misc(&'static str),
+ /// a number involved in the instruction: typically either a disaplacement or immediate. the
+ /// string describes which. the `i64` member is typically a sign-extended value from the
+ /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign
+ /// extending to `-1`. bug reports are highly encouraged for unexpected values.
Number(&'static str, i64),
+ /// a boundary between two logically distinct sections of an instruction. these typically
+ /// separate the leading prefix string (if any), opcode, and operands (if any). the included
+ /// string describes which boundary this is. boundary names should not be considered stable,
+ /// and are useful at most for displaying for human consumption.
Boundary(&'static str),
}
@@ -7425,7 +7462,7 @@ impl fmt::Display for InnerDescription {
InnerDescription::Opcode(opc) => {
write!(f, "opcode `{}`", opc)
}
- InnerDescription::OperandCode(code) => {
+ InnerDescription::OperandCode(OperandCodeWrapper { code }) => {
write!(f, "operand code `{:?}`", code)
}
InnerDescription::RegisterNumber(name, num, reg) => {
@@ -7444,6 +7481,13 @@ pub struct FieldDescription {
id: u32,
}
+impl FieldDescription {
+ /// the actual description associated with this bitfield.
+ pub fn desc(&self) -> &InnerDescription {
+ &self.desc
+ }
+}
+
impl yaxpeax_arch::FieldDescription for FieldDescription {
fn id(&self) -> u32 {
self.id
@@ -7497,7 +7541,7 @@ fn read_with_annotations<
});
}
sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
- desc: InnerDescription::OperandCode(record.1),
+ desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),
id: words.offset() as u32 * 8 - 8 + 1,
});
break record;
diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs
index b60e3ee..3d78fa3 100644
--- a/src/real_mode/mod.rs
+++ b/src/real_mode/mod.rs
@@ -4817,6 +4817,11 @@ impl OperandCodeBuilder {
}
}
+/// a wrapper to hide internal library implementation details. this is only useful for the inner
+/// content's `Display` impl, which itself is unstable and suitable only for human consumption.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct OperandCodeWrapper { code: OperandCode }
+
#[allow(non_camel_case_types)]
// might be able to pack these into a u8, but with `Operand` being u16 as well now there's little
// point. table entries will have a padding byte per record already.
@@ -7383,15 +7388,47 @@ fn read_0f3a_opcode(opcode: u8, prefixes: &mut Prefixes) -> OpcodeRecord {
};
}
+/// the actual description for a selection of bits involved in decoding an [`long_mode::Instruction`].
+///
+/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full
+/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable
+/// and only useful for displaying for human consumption.
#[derive(Clone, Debug, PartialEq, Eq)]
-enum InnerDescription {
+pub enum InnerDescription {
+ /// the literal byte read for a `rex` prefix, `0x4_`. while 32-bit code does not have `rex`
+ /// prefixes, this description is also used for the implied `rex`-type bits in `vex` and `evex`
+ /// prefixes.
RexPrefix(u8),
+ /// the segment selected by a segment override prefix. this is not necessarily the actual
+ /// segement used in the instruction's memory accesses, if any are made.
SegmentPrefix(Segment),
+ /// the opcode read for this instruction. this may be reported multiple times in an instruction
+ /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two
+ /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from
+ /// decoding the same instruction. this invariant is not well-tested, and may occur in
+ /// practice.
Opcode(Opcode),
- OperandCode(OperandCode),
+ /// the operand code indicating how to read operands for this instruction. this is an internal
+ /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the
+ /// decoding process. `OperandCode` names are unstable, and this variant is only useful for
+ /// displaying for human consumption.
+ OperandCode(OperandCodeWrapper),
+ /// a decoded register: a name for the bits used to decode it, the register number those bits
+ /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.
RegisterNumber(&'static str, u8, RegSpec),
+ /// a miscellaneous string describing some bits of the instruction. this may describe a prefix,
+ /// internal details of a prefix, error or constraints on an opcode, operand encoding details,
+ /// or other items involved in an instruction.
Misc(&'static str),
+ /// a number involved in the instruction: typically either a disaplacement or immediate. the
+ /// string describes which. the `i64` member is typically a sign-extended value from the
+ /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign
+ /// extending to `-1`. bug reports are highly encouraged for unexpected values.
Number(&'static str, i64),
+ /// a boundary between two logically distinct sections of an instruction. these typically
+ /// separate the leading prefix string (if any), opcode, and operands (if any). the included
+ /// string describes which boundary this is. boundary names should not be considered stable,
+ /// and are useful at most for displaying for human consumption.
Boundary(&'static str),
}
@@ -7427,7 +7464,7 @@ impl fmt::Display for InnerDescription {
InnerDescription::Opcode(opc) => {
write!(f, "opcode `{}`", opc)
}
- InnerDescription::OperandCode(code) => {
+ InnerDescription::OperandCode(OperandCodeWrapper { code }) => {
write!(f, "operand code `{:?}`", code)
}
InnerDescription::RegisterNumber(name, num, reg) => {
@@ -7446,6 +7483,13 @@ pub struct FieldDescription {
id: u32,
}
+impl FieldDescription {
+ /// the actual description associated with this bitfield.
+ pub fn desc(&self) -> &InnerDescription {
+ &self.desc
+ }
+}
+
impl yaxpeax_arch::FieldDescription for FieldDescription {
fn id(&self) -> u32 {
self.id
@@ -7499,7 +7543,7 @@ fn read_with_annotations<
});
}
sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
- desc: InnerDescription::OperandCode(record.1),
+ desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),
id: words.offset() as u32 * 8 - 8 + 1,
});
break record;