From 83f07b4e70efc45b2495d66a58f5d6ff0e5b7221 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 22 Aug 2021 14:43:06 -0700 Subject: move annotation stuff to its own module --- CHANGELOG | 6 +++ src/annotation/mod.rs | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 63 ++----------------------- src/reader.rs | 2 +- 4 files changed, 137 insertions(+), 60 deletions(-) create mode 100644 src/annotation/mod.rs diff --git a/CHANGELOG b/CHANGELOG index 2ffb6d6..0df9315 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,12 @@ TODO: Reader::next_n should return the number of items read as Err(ReadError::In TODO: Reader::offset should return an AddressDiff
, not a bare Address TODO: impls of `fn one` and `fn zero` so downstream users don't have to import num_traits directly +## 0.2.7 + +moved `AnnotatingDecoder` and its associated types to `annotation/`, for module-level documentation about that feature. + +yanked 0.2.6 because there was not yet a user of it other than myself, and it had this feature in the wrong location in the crate. + ## 0.2.6 added `AnnotatingDecoder` and associated traits `FieldDescription` and `DescriptionSink` for architectures to report meanings for bit ranges in decoded instructions. diff --git a/src/annotation/mod.rs b/src/annotation/mod.rs new file mode 100644 index 0000000..0139cf3 --- /dev/null +++ b/src/annotation/mod.rs @@ -0,0 +1,126 @@ +//! traits (and convenient impls) for decoders that also produce descriptions of parsed bit fields. +//! +//! the design of this API is discussed in [`yaxpeax-arch` +//! documentation](https://github.com/iximeow/yaxpeax-arch/blob/no-gods-no-/docs/0001-AnnotatingDecoder.md#descriptionsink). +//! +//! ## usage +//! +//! [`AnnotatingDecoder::decode_with_annotation`] decodes an instruction much like +//! [`crate::Decoder::decode_into`], but also reports descriptions of bit fields to a provided +//! [`DescriptionSink`]. [`VecSink`] is likely the `DescriptionSink` of interest to retain fields; +//! decoders are not required to make any guarantees about the order of descriptions, either by the +//! description's associated [`FieldDescription::id`], or with respect to the bits a +//! `FieldDescription` is reported against. fields may be described by multiple `FieldDescription` +//! with matching `id` and `desc` -- this is to describe data in an instruction where +//! non-contiguous bits are taken together for a single detail. for these cases, the various +//! `FieldDescription` must compare equal, and users of `yaxpeax-arch` can rely on this equivalence +//! for grouping bit ranges. +//! +//! in a generic setting, there isn't much to do with a `FieldDescription` other than display it. a +//! typical use might look something like: +//! ``` +//! fn show_field_descriptions(decoder: A::Decoder, buf: &[u8]) +//! where +//! A::Decoder: AnnotatingDecoder, +//! A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader, +//! { +//! let mut inst = A::Instruction::default(); +//! let mut reader = U8Reader::new(buf); +//! let mut sink: VecSink<>::FieldDescription> = VecSink::new(); +//! +//! decoder.decode_with_annotation(&mut inst, &mut reader, &mut sink).unwrap(); +//! +//! println!("decoded instruction {}", inst); +//! for (start, end, desc) in sink.records.iter() { +//! println(" bits [{}, {}]: {}", start, end, desc); +//! } +//! } +//! ``` +//! +//! note that the range `[start, end]` for a reported span is _inclusive_. the `end`-th bit of a +//! an instruction's bit stream is described by the description. +//! +//! ## implementation guidance +//! +//! the typical implementation pattern is that an architecture's `Decoder` implements [`crate::Decoder`] +//! _and_ [`AnnotatingDecoder`], then callers are free to choose which style of decoding they want. +//! [`NullSink`] has a blanket impl of [`DescriptionSink`] for all possible descriptions, and +//! discards reported field descriptions. `decode_with_annotation` with annotations reported to a +//! `NullSink` must be functionally identical to a call to `Decoder::decode_into`. +//! +//! the important points: +//! +//! * `AnnotatingDecoder` is an **optional** implementation for decoders. +//! * `FieldDescription` in general is oriented towards human-directed output, but implementations +//! can be as precise as they want. +//! * since bit/byte order varies from architecture to architecture, a field's `start` and `end` +//! are defined with some ordering from the corresponding decoder crate. crates should describe the +//! bit ordering they select, and where possible, the bit ordering they describe should match +//! relevant ISA mauals. +//! * `FieldDescription` that return true for [`FieldDescription::is_separator`] are an exception +//! to bit span inclusivity: for these descriptions, the bit range should be `[b, b]` where `b` is +//! the last bit before the boundary being delimited. unlike other descriptions, `is_separator` +//! descriptions describe the space between bits `b` and `b+1`. +//! * if a description is to cover multiple bit fields, the reported `FieldDescription` must +//! be identical on `id` and `desc` for all involved bit fields. + +use crate::{Arch, Reader}; + +use core::fmt::Display; + +/// implementors of `DescriptionSink` receive descriptions of an instruction's disassembly process +/// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and +/// architectures are expected to be able to turn the bit-level `start` and `width` values into a +/// meaningful description of bits in the original instruction stream. +pub trait DescriptionSink { + /// inform this `DescriptionSink` of a `description` that was informed by bits `start` to + /// `end` from the start of an instruction's decoding. `start` and `end` are only relative the + /// instruction being decoded when this sink `DescriptionSink` provided, so they will have no + /// relation to the position in an underlying data stream used for past or future instructions. + fn record(&mut self, start: u32, end: u32, description: Descriptor); +} + +pub struct NullSink; + +impl DescriptionSink for NullSink { + fn record(&mut self, _start: u32, _end: u32, _description: T) { } +} + +#[cfg(feature = "std")] +pub struct VecSink { + pub records: std::vec::Vec<(u32, u32, T)> +} + +#[cfg(feature = "std")] +impl VecSink { + pub fn new() -> Self { + VecSink { records: std::vec::Vec::new() } + } +} + +#[cfg(feature = "std")] +impl DescriptionSink for VecSink { + fn record(&mut self, start: u32, end: u32, description: T) { + self.records.push((start, end, description)); + } +} + +pub trait FieldDescription { + fn id(&self) -> u32; + fn is_separator(&self) -> bool; +} + +/// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the +/// decoder able to report descriptions of bits or fields in the instruction to a sink implementing +/// [`DescriptionSink`]. the sink may be [`NullSink`] to discard provided data. decoding with a +/// `NullSink` should behave identically to `Decoder::decode_into`. implementors are recommended to +/// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_annotation` if +/// implementing both traits. +pub trait AnnotatingDecoder { + type FieldDescription: FieldDescription + Clone + Display + PartialEq; + + fn decode_with_annotation< + T: Reader, + S: DescriptionSink + >(&self, inst: &mut A::Instruction, words: &mut T, sink: &mut S) -> Result<(), A::DecodeError>; +} diff --git a/src/lib.rs b/src/lib.rs index 0fe090e..88370bc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,8 @@ pub use address::{AddressDisplayUsize, AddressDisplayU64, AddressDisplayU32, Add #[cfg(feature="address-parse")] pub use address::AddrParse; +pub mod annotation; + mod color; pub use color::{Colorize, NoColors, YaxColors}; @@ -136,14 +138,14 @@ impl fmt::Display for NoDescription { /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are /// the architecture-defined [`DecodeError`] implemention. pub trait Decoder { - /// decode one instruction for this architecture from the [`yaxpeax_arch::Reader`] of this + /// decode one instruction for this architecture from the [`crate::Reader`] of this /// architecture's `Word`. fn decode>(&self, words: &mut T) -> Result { let mut inst = A::Instruction::default(); self.decode_into(&mut inst, words).map(|_: ()| inst) } - /// decode one instruction for this architecture from the [`yaxpeax_arch::Reader`] of this + /// decode one instruction for this architecture from the [`crate::Reader`] of this /// architecture's `Word`, writing into the provided `inst`. /// /// SAFETY: @@ -155,63 +157,6 @@ pub trait Decoder { fn decode_into>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>; } -/// implementors of `DescriptionSink` receive descriptions of an instruction's disassembly process -/// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and -/// architectures are expected to be able to turn the bit-level `start` and `width` values into a -/// meaningful description of bits in the original instruction stream. -pub trait DescriptionSink { - /// inform this `DescriptionSink` of a `description` that was informed by bits `start` to - /// `end` from the start of an instruction's decoding. `start` and `end` are only relative the - /// instruction being decoded when this sink `DescriptionSink` provided, so they will have no - /// relation to the position in an underlying data stream used for past or future instructions. - fn record(&mut self, start: u32, end: u32, description: Descriptor); -} - -pub struct NullSink; - -impl DescriptionSink for NullSink { - fn record(&mut self, _start: u32, _end: u32, _description: T) { } -} - -#[cfg(feature = "std")] -pub struct VecSink { - pub records: std::vec::Vec<(u32, u32, T)> -} - -#[cfg(feature = "std")] -impl VecSink { - pub fn new() -> Self { - VecSink { records: std::vec::Vec::new() } - } -} - -#[cfg(feature = "std")] -impl DescriptionSink for VecSink { - fn record(&mut self, start: u32, end: u32, description: T) { - self.records.push((start, end, description)); - } -} - -pub trait FieldDescription { - fn id(&self) -> u32; - fn is_separator(&self) -> bool; -} - -/// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the -/// decoder able to report descriptions of bits or fields in the instruction to a sink implementing -/// [`DescriptionSink`]. the sink may be [`NullSink`] which discards provided data. decoding with a -/// `NullSink` should behave identically to `Decoder::decode_into`. implementors are recommended to -/// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_fields` if -/// implementing both traits. -pub trait AnnotatingDecoder { - type FieldDescription: FieldDescription + Clone + Display + PartialEq; - - fn decode_with_annotation< - T: Reader, - S: DescriptionSink - >(&self, inst: &mut A::Instruction, words: &mut T, sink: &mut S) -> Result<(), A::DecodeError>; -} - #[cfg(feature = "use-serde")] pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {} #[cfg(not(feature = "use-serde"))] diff --git a/src/reader.rs b/src/reader.rs index acb0146..b9514ae 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -13,7 +13,7 @@ pub enum ReadError { } /// a trait defining how `Item`-sized words are read at `Address`-positioned offsets into some -/// stream of data. for *most* uses, [`yaxpeax_arch::U8Reader`] probably is sufficient. when +/// stream of data. for *most* uses, [`crate::U8Reader`] probably is sufficient. when /// reading from data sources that aren't `&[u8]`, `Address` isn't a multiple of `u8`, or `Item` /// isn't a multiple of 8 bits, `U8Reader` won't be sufficient. pub trait Reader { -- cgit v1.1