diff options
| -rw-r--r-- | CHANGELOG | 84 | ||||
| -rw-r--r-- | Cargo.toml | 16 | ||||
| -rw-r--r-- | Makefile | 19 | ||||
| -rw-r--r-- | README.md | 6 | ||||
| -rw-r--r-- | fuzz/.gitignore | 3 | ||||
| -rw-r--r-- | fuzz/Cargo.toml | 25 | ||||
| -rw-r--r-- | fuzz/fuzz_targets/write_helpers_are_correct.rs | 96 | ||||
| -rw-r--r-- | goodfile | 40 | ||||
| -rw-r--r-- | src/annotation/mod.rs | 43 | ||||
| -rw-r--r-- | src/color_new.rs | 281 | ||||
| -rw-r--r-- | src/display.rs | 58 | ||||
| -rw-r--r-- | src/display/display_sink.rs | 1017 | ||||
| -rw-r--r-- | src/display/display_sink/imp_generic.rs | 26 | ||||
| -rw-r--r-- | src/display/display_sink/imp_x86.rs | 187 | ||||
| -rw-r--r-- | src/lib.rs | 28 | ||||
| -rw-r--r-- | src/reader.rs | 5 | ||||
| -rw-r--r-- | src/safer_unchecked.rs | 40 | ||||
| -rw-r--r-- | src/testkit.rs | 10 | ||||
| -rw-r--r-- | src/testkit/display.rs | 192 | ||||
| -rw-r--r-- | tests/display.rs | 143 | ||||
| -rw-r--r-- | tests/lib.rs | 50 | 
21 files changed, 2315 insertions, 54 deletions
| @@ -1,8 +1,88 @@ -## 0.3.0 +## TODO -TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted +~~TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted~~ +* a reader's `.offset()` should reflect the amount of items that were consumed, if any. if a reader can quickly determine +  there is not enough input, should it return Incomplete(0) or ExhaustedInput? Incomplete(0) vs ExhaustedInput may still +  imply that some state was changed (an access mode, for example). this needs more thought.  TODO: Reader::offset should return an AddressDiff<Address>, not a bare Address +* quick look seems reasonable enough, should be changed in concert with +  yaxpeax-core though and that's more than i'm signing up for today  TODO: impls of `fn one` and `fn zero` so downstream users don't have to import num_traits directly +* seems nice at first but this means that there are conflicting functions when Zero or One are in scope +  ... assuming that the idea at the time was to add `fn one` and `fn zero` to `AddressBase`. +TODO: 0.4.0 or later: +  * remove `mod colors`, crossterm dependency, related feature flags + +## 0.3.2 + +fix yaxpeax-arch not building for non-x86 targets when alloc is not enabled + +## 0.3.1 + +fix InstructionTextSink::write_char to not panic in debug builds + +## 0.3.0 + +added a new crate feature flag, `alloc`. +  this flag is for any features that do not require std, but do require +  containers from `liballoc`. good examples are `alloc::string::String` or +  `alloc::vec::Vec`. + +added `yaxpeax_arch::display::DisplaySink` after revisiting output colorization. +  `DisplaySink` is better suited for general markup, rather than being focused +  specifically on ANSI/console text coloring. `YaxColors` also simply does not +  style text in some unfortunate circumstances, such as when the console that +  needs to be styled is only written to after intermediate buffering. + +  `DisplaySink` also includes specializable functions for writing text to an +  output, and the implementation for `alloc::string::String` takes advantage of +  this: writing through `impl DisplaySink for String` will often be substantially +  more performant than writing through `fmt::Write`. + +added `mod color_new`: +  this includes an alternate vision for `YaxColors` and better fits with the +  new `DisplaySink` machinery; ANSI-style text markup can be done through the +  new `yaxpeax_arch::color_new::ansi::AnsiDisplaySink`. + +  this provides more flexibility than i'd initially expected! yours truly will +  be using this to render instructions with HTML spans (rather than ANSI +  sequences) to colorize dis.yaxpeax.net. + +  in the future, `mod colored` will be removed, `mod color_new` will be renamed +  to `mod color`. + +deprecated `mod colored`: +  generally, colorization of text is a presentation issue; `trait Colorize` +  mixed formatting of data to text with how that text is presented, but that is +  at odds with the same text being presented in different ways for which +  colorization is not generic. for example, rendering an instruction as marked +  up HTML involves coloring in an entirely different way than rendering an +  instruction with ANSI sequences for a VT100-like terminal. + +added `yaxpeax_arch::safer_unchecked` to aid in testing use of unchecked methods +  these were originally added to improve yaxpeax-x86 testing: +  https://github.com/iximeow/yaxpeax-x86/pull/17, but are being pulled into +  yaxpeax-arch as they're generally applicable and overall wonderful tools. +  thank you again 522! + +added `mod testkit`: +  this module contains tools to validate the correctness of crates implementing +  `yaxpeax-arch` traits. these initial tools are focused on validating the +  correctness of functions that write to `DisplaySink`, especially that span +  management is correct. + +  `yaxpeax-x86`, for example, will imminently have fuzz targets to use these +  types for its own validation. + +made VecSink's `records` private. instead of extracting records from the struct +  by accessing this field directly, call `VecSink::into_inner()`. + +made VecSink is now available through the `alloc` feature flag as well as `std`. + +meta: the major omission in this release is an architecture-agnostic way to +format an instruction into a `DisplaySink`. i haven't been able to figure out +quite the right shape for that! it is fully expected in the future, and will +probably end up somehow referenced through `yaxpeax_arch::Arch`.  ## 0.2.8 @@ -7,7 +7,7 @@ keywords = ["disassembly", "disassembler"]  license = "0BSD"  name = "yaxpeax-arch"  repository = "https://git.iximeow.net/yaxpeax-arch/" -version = "0.2.8" +version = "0.3.2"  [dependencies]  "num-traits" = { version = "0.2", default-features = false } @@ -23,14 +23,24 @@ thiserror = "1.0.26"  lto = true  [features] -default = ["std", "use-serde", "colors", "address-parse"] +default = ["std", "alloc", "use-serde", "color-new", "address-parse"] -std = [] +std = ["alloc"] + +alloc = []  # enables the (optional) use of Serde for bounds on  # Arch and Arch::Address  use-serde = ["serde", "serde_derive"] +# feature flag for the existing but misfeature'd initial support for output +# coloring.  the module this gates will be removed in 0.4.0, which includes +# removing `trait Colorize`, and requires a major version bump for any +# dependency that moves forward.  colors = ["crossterm"] +# feature flag for revised output colorizing support, which will replace the +# existing `colors` feature in 0.4.0. +color-new = [] +  address-parse = [] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..57c8615 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +test: build-smoketest test-std test-no-std test-serde-no-std test-colors-no-std test-color-new-no-std test-alloc-no-std + +build-smoketest: +	cargo build +	cargo build --no-default-features +	cargo build --no-default-features --target wasm32-wasi + +test-std: +	cargo test +test-no-std: +	cargo test --no-default-features +test-serde-no-std: +	cargo test --no-default-features --features "serde" +test-colors-no-std: +	cargo test --no-default-features --features "colors" +test-color-new-no-std: +	cargo test --no-default-features --features "color-new" +test-alloc-no-std: +	cargo test --no-default-features --features "alloc" @@ -9,7 +9,8 @@ typically this crate is only interesting if you're writing code to operate on mu  `yaxpeax-arch` has several crate features, which implementers are encouraged to also support:  * `std`: opt-in for `std`-specific support - in this crate, `std` enables a [`std::error::Error`](https://doc.rust-lang.org/std/error/trait.Error.html) requirement on `DecodeError`, allowing users to `?`-unwrap decode results. -* `colors`: enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled. +* `color_new`: enables traits and structs to stylize formatted instructions, including ANSI colorization. +* ~`colors`~: DEPRECATED. enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled.  * `address-parse`: enable a requirement that `yaxpeax_arch::Address` be parsable from `&str`. this is useful for use cases that, for example, read addresses from humans.  * `use-serde`: enable [`serde`](https://docs.rs/serde/latest/serde/) serialization and deserialization bounds for types like `Address`. @@ -54,12 +55,15 @@ there are numerous architectures for which decoders are implemented, at varying  | `sh`/`sh2`/`j2`/`sh3`/`sh4` | [yaxpeax-superh](https://git.sr.ht/~nabijaczleweli/yaxpeax-superh) | 🥳 | 🚧 | ❓ | contributed by [наб](https://nabijaczleweli.xyz) |  | `MOS 6502` | [yaxpeax-6502](https://github.com/cr1901/yaxpeax-6502) | ⚠️ | ❓ | ❓ | contributed by [@cr1901](https://www.twitter.com/cr1901) |  | `lc87` | [yaxpeax-lc87](https://www.github.com/iximeow/yaxpeax-lc87) | 🥳 | ⚠️ | ❓ | | +| `rx` | [yaxpeax-rx](https://www.github.com/iximeow/yaxpeax-rx) | 🥳 | ⚠️ | ❓ | | +| `"avnera"` | [yaxpeax-avnera](https://www.github.com/iximeow/yaxpeax-avnera) | ⚠️ | ⚠️ | ❓ | |undocumented architecture in some Avnera (now Skyworks) Bluetooth modules |  #### feature support  `yaxpeax-arch` defines a few typically-optional features that decoders can also implement, in addition to simple `(bytes) -> instruction` decoding. these are `yaxpeax-arch` traits (or collections thereof) which architectures implement, not crate features.  `description_spans`: implementation of [`AnnotatingDecoder`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.AnnotatingDecoder.html), to decode instructions with bit-level details of what incoming bitstreams mean. +  `contextualize`: implementation of [`ShowContextual`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.ShowContextual.html), to display instructions with user-defined information in place of default instruction data. typically expected to show label names instead of relative branch addresses. **i do not recommend implementing this trait**, it needs significant reconsideration.  | architecture | `description_spans` | `contextualize` | diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..a092511 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..67ffa43 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "yaxpeax-arch-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.yaxpeax-arch] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "write_helpers_are_correct" +path = "fuzz_targets/write_helpers_are_correct.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/write_helpers_are_correct.rs b/fuzz/fuzz_targets/write_helpers_are_correct.rs new file mode 100644 index 0000000..41e27bd --- /dev/null +++ b/fuzz/fuzz_targets/write_helpers_are_correct.rs @@ -0,0 +1,96 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use yaxpeax_arch::display::DisplaySink; + +use std::convert::TryInto; + +fuzz_target!(|data: &[u8]| { +    let mut buf = String::new(); +    match data.len() { +        1 => { +            let i = data[0]; + +            buf.clear(); +            buf.write_u8(i).expect("write succeeds"); +            assert_eq!(buf, format!("{:x}", i)); + +            buf.clear(); +            buf.write_prefixed_u8(i).expect("write succeeds"); +            assert_eq!(buf, format!("0x{:x}", i)); + +            let expected = if (i as i8) < 0 { +                format!("-0x{:x}", (i as i8).unsigned_abs()) +            } else { +                format!("0x{:x}", i) +            }; + +            buf.clear(); +            buf.write_prefixed_i8(i as i8).expect("write succeeds"); +            assert_eq!(buf, expected); +        }, +        2 => { +            let i: u16 = u16::from_le_bytes(data.try_into().expect("checked the size is right")); + +            buf.clear(); +            buf.write_u16(i).expect("write succeeds"); +            assert_eq!(buf, format!("{:x}", i)); + +            buf.clear(); +            buf.write_prefixed_u16(i).expect("write succeeds"); +            assert_eq!(buf, format!("0x{:x}", i)); + +            let expected = if (i as i16) < 0 { +                format!("-0x{:x}", (i as i16).unsigned_abs()) +            } else { +                format!("0x{:x}", i) +            }; + +            buf.clear(); +            buf.write_prefixed_i16(i as i16).expect("write succeeds"); +            assert_eq!(buf, expected); +        } +        4 => { +            let i: u32 = u32::from_le_bytes(data.try_into().expect("checked the size is right")); + +            buf.clear(); +            buf.write_u32(i).expect("write succeeds"); +            assert_eq!(buf, format!("{:x}", i)); + +            buf.clear(); +            buf.write_prefixed_u32(i).expect("write succeeds"); +            assert_eq!(buf, format!("0x{:x}", i)); + +            let expected = if (i as i32) < 0 { +                format!("-0x{:x}", (i as i32).unsigned_abs()) +            } else { +                format!("0x{:x}", i) +            }; + +            buf.clear(); +            buf.write_prefixed_i32(i as i32).expect("write succeeds"); +            assert_eq!(buf, expected); +        }, +        8 => { +            let i: u64 = u64::from_le_bytes(data.try_into().expect("checked the size is right")); + +            buf.clear(); +            buf.write_u64(i).expect("write succeeds"); +            assert_eq!(buf, format!("{:x}", i)); + +            buf.clear(); +            buf.write_prefixed_u64(i).expect("write succeeds"); +            assert_eq!(buf, format!("0x{:x}", i)); + +            let expected = if (i as i64) < 0 { +                format!("-0x{:x}", (i as i64).unsigned_abs()) +            } else { +                format!("0x{:x}", i) +            }; + +            buf.clear(); +            buf.write_prefixed_i64(i as i64).expect("write succeeds"); +            assert_eq!(buf, expected); +        }, +        _ => {} +    } +}); @@ -1,19 +1,35 @@ -Build.dependencies({"git", "make", "rustc", "cargo"}) +Build.dependencies({"git", "make", "rustc", "cargo", "rustup"})  Step.start("crate")  Step.push("build")  Build.run({"cargo", "build"}) +-- and now that some code is conditional on target arch, at least try to build +-- for other architectures even if we might not be able to run on them. +Build.run({"rustup", "target", "add", "wasm32-wasi"}) +Build.run({"cargo", "build", "--no-default-features", "--target", "wasm32-wasi"})  Step.advance("test") +-- TODO: set `-D warnings` here and below...  Build.run({"cargo", "test"}, {name="test default features"}) -Build.run({"cargo", "test", "--no-default-features"}, {name="test no features"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std"}, {name="test std only"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "colors"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "use-serde"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,colors"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,use-serde"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,colors,address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,use-serde,colors"}, {name="test feature combinations"}) + +-- `cargo test` ends up running doc tests. great! but yaxpeax-arch's docs reference items in std only. +-- so for other feature combinations, skip doc tests. do this by passing `--tests` explicitly, +-- which disables the automagic "run everything" settings. +Build.run({"cargo", "test", "--no-default-features", "--tests"}, {name="test no features"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std"}, {name="test std only"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "colors"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new,alloc"}, {name="test feature combinations"}) diff --git a/src/annotation/mod.rs b/src/annotation/mod.rs index 0248b94..af8b4bf 100644 --- a/src/annotation/mod.rs +++ b/src/annotation/mod.rs @@ -19,6 +19,8 @@  //! in a generic setting, there isn't much to do with a `FieldDescription` other than display it. a  //! typical use might look something like:  //! ``` +//! #[cfg(feature="std")] +//! # {  //! use core::fmt;  //!  //! use yaxpeax_arch::annotation::{AnnotatingDecoder, VecSink}; @@ -40,6 +42,7 @@  //!         println!("  bits [{}, {}]: {}", start, end, desc);  //!     }  //! } +//! # }  //! ```  //!  //! note that the range `[start, end]` for a reported span is _inclusive_. the `end`-th bit of a @@ -73,7 +76,7 @@ use crate::{Arch, Reader};  use core::fmt::Display; -/// implementors of `DescriptionSink` receive descriptions of an instruction's disassembly process +/// implementers of `DescriptionSink` receive descriptions of an instruction's disassembly process  /// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and  /// architectures are expected to be able to turn the bit-level `start` and `width` values into a  /// meaningful description of bits in the original instruction stream. @@ -91,24 +94,34 @@ impl<T> DescriptionSink<T> for NullSink {      fn record(&mut self, _start: u32, _end: u32, _description: T) { }  } -#[cfg(feature = "std")] -pub struct VecSink<T: Clone + Display> { -    pub records: std::vec::Vec<(u32, u32, T)> -} +#[cfg(feature = "alloc")] +mod vec_sink { +    use alloc::vec::Vec; +    use core::fmt::Display; +    use crate::annotation::DescriptionSink; -#[cfg(feature = "std")] -impl<T: Clone + Display> VecSink<T> { -    pub fn new() -> Self { -        VecSink { records: std::vec::Vec::new() } +    pub struct VecSink<T: Clone + Display> { +        pub records: Vec<(u32, u32, T)> +    } + +    impl<T: Clone + Display> VecSink<T> { +        pub fn new() -> Self { +            VecSink { records: Vec::new() } +        } + +        pub fn into_inner(self) -> Vec<(u32, u32, T)> { +            self.records +        }      } -} -#[cfg(feature = "std")] -impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> { -    fn record(&mut self, start: u32, end: u32, description: T) { -        self.records.push((start, end, description)); +    impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> { +        fn record(&mut self, start: u32, end: u32, description: T) { +            self.records.push((start, end, description)); +        }      }  } +#[cfg(feature = "alloc")] +pub use vec_sink::VecSink;  pub trait FieldDescription {      fn id(&self) -> u32; @@ -118,7 +131,7 @@ pub trait FieldDescription {  /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the  /// decoder able to report descriptions of bits or fields in the instruction to a sink implementing  /// [`DescriptionSink`]. the sink may be [`NullSink`] to discard provided data. decoding with a -/// `NullSink` should behave identically to `Decoder::decode_into`. implementors are recommended to +/// `NullSink` should behave identically to `Decoder::decode_into`. implementers are recommended to  /// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_annotation` if  /// implementing both traits.  pub trait AnnotatingDecoder<A: Arch + ?Sized> { diff --git a/src/color_new.rs b/src/color_new.rs new file mode 100644 index 0000000..1d3e358 --- /dev/null +++ b/src/color_new.rs @@ -0,0 +1,281 @@ +#[non_exhaustive] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum Color { +    Black, +    DarkGrey, +    Red, +    DarkRed, +    Green, +    DarkGreen, +    Yellow, +    DarkYellow, +    Blue, +    DarkBlue, +    Magenta, +    DarkMagenta, +    Cyan, +    DarkCyan, +    White, +    Grey, +} + +pub trait YaxColors { +    fn arithmetic_op(&self) -> Color; +    fn stack_op(&self) -> Color; +    fn nop_op(&self) -> Color; +    fn stop_op(&self) -> Color; +    fn control_flow_op(&self) -> Color; +    fn data_op(&self) -> Color; +    fn comparison_op(&self) -> Color; +    fn invalid_op(&self) -> Color; +    fn platform_op(&self) -> Color; +    fn misc_op(&self) -> Color; + +    fn register(&self) -> Color; +    fn program_counter(&self) -> Color; +    fn number(&self) -> Color; +    fn zero(&self) -> Color; +    fn one(&self) -> Color; +    fn minus_one(&self) -> Color; +    fn address(&self) -> Color; +    fn symbol(&self) -> Color; +    fn function(&self) -> Color; +} + +/// support for colorizing text with ANSI control sequences. +/// +/// the most useful item in this module is [`ansi::AnsiDisplaySink`], which interprets span entry +/// and exit as points at which ANSI sequences may need to be written into the output it wraps - +/// that output may be any type implementing [`crate::display::DisplaySink`], including +/// [`crate::display::FmtSink`] to adapt any implementer of `fmt::Write` such as standard out. +/// +/// ## example +/// +/// to write colored text to standard out: +/// +/// ``` +/// # #[cfg(feature="alloc")] +/// # { +/// # extern crate alloc; +/// # use alloc::string::String; +/// use yaxpeax_arch::color_new::DefaultColors; +/// use yaxpeax_arch::color_new::ansi::AnsiDisplaySink; +/// use yaxpeax_arch::display::FmtSink; +/// +/// let mut s = String::new(); +/// let mut s_sink = FmtSink::new(&mut s); +/// +/// let mut writer = AnsiDisplaySink::new(&mut s_sink, DefaultColors); +/// +/// // this might be a yaxpeax crate's `display_into`, or other library implementation code +/// mod fake_yaxpeax_crate { +///     use yaxpeax_arch::display::DisplaySink; +/// +///     pub fn format_memory_operand<T: DisplaySink>(out: &mut T) -> core::fmt::Result { +///         out.span_start_immediate(); +///         out.write_prefixed_u8(0x80)?; +///         out.span_end_immediate(); +///         out.write_fixed_size("(")?; +///         out.span_start_register(); +///         out.write_fixed_size("rbp")?; +///         out.span_end_register(); +///         out.write_fixed_size(")")?; +///         Ok(()) +///     } +/// } +/// +/// // this might be how a user uses `AnsiDisplaySink`, which will write ANSI-ful text to `s` and +/// // print it. +/// +/// fake_yaxpeax_crate::format_memory_operand(&mut writer).expect("write succeeds"); +/// +/// println!("{}", s); +/// # } +/// ``` +pub mod ansi { +    use crate::color_new::Color; + +    // color sequences as described by ECMA-48 and, apparently, `man 4 console_codes` +    /// translate [`yaxpeax_arch::color_new::Color`] to an ANSI control code that changes the +    /// foreground color to match. +    #[allow(dead_code)] // allowing this to be dead code because if colors are enabled and alloc is not, there will not be an AnsiDisplaySink, which is the sole user of this function. +    fn color2ansi(color: Color) -> &'static str { +        // for most of these, in 256 color space the darker color can be picked by the same color +        // index as the brighter form (from the 8 color command set). dark grey is an outlier, +        // where 38;5;0 and 30 both are black. there is no "grey" in the shorter command set to +        // map to. but it turns out that 38;5;m is exactly the darker grey to use. +        match color { +            Color::Black => "\x1b[30m", +            Color::DarkGrey => "\x1b[38;5;8m", +            Color::Red => "\x1b[31m", +            Color::DarkRed => "\x1b[38;5;1m", +            Color::Green => "\x1b[32m", +            Color::DarkGreen => "\x1b[38;5;2m", +            Color::Yellow => "\x1b[33m", +            Color::DarkYellow => "\x1b[38;5;3m", +            Color::Blue => "\x1b[34m", +            Color::DarkBlue => "\x1b[38;5;4m", +            Color::Magenta => "\x1b[35m", +            Color::DarkMagenta => "\x1b[38;5;5m", +            Color::Cyan => "\x1b[36m", +            Color::DarkCyan => "\x1b[38;5;6m", +            Color::White => "\x1b[37m", +            Color::Grey => "\x1b[38;5;7m", +        } +    } + +    // could reasonably be always present, but only used if feature="alloc" +    #[cfg(feature="alloc")] +    const DEFAULT_FG: &'static str = "\x1b[39m"; + +    #[cfg(feature="alloc")] +    mod ansi_display_sink { +        use crate::color_new::{Color, YaxColors}; +        use crate::display::DisplaySink; + +        /// adapter to insert ANSI color command sequences in formatted text to style printed +        /// instructions. +        /// +        /// this enables similar behavior as the deprecated [`crate::Colorize`] trait, +        /// for outputs that can process ANSI color commands. +        /// +        /// `AnsiDisplaySink` will silently ignore errors from writes to the underlying `T: +        /// DisplaySink`. when writing to a string or other growable buffer, errors are likely +        /// inseparable from `abort()`. when writing to stdout or stderr, write failures likely +        /// mean output is piped to a process which has closed the pipe but are otherwise harmless. +        /// `span_enter_*` and `span_exit_*` don't have error reporting mechanisms in their return +        /// type, so the only available error mechanism would be to also `abort()`. +        /// +        /// if this turns out to be a bad decision, it'll have to be rethought! +        pub struct AnsiDisplaySink<'sink, T: DisplaySink, Y: YaxColors> { +            out: &'sink mut T, +            span_stack: alloc::vec::Vec<Color>, +            colors: Y +        } + +        impl<'sink, T: DisplaySink, Y: YaxColors> AnsiDisplaySink<'sink, T, Y> { +            pub fn new(out: &'sink mut T, colors: Y) -> Self { +                Self { +                    out, +                    span_stack: alloc::vec::Vec::new(), +                    colors, +                } +            } + +            fn push_color(&mut self, color: Color) { +                self.span_stack.push(color); +                let _ = self.out.write_fixed_size(super::color2ansi(color)); +            } + +            fn restore_prev_color(&mut self) { +                let _ = self.span_stack.pop(); +                if let Some(prev_color) = self.span_stack.last() { +                    let _ = self.out.write_fixed_size(super::color2ansi(*prev_color)); +                } else { +                    let _ = self.out.write_fixed_size(super::DEFAULT_FG); +                }; +            } +        } + +        impl<'sink, T: DisplaySink, Y: YaxColors> core::fmt::Write for AnsiDisplaySink<'sink, T, Y> { +            fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { +                self.out.write_str(s) +            } +            fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { +                self.out.write_char(c) +            } +        } + +        impl<'sink, T: DisplaySink, Y: YaxColors> DisplaySink for AnsiDisplaySink<'sink, T, Y> { +            fn span_start_immediate(&mut self) { self.push_color(self.colors.number()); } +            fn span_end_immediate(&mut self) { self.restore_prev_color() } + +            fn span_start_register(&mut self) { self.push_color(self.colors.register()); } +            fn span_end_register(&mut self) { self.restore_prev_color() } + +            // ah.. the right way, currently, to colorize opcodes would be to collect text while in the +            // opcode span, and request some kind of user-provided decoder ring to translate mnemonics +            // into the right color. that's very unfortunate. maybe there should be another span for +            // `opcode_kind(u8)` for impls to report what kind of opcode they'll be emitting.. +            fn span_start_opcode(&mut self) { self.push_color(self.colors.misc_op()); } +            fn span_end_opcode(&mut self) { self.restore_prev_color() } + +            fn span_start_program_counter(&mut self) { self.push_color(self.colors.program_counter()); } +            fn span_end_program_counter(&mut self) { self.restore_prev_color() } + +            fn span_start_number(&mut self) { self.push_color(self.colors.number()); } +            fn span_end_number(&mut self) { self.restore_prev_color() } + +            fn span_start_address(&mut self) { self.push_color(self.colors.address()); } +            fn span_end_address(&mut self) { self.restore_prev_color() } + +            fn span_start_function_expr(&mut self) { self.push_color(self.colors.function()); } +            fn span_end_function_expr(&mut self) { self.restore_prev_color() } +        } +    } +    #[cfg(feature="alloc")] +    pub use ansi_display_sink::AnsiDisplaySink; +} + +pub struct DefaultColors; + +impl YaxColors for DefaultColors { +    fn arithmetic_op(&self) -> Color { +        Color::Yellow +    } +    fn stack_op(&self) -> Color { +        Color::DarkMagenta +    } +    fn nop_op(&self) -> Color { +        Color::DarkBlue +    } +    fn stop_op(&self) -> Color { +        Color::Red +    } +    fn control_flow_op(&self) -> Color { +        Color::DarkGreen +    } +    fn data_op(&self) -> Color { +        Color::Magenta +    } +    fn comparison_op(&self) -> Color { +        Color::DarkYellow +    } +    fn invalid_op(&self) -> Color { +        Color::DarkRed +    } +    fn misc_op(&self) -> Color { +        Color::Cyan +    } +    fn platform_op(&self) -> Color { +        Color::DarkCyan +    } + +    fn register(&self) -> Color { +        Color::DarkCyan +    } +    fn program_counter(&self) -> Color { +        Color::DarkRed +    } +    fn number(&self) -> Color { +        Color::White +    } +    fn zero(&self) -> Color { +        Color::White +    } +    fn one(&self) -> Color { +        Color::White +    } +    fn minus_one(&self) -> Color { +        Color::White +    } +    fn address(&self) -> Color { +        Color::DarkGreen +    } +    fn symbol(&self) -> Color { +        Color::Green +    } +    fn function(&self) -> Color { +        Color::Green +    } +} diff --git a/src/display.rs b/src/display.rs index 789919e..754d3e6 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,9 +1,35 @@ +// allow use of deprecated items in this module since some functions using `SignedHexDisplay` still +// exist here +#![allow(deprecated)] +  use crate::YaxColors;  use core::fmt;  use core::num::Wrapping;  use core::ops::Neg; +mod display_sink; + +pub use display_sink::{DisplaySink, FmtSink}; +#[cfg(feature = "alloc")] +pub use display_sink::InstructionTextSink; + +/// translate a byte in range `[0, 15]` to a lowercase base-16 digit. +/// +/// if `c` is in range, the output is always valid as the sole byte in a utf-8 string. if `c` is out +/// of range, the returned character might not be a valid single-byte utf-8 codepoint. +#[cfg(feature = "alloc")] // this function is of course not directly related to alloc, but it's only needed by impls that themselves are only present with alloc. +fn u8_to_hex(c: u8) -> u8 { +    // this conditional branch is faster than a lookup for... most architectures (especially x86 +    // with cmov) +    if c < 10 { +        b'0' + c +    } else { +        b'a' + c - 10 +    } +} + +#[deprecated(since="0.3.0", note="format_number_i32 does not optimize as expected and will be removed in the future. see DisplaySink instead.")]  pub enum NumberStyleHint {      Signed,      HexSigned, @@ -17,36 +43,37 @@ pub enum NumberStyleHint {      HexUnsignedWithSign  } -pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result { +#[deprecated(since="0.3.0", note="format_number_i32 is both slow and incorrect: YaxColors may not result in correct styling when writing anywhere other than a terminal, and both stylin and formatting does not inline as well as initially expected. see DisplaySink instead.")] +pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(_colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result {      match hint {          NumberStyleHint::Signed => { -            write!(f, "{}", colors.number(i)) +            write!(f, "{}", (i))          },          NumberStyleHint::HexSigned => { -            write!(f, "{}", colors.number(signed_i32_hex(i))) +            write!(f, "{}", signed_i32_hex(i))          },          NumberStyleHint::Unsigned => { -            write!(f, "{}", colors.number(i as u32)) +            write!(f, "{}", i as u32)          },          NumberStyleHint::HexUnsigned => { -            write!(f, "{}", colors.number(u32_hex(i as u32))) +            write!(f, "{}", u32_hex(i as u32))          },          NumberStyleHint::SignedWithSignSplit => {              if i == core::i32::MIN { -                write!(f, "- {}", colors.number("2147483647")) +                write!(f, "- {}", "2147483647")              } else if i < 0 { -                write!(f, "- {}", colors.number(-Wrapping(i))) +                write!(f, "- {}", -Wrapping(i))              } else { -                write!(f, "+ {}", colors.number(i)) +                write!(f, "+ {}", i)              }          }          NumberStyleHint::HexSignedWithSignSplit => {              if i == core::i32::MIN { -                write!(f, "- {}", colors.number("0x7fffffff")) +                write!(f, "- {}", ("0x7fffffff"))              } else if i < 0 { -                write!(f, "- {}", colors.number(u32_hex((-Wrapping(i)).0 as u32))) +                write!(f, "- {}", u32_hex((-Wrapping(i)).0 as u32))              } else { -                write!(f, "+ {}", colors.number(u32_hex(i as u32))) +                write!(f, "+ {}", u32_hex(i as u32))              }          },          NumberStyleHint::HexSignedWithSign => { @@ -64,6 +91,7 @@ pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(colors: &Y, f: &mut W, i:      }  } +#[deprecated(since="0.3.0", note="SignedHexDisplay does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub struct SignedHexDisplay<T: core::fmt::LowerHex + Neg> {      value: T,      negative: bool @@ -79,6 +107,7 @@ impl<T: fmt::LowerHex + Neg + Copy> fmt::Display for SignedHexDisplay<T> where W      }  } +#[deprecated(since="0.3.0", note="u8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> {      SignedHexDisplay {          value: value as i8, @@ -86,6 +115,7 @@ pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> {      }  } +#[deprecated(since="0.3.0", note="signed_i8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> {      SignedHexDisplay {          value: imm, @@ -93,6 +123,7 @@ pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> {      }  } +#[deprecated(since="0.3.0", note="u16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> {      SignedHexDisplay {          value: value as i16, @@ -100,6 +131,7 @@ pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> {      }  } +#[deprecated(since="0.3.0", note="signed_i16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> {      SignedHexDisplay {          value: imm, @@ -107,6 +139,7 @@ pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> {      }  } +#[deprecated(since="0.3.0", note="u32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> {      SignedHexDisplay {          value: value as i32, @@ -114,6 +147,7 @@ pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> {      }  } +#[deprecated(since="0.3.0", note="signed_i32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> {      SignedHexDisplay {          value: imm, @@ -121,6 +155,7 @@ pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> {      }  } +#[deprecated(since="0.3.0", note="u64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> {      SignedHexDisplay {          value: value as i64, @@ -128,6 +163,7 @@ pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> {      }  } +#[deprecated(since="0.3.0", note="signed_i64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]  pub fn signed_i64_hex(imm: i64) -> SignedHexDisplay<i64> {      SignedHexDisplay {          value: imm, diff --git a/src/display/display_sink.rs b/src/display/display_sink.rs new file mode 100644 index 0000000..9aa3c85 --- /dev/null +++ b/src/display/display_sink.rs @@ -0,0 +1,1017 @@ +use core::fmt; + +// `imp_x86.rs` has `asm!()` macros, and so is not portable at all. +#[cfg(all(feature="alloc", target_arch = "x86_64"))] +#[path="./display_sink/imp_x86.rs"] +mod imp; + +// for other architectures, fall back on possibly-slower portable functions. +#[cfg(all(feature="alloc", not(target_arch = "x86_64")))] +#[path="./display_sink/imp_generic.rs"] +mod imp; + + +/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used +/// in formatting instructions for two reasons: +/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at +///   the same time as collecting output itself. +/// * `DisplaySink` implementations provide specialized functions for writing strings in +///   circumstances where a simple "use `core::fmt`" might incur unwanted overhead. +/// +/// ## spans +/// +/// spans are out-of-band indicators for the meaning of data written to this sink. when a +/// `span_start_<foo>` function is called, data written until a matching `span_end_<foo>` can be +/// considered the text corresponding to `<foo>`. +/// +/// spans are entered and exited in a FILO manner. implementations of `DisplaySink` are explicitly +/// allowed to depend on this fact. functions writing to a `DisplaySink` must exit spans in reverse +/// order to when they are entered. a function that has a call sequence like +/// ```text +/// sink.span_start_operand(); +/// sink.span_start_immediate(); +/// sink.span_end_operand(); +/// ``` +/// is in error. +/// +/// spans are reported through the `span_start_*` and `span_end_*` families of functions to avoid +/// constraining implementations into tracking current output offset (which may not be knowable) or +/// span size (which may be knowable, but incur additional overhead to compute or track). if the +/// task for a span is to simply emit VT100 color codes, for example, implementations avoid the +/// overhead of tracking offsets. +/// +/// default implementations of the `span_start_*` and `span_end_*` functions are to do nothing. a +/// no-op `span_start_*` or `span_end_*` allows rustc to elimiate such calls at compile time for +/// `DisplaySink` that are uninterested in the corresponding span type. +/// +/// # write helpers (`write_*`) +/// +/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in +/// documentation here to better support writing some kinds of inputs than a fully-general solution +/// (such as `core::fmt`) might be able to yield. +/// +/// currently there are two motivating factors for `write_*` helpers: +/// +/// instruction formatting often involves writing small but variable-size strings, such as register +/// names, which is something of a pathological case for string appending as Rust currently exists: +/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an +/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: +/// <https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232> +/// +/// there are similar papercuts around formatting integers as base-16 numbers, such as +/// <https://github.com/rust-lang/rust/pull/122770>. in isolation and in most applications these are +/// not a significant source of overhead. but for programs bounded on decoding and printing +/// instructions, these can add up to significant overhead - on the order of 10-20% of total +/// runtime. +/// +/// ## example +/// +/// a simple call sequence to `DisplaySink` might look something like: +/// ```compile_fail +/// sink.span_start_operand() +/// sink.write_char('[') +/// sink.span_start_register() +/// sink.write_fixed_size("rbp") +/// sink.span_end_register() +/// sink.write_char(']') +/// sink.span_end_operand() +/// ``` +/// which writes the text `[rbp]`, telling sinks that the operand begins at `[`, ends after `]`, +/// and `rbp` is a register in that operand. +/// +/// ## extensibility +/// +/// additional `span_{start,end}_*` helpers may be added over time - in the above example, one +/// future addition might be to add a new `effective_address` span that is started before +/// `register` and ended after `register. for an operand like `\[rbp\]` the effective address span +/// would exactly match a corresponding register span, but in more complicated scenarios like +/// `[rsp + rdi * 4 + 0x50]` the effective address would be all of `rsp + rdi * 4 + 0x50`. +/// +/// additional spans are expected to be added as needed. it is not immediately clear how to add +/// support for more architecture-specific concepts (such as itanium predicate registers) would be +/// supported yet, and so architecture-specific concepts may be expressed on `DisplaySink` if the +/// need arises. +/// +/// new `span_{start,end}_*` helpers will be defaulted as no-op. additions to this trait will be +/// minor version bumps, so users should take care to not add custom functions starting with +/// `span_start_` or `span_end_` to structs implementing `DisplaySink`. +pub trait DisplaySink: fmt::Write { +    #[inline(always)] +    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } + +    /// write a string to this sink that is less than 32 bytes. this is provided for optimization +    /// opportunities when writing a variable-length string with known max size. +    /// +    /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than +    /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a +    /// utf-8 string. this may corrupt Rust strings. +    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } +    /// write a string to this sink that is less than 16 bytes. this is provided for optimization +    /// opportunities when writing a variable-length string with known max size. +    /// +    /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than +    /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a +    /// utf-8 string. this may corrupt Rust strings. +    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } +    /// write a string to this sink that is less than 8 bytes. this is provided for optimization +    /// opportunities when writing a variable-length string with known max size. +    /// +    /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than +    /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a +    /// utf-8 string. this may corrupt Rust strings. +    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } + +    /// write a u8 to the output as a base-16 integer. +    /// +    /// this corresponds to the Rust format specifier `{:x}` - see [`std::fmt::LowerHex`] for more. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u8 to the output as a base-16 integer with leading `0x`. +    /// +    /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { +        self.write_fixed_size("0x")?; +        self.write_u8(v) +    } +    /// write an i8 to the output as a base-16 integer with leading `0x`, and leading `-` if the +    /// value is negative. +    /// +    /// there is no matching `std` formatter, so some examples here: +    /// ```text +    /// sink.write_prefixed_i8(-0x60); // writes `-0x60` to the sink +    /// sink.write_prefixed_i8(127); // writes `0x7f` to the sink +    /// sink.write_prefixed_i8(-128); // writes `-0x80` to the sink +    /// ``` +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_i8(&mut self, v: i8) -> Result<(), core::fmt::Error> { +        let v = if v < 0 { +            self.write_char('-')?; +            v.unsigned_abs() +        } else { +            v as u8 +        }; +        self.write_prefixed_u8(v) +    } +    /// write a u16 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u16 to the output as a base-16 integer with leading `0x`. +    /// +    /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { +        self.write_fixed_size("0x")?; +        self.write_u16(v) +    } +    /// write an i16 to the output as a base-16 integer with leading `0x`, and leading `-` if the +    /// value is negative. +    /// +    /// there is no matching `std` formatter, so some examples here: +    /// ```text +    /// sink.write_prefixed_i16(-0x60); // writes `-0x60` to the sink +    /// sink.write_prefixed_i16(127); // writes `0x7f` to the sink +    /// sink.write_prefixed_i16(-128); // writes `-0x80` to the sink +    /// ``` +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_i16(&mut self, v: i16) -> Result<(), core::fmt::Error> { +        let v = if v < 0 { +            self.write_char('-')?; +            v.unsigned_abs() +        } else { +            v as u16 +        }; +        self.write_prefixed_u16(v) +    } +    /// write a u32 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u32 to the output as a base-16 integer with leading `0x`. +    /// +    /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { +        self.write_fixed_size("0x")?; +        self.write_u32(v) +    } +    /// write an i32 to the output as a base-32 integer with leading `0x`, and leading `-` if the +    /// value is negative. +    /// +    /// there is no matching `std` formatter, so some examples here: +    /// ```text +    /// sink.write_prefixed_i32(-0x60); // writes `-0x60` to the sink +    /// sink.write_prefixed_i32(127); // writes `0x7f` to the sink +    /// sink.write_prefixed_i32(-128); // writes `-0x80` to the sink +    /// ``` +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_i32(&mut self, v: i32) -> Result<(), core::fmt::Error> { +        let v = if v < 0 { +            self.write_char('-')?; +            v.unsigned_abs() +        } else { +            v as u32 +        }; +        self.write_prefixed_u32(v) +    } +    /// write a u64 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u64 to the output as a base-16 integer with leading `0x`. +    /// +    /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { +        self.write_fixed_size("0x")?; +        self.write_u64(v) +    } +    /// write an i64 to the output as a base-64 integer with leading `0x`, and leading `-` if the +    /// value is negative. +    /// +    /// there is no matching `std` formatter, so some examples here: +    /// ```text +    /// sink.write_prefixed_i64(-0x60); // writes `-0x60` to the sink +    /// sink.write_prefixed_i64(127); // writes `0x7f` to the sink +    /// sink.write_prefixed_i64(-128); // writes `-0x80` to the sink +    /// ``` +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_prefixed_i64(&mut self, v: i64) -> Result<(), core::fmt::Error> { +        let v = if v < 0 { +            self.write_char('-')?; +            v.unsigned_abs() +        } else { +            v as u64 +        }; +        self.write_prefixed_u64(v) +    } + +    /// enter a region inside which output corresponds to an immediate. +    fn span_start_immediate(&mut self) { } +    /// end a region where an immediate was written. see docs on [`DisplaySink`] for more. +    fn span_end_immediate(&mut self) { } + +    /// enter a region inside which output corresponds to a register. +    fn span_start_register(&mut self) { } +    /// end a region where a register was written. see docs on [`DisplaySink`] for more. +    fn span_end_register(&mut self) { } + +    /// enter a region inside which output corresponds to an opcode. +    fn span_start_opcode(&mut self) { } +    /// end a region where an opcode was written. see docs on [`DisplaySink`] for more. +    fn span_end_opcode(&mut self) { } + +    /// enter a region inside which output corresponds to the program counter. +    fn span_start_program_counter(&mut self) { } +    /// end a region where the program counter was written. see docs on [`DisplaySink`] for more. +    fn span_end_program_counter(&mut self) { } + +    /// enter a region inside which output corresponds to a number, such as a memory offset or +    /// immediate. +    fn span_start_number(&mut self) { } +    /// end a region where a number was written. see docs on [`DisplaySink`] for more. +    fn span_end_number(&mut self) { } + +    /// enter a region inside which output corresponds to an address. this is a best guess; +    /// instructions like x86's `lea` may involve an "address" that is not, and arithmetic +    /// instructions may operate on addresses held in registers. +    /// +    /// where possible, the presence of this span will be informed by ISA semantics - if an +    /// instruction has a memory operand, the effective address calculation of that operand should +    /// be in an address span. +    fn span_start_address(&mut self) { } +    /// end a region where an address was written. the specifics of an "address" are ambiguous and +    /// best-effort; see [`DisplaySink::span_start_address`] for more about this. otherwise, see +    /// docs on [`DisplaySink`] for more about spans. +    fn span_end_address(&mut self) { } + +    /// enter a region inside which output corresponds to a function address, or expression +    /// evaluating to a function address. this is a best guess; instructions like `call` may call +    /// to a non-function address, `jmp` may jump to a function (as with tail calls), function +    /// addresses may be computed via table lookup without semantic hints. +    /// +    /// where possible, the presence of this span will be informed by ISA semantics - if an +    /// instruction is like a "call", an address operand should be a `function` span. if other +    /// instructions can be expected to handle subroutine starting addresses purely from ISA +    /// semantics, address operand(s) should be in a `function` span. +    fn span_start_function_expr(&mut self) { } +    /// end a region where function address expression was written. the specifics of a "function +    /// address" are ambiguous and best-effort; see [`DisplaySink::span_start_function_expr`] for more +    /// about this. otherwise, see docs on [`DisplaySink`] for more about spans. +    fn span_end_function_expr(&mut self) { } +} + +/// `FmtSink` can be used to adapt any `fmt::Write`-implementing type into a `DisplaySink` to +/// format an instruction while discarding all span information at zero cost. +pub struct FmtSink<'a, T: fmt::Write> { +    out: &'a mut T, +} + +impl<'a, T: fmt::Write> FmtSink<'a, T> { +    pub fn new(f: &'a mut T) -> Self { +        Self { out: f } +    } + +    pub fn inner_ref(&self) -> &T { +        &self.out +    } +} + +/// blanket impl that discards all span information, forwards writes to the underlying `fmt::Write` +/// type. +impl<'a, T: fmt::Write> DisplaySink for FmtSink<'a, T> { } + +impl<'a, T: fmt::Write> fmt::Write for FmtSink<'a, T> { +    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.out.write_str(s) +    } +    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { +        self.out.write_char(c) +    } +    fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { +        self.out.write_fmt(f) +    } +} + +#[cfg(feature = "alloc")] +mod instruction_text_sink { +    use core::fmt; + +    use super::{DisplaySink, u8_to_hex}; + +    /// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the +    /// disassemblers, do not use this struct. do not depend on this struct existing. this struct is +    /// not stable. this struct is not safe for general use. if you use this struct you and your +    /// program will be eaten by gremlins. +    /// +    /// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct +    /// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. +    /// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are +    /// not bounds-checked, and the buffer is never grown. +    /// +    /// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is +    /// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be +    /// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* +    /// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would +    /// be written to without being rewound first. +    /// +    /// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect +    /// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the +    /// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of +    /// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. +    /// +    /// this is intended to be hidden in docs. if you see this in docs, it's a bug. +    #[doc(hidden)] +    pub struct InstructionTextSink<'buf> { +        buf: &'buf mut alloc::string::String +    } + +    impl<'buf> InstructionTextSink<'buf> { +        /// create an `InstructionTextSink` using the provided buffer for storage. +        /// +        /// SAFETY: callers must ensure that this sink will never have more content written than +        /// this buffer can hold. while the buffer may appear growable, `write_*` methods here may +        /// *bypass bounds checks* and so will never trigger the buffer to grow. writing more data +        /// than the buffer's size when provided to `new` will cause out-of-bounds writes and +        /// memory corruption. +        pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { +            Self { buf } +        } +    } + +    impl<'buf> fmt::Write for InstructionTextSink<'buf> { +        fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { +            self.buf.write_str(s) +        } +        fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + 1 { +                    panic!("InstructionTextSink::write_char would overflow output"); +                } +            } + +            // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` +            // is valid for writing, but may be uninitialized. +            // +            // this function is essentially equivalent to `Vec::push` specialized for the case that +            // `len < buf.capacity()`: +            // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 +            unsafe { +                let underlying = self.buf.as_mut_vec(); +                // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to +                // write single ASCII characters. this is wrong in the general case, but `write_char` +                // here is not going to be used in the general case. +                if cfg!(debug_assertions) { +                    if c > '\x7f' { +                        panic!("InstructionTextSink::write_char would truncate output"); +                    } +                } +                let to_push = c as u8; +                // `ptr::write` here because `underlying.add(underlying.len())` may not point to an +                // initialized value, which would mean that turning that pointer into a `&mut u8` to +                // store through would be UB. `ptr::write` avoids taking the mut ref. +                underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); +                // we have initialized all (one) bytes that `set_len` is increasing the length to +                // include. +                underlying.set_len(underlying.len() + 1); +            } +            Ok(()) +        } +    } + +    impl<'buf> DisplaySink for InstructionTextSink<'buf> { +        #[inline(always)] +        fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_fixed_size would overflow output"); +                } +            } + +            // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +            // be valid utf8 +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_bytes = s.as_bytes(); + +            if new_bytes.len() == 0 { +                return Ok(()); +            } + +            unsafe { +                let dest = buf.as_mut_ptr().offset(buf.len() as isize); + +                // this used to be enough to bamboozle llvm away from +                // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +                // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped +                // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` +                // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this +                // unrolls into some kind of appropriate series of `mov`. +                dest.offset(0 as isize).write(new_bytes[0]); +                for i in 1..new_bytes.len() { +                    dest.offset(i as isize).write(new_bytes[i]); +                } + +                buf.set_len(buf.len() + new_bytes.len()); +            } + +            Ok(()) +        } +        unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_lt_32 would overflow output"); +                } +            } + +            // Safety: `new` requires callers promise there is enough space to hold `s`. +            unsafe { +                super::imp::append_string_lt_32_unchecked(&mut self.buf, s); +            } + +            Ok(()) +        } +        unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_lt_16 would overflow output"); +                } +            } + +            // Safety: `new` requires callers promise there is enough space to hold `s`. +            unsafe { +                super::imp::append_string_lt_16_unchecked(&mut self.buf, s); +            } + +            Ok(()) +        } +        unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_lt_8 would overflow output"); +                } +            } + +            // Safety: `new` requires callers promise there is enough space to hold `s`. +            unsafe { +                super::imp::append_string_lt_8_unchecked(&mut self.buf, s); +            } + +            Ok(()) +        } +        /// write a u8 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u8 would overflow output"); +                } +            } + +            // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +            // be valid utf8 +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; + +            // Safety: there is no way to exit this function without initializing all bytes up to +            // `new_len` +            unsafe { +                buf.set_len(new_len); +            } +            // Safety: `new()` requires callers promise there is space through to `new_len` +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +                // to a location valid for writing. +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                } +            } + +            Ok(()) +        } +        /// write a u16 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } + +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u16 would overflow output"); +                } +            } + +            // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +            // be valid utf8 +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; + +            // Safety: there is no way to exit this function without initializing all bytes up to +            // `new_len` +            unsafe { +                buf.set_len(new_len); +            } +            // Safety: `new()` requires callers promise there is space through to `new_len` +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +                // to a location valid for writing. +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                } +            } + +            Ok(()) +        } +        /// write a u32 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } + +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u32 would overflow output"); +                } +            } + +            // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +            // be valid utf8 +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; + +            // Safety: there is no way to exit this function without initializing all bytes up to +            // `new_len` +            unsafe { +                buf.set_len(new_len); +            } +            // Safety: `new()` requires callers promise there is space through to `new_len` +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +                // to a location valid for writing. +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                } +            } + +            Ok(()) +        } +        /// write a u64 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } + +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u64 would overflow output"); +                } +            } + +            // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +            // be valid utf8 +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; + +            // Safety: there is no way to exit this function without initializing all bytes up to +            // `new_len` +            unsafe { +                buf.set_len(new_len); +            } +            // Safety: `new()` requires callers promise there is space through to `new_len` +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +                // to a location valid for writing. +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                } +            } + +            Ok(()) +        } +    } +} +#[cfg(feature = "alloc")] +pub use instruction_text_sink::InstructionTextSink; + + +#[cfg(feature = "alloc")] +use crate::display::u8_to_hex; + +/// this [`DisplaySink`] impl exists to support somewhat more performant buffering of the kinds of +/// strings `yaxpeax-x86` uses in formatting instructions. +/// +/// span information is discarded at zero cost. +#[cfg(feature = "alloc")] +impl DisplaySink for alloc::string::String { +    #[inline(always)] +    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.reserve(s.len()); +        // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +        // be valid utf8 +        let buf = unsafe { self.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        if new_bytes.len() == 0 { +            return Ok(()); +        } + +        // Safety: we have reserved space for all `buf` bytes, above. +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); + +            // this used to be enough to bamboozle llvm away from +            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped +            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` +            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this +            // unrolls into some kind of appropriate series of `mov`. +            dest.offset(0 as isize).write(new_bytes[0]); +            for i in 1..new_bytes.len() { +                dest.offset(i as isize).write(new_bytes[i]); +            } + +            // Safety: we have initialized all bytes from where `self` initially ended, through to +            // all `new_bytes` additional elements. +            buf.set_len(buf.len() + new_bytes.len()); +        } + +        Ok(()) +    } +    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.reserve(s.len()); + +        // Safety: we have reserved enough space for `s`. +        unsafe { +            imp::append_string_lt_32_unchecked(self, s); +        } + +        Ok(()) +    } +    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.reserve(s.len()); + +        // Safety: we have reserved enough space for `s`. +        unsafe { +            imp::append_string_lt_16_unchecked(self, s); +        } + +        Ok(()) +    } +    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.reserve(s.len()); + +        // Safety: we have reserved enough space for `s`. +        unsafe { +            imp::append_string_lt_8_unchecked(self, s); +        } + +        Ok(()) +    } +    /// write a u8 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +        // be valid utf8 +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        // Safety: there is no way to exit this function without initializing all bytes up to +        // `new_len` +        unsafe { +            buf.set_len(new_len); +        } +        // Safety: we have reserved space through to `new_len` by calling `reserve` above. +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = u8_to_hex(digit as u8); +            // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +            // to a location valid for writing. +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u16 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +        // be valid utf8 +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        // Safety: there is no way to exit this function without initializing all bytes up to +        // `new_len` +        unsafe { +            buf.set_len(new_len); +        } +        // Safety: we have reserved space through to `new_len` by calling `reserve` above. +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = u8_to_hex(digit as u8); +            // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +            // to a location valid for writing. +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u32 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +        // be valid utf8 +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        // Safety: there is no way to exit this function without initializing all bytes up to +        // `new_len` +        unsafe { +            buf.set_len(new_len); +        } +        // Safety: we have reserved space through to `new_len` by calling `reserve` above. +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = u8_to_hex(digit as u8); +            // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +            // to a location valid for writing. +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u64 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +        // be valid utf8 +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        // Safety: there is no way to exit this function without initializing all bytes up to +        // `new_len` +        unsafe { +            buf.set_len(new_len); +        } +        // Safety: we have reserved space through to `new_len` by calling `reserve` above. +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = u8_to_hex(digit as u8); +            // Safety: `p` will not move before `buf`'s length at function entry, so `p` points +            // to a location valid for writing. +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +} diff --git a/src/display/display_sink/imp_generic.rs b/src/display/display_sink/imp_generic.rs new file mode 100644 index 0000000..8819243 --- /dev/null +++ b/src/display/display_sink/imp_generic.rs @@ -0,0 +1,26 @@ +/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) { +    buf.push_str(data); +} + +/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) { +    buf.push_str(data); +} + +/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) { +    buf.push_str(data); +} diff --git a/src/display/display_sink/imp_x86.rs b/src/display/display_sink/imp_x86.rs new file mode 100644 index 0000000..902ea69 --- /dev/null +++ b/src/display/display_sink/imp_x86.rs @@ -0,0 +1,187 @@ +//! `imp_x86` has specialized copies to append short strings to strings. buffer sizing must be +//! handled by callers, in all cases. +//! +//! the structure of all implementations here is, essentially, to take the size of the data to +//! append and execute a copy for each bit set in that size, from highest to lowest. some bits are +//! simply never checked if the input is promised to never be that large - if a string to append is +//! only 0..7 bytes long, it is sufficient to only look at the low three bits to copy all bytes. +//! +//! in this way, it is slightly more efficient to right-size which append function is used, if the +//! maximum size of input strings can be bounded well. if the maximum size of input strings cannot +//! be bounded, you shouldn't be using these functions. + +/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) { +    // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +    // be valid utf8 +    let buf = unsafe { buf.as_mut_vec() }; +    let new_bytes = data.as_bytes(); + +    unsafe { +        let dest = buf.as_mut_ptr().offset(buf.len() as isize); +        let src = new_bytes.as_ptr(); + +        let rem = new_bytes.len() as isize; + +        // set_len early because there is no way to avoid the following asm!() writing that +        // same number of bytes into buf +        buf.set_len(buf.len() + new_bytes.len()); + +        core::arch::asm!( +            "8:", +            "cmp {rem:e}, 4", +            "jb 9f", +            "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +            "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +            "sub {rem:e}, 4", +            "jz 11f", +            "9:", +            "cmp {rem:e}, 2", +            "jb 10f", +            "mov {buf:x}, word ptr [{src} + {rem} - 2]", +            "mov word ptr [{dest} + {rem} - 2], {buf:x}", +            "sub {rem:e}, 2", +            "jz 11f", +            "10:", +            "cmp {rem:e}, 1", +            "jb 11f", +            "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +            "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +            "11:", +            src = in(reg) src, +            dest = in(reg) dest, +            rem = inout(reg) rem => _, +            buf = out(reg) _, +            options(nostack), +        ); +    } +} + +/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) { +    // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +    // be valid utf8 +    let buf = unsafe { buf.as_mut_vec() }; +    let new_bytes = data.as_bytes(); + +    unsafe { +        let dest = buf.as_mut_ptr().offset(buf.len() as isize); +        let src = new_bytes.as_ptr(); + +        let rem = new_bytes.len() as isize; + +        // set_len early because there is no way to avoid the following asm!() writing that +        // same number of bytes into buf +        buf.set_len(buf.len() + new_bytes.len()); + +        core::arch::asm!( +            "7:", +            "cmp {rem:e}, 8", +            "jb 8f", +            "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +            "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +            "sub {rem:e}, 8", +            "jz 11f", +            "8:", +            "cmp {rem:e}, 4", +            "jb 9f", +            "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +            "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +            "sub {rem:e}, 4", +            "jz 11f", +            "9:", +            "cmp {rem:e}, 2", +            "jb 10f", +            "mov {buf:x}, word ptr [{src} + {rem} - 2]", +            "mov word ptr [{dest} + {rem} - 2], {buf:x}", +            "sub {rem:e}, 2", +            "jz 11f", +            "10:", +            "cmp {rem:e}, 1", +            "jb 11f", +            "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +            "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +            "11:", +            src = in(reg) src, +            dest = in(reg) dest, +            rem = inout(reg) rem => _, +            buf = out(reg) _, +            options(nostack), +        ); +    } +} + +/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) { +    // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to +    // be valid utf8 +    let buf = unsafe { buf.as_mut_vec() }; +    let new_bytes = data.as_bytes(); + +    unsafe { +        let dest = buf.as_mut_ptr().offset(buf.len() as isize); +        let src = new_bytes.as_ptr(); + +        let rem = new_bytes.len() as isize; + +        // set_len early because there is no way to avoid the following asm!() writing that +        // same number of bytes into buf +        buf.set_len(buf.len() + new_bytes.len()); + +        core::arch::asm!( +            "6:", +            "cmp {rem:e}, 16", +            "jb 7f", +            "mov {buf:r}, qword ptr [{src} + {rem} - 16]", +            "mov qword ptr [{dest} + {rem} - 16], {buf:r}", +            "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +            "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +            "sub {rem:e}, 16", +            "jz 11f", +            "7:", +            "cmp {rem:e}, 8", +            "jb 8f", +            "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +            "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +            "sub {rem:e}, 8", +            "jz 11f", +            "8:", +            "cmp {rem:e}, 4", +            "jb 9f", +            "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +            "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +            "sub {rem:e}, 4", +            "jz 11f", +            "9:", +            "cmp {rem:e}, 2", +            "jb 10f", +            "mov {buf:x}, word ptr [{src} + {rem} - 2]", +            "mov word ptr [{dest} + {rem} - 2], {buf:x}", +            "sub {rem:e}, 2", +            "jz 11f", +            "10:", +            "cmp {rem:e}, 1", +            "jb 11f", +            "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +            "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +            "11:", +            src = in(reg) src, +            dest = in(reg) dest, +            rem = inout(reg) rem => _, +            buf = out(reg) _, +            options(nostack), +        ); +    } +} @@ -1,12 +1,14 @@  #![no_std]  #![doc = include_str!("../README.md")] +#[cfg(feature = "alloc")] +extern crate alloc; +  use core::fmt::{self, Debug, Display};  use core::hash::Hash;  #[cfg(feature="use-serde")]  #[macro_use] extern crate serde_derive; -  #[cfg(feature="use-serde")]  use serde::{Serialize, Deserialize}; @@ -18,19 +20,25 @@ pub use address::AddrParse;  pub mod annotation; +#[deprecated(since="0.3.0", note="yaxpeax_arch::color conflates output mechanism and styling, leaving it brittle and overly-restrictive. see `yaxpeax_arch::color_new`, which will replace `color` in a future version.")]  mod color; +#[allow(deprecated)] // allow exporting the deprecated items here to not break downstreams even further...  pub use color::{Colorize, NoColors, YaxColors}; - -#[cfg(feature="colors")] -pub use color::ColorSettings; +#[cfg(feature="color-new")] +pub mod color_new;  pub mod display; +  mod reader;  pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be}; +pub mod safer_unchecked; + +pub mod testkit; +  /// the minimum set of errors a `yaxpeax-arch` disassembler may produce.  /// -/// it is permissible for an implementor of `DecodeError` to have items that return `false` for +/// it is permissible for an implementer of `DecodeError` to have items that return `false` for  /// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know  /// about.  pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static { @@ -42,12 +50,12 @@ pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static {      /// generally indicate an issue with the instruction itself. this is in contrast to one      /// specific operand being invalid for the instruction, or some other issue to do with decoding      /// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy -    /// and left as best-effort for decoder implementors. +    /// and left as best-effort for decoder implementers.      fn bad_opcode(&self) -> bool;      /// did the decoder error because an operand of the instruction to decode is invalid?      ///      /// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on -    /// the part of implementors. +    /// the part of implementers.      fn bad_operand(&self) -> bool;      /// a human-friendly description of this decode error.      fn description(&self) -> &'static str; @@ -127,6 +135,7 @@ impl DecodeError for StandardPartialDecoderError {      }  } +/*  #[derive(Copy, Clone)]  struct NoDescription {} @@ -135,6 +144,7 @@ impl fmt::Display for NoDescription {          Ok(())      }  } +*/  /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are  /// the architecture-defined [`DecodeError`] implemention. @@ -152,7 +162,7 @@ pub trait Decoder<A: Arch + ?Sized> {      /// SAFETY:      ///      /// while `inst` MUST be left in a state that does not violate Rust's safety guarantees, -    /// implementors are NOT obligated to leave `inst` in a semantically meaningful state if +    /// implementers are NOT obligated to leave `inst` in a semantically meaningful state if      /// decoding fails. if `decode_into` returns an error, callers may find contradictory and      /// useless information in `inst`, as well as *stale data* from whatever was passed in.      fn decode_into<T: Reader<A::Address, A::Word>>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>; @@ -227,6 +237,8 @@ pub trait Instruction {      fn well_defined(&self) -> bool;  } +#[allow(deprecated)] +#[deprecated(since="0.3.0", note="ShowContextual ties YaxColors and fmt::Write in a way that only sometimes composes. simultaneously, it is too generic on Ctx, making it difficult to implement and use. it will be revisited in the future.")]  pub trait ShowContextual<Addr, Ctx: ?Sized, T: fmt::Write, Y: YaxColors> {      fn contextualize(&self, colors: &Y, address: Addr, context: Option<&Ctx>, out: &mut T) -> fmt::Result;  } diff --git a/src/reader.rs b/src/reader.rs index 028d835..8b68486 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -24,8 +24,9 @@ pub enum ReadError {  /// isn't a multiple of 8 bits, `U8Reader` won't be sufficient.  pub trait Reader<Address, Item> {      fn next(&mut self) -> Result<Item, ReadError>; -    /// read `buf`-many items from this reader in bulk. if `Reader` cannot read `buf`-many items, -    /// return `ReadError::ExhaustedInput`. +    /// read `buf`-many items from this reader in bulk. +    /// +    /// if `Reader` cannot read `buf`-many items, return `ReadError::ExhaustedInput`.      fn next_n(&mut self, buf: &mut [Item]) -> Result<(), ReadError>;      /// mark the current position as where to measure `offset` against.      fn mark(&mut self); diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs new file mode 100644 index 0000000..b556a6f --- /dev/null +++ b/src/safer_unchecked.rs @@ -0,0 +1,40 @@ +//! tools to help validate correct use of `unchecked` functions. +//! +//! these `kinda_unchecked` functions will use equivalent implementations that panic when +//! invariants are violated when the `debug_assertions` config is present, but use the +//! corresponding `*_unchecked` otherwise. +//! +//! for example, `GetSaferUnchecked` uses a normal index when debug assertions are enabled, but +//! `.get_unchecked()` otherwise. this means that tests and even fuzzing can be made to exercise +//! panic-on-error cases as desired. + +use core::slice::SliceIndex; + +pub trait GetSaferUnchecked<T> { +    unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output +    where +        I: SliceIndex<[T]>; +} + +impl<T> GetSaferUnchecked<T> for [T] { +    #[inline(always)] +    unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output +    where +        I: SliceIndex<[T]>, +    { +        if cfg!(debug_assertions) { +            &self[index] +        } else { +            self.get_unchecked(index) +        } +    } +} + +#[inline(always)] +pub unsafe fn unreachable_kinda_unchecked() -> ! { +    if cfg!(debug_assertions) { +        panic!("UB: Unreachable unchecked was executed") +    } else { +        core::hint::unreachable_unchecked() +    } +} diff --git a/src/testkit.rs b/src/testkit.rs new file mode 100644 index 0000000..215a062 --- /dev/null +++ b/src/testkit.rs @@ -0,0 +1,10 @@ +//! utilities to validate that implementations of traits in `yaxpeax-arch` uphold requirements +//! described in this crate. +//! +//! currently, this only includes tools to validate correct use of +//! [`crate::display::DisplaySink`], but may grow in the future. + +#[cfg(feature="alloc")] +mod display; +#[cfg(feature="alloc")] +pub use display::{DisplaySinkValidator, DisplaySinkWriteComparator}; diff --git a/src/testkit/display.rs b/src/testkit/display.rs new file mode 100644 index 0000000..3cef59c --- /dev/null +++ b/src/testkit/display.rs @@ -0,0 +1,192 @@ +//! tools to test the correctness of `yaxpeax-arch` trait implementations. + +use core::fmt; +use core::fmt::Write; + +use crate::display::DisplaySink; + +/// `DisplaySinkValidator` is a `DisplaySink` that panics if invariants required of +/// `DisplaySink`-writing functions are not upheld. +/// +/// there are two categories of invariants that `DisplaySinkValidator` validates. +/// +/// first, this panics if spans are not `span_end_*`-ed in first-in-last-out order with +/// corresponding `span_start_*. second, this panics if `write_lt_*` functions are ever provided +/// inputs longer than the corresponding maximum length. +/// +/// functions that write to a `DisplaySink` are strongly encouraged to come with fuzzing that for +/// all inputs `DisplaySinkValidator` does not panic. +pub struct DisplaySinkValidator { +    spans: alloc::vec::Vec<&'static str>, +} + +impl DisplaySinkValidator { +    pub fn new() -> Self { +        Self { spans: alloc::vec::Vec::new() } +    } +} + +impl core::ops::Drop for DisplaySinkValidator { +    fn drop(&mut self) { +        if self.spans.len() != 0 { +            panic!("DisplaySinkValidator dropped with open spans"); +        } +    } +} + +impl fmt::Write for DisplaySinkValidator { +    fn write_str(&mut self, _s: &str) -> Result<(), fmt::Error> { +        Ok(()) +    } +    fn write_char(&mut self, _c: char) -> Result<(), fmt::Error> { +        Ok(()) +    } +} + +impl DisplaySink for DisplaySinkValidator { +    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { +        if s.len() >= 32 { +            panic!("DisplaySinkValidator::write_lt_32 was given a string longer than the maximum permitted length"); +        } + +        self.write_str(s) +    } +    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { +        if s.len() >= 16 { +            panic!("DisplaySinkValidator::write_lt_16 was given a string longer than the maximum permitted length"); +        } + +        self.write_str(s) +    } +    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { +        if s.len() >= 8 { +            panic!("DisplaySinkValidator::write_lt_8 was given a string longer than the maximum permitted length"); +        } + +        self.write_str(s) +    } + +    fn span_start_immediate(&mut self) { +        self.spans.push("immediate"); +    } + +    fn span_end_immediate(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "immediate"); +    } + +    fn span_start_register(&mut self) { +        self.spans.push("register"); +    } + +    fn span_end_register(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "register"); +    } + +    fn span_start_opcode(&mut self) { +        self.spans.push("opcode"); +    } + +    fn span_end_opcode(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "opcode"); +    } + +    fn span_start_program_counter(&mut self) { +        self.spans.push("program counter"); +    } + +    fn span_end_program_counter(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "program counter"); +    } + +    fn span_start_number(&mut self) { +        self.spans.push("number"); +    } + +    fn span_end_number(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "number"); +    } + +    fn span_start_address(&mut self) { +        self.spans.push("address"); +    } + +    fn span_end_address(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "address"); +    } + +    fn span_start_function_expr(&mut self) { +        self.spans.push("function expr"); +    } + +    fn span_end_function_expr(&mut self) { +        let last = self.spans.pop().expect("item to pop"); +        assert_eq!(last, "function expr"); +    } +} + +/// `DisplaySinkWriteComparator` helps test that two `DisplaySink` implementations which should +/// produce the same output actually do. +/// +/// this is most useful for cases like testing specialized `write_lt_*` functions, which ought to +/// behave the same as if `write_str()` were called instead and so can be used as a very simple +/// oracle. +/// +/// this is somewhat less useful when the sinks are expected to produce unequal text, such as when +/// one sink writes ANSI color sequences and the other does not. +pub struct DisplaySinkWriteComparator<'sinks, T: DisplaySink, U: DisplaySink> { +    sink1: &'sinks mut T, +    sink1_check: fn(&T) -> &str, +    sink2: &'sinks mut U, +    sink2_check: fn(&U) -> &str, +} + +impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySinkWriteComparator<'sinks, T, U> { +    pub fn new( +        t: &'sinks mut T, t_check: fn(&T) -> &str, +        u: &'sinks mut U, u_check: fn(&U) -> &str +    ) -> Self { +        Self { +            sink1: t, +            sink1_check: t_check, +            sink2: u, +            sink2_check: u_check, +        } +    } + +    fn compare_sinks(&self) { +        let sink1_text = (self.sink1_check)(self.sink1); +        let sink2_text = (self.sink2_check)(self.sink2); + +        if sink1_text != sink2_text { +            panic!("sinks produced different output: {} != {}", sink1_text, sink2_text); +        } +    } +} + +impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySink for DisplaySinkWriteComparator<'sinks, T, U> { +    fn write_u8(&mut self, v: u8) -> Result<(), fmt::Error> { +        self.sink1.write_u8(v).expect("write to sink1 succeeds"); +        self.sink2.write_u8(v).expect("write to sink2 succeeds"); +        self.compare_sinks(); +        Ok(()) +    } +} + +impl<'sinks, T: DisplaySink, U: DisplaySink> fmt::Write for DisplaySinkWriteComparator<'sinks, T, U> { +    fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.sink1.write_str(s).expect("write to sink1 succeeds"); +        self.sink2.write_str(s).expect("write to sink2 succeeds"); +        Ok(()) +    } +    fn write_char(&mut self, c: char) -> Result<(), fmt::Error> { +        self.sink1.write_char(c).expect("write to sink1 succeeds"); +        self.sink2.write_char(c).expect("write to sink2 succeeds"); +        Ok(()) +    } +} diff --git a/tests/display.rs b/tests/display.rs new file mode 100644 index 0000000..8826303 --- /dev/null +++ b/tests/display.rs @@ -0,0 +1,143 @@ + +// this was something of a misfeature for these formatters.. +#[test] +#[allow(deprecated)] +fn formatters_are_not_feature_gated() { +    use yaxpeax_arch::display::{ +        u8_hex, u16_hex, u32_hex, u64_hex, +        signed_i8_hex, signed_i16_hex, signed_i32_hex, signed_i64_hex +    }; +    let _ = u8_hex(10); +    let _ = u16_hex(10); +    let _ = u32_hex(10); +    let _ = u64_hex(10); +    let _ = signed_i8_hex(10); +    let _ = signed_i16_hex(10); +    let _ = signed_i32_hex(10); +    let _ = signed_i64_hex(10); +} + +#[cfg(feature="alloc")] +#[test] +fn instruction_text_sink_write_char_requires_ascii() { +    use core::fmt::Write; + +    let mut text = String::with_capacity(512); +    let mut sink = unsafe { +        yaxpeax_arch::display::InstructionTextSink::new(&mut text) +    }; +    let expected = "`1234567890-=+_)(*&^%$#@!~\\][poiuytrewq	|}{POIUYTREWQ';lkjhgfdsa\":LKJHGFDSA/.,mnbvcxz?><MNBVCXZ \r\n"; +    for c in expected.as_bytes().iter() { +        sink.write_char(*c as char).expect("write works"); +    } +    assert_eq!(text, expected); +} + +#[cfg(feature="alloc")] +#[test] +#[should_panic] +fn instruction_text_sink_write_char_rejects_not_ascii() { +    use core::fmt::Write; + +    let mut text = String::with_capacity(512); +    let mut sink = unsafe { +        yaxpeax_arch::display::InstructionTextSink::new(&mut text) +    }; +    sink.write_char('\u{80}').expect("write works"); +} + +#[cfg(feature="alloc")] +#[test] +fn display_sink_write_hex_helpers() { +    use yaxpeax_arch::display::{DisplaySink}; + +    // for u8/i8/u16/i16 we can exhaustively test. we'll leave the rest for fuzzers. +    let mut buf = String::new(); +    for i in 0..=u8::MAX { +        buf.clear(); +        buf.write_u8(i).expect("write succeeds"); +        assert_eq!(buf, format!("{:x}", i)); + +        buf.clear(); +        buf.write_prefixed_u8(i).expect("write succeeds"); +        assert_eq!(buf, format!("0x{:x}", i)); + +        let expected = if (i as i8) < 0 { +            format!("-0x{:x}", (i as i8).unsigned_abs()) +        } else { +            format!("0x{:x}", i) +        }; + +        buf.clear(); +        buf.write_prefixed_i8(i as i8).expect("write succeeds"); +        assert_eq!(buf, expected); +    } + +    for i in 0..=u16::MAX { +        buf.clear(); +        buf.write_u16(i).expect("write succeeds"); +        assert_eq!(buf, format!("{:x}", i)); + +        buf.clear(); +        buf.write_prefixed_u16(i).expect("write succeeds"); +        assert_eq!(buf, format!("0x{:x}", i)); + +        let expected = if (i as i16) < 0 { +            format!("-0x{:x}", (i as i16).unsigned_abs()) +        } else { +            format!("0x{:x}", i) +        }; + +        buf.clear(); +        buf.write_prefixed_i16(i as i16).expect("write succeeds"); +        assert_eq!(buf, expected); +    } +} + +#[cfg(feature="alloc")] +#[test] +fn sinks_are_equivalent() { +    use yaxpeax_arch::display::{DisplaySink, FmtSink}; +    use yaxpeax_arch::testkit::DisplaySinkWriteComparator; + +    let mut bare = String::new(); +    let mut through_sink = String::new(); +    for i in 0..u16::MAX { +        bare.clear(); +        through_sink.clear(); +        let mut out = FmtSink::new(&mut through_sink); +        let mut comparator = DisplaySinkWriteComparator::new( +            &mut out, +            |sink| { sink.inner_ref().as_str() }, +            &mut bare, +            |sink| { sink.as_str() }, +        ); +        comparator.write_u16(i).expect("write succeeds"); +        comparator.write_prefixed_u16(i).expect("write succeeds"); +        comparator.write_prefixed_i16(i as i16).expect("write succeeds"); +    } +} + +#[cfg(all(feature="alloc", feature="color-new"))] +#[test] +fn ansi_sink_works() { +    use yaxpeax_arch::color_new::ansi::AnsiDisplaySink; +    use yaxpeax_arch::display::DisplaySink; + +    let mut buf = String::new(); + +    let mut ansi_sink = AnsiDisplaySink::new(&mut buf, yaxpeax_arch::color_new::DefaultColors); + +    ansi_sink.span_start_immediate(); +    ansi_sink.write_prefixed_u8(0x80).expect("write succeeds"); +    ansi_sink.span_end_immediate(); +    ansi_sink.write_fixed_size("(").expect("write succeeds"); +    ansi_sink.span_start_register(); +    ansi_sink.write_fixed_size("rbp").expect("write succeeds"); +    ansi_sink.span_end_register(); +    ansi_sink.write_fixed_size(")").expect("write succeeds"); + +    drop(ansi_sink); + +    assert_eq!(buf, "\x1b[37m0x80\x1b[39m(\x1b[38;5;6mrbp\x1b[39m)"); +} diff --git a/tests/lib.rs b/tests/lib.rs index 1d5e964..9dc1449 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -12,6 +12,7 @@ fn test_u16() {  }  #[test] +#[cfg(std)]  fn generic_error_can_bail() {      use yaxpeax_arch::{Arch, Decoder, Reader}; @@ -23,6 +24,7 @@ fn generic_error_can_bail() {      }  }  #[test] +#[cfg(std)]  fn error_can_bail() {      use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader};      struct TestIsa {} @@ -76,3 +78,51 @@ fn error_can_bail() {      assert_eq!(exercise_eq(), Err(Error::TestDecode(StandardDecodeError::ExhaustedInput)));  } + +#[test] +fn example_arch_impl() { +    use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader}; +    struct TestIsa {} +    #[derive(Debug, Default)] +    struct TestInst {} +    impl Arch for TestIsa { +        type Word = u8; +        type Address = u64; +        type Instruction = TestInst; +        type Decoder = TestIsaDecoder; +        type DecodeError = StandardDecodeError; +        type Operand = (); +    } + +    impl Instruction for TestInst { +        fn well_defined(&self) -> bool { true } +    } + +    impl LengthedInstruction for TestInst { +        type Unit = AddressDiff<u64>; +        fn len(&self) -> Self::Unit { AddressDiff::from_const(1) } +        fn min_size() -> Self::Unit { AddressDiff::from_const(1) } +    } + +    struct TestIsaDecoder {} + +    impl Default for TestIsaDecoder { +        fn default() -> Self { +            TestIsaDecoder {} +        } +    } + +    impl Decoder<TestIsa> for TestIsaDecoder { +        fn decode_into<T: Reader<u64, u8>>(&self, _inst: &mut TestInst, _words: &mut T) -> Result<(), StandardDecodeError> { +            Err(StandardDecodeError::ExhaustedInput) +        } +    } + +    fn exercise_eq() -> Result<(), StandardDecodeError> { +        let mut reader = U8Reader::new(&[]); +        TestIsaDecoder::default().decode(&mut reader)?; +        Ok(()) +    } + +    assert_eq!(exercise_eq(), Err(StandardDecodeError::ExhaustedInput)); +} | 
