diff options
-rw-r--r-- | CHANGELOG | 84 | ||||
-rw-r--r-- | Cargo.toml | 16 | ||||
-rw-r--r-- | Makefile | 19 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | fuzz/.gitignore | 3 | ||||
-rw-r--r-- | fuzz/Cargo.toml | 25 | ||||
-rw-r--r-- | fuzz/fuzz_targets/write_helpers_are_correct.rs | 96 | ||||
-rw-r--r-- | goodfile | 40 | ||||
-rw-r--r-- | src/annotation/mod.rs | 43 | ||||
-rw-r--r-- | src/color_new.rs | 281 | ||||
-rw-r--r-- | src/display.rs | 58 | ||||
-rw-r--r-- | src/display/display_sink.rs | 1017 | ||||
-rw-r--r-- | src/display/display_sink/imp_generic.rs | 26 | ||||
-rw-r--r-- | src/display/display_sink/imp_x86.rs | 187 | ||||
-rw-r--r-- | src/lib.rs | 28 | ||||
-rw-r--r-- | src/reader.rs | 5 | ||||
-rw-r--r-- | src/safer_unchecked.rs | 40 | ||||
-rw-r--r-- | src/testkit.rs | 10 | ||||
-rw-r--r-- | src/testkit/display.rs | 192 | ||||
-rw-r--r-- | tests/display.rs | 143 | ||||
-rw-r--r-- | tests/lib.rs | 50 |
21 files changed, 2312 insertions, 54 deletions
@@ -1,8 +1,88 @@ -## 0.3.0 +## TODO -TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted +~~TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted~~ +* a reader's `.offset()` should reflect the amount of items that were consumed, if any. if a reader can quickly determine + there is not enough input, should it return Incomplete(0) or ExhaustedInput? Incomplete(0) vs ExhaustedInput may still + imply that some state was changed (an access mode, for example). this needs more thought. TODO: Reader::offset should return an AddressDiff<Address>, not a bare Address +* quick look seems reasonable enough, should be changed in concert with + yaxpeax-core though and that's more than i'm signing up for today TODO: impls of `fn one` and `fn zero` so downstream users don't have to import num_traits directly +* seems nice at first but this means that there are conflicting functions when Zero or One are in scope + ... assuming that the idea at the time was to add `fn one` and `fn zero` to `AddressBase`. +TODO: 0.4.0 or later: + * remove `mod colors`, crossterm dependency, related feature flags + +## 0.3.2 + +fix yaxpeax-arch not building for non-x86 targets when alloc is not enabled + +## 0.3.1 + +fix InstructionTextSink::write_char to not panic in debug builds + +## 0.3.0 + +added a new crate feature flag, `alloc`. + this flag is for any features that do not require std, but do require + containers from `liballoc`. good examples are `alloc::string::String` or + `alloc::vec::Vec`. + +added `yaxpeax_arch::display::DisplaySink` after revisiting output colorization. + `DisplaySink` is better suited for general markup, rather than being focused + specifically on ANSI/console text coloring. `YaxColors` also simply does not + style text in some unfortunate circumstances, such as when the console that + needs to be styled is only written to after intermediate buffering. + + `DisplaySink` also includes specializable functions for writing text to an + output, and the implementation for `alloc::string::String` takes advantage of + this: writing through `impl DisplaySink for String` will often be substantially + more performant than writing through `fmt::Write`. + +added `mod color_new`: + this includes an alternate vision for `YaxColors` and better fits with the + new `DisplaySink` machinery; ANSI-style text markup can be done through the + new `yaxpeax_arch::color_new::ansi::AnsiDisplaySink`. + + this provides more flexibility than i'd initially expected! yours truly will + be using this to render instructions with HTML spans (rather than ANSI + sequences) to colorize dis.yaxpeax.net. + + in the future, `mod colored` will be removed, `mod color_new` will be renamed + to `mod color`. + +deprecated `mod colored`: + generally, colorization of text is a presentation issue; `trait Colorize` + mixed formatting of data to text with how that text is presented, but that is + at odds with the same text being presented in different ways for which + colorization is not generic. for example, rendering an instruction as marked + up HTML involves coloring in an entirely different way than rendering an + instruction with ANSI sequences for a VT100-like terminal. + +added `yaxpeax_arch::safer_unchecked` to aid in testing use of unchecked methods + these were originally added to improve yaxpeax-x86 testing: + https://github.com/iximeow/yaxpeax-x86/pull/17, but are being pulled into + yaxpeax-arch as they're generally applicable and overall wonderful tools. + thank you again 522! + +added `mod testkit`: + this module contains tools to validate the correctness of crates implementing + `yaxpeax-arch` traits. these initial tools are focused on validating the + correctness of functions that write to `DisplaySink`, especially that span + management is correct. + + `yaxpeax-x86`, for example, will imminently have fuzz targets to use these + types for its own validation. + +made VecSink's `records` private. instead of extracting records from the struct + by accessing this field directly, call `VecSink::into_inner()`. + +made VecSink is now available through the `alloc` feature flag as well as `std`. + +meta: the major omission in this release is an architecture-agnostic way to +format an instruction into a `DisplaySink`. i haven't been able to figure out +quite the right shape for that! it is fully expected in the future, and will +probably end up somehow referenced through `yaxpeax_arch::Arch`. ## 0.2.8 @@ -7,7 +7,7 @@ keywords = ["disassembly", "disassembler"] license = "0BSD" name = "yaxpeax-arch" repository = "https://git.iximeow.net/yaxpeax-arch/" -version = "0.2.8" +version = "0.3.2" [dependencies] "num-traits" = { version = "0.2", default-features = false } @@ -23,14 +23,24 @@ thiserror = "1.0.26" lto = true [features] -default = ["std", "use-serde", "colors", "address-parse"] +default = ["std", "alloc", "use-serde", "color-new", "address-parse"] -std = [] +std = ["alloc"] + +alloc = [] # enables the (optional) use of Serde for bounds on # Arch and Arch::Address use-serde = ["serde", "serde_derive"] +# feature flag for the existing but misfeature'd initial support for output +# coloring. the module this gates will be removed in 0.4.0, which includes +# removing `trait Colorize`, and requires a major version bump for any +# dependency that moves forward. colors = ["crossterm"] +# feature flag for revised output colorizing support, which will replace the +# existing `colors` feature in 0.4.0. +color-new = [] + address-parse = [] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..57c8615 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +test: build-smoketest test-std test-no-std test-serde-no-std test-colors-no-std test-color-new-no-std test-alloc-no-std + +build-smoketest: + cargo build + cargo build --no-default-features + cargo build --no-default-features --target wasm32-wasi + +test-std: + cargo test +test-no-std: + cargo test --no-default-features +test-serde-no-std: + cargo test --no-default-features --features "serde" +test-colors-no-std: + cargo test --no-default-features --features "colors" +test-color-new-no-std: + cargo test --no-default-features --features "color-new" +test-alloc-no-std: + cargo test --no-default-features --features "alloc" @@ -9,7 +9,8 @@ typically this crate is only interesting if you're writing code to operate on mu `yaxpeax-arch` has several crate features, which implementers are encouraged to also support: * `std`: opt-in for `std`-specific support - in this crate, `std` enables a [`std::error::Error`](https://doc.rust-lang.org/std/error/trait.Error.html) requirement on `DecodeError`, allowing users to `?`-unwrap decode results. -* `colors`: enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled. +* `color_new`: enables traits and structs to stylize formatted instructions, including ANSI colorization. +* ~`colors`~: DEPRECATED. enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled. * `address-parse`: enable a requirement that `yaxpeax_arch::Address` be parsable from `&str`. this is useful for use cases that, for example, read addresses from humans. * `use-serde`: enable [`serde`](https://docs.rs/serde/latest/serde/) serialization and deserialization bounds for types like `Address`. diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..a092511 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..67ffa43 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "yaxpeax-arch-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.yaxpeax-arch] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "write_helpers_are_correct" +path = "fuzz_targets/write_helpers_are_correct.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/write_helpers_are_correct.rs b/fuzz/fuzz_targets/write_helpers_are_correct.rs new file mode 100644 index 0000000..41e27bd --- /dev/null +++ b/fuzz/fuzz_targets/write_helpers_are_correct.rs @@ -0,0 +1,96 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use yaxpeax_arch::display::DisplaySink; + +use std::convert::TryInto; + +fuzz_target!(|data: &[u8]| { + let mut buf = String::new(); + match data.len() { + 1 => { + let i = data[0]; + + buf.clear(); + buf.write_u8(i).expect("write succeeds"); + assert_eq!(buf, format!("{:x}", i)); + + buf.clear(); + buf.write_prefixed_u8(i).expect("write succeeds"); + assert_eq!(buf, format!("0x{:x}", i)); + + let expected = if (i as i8) < 0 { + format!("-0x{:x}", (i as i8).unsigned_abs()) + } else { + format!("0x{:x}", i) + }; + + buf.clear(); + buf.write_prefixed_i8(i as i8).expect("write succeeds"); + assert_eq!(buf, expected); + }, + 2 => { + let i: u16 = u16::from_le_bytes(data.try_into().expect("checked the size is right")); + + buf.clear(); + buf.write_u16(i).expect("write succeeds"); + assert_eq!(buf, format!("{:x}", i)); + + buf.clear(); + buf.write_prefixed_u16(i).expect("write succeeds"); + assert_eq!(buf, format!("0x{:x}", i)); + + let expected = if (i as i16) < 0 { + format!("-0x{:x}", (i as i16).unsigned_abs()) + } else { + format!("0x{:x}", i) + }; + + buf.clear(); + buf.write_prefixed_i16(i as i16).expect("write succeeds"); + assert_eq!(buf, expected); + } + 4 => { + let i: u32 = u32::from_le_bytes(data.try_into().expect("checked the size is right")); + + buf.clear(); + buf.write_u32(i).expect("write succeeds"); + assert_eq!(buf, format!("{:x}", i)); + + buf.clear(); + buf.write_prefixed_u32(i).expect("write succeeds"); + assert_eq!(buf, format!("0x{:x}", i)); + + let expected = if (i as i32) < 0 { + format!("-0x{:x}", (i as i32).unsigned_abs()) + } else { + format!("0x{:x}", i) + }; + + buf.clear(); + buf.write_prefixed_i32(i as i32).expect("write succeeds"); + assert_eq!(buf, expected); + }, + 8 => { + let i: u64 = u64::from_le_bytes(data.try_into().expect("checked the size is right")); + + buf.clear(); + buf.write_u64(i).expect("write succeeds"); + assert_eq!(buf, format!("{:x}", i)); + + buf.clear(); + buf.write_prefixed_u64(i).expect("write succeeds"); + assert_eq!(buf, format!("0x{:x}", i)); + + let expected = if (i as i64) < 0 { + format!("-0x{:x}", (i as i64).unsigned_abs()) + } else { + format!("0x{:x}", i) + }; + + buf.clear(); + buf.write_prefixed_i64(i as i64).expect("write succeeds"); + assert_eq!(buf, expected); + }, + _ => {} + } +}); @@ -1,19 +1,35 @@ -Build.dependencies({"git", "make", "rustc", "cargo"}) +Build.dependencies({"git", "make", "rustc", "cargo", "rustup"}) Step.start("crate") Step.push("build") Build.run({"cargo", "build"}) +-- and now that some code is conditional on target arch, at least try to build +-- for other architectures even if we might not be able to run on them. +Build.run({"rustup", "target", "add", "wasm32-wasi"}) +Build.run({"cargo", "build", "--no-default-features", "--target", "wasm32-wasi"}) Step.advance("test") +-- TODO: set `-D warnings` here and below... Build.run({"cargo", "test"}, {name="test default features"}) -Build.run({"cargo", "test", "--no-default-features"}, {name="test no features"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std"}, {name="test std only"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "colors"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "use-serde"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,colors"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,use-serde"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,colors,address-parse"}, {name="test feature combinations"}) -Build.run({"cargo", "test", "--no-default-features", "--features", "std,use-serde,colors"}, {name="test feature combinations"}) + +-- `cargo test` ends up running doc tests. great! but yaxpeax-arch's docs reference items in std only. +-- so for other feature combinations, skip doc tests. do this by passing `--tests` explicitly, +-- which disables the automagic "run everything" settings. +Build.run({"cargo", "test", "--no-default-features", "--tests"}, {name="test no features"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std"}, {name="test std only"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "colors"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors,alloc"}, {name="test feature combinations"}) +Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new,alloc"}, {name="test feature combinations"}) diff --git a/src/annotation/mod.rs b/src/annotation/mod.rs index 0248b94..af8b4bf 100644 --- a/src/annotation/mod.rs +++ b/src/annotation/mod.rs @@ -19,6 +19,8 @@ //! in a generic setting, there isn't much to do with a `FieldDescription` other than display it. a //! typical use might look something like: //! ``` +//! #[cfg(feature="std")] +//! # { //! use core::fmt; //! //! use yaxpeax_arch::annotation::{AnnotatingDecoder, VecSink}; @@ -40,6 +42,7 @@ //! println!(" bits [{}, {}]: {}", start, end, desc); //! } //! } +//! # } //! ``` //! //! note that the range `[start, end]` for a reported span is _inclusive_. the `end`-th bit of a @@ -73,7 +76,7 @@ use crate::{Arch, Reader}; use core::fmt::Display; -/// implementors of `DescriptionSink` receive descriptions of an instruction's disassembly process +/// implementers of `DescriptionSink` receive descriptions of an instruction's disassembly process /// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and /// architectures are expected to be able to turn the bit-level `start` and `width` values into a /// meaningful description of bits in the original instruction stream. @@ -91,24 +94,34 @@ impl<T> DescriptionSink<T> for NullSink { fn record(&mut self, _start: u32, _end: u32, _description: T) { } } -#[cfg(feature = "std")] -pub struct VecSink<T: Clone + Display> { - pub records: std::vec::Vec<(u32, u32, T)> -} +#[cfg(feature = "alloc")] +mod vec_sink { + use alloc::vec::Vec; + use core::fmt::Display; + use crate::annotation::DescriptionSink; -#[cfg(feature = "std")] -impl<T: Clone + Display> VecSink<T> { - pub fn new() -> Self { - VecSink { records: std::vec::Vec::new() } + pub struct VecSink<T: Clone + Display> { + pub records: Vec<(u32, u32, T)> + } + + impl<T: Clone + Display> VecSink<T> { + pub fn new() -> Self { + VecSink { records: Vec::new() } + } + + pub fn into_inner(self) -> Vec<(u32, u32, T)> { + self.records + } } -} -#[cfg(feature = "std")] -impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> { - fn record(&mut self, start: u32, end: u32, description: T) { - self.records.push((start, end, description)); + impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> { + fn record(&mut self, start: u32, end: u32, description: T) { + self.records.push((start, end, description)); + } } } +#[cfg(feature = "alloc")] +pub use vec_sink::VecSink; pub trait FieldDescription { fn id(&self) -> u32; @@ -118,7 +131,7 @@ pub trait FieldDescription { /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the /// decoder able to report descriptions of bits or fields in the instruction to a sink implementing /// [`DescriptionSink`]. the sink may be [`NullSink`] to discard provided data. decoding with a -/// `NullSink` should behave identically to `Decoder::decode_into`. implementors are recommended to +/// `NullSink` should behave identically to `Decoder::decode_into`. implementers are recommended to /// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_annotation` if /// implementing both traits. pub trait AnnotatingDecoder<A: Arch + ?Sized> { diff --git a/src/color_new.rs b/src/color_new.rs new file mode 100644 index 0000000..1d3e358 --- /dev/null +++ b/src/color_new.rs @@ -0,0 +1,281 @@ +#[non_exhaustive] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum Color { + Black, + DarkGrey, + Red, + DarkRed, + Green, + DarkGreen, + Yellow, + DarkYellow, + Blue, + DarkBlue, + Magenta, + DarkMagenta, + Cyan, + DarkCyan, + White, + Grey, +} + +pub trait YaxColors { + fn arithmetic_op(&self) -> Color; + fn stack_op(&self) -> Color; + fn nop_op(&self) -> Color; + fn stop_op(&self) -> Color; + fn control_flow_op(&self) -> Color; + fn data_op(&self) -> Color; + fn comparison_op(&self) -> Color; + fn invalid_op(&self) -> Color; + fn platform_op(&self) -> Color; + fn misc_op(&self) -> Color; + + fn register(&self) -> Color; + fn program_counter(&self) -> Color; + fn number(&self) -> Color; + fn zero(&self) -> Color; + fn one(&self) -> Color; + fn minus_one(&self) -> Color; + fn address(&self) -> Color; + fn symbol(&self) -> Color; + fn function(&self) -> Color; +} + +/// support for colorizing text with ANSI control sequences. +/// +/// the most useful item in this module is [`ansi::AnsiDisplaySink`], which interprets span entry +/// and exit as points at which ANSI sequences may need to be written into the output it wraps - +/// that output may be any type implementing [`crate::display::DisplaySink`], including +/// [`crate::display::FmtSink`] to adapt any implementer of `fmt::Write` such as standard out. +/// +/// ## example +/// +/// to write colored text to standard out: +/// +/// ``` +/// # #[cfg(feature="alloc")] +/// # { +/// # extern crate alloc; +/// # use alloc::string::String; +/// use yaxpeax_arch::color_new::DefaultColors; +/// use yaxpeax_arch::color_new::ansi::AnsiDisplaySink; +/// use yaxpeax_arch::display::FmtSink; +/// +/// let mut s = String::new(); +/// let mut s_sink = FmtSink::new(&mut s); +/// +/// let mut writer = AnsiDisplaySink::new(&mut s_sink, DefaultColors); +/// +/// // this might be a yaxpeax crate's `display_into`, or other library implementation code +/// mod fake_yaxpeax_crate { +/// use yaxpeax_arch::display::DisplaySink; +/// +/// pub fn format_memory_operand<T: DisplaySink>(out: &mut T) -> core::fmt::Result { +/// out.span_start_immediate(); +/// out.write_prefixed_u8(0x80)?; +/// out.span_end_immediate(); +/// out.write_fixed_size("(")?; +/// out.span_start_register(); +/// out.write_fixed_size("rbp")?; +/// out.span_end_register(); +/// out.write_fixed_size(")")?; +/// Ok(()) +/// } +/// } +/// +/// // this might be how a user uses `AnsiDisplaySink`, which will write ANSI-ful text to `s` and +/// // print it. +/// +/// fake_yaxpeax_crate::format_memory_operand(&mut writer).expect("write succeeds"); +/// +/// println!("{}", s); +/// # } +/// ``` +pub mod ansi { + use crate::color_new::Color; + + // color sequences as described by ECMA-48 and, apparently, `man 4 console_codes` + /// translate [`yaxpeax_arch::color_new::Color`] to an ANSI control code that changes the + /// foreground color to match. + #[allow(dead_code)] // allowing this to be dead code because if colors are enabled and alloc is not, there will not be an AnsiDisplaySink, which is the sole user of this function. + fn color2ansi(color: Color) -> &'static str { + // for most of these, in 256 color space the darker color can be picked by the same color + // index as the brighter form (from the 8 color command set). dark grey is an outlier, + // where 38;5;0 and 30 both are black. there is no "grey" in the shorter command set to + // map to. but it turns out that 38;5;m is exactly the darker grey to use. + match color { + Color::Black => "\x1b[30m", + Color::DarkGrey => "\x1b[38;5;8m", + Color::Red => "\x1b[31m", + Color::DarkRed => "\x1b[38;5;1m", + Color::Green => "\x1b[32m", + Color::DarkGreen => "\x1b[38;5;2m", + Color::Yellow => "\x1b[33m", + Color::DarkYellow => "\x1b[38;5;3m", + Color::Blue => "\x1b[34m", + Color::DarkBlue => "\x1b[38;5;4m", + Color::Magenta => "\x1b[35m", + Color::DarkMagenta => "\x1b[38;5;5m", + Color::Cyan => "\x1b[36m", + Color::DarkCyan => "\x1b[38;5;6m", + Color::White => "\x1b[37m", + Color::Grey => "\x1b[38;5;7m", + } + } + + // could reasonably be always present, but only used if feature="alloc" + #[cfg(feature="alloc")] + const DEFAULT_FG: &'static str = "\x1b[39m"; + + #[cfg(feature="alloc")] + mod ansi_display_sink { + use crate::color_new::{Color, YaxColors}; + use crate::display::DisplaySink; + + /// adapter to insert ANSI color command sequences in formatted text to style printed + /// instructions. + /// + /// this enables similar behavior as the deprecated [`crate::Colorize`] trait, + /// for outputs that can process ANSI color commands. + /// + /// `AnsiDisplaySink` will silently ignore errors from writes to the underlying `T: + /// DisplaySink`. when writing to a string or other growable buffer, errors are likely + /// inseparable from `abort()`. when writing to stdout or stderr, write failures likely + /// mean output is piped to a process which has closed the pipe but are otherwise harmless. + /// `span_enter_*` and `span_exit_*` don't have error reporting mechanisms in their return + /// type, so the only available error mechanism would be to also `abort()`. + /// + /// if this turns out to be a bad decision, it'll have to be rethought! + pub struct AnsiDisplaySink<'sink, T: DisplaySink, Y: YaxColors> { + out: &'sink mut T, + span_stack: alloc::vec::Vec<Color>, + colors: Y + } + + impl<'sink, T: DisplaySink, Y: YaxColors> AnsiDisplaySink<'sink, T, Y> { + pub fn new(out: &'sink mut T, colors: Y) -> Self { + Self { + out, + span_stack: alloc::vec::Vec::new(), + colors, + } + } + + fn push_color(&mut self, color: Color) { + self.span_stack.push(color); + let _ = self.out.write_fixed_size(super::color2ansi(color)); + } + + fn restore_prev_color(&mut self) { + let _ = self.span_stack.pop(); + if let Some(prev_color) = self.span_stack.last() { + let _ = self.out.write_fixed_size(super::color2ansi(*prev_color)); + } else { + let _ = self.out.write_fixed_size(super::DEFAULT_FG); + }; + } + } + + impl<'sink, T: DisplaySink, Y: YaxColors> core::fmt::Write for AnsiDisplaySink<'sink, T, Y> { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.out.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + self.out.write_char(c) + } + } + + impl<'sink, T: DisplaySink, Y: YaxColors> DisplaySink for AnsiDisplaySink<'sink, T, Y> { + fn span_start_immediate(&mut self) { self.push_color(self.colors.number()); } + fn span_end_immediate(&mut self) { self.restore_prev_color() } + + fn span_start_register(&mut self) { self.push_color(self.colors.register()); } + fn span_end_register(&mut self) { self.restore_prev_color() } + + // ah.. the right way, currently, to colorize opcodes would be to collect text while in the + // opcode span, and request some kind of user-provided decoder ring to translate mnemonics + // into the right color. that's very unfortunate. maybe there should be another span for + // `opcode_kind(u8)` for impls to report what kind of opcode they'll be emitting.. + fn span_start_opcode(&mut self) { self.push_color(self.colors.misc_op()); } + fn span_end_opcode(&mut self) { self.restore_prev_color() } + + fn span_start_program_counter(&mut self) { self.push_color(self.colors.program_counter()); } + fn span_end_program_counter(&mut self) { self.restore_prev_color() } + + fn span_start_number(&mut self) { self.push_color(self.colors.number()); } + fn span_end_number(&mut self) { self.restore_prev_color() } + + fn span_start_address(&mut self) { self.push_color(self.colors.address()); } + fn span_end_address(&mut self) { self.restore_prev_color() } + + fn span_start_function_expr(&mut self) { self.push_color(self.colors.function()); } + fn span_end_function_expr(&mut self) { self.restore_prev_color() } + } + } + #[cfg(feature="alloc")] + pub use ansi_display_sink::AnsiDisplaySink; +} + +pub struct DefaultColors; + +impl YaxColors for DefaultColors { + fn arithmetic_op(&self) -> Color { + Color::Yellow + } + fn stack_op(&self) -> Color { + Color::DarkMagenta + } + fn nop_op(&self) -> Color { + Color::DarkBlue + } + fn stop_op(&self) -> Color { + Color::Red + } + fn control_flow_op(&self) -> Color { + Color::DarkGreen + } + fn data_op(&self) -> Color { + Color::Magenta + } + fn comparison_op(&self) -> Color { + Color::DarkYellow + } + fn invalid_op(&self) -> Color { + Color::DarkRed + } + fn misc_op(&self) -> Color { + Color::Cyan + } + fn platform_op(&self) -> Color { + Color::DarkCyan + } + + fn register(&self) -> Color { + Color::DarkCyan + } + fn program_counter(&self) -> Color { + Color::DarkRed + } + fn number(&self) -> Color { + Color::White + } + fn zero(&self) -> Color { + Color::White + } + fn one(&self) -> Color { + Color::White + } + fn minus_one(&self) -> Color { + Color::White + } + fn address(&self) -> Color { + Color::DarkGreen + } + fn symbol(&self) -> Color { + Color::Green + } + fn function(&self) -> Color { + Color::Green + } +} diff --git a/src/display.rs b/src/display.rs index 789919e..754d3e6 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,9 +1,35 @@ +// allow use of deprecated items in this module since some functions using `SignedHexDisplay` still +// exist here +#![allow(deprecated)] + use crate::YaxColors; use core::fmt; use core::num::Wrapping; use core::ops::Neg; +mod display_sink; + +pub use display_sink::{DisplaySink, FmtSink}; +#[cfg(feature = "alloc")] +pub use display_sink::InstructionTextSink; + +/// translate a byte in range `[0, 15]` to a lowercase base-16 digit. +/// +/// if `c` is in range, the output is always valid as the sole byte in a utf-8 string. if `c` is out +/// of range, the returned character might not be a valid single-byte utf-8 codepoint. +#[cfg(feature = "alloc")] // this function is of course not directly related to alloc, but it's only needed by impls that themselves are only present with alloc. +fn u8_to_hex(c: u8) -> u8 { + // this conditional branch is faster than a lookup for... most architectures (especially x86 + // with cmov) + if c < 10 { + b'0' + c + } else { + b'a' + c - 10 + } +} + +#[deprecated(since="0.3.0", note="format_number_i32 does not optimize as expected and will be removed in the future. see DisplaySink instead.")] pub enum NumberStyleHint { Signed, HexSigned, @@ -17,36 +43,37 @@ pub enum NumberStyleHint { HexUnsignedWithSign } -pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result { +#[deprecated(since="0.3.0", note="format_number_i32 is both slow and incorrect: YaxColors may not result in correct styling when writing anywhere other than a terminal, and both stylin and formatting does not inline as well as initially expected. see DisplaySink instead.")] +pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(_colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result { match hint { NumberStyleHint::Signed => { - write!(f, "{}", colors.number(i)) + write!(f, "{}", (i)) }, NumberStyleHint::HexSigned => { - write!(f, "{}", colors.number(signed_i32_hex(i))) + write!(f, "{}", signed_i32_hex(i)) }, NumberStyleHint::Unsigned => { - write!(f, "{}", colors.number(i as u32)) + write!(f, "{}", i as u32) }, NumberStyleHint::HexUnsigned => { - write!(f, "{}", colors.number(u32_hex(i as u32))) + write!(f, "{}", u32_hex(i as u32)) }, NumberStyleHint::SignedWithSignSplit => { if i == core::i32::MIN { - write!(f, "- {}", colors.number("2147483647")) + write!(f, "- {}", "2147483647") } else if i < 0 { - write!(f, "- {}", colors.number(-Wrapping(i))) + write!(f, "- {}", -Wrapping(i)) } else { - write!(f, "+ {}", colors.number(i)) + write!(f, "+ {}", i) } } NumberStyleHint::HexSignedWithSignSplit => { if i == core::i32::MIN { - write!(f, "- {}", colors.number("0x7fffffff")) + write!(f, "- {}", ("0x7fffffff")) } else if i < 0 { - write!(f, "- {}", colors.number(u32_hex((-Wrapping(i)).0 as u32))) + write!(f, "- {}", u32_hex((-Wrapping(i)).0 as u32)) } else { - write!(f, "+ {}", colors.number(u32_hex(i as u32))) + write!(f, "+ {}", u32_hex(i as u32)) } }, NumberStyleHint::HexSignedWithSign => { @@ -64,6 +91,7 @@ pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(colors: &Y, f: &mut W, i: } } +#[deprecated(since="0.3.0", note="SignedHexDisplay does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub struct SignedHexDisplay<T: core::fmt::LowerHex + Neg> { value: T, negative: bool @@ -79,6 +107,7 @@ impl<T: fmt::LowerHex + Neg + Copy> fmt::Display for SignedHexDisplay<T> where W } } +#[deprecated(since="0.3.0", note="u8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> { SignedHexDisplay { value: value as i8, @@ -86,6 +115,7 @@ pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> { } } +#[deprecated(since="0.3.0", note="signed_i8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> { SignedHexDisplay { value: imm, @@ -93,6 +123,7 @@ pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> { } } +#[deprecated(since="0.3.0", note="u16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> { SignedHexDisplay { value: value as i16, @@ -100,6 +131,7 @@ pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> { } } +#[deprecated(since="0.3.0", note="signed_i16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> { SignedHexDisplay { value: imm, @@ -107,6 +139,7 @@ pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> { } } +#[deprecated(since="0.3.0", note="u32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> { SignedHexDisplay { value: value as i32, @@ -114,6 +147,7 @@ pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> { } } +#[deprecated(since="0.3.0", note="signed_i32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> { SignedHexDisplay { value: imm, @@ -121,6 +155,7 @@ pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> { } } +#[deprecated(since="0.3.0", note="u64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> { SignedHexDisplay { value: value as i64, @@ -128,6 +163,7 @@ pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> { } } +#[deprecated(since="0.3.0", note="signed_i64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")] pub fn signed_i64_hex(imm: i64) -> SignedHexDisplay<i64> { SignedHexDisplay { value: imm, diff --git a/src/display/display_sink.rs b/src/display/display_sink.rs new file mode 100644 index 0000000..9aa3c85 --- /dev/null +++ b/src/display/display_sink.rs @@ -0,0 +1,1017 @@ +use core::fmt; + +// `imp_x86.rs` has `asm!()` macros, and so is not portable at all. +#[cfg(all(feature="alloc", target_arch = "x86_64"))] +#[path="./display_sink/imp_x86.rs"] +mod imp; + +// for other architectures, fall back on possibly-slower portable functions. +#[cfg(all(feature="alloc", not(target_arch = "x86_64")))] +#[path="./display_sink/imp_generic.rs"] +mod imp; + + +/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used +/// in formatting instructions for two reasons: +/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at +/// the same time as collecting output itself. +/// * `DisplaySink` implementations provide specialized functions for writing strings in +/// circumstances where a simple "use `core::fmt`" might incur unwanted overhead. +/// +/// ## spans +/// +/// spans are out-of-band indicators for the meaning of data written to this sink. when a +/// `span_start_<foo>` function is called, data written until a matching `span_end_<foo>` can be +/// considered the text corresponding to `<foo>`. +/// +/// spans are entered and exited in a FILO manner. implementations of `DisplaySink` are explicitly +/// allowed to depend on this fact. functions writing to a `DisplaySink` must exit spans in reverse +/// order to when they are entered. a function that has a call sequence like +/// ```text +/// sink.span_start_operand(); +/// sink.span_start_immediate(); +/// sink.span_end_operand(); +/// ``` +/// is in error. +/// +/// spans are reported through the `span_start_*` and `span_end_*` families of functions to avoid +/// constraining implementations into tracking current output offset (which may not be knowable) or +/// span size (which may be knowable, but incur additional overhead to compute or track). if the +/// task for a span is to simply emit VT100 color codes, for example, implementations avoid the +/// overhead of tracking offsets. +/// +/// default implementations of the `span_start_*` and `span_end_*` functions are to do nothing. a +/// no-op `span_start_*` or `span_end_*` allows rustc to elimiate such calls at compile time for +/// `DisplaySink` that are uninterested in the corresponding span type. +/// +/// # write helpers (`write_*`) +/// +/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in +/// documentation here to better support writing some kinds of inputs than a fully-general solution +/// (such as `core::fmt`) might be able to yield. +/// +/// currently there are two motivating factors for `write_*` helpers: +/// +/// instruction formatting often involves writing small but variable-size strings, such as register +/// names, which is something of a pathological case for string appending as Rust currently exists: +/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an +/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: +/// <https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232> +/// +/// there are similar papercuts around formatting integers as base-16 numbers, such as +/// <https://github.com/rust-lang/rust/pull/122770>. in isolation and in most applications these are +/// not a significant source of overhead. but for programs bounded on decoding and printing +/// instructions, these can add up to significant overhead - on the order of 10-20% of total +/// runtime. +/// +/// ## example +/// +/// a simple call sequence to `DisplaySink` might look something like: +/// ```compile_fail +/// sink.span_start_operand() +/// sink.write_char('[') +/// sink.span_start_register() +/// sink.write_fixed_size("rbp") +/// sink.span_end_register() +/// sink.write_char(']') +/// sink.span_end_operand() +/// ``` +/// which writes the text `[rbp]`, telling sinks that the operand begins at `[`, ends after `]`, +/// and `rbp` is a register in that operand. +/// +/// ## extensibility +/// +/// additional `span_{start,end}_*` helpers may be added over time - in the above example, one +/// future addition might be to add a new `effective_address` span that is started before +/// `register` and ended after `register. for an operand like `\[rbp\]` the effective address span +/// would exactly match a corresponding register span, but in more complicated scenarios like +/// `[rsp + rdi * 4 + 0x50]` the effective address would be all of `rsp + rdi * 4 + 0x50`. +/// +/// additional spans are expected to be added as needed. it is not immediately clear how to add +/// support for more architecture-specific concepts (such as itanium predicate registers) would be +/// supported yet, and so architecture-specific concepts may be expressed on `DisplaySink` if the +/// need arises. +/// +/// new `span_{start,end}_*` helpers will be defaulted as no-op. additions to this trait will be +/// minor version bumps, so users should take care to not add custom functions starting with +/// `span_start_` or `span_end_` to structs implementing `DisplaySink`. +pub trait DisplaySink: fmt::Write { + #[inline(always)] + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + + /// write a string to this sink that is less than 32 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than + /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + /// write a string to this sink that is less than 16 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than + /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + /// write a string to this sink that is less than 8 bytes. this is provided for optimization + /// opportunities when writing a variable-length string with known max size. + /// + /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than + /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a + /// utf-8 string. this may corrupt Rust strings. + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.write_str(s) + } + + /// write a u8 to the output as a base-16 integer. + /// + /// this corresponds to the Rust format specifier `{:x}` - see [`std::fmt::LowerHex`] for more. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u8 to the output as a base-16 integer with leading `0x`. + /// + /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { + self.write_fixed_size("0x")?; + self.write_u8(v) + } + /// write an i8 to the output as a base-16 integer with leading `0x`, and leading `-` if the + /// value is negative. + /// + /// there is no matching `std` formatter, so some examples here: + /// ```text + /// sink.write_prefixed_i8(-0x60); // writes `-0x60` to the sink + /// sink.write_prefixed_i8(127); // writes `0x7f` to the sink + /// sink.write_prefixed_i8(-128); // writes `-0x80` to the sink + /// ``` + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_i8(&mut self, v: i8) -> Result<(), core::fmt::Error> { + let v = if v < 0 { + self.write_char('-')?; + v.unsigned_abs() + } else { + v as u8 + }; + self.write_prefixed_u8(v) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u16 to the output as a base-16 integer with leading `0x`. + /// + /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { + self.write_fixed_size("0x")?; + self.write_u16(v) + } + /// write an i16 to the output as a base-16 integer with leading `0x`, and leading `-` if the + /// value is negative. + /// + /// there is no matching `std` formatter, so some examples here: + /// ```text + /// sink.write_prefixed_i16(-0x60); // writes `-0x60` to the sink + /// sink.write_prefixed_i16(127); // writes `0x7f` to the sink + /// sink.write_prefixed_i16(-128); // writes `-0x80` to the sink + /// ``` + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_i16(&mut self, v: i16) -> Result<(), core::fmt::Error> { + let v = if v < 0 { + self.write_char('-')?; + v.unsigned_abs() + } else { + v as u16 + }; + self.write_prefixed_u16(v) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u32 to the output as a base-16 integer with leading `0x`. + /// + /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { + self.write_fixed_size("0x")?; + self.write_u32(v) + } + /// write an i32 to the output as a base-32 integer with leading `0x`, and leading `-` if the + /// value is negative. + /// + /// there is no matching `std` formatter, so some examples here: + /// ```text + /// sink.write_prefixed_i32(-0x60); // writes `-0x60` to the sink + /// sink.write_prefixed_i32(127); // writes `0x7f` to the sink + /// sink.write_prefixed_i32(-128); // writes `-0x80` to the sink + /// ``` + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_i32(&mut self, v: i32) -> Result<(), core::fmt::Error> { + let v = if v < 0 { + self.write_char('-')?; + v.unsigned_abs() + } else { + v as u32 + }; + self.write_prefixed_u32(v) + } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { + write!(self, "{:x}", v) + } + /// write a u64 to the output as a base-16 integer with leading `0x`. + /// + /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { + self.write_fixed_size("0x")?; + self.write_u64(v) + } + /// write an i64 to the output as a base-64 integer with leading `0x`, and leading `-` if the + /// value is negative. + /// + /// there is no matching `std` formatter, so some examples here: + /// ```text + /// sink.write_prefixed_i64(-0x60); // writes `-0x60` to the sink + /// sink.write_prefixed_i64(127); // writes `0x7f` to the sink + /// sink.write_prefixed_i64(-128); // writes `-0x80` to the sink + /// ``` + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + fn write_prefixed_i64(&mut self, v: i64) -> Result<(), core::fmt::Error> { + let v = if v < 0 { + self.write_char('-')?; + v.unsigned_abs() + } else { + v as u64 + }; + self.write_prefixed_u64(v) + } + + /// enter a region inside which output corresponds to an immediate. + fn span_start_immediate(&mut self) { } + /// end a region where an immediate was written. see docs on [`DisplaySink`] for more. + fn span_end_immediate(&mut self) { } + + /// enter a region inside which output corresponds to a register. + fn span_start_register(&mut self) { } + /// end a region where a register was written. see docs on [`DisplaySink`] for more. + fn span_end_register(&mut self) { } + + /// enter a region inside which output corresponds to an opcode. + fn span_start_opcode(&mut self) { } + /// end a region where an opcode was written. see docs on [`DisplaySink`] for more. + fn span_end_opcode(&mut self) { } + + /// enter a region inside which output corresponds to the program counter. + fn span_start_program_counter(&mut self) { } + /// end a region where the program counter was written. see docs on [`DisplaySink`] for more. + fn span_end_program_counter(&mut self) { } + + /// enter a region inside which output corresponds to a number, such as a memory offset or + /// immediate. + fn span_start_number(&mut self) { } + /// end a region where a number was written. see docs on [`DisplaySink`] for more. + fn span_end_number(&mut self) { } + + /// enter a region inside which output corresponds to an address. this is a best guess; + /// instructions like x86's `lea` may involve an "address" that is not, and arithmetic + /// instructions may operate on addresses held in registers. + /// + /// where possible, the presence of this span will be informed by ISA semantics - if an + /// instruction has a memory operand, the effective address calculation of that operand should + /// be in an address span. + fn span_start_address(&mut self) { } + /// end a region where an address was written. the specifics of an "address" are ambiguous and + /// best-effort; see [`DisplaySink::span_start_address`] for more about this. otherwise, see + /// docs on [`DisplaySink`] for more about spans. + fn span_end_address(&mut self) { } + + /// enter a region inside which output corresponds to a function address, or expression + /// evaluating to a function address. this is a best guess; instructions like `call` may call + /// to a non-function address, `jmp` may jump to a function (as with tail calls), function + /// addresses may be computed via table lookup without semantic hints. + /// + /// where possible, the presence of this span will be informed by ISA semantics - if an + /// instruction is like a "call", an address operand should be a `function` span. if other + /// instructions can be expected to handle subroutine starting addresses purely from ISA + /// semantics, address operand(s) should be in a `function` span. + fn span_start_function_expr(&mut self) { } + /// end a region where function address expression was written. the specifics of a "function + /// address" are ambiguous and best-effort; see [`DisplaySink::span_start_function_expr`] for more + /// about this. otherwise, see docs on [`DisplaySink`] for more about spans. + fn span_end_function_expr(&mut self) { } +} + +/// `FmtSink` can be used to adapt any `fmt::Write`-implementing type into a `DisplaySink` to +/// format an instruction while discarding all span information at zero cost. +pub struct FmtSink<'a, T: fmt::Write> { + out: &'a mut T, +} + +impl<'a, T: fmt::Write> FmtSink<'a, T> { + pub fn new(f: &'a mut T) -> Self { + Self { out: f } + } + + pub fn inner_ref(&self) -> &T { + &self.out + } +} + +/// blanket impl that discards all span information, forwards writes to the underlying `fmt::Write` +/// type. +impl<'a, T: fmt::Write> DisplaySink for FmtSink<'a, T> { } + +impl<'a, T: fmt::Write> fmt::Write for FmtSink<'a, T> { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.out.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + self.out.write_char(c) + } + fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { + self.out.write_fmt(f) + } +} + +#[cfg(feature = "alloc")] +mod instruction_text_sink { + use core::fmt; + + use super::{DisplaySink, u8_to_hex}; + + /// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the + /// disassemblers, do not use this struct. do not depend on this struct existing. this struct is + /// not stable. this struct is not safe for general use. if you use this struct you and your + /// program will be eaten by gremlins. + /// + /// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct + /// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. + /// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are + /// not bounds-checked, and the buffer is never grown. + /// + /// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is + /// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be + /// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* + /// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would + /// be written to without being rewound first. + /// + /// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect + /// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the + /// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of + /// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. + /// + /// this is intended to be hidden in docs. if you see this in docs, it's a bug. + #[doc(hidden)] + pub struct InstructionTextSink<'buf> { + buf: &'buf mut alloc::string::String + } + + impl<'buf> InstructionTextSink<'buf> { + /// create an `InstructionTextSink` using the provided buffer for storage. + /// + /// SAFETY: callers must ensure that this sink will never have more content written than + /// this buffer can hold. while the buffer may appear growable, `write_*` methods here may + /// *bypass bounds checks* and so will never trigger the buffer to grow. writing more data + /// than the buffer's size when provided to `new` will cause out-of-bounds writes and + /// memory corruption. + pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { + Self { buf } + } + } + + impl<'buf> fmt::Write for InstructionTextSink<'buf> { + fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.buf.write_str(s) + } + fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + 1 { + panic!("InstructionTextSink::write_char would overflow output"); + } + } + + // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` + // is valid for writing, but may be uninitialized. + // + // this function is essentially equivalent to `Vec::push` specialized for the case that + // `len < buf.capacity()`: + // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 + unsafe { + let underlying = self.buf.as_mut_vec(); + // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to + // write single ASCII characters. this is wrong in the general case, but `write_char` + // here is not going to be used in the general case. + if cfg!(debug_assertions) { + if c > '\x7f' { + panic!("InstructionTextSink::write_char would truncate output"); + } + } + let to_push = c as u8; + // `ptr::write` here because `underlying.add(underlying.len())` may not point to an + // initialized value, which would mean that turning that pointer into a `&mut u8` to + // store through would be UB. `ptr::write` avoids taking the mut ref. + underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); + // we have initialized all (one) bytes that `set_len` is increasing the length to + // include. + underlying.set_len(underlying.len() + 1); + } + Ok(()) + } + } + + impl<'buf> DisplaySink for InstructionTextSink<'buf> { + #[inline(always)] + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_fixed_size would overflow output"); + } + } + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.buf.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + if new_bytes.len() == 0 { + return Ok(()); + } + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + + // this used to be enough to bamboozle llvm away from + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped + // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` + // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this + // unrolls into some kind of appropriate series of `mov`. + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + dest.offset(i as isize).write(new_bytes[i]); + } + + buf.set_len(buf.len() + new_bytes.len()); + } + + Ok(()) + } + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_lt_32 would overflow output"); + } + } + + // Safety: `new` requires callers promise there is enough space to hold `s`. + unsafe { + super::imp::append_string_lt_32_unchecked(&mut self.buf, s); + } + + Ok(()) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_lt_16 would overflow output"); + } + } + + // Safety: `new` requires callers promise there is enough space to hold `s`. + unsafe { + super::imp::append_string_lt_16_unchecked(&mut self.buf, s); + } + + Ok(()) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + s.len() { + panic!("InstructionTextSink::write_lt_8 would overflow output"); + } + } + + // Safety: `new` requires callers promise there is enough space to hold `s`. + unsafe { + super::imp::append_string_lt_8_unchecked(&mut self.buf, s); + } + + Ok(()) + } + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u8 would overflow output"); + } + } + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: `new()` requires callers promise there is space through to `new_len` + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u16 would overflow output"); + } + } + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: `new()` requires callers promise there is space through to `new_len` + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u32 would overflow output"); + } + } + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: `new()` requires callers promise there is space through to `new_len` + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + + if cfg!(debug_assertions) { + if self.buf.capacity() < self.buf.len() + printed_size { + panic!("InstructionTextSink::write_u64 would overflow output"); + } + } + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.buf.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: `new()` requires callers promise there is space through to `new_len` + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + } +} +#[cfg(feature = "alloc")] +pub use instruction_text_sink::InstructionTextSink; + + +#[cfg(feature = "alloc")] +use crate::display::u8_to_hex; + +/// this [`DisplaySink`] impl exists to support somewhat more performant buffering of the kinds of +/// strings `yaxpeax-x86` uses in formatting instructions. +/// +/// span information is discarded at zero cost. +#[cfg(feature = "alloc")] +impl DisplaySink for alloc::string::String { + #[inline(always)] + fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { + self.reserve(s.len()); + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.as_mut_vec() }; + let new_bytes = s.as_bytes(); + + if new_bytes.len() == 0 { + return Ok(()); + } + + // Safety: we have reserved space for all `buf` bytes, above. + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + + // this used to be enough to bamboozle llvm away from + // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 + // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped + // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` + // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this + // unrolls into some kind of appropriate series of `mov`. + dest.offset(0 as isize).write(new_bytes[0]); + for i in 1..new_bytes.len() { + dest.offset(i as isize).write(new_bytes[i]); + } + + // Safety: we have initialized all bytes from where `self` initially ended, through to + // all `new_bytes` additional elements. + buf.set_len(buf.len() + new_bytes.len()); + } + + Ok(()) + } + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // Safety: we have reserved enough space for `s`. + unsafe { + imp::append_string_lt_32_unchecked(self, s); + } + + Ok(()) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // Safety: we have reserved enough space for `s`. + unsafe { + imp::append_string_lt_16_unchecked(self, s); + } + + Ok(()) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + self.reserve(s.len()); + + // Safety: we have reserved enough space for `s`. + unsafe { + imp::append_string_lt_8_unchecked(self, s); + } + + Ok(()) + } + /// write a u8 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: we have reserved space through to `new_len` by calling `reserve` above. + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u16 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: we have reserved space through to `new_len` by calling `reserve` above. + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u32 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: we have reserved space through to `new_len` by calling `reserve` above. + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } + /// write a u64 to the output as a base-16 integer. + /// + /// this is provided for optimization opportunities when the formatted integer can be written + /// directly to the sink (rather than formatted to an intermediate buffer and output as a + /// followup step) + #[inline(always)] + fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { + if v == 0 { + return self.write_fixed_size("0"); + } + // we can fairly easily predict the size of a formatted string here with lzcnt, which also + // means we can write directly into the correct offsets of the output string. + let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + + self.reserve(printed_size); + + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { self.as_mut_vec() }; + let new_len = buf.len() + printed_size; + + // Safety: there is no way to exit this function without initializing all bytes up to + // `new_len` + unsafe { + buf.set_len(new_len); + } + // Safety: we have reserved space through to `new_len` by calling `reserve` above. + let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + + loop { + let digit = v % 16; + let c = u8_to_hex(digit as u8); + // Safety: `p` will not move before `buf`'s length at function entry, so `p` points + // to a location valid for writing. + unsafe { + p = p.offset(-1); + p.write(c); + } + v = v / 16; + if v == 0 { + break; + } + } + + Ok(()) + } +} diff --git a/src/display/display_sink/imp_generic.rs b/src/display/display_sink/imp_generic.rs new file mode 100644 index 0000000..8819243 --- /dev/null +++ b/src/display/display_sink/imp_generic.rs @@ -0,0 +1,26 @@ +/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) { + buf.push_str(data); +} + +/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) { + buf.push_str(data); +} + +/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) { + buf.push_str(data); +} diff --git a/src/display/display_sink/imp_x86.rs b/src/display/display_sink/imp_x86.rs new file mode 100644 index 0000000..902ea69 --- /dev/null +++ b/src/display/display_sink/imp_x86.rs @@ -0,0 +1,187 @@ +//! `imp_x86` has specialized copies to append short strings to strings. buffer sizing must be +//! handled by callers, in all cases. +//! +//! the structure of all implementations here is, essentially, to take the size of the data to +//! append and execute a copy for each bit set in that size, from highest to lowest. some bits are +//! simply never checked if the input is promised to never be that large - if a string to append is +//! only 0..7 bytes long, it is sufficient to only look at the low three bits to copy all bytes. +//! +//! in this way, it is slightly more efficient to right-size which append function is used, if the +//! maximum size of input strings can be bounded well. if the maximum size of input strings cannot +//! be bounded, you shouldn't be using these functions. + +/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) { + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { buf.as_mut_vec() }; + let new_bytes = data.as_bytes(); + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } +} + +/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) { + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { buf.as_mut_vec() }; + let new_bytes = data.as_bytes(); + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } +} + +/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space +/// remaining to hold all bytes in `data`. +/// +/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`. +#[inline(always)] +pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) { + // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to + // be valid utf8 + let buf = unsafe { buf.as_mut_vec() }; + let new_bytes = data.as_bytes(); + + unsafe { + let dest = buf.as_mut_ptr().offset(buf.len() as isize); + let src = new_bytes.as_ptr(); + + let rem = new_bytes.len() as isize; + + // set_len early because there is no way to avoid the following asm!() writing that + // same number of bytes into buf + buf.set_len(buf.len() + new_bytes.len()); + + core::arch::asm!( + "6:", + "cmp {rem:e}, 16", + "jb 7f", + "mov {buf:r}, qword ptr [{src} + {rem} - 16]", + "mov qword ptr [{dest} + {rem} - 16], {buf:r}", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 16", + "jz 11f", + "7:", + "cmp {rem:e}, 8", + "jb 8f", + "mov {buf:r}, qword ptr [{src} + {rem} - 8]", + "mov qword ptr [{dest} + {rem} - 8], {buf:r}", + "sub {rem:e}, 8", + "jz 11f", + "8:", + "cmp {rem:e}, 4", + "jb 9f", + "mov {buf:e}, dword ptr [{src} + {rem} - 4]", + "mov dword ptr [{dest} + {rem} - 4], {buf:e}", + "sub {rem:e}, 4", + "jz 11f", + "9:", + "cmp {rem:e}, 2", + "jb 10f", + "mov {buf:x}, word ptr [{src} + {rem} - 2]", + "mov word ptr [{dest} + {rem} - 2], {buf:x}", + "sub {rem:e}, 2", + "jz 11f", + "10:", + "cmp {rem:e}, 1", + "jb 11f", + "mov {buf:l}, byte ptr [{src} + {rem} - 1]", + "mov byte ptr [{dest} + {rem} - 1], {buf:l}", + "11:", + src = in(reg) src, + dest = in(reg) dest, + rem = inout(reg) rem => _, + buf = out(reg) _, + options(nostack), + ); + } +} @@ -1,12 +1,14 @@ #![no_std] #![doc = include_str!("../README.md")] +#[cfg(feature = "alloc")] +extern crate alloc; + use core::fmt::{self, Debug, Display}; use core::hash::Hash; #[cfg(feature="use-serde")] #[macro_use] extern crate serde_derive; - #[cfg(feature="use-serde")] use serde::{Serialize, Deserialize}; @@ -18,19 +20,25 @@ pub use address::AddrParse; pub mod annotation; +#[deprecated(since="0.3.0", note="yaxpeax_arch::color conflates output mechanism and styling, leaving it brittle and overly-restrictive. see `yaxpeax_arch::color_new`, which will replace `color` in a future version.")] mod color; +#[allow(deprecated)] // allow exporting the deprecated items here to not break downstreams even further... pub use color::{Colorize, NoColors, YaxColors}; - -#[cfg(feature="colors")] -pub use color::ColorSettings; +#[cfg(feature="color-new")] +pub mod color_new; pub mod display; + mod reader; pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be}; +pub mod safer_unchecked; + +pub mod testkit; + /// the minimum set of errors a `yaxpeax-arch` disassembler may produce. /// -/// it is permissible for an implementor of `DecodeError` to have items that return `false` for +/// it is permissible for an implementer of `DecodeError` to have items that return `false` for /// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know /// about. pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static { @@ -42,12 +50,12 @@ pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static { /// generally indicate an issue with the instruction itself. this is in contrast to one /// specific operand being invalid for the instruction, or some other issue to do with decoding /// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy - /// and left as best-effort for decoder implementors. + /// and left as best-effort for decoder implementers. fn bad_opcode(&self) -> bool; /// did the decoder error because an operand of the instruction to decode is invalid? /// /// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on - /// the part of implementors. + /// the part of implementers. fn bad_operand(&self) -> bool; /// a human-friendly description of this decode error. fn description(&self) -> &'static str; @@ -127,6 +135,7 @@ impl DecodeError for StandardPartialDecoderError { } } +/* #[derive(Copy, Clone)] struct NoDescription {} @@ -135,6 +144,7 @@ impl fmt::Display for NoDescription { Ok(()) } } +*/ /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are /// the architecture-defined [`DecodeError`] implemention. @@ -152,7 +162,7 @@ pub trait Decoder<A: Arch + ?Sized> { /// SAFETY: /// /// while `inst` MUST be left in a state that does not violate Rust's safety guarantees, - /// implementors are NOT obligated to leave `inst` in a semantically meaningful state if + /// implementers are NOT obligated to leave `inst` in a semantically meaningful state if /// decoding fails. if `decode_into` returns an error, callers may find contradictory and /// useless information in `inst`, as well as *stale data* from whatever was passed in. fn decode_into<T: Reader<A::Address, A::Word>>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>; @@ -227,6 +237,8 @@ pub trait Instruction { fn well_defined(&self) -> bool; } +#[allow(deprecated)] +#[deprecated(since="0.3.0", note="ShowContextual ties YaxColors and fmt::Write in a way that only sometimes composes. simultaneously, it is too generic on Ctx, making it difficult to implement and use. it will be revisited in the future.")] pub trait ShowContextual<Addr, Ctx: ?Sized, T: fmt::Write, Y: YaxColors> { fn contextualize(&self, colors: &Y, address: Addr, context: Option<&Ctx>, out: &mut T) -> fmt::Result; } diff --git a/src/reader.rs b/src/reader.rs index 028d835..8b68486 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -24,8 +24,9 @@ pub enum ReadError { /// isn't a multiple of 8 bits, `U8Reader` won't be sufficient. pub trait Reader<Address, Item> { fn next(&mut self) -> Result<Item, ReadError>; - /// read `buf`-many items from this reader in bulk. if `Reader` cannot read `buf`-many items, - /// return `ReadError::ExhaustedInput`. + /// read `buf`-many items from this reader in bulk. + /// + /// if `Reader` cannot read `buf`-many items, return `ReadError::ExhaustedInput`. fn next_n(&mut self, buf: &mut [Item]) -> Result<(), ReadError>; /// mark the current position as where to measure `offset` against. fn mark(&mut self); diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs new file mode 100644 index 0000000..b556a6f --- /dev/null +++ b/src/safer_unchecked.rs @@ -0,0 +1,40 @@ +//! tools to help validate correct use of `unchecked` functions. +//! +//! these `kinda_unchecked` functions will use equivalent implementations that panic when +//! invariants are violated when the `debug_assertions` config is present, but use the +//! corresponding `*_unchecked` otherwise. +//! +//! for example, `GetSaferUnchecked` uses a normal index when debug assertions are enabled, but +//! `.get_unchecked()` otherwise. this means that tests and even fuzzing can be made to exercise +//! panic-on-error cases as desired. + +use core::slice::SliceIndex; + +pub trait GetSaferUnchecked<T> { + unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output + where + I: SliceIndex<[T]>; +} + +impl<T> GetSaferUnchecked<T> for [T] { + #[inline(always)] + unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output + where + I: SliceIndex<[T]>, + { + if cfg!(debug_assertions) { + &self[index] + } else { + self.get_unchecked(index) + } + } +} + +#[inline(always)] +pub unsafe fn unreachable_kinda_unchecked() -> ! { + if cfg!(debug_assertions) { + panic!("UB: Unreachable unchecked was executed") + } else { + core::hint::unreachable_unchecked() + } +} diff --git a/src/testkit.rs b/src/testkit.rs new file mode 100644 index 0000000..215a062 --- /dev/null +++ b/src/testkit.rs @@ -0,0 +1,10 @@ +//! utilities to validate that implementations of traits in `yaxpeax-arch` uphold requirements +//! described in this crate. +//! +//! currently, this only includes tools to validate correct use of +//! [`crate::display::DisplaySink`], but may grow in the future. + +#[cfg(feature="alloc")] +mod display; +#[cfg(feature="alloc")] +pub use display::{DisplaySinkValidator, DisplaySinkWriteComparator}; diff --git a/src/testkit/display.rs b/src/testkit/display.rs new file mode 100644 index 0000000..3cef59c --- /dev/null +++ b/src/testkit/display.rs @@ -0,0 +1,192 @@ +//! tools to test the correctness of `yaxpeax-arch` trait implementations. + +use core::fmt; +use core::fmt::Write; + +use crate::display::DisplaySink; + +/// `DisplaySinkValidator` is a `DisplaySink` that panics if invariants required of +/// `DisplaySink`-writing functions are not upheld. +/// +/// there are two categories of invariants that `DisplaySinkValidator` validates. +/// +/// first, this panics if spans are not `span_end_*`-ed in first-in-last-out order with +/// corresponding `span_start_*. second, this panics if `write_lt_*` functions are ever provided +/// inputs longer than the corresponding maximum length. +/// +/// functions that write to a `DisplaySink` are strongly encouraged to come with fuzzing that for +/// all inputs `DisplaySinkValidator` does not panic. +pub struct DisplaySinkValidator { + spans: alloc::vec::Vec<&'static str>, +} + +impl DisplaySinkValidator { + pub fn new() -> Self { + Self { spans: alloc::vec::Vec::new() } + } +} + +impl core::ops::Drop for DisplaySinkValidator { + fn drop(&mut self) { + if self.spans.len() != 0 { + panic!("DisplaySinkValidator dropped with open spans"); + } + } +} + +impl fmt::Write for DisplaySinkValidator { + fn write_str(&mut self, _s: &str) -> Result<(), fmt::Error> { + Ok(()) + } + fn write_char(&mut self, _c: char) -> Result<(), fmt::Error> { + Ok(()) + } +} + +impl DisplaySink for DisplaySinkValidator { + unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { + if s.len() >= 32 { + panic!("DisplaySinkValidator::write_lt_32 was given a string longer than the maximum permitted length"); + } + + self.write_str(s) + } + unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { + if s.len() >= 16 { + panic!("DisplaySinkValidator::write_lt_16 was given a string longer than the maximum permitted length"); + } + + self.write_str(s) + } + unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { + if s.len() >= 8 { + panic!("DisplaySinkValidator::write_lt_8 was given a string longer than the maximum permitted length"); + } + + self.write_str(s) + } + + fn span_start_immediate(&mut self) { + self.spans.push("immediate"); + } + + fn span_end_immediate(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "immediate"); + } + + fn span_start_register(&mut self) { + self.spans.push("register"); + } + + fn span_end_register(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "register"); + } + + fn span_start_opcode(&mut self) { + self.spans.push("opcode"); + } + + fn span_end_opcode(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "opcode"); + } + + fn span_start_program_counter(&mut self) { + self.spans.push("program counter"); + } + + fn span_end_program_counter(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "program counter"); + } + + fn span_start_number(&mut self) { + self.spans.push("number"); + } + + fn span_end_number(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "number"); + } + + fn span_start_address(&mut self) { + self.spans.push("address"); + } + + fn span_end_address(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "address"); + } + + fn span_start_function_expr(&mut self) { + self.spans.push("function expr"); + } + + fn span_end_function_expr(&mut self) { + let last = self.spans.pop().expect("item to pop"); + assert_eq!(last, "function expr"); + } +} + +/// `DisplaySinkWriteComparator` helps test that two `DisplaySink` implementations which should +/// produce the same output actually do. +/// +/// this is most useful for cases like testing specialized `write_lt_*` functions, which ought to +/// behave the same as if `write_str()` were called instead and so can be used as a very simple +/// oracle. +/// +/// this is somewhat less useful when the sinks are expected to produce unequal text, such as when +/// one sink writes ANSI color sequences and the other does not. +pub struct DisplaySinkWriteComparator<'sinks, T: DisplaySink, U: DisplaySink> { + sink1: &'sinks mut T, + sink1_check: fn(&T) -> &str, + sink2: &'sinks mut U, + sink2_check: fn(&U) -> &str, +} + +impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySinkWriteComparator<'sinks, T, U> { + pub fn new( + t: &'sinks mut T, t_check: fn(&T) -> &str, + u: &'sinks mut U, u_check: fn(&U) -> &str + ) -> Self { + Self { + sink1: t, + sink1_check: t_check, + sink2: u, + sink2_check: u_check, + } + } + + fn compare_sinks(&self) { + let sink1_text = (self.sink1_check)(self.sink1); + let sink2_text = (self.sink2_check)(self.sink2); + + if sink1_text != sink2_text { + panic!("sinks produced different output: {} != {}", sink1_text, sink2_text); + } + } +} + +impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySink for DisplaySinkWriteComparator<'sinks, T, U> { + fn write_u8(&mut self, v: u8) -> Result<(), fmt::Error> { + self.sink1.write_u8(v).expect("write to sink1 succeeds"); + self.sink2.write_u8(v).expect("write to sink2 succeeds"); + self.compare_sinks(); + Ok(()) + } +} + +impl<'sinks, T: DisplaySink, U: DisplaySink> fmt::Write for DisplaySinkWriteComparator<'sinks, T, U> { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + self.sink1.write_str(s).expect("write to sink1 succeeds"); + self.sink2.write_str(s).expect("write to sink2 succeeds"); + Ok(()) + } + fn write_char(&mut self, c: char) -> Result<(), fmt::Error> { + self.sink1.write_char(c).expect("write to sink1 succeeds"); + self.sink2.write_char(c).expect("write to sink2 succeeds"); + Ok(()) + } +} diff --git a/tests/display.rs b/tests/display.rs new file mode 100644 index 0000000..8826303 --- /dev/null +++ b/tests/display.rs @@ -0,0 +1,143 @@ + +// this was something of a misfeature for these formatters.. +#[test] +#[allow(deprecated)] +fn formatters_are_not_feature_gated() { + use yaxpeax_arch::display::{ + u8_hex, u16_hex, u32_hex, u64_hex, + signed_i8_hex, signed_i16_hex, signed_i32_hex, signed_i64_hex + }; + let _ = u8_hex(10); + let _ = u16_hex(10); + let _ = u32_hex(10); + let _ = u64_hex(10); + let _ = signed_i8_hex(10); + let _ = signed_i16_hex(10); + let _ = signed_i32_hex(10); + let _ = signed_i64_hex(10); +} + +#[cfg(feature="alloc")] +#[test] +fn instruction_text_sink_write_char_requires_ascii() { + use core::fmt::Write; + + let mut text = String::with_capacity(512); + let mut sink = unsafe { + yaxpeax_arch::display::InstructionTextSink::new(&mut text) + }; + let expected = "`1234567890-=+_)(*&^%$#@!~\\][poiuytrewq |}{POIUYTREWQ';lkjhgfdsa\":LKJHGFDSA/.,mnbvcxz?><MNBVCXZ \r\n"; + for c in expected.as_bytes().iter() { + sink.write_char(*c as char).expect("write works"); + } + assert_eq!(text, expected); +} + +#[cfg(feature="alloc")] +#[test] +#[should_panic] +fn instruction_text_sink_write_char_rejects_not_ascii() { + use core::fmt::Write; + + let mut text = String::with_capacity(512); + let mut sink = unsafe { + yaxpeax_arch::display::InstructionTextSink::new(&mut text) + }; + sink.write_char('\u{80}').expect("write works"); +} + +#[cfg(feature="alloc")] +#[test] +fn display_sink_write_hex_helpers() { + use yaxpeax_arch::display::{DisplaySink}; + + // for u8/i8/u16/i16 we can exhaustively test. we'll leave the rest for fuzzers. + let mut buf = String::new(); + for i in 0..=u8::MAX { + buf.clear(); + buf.write_u8(i).expect("write succeeds"); + assert_eq!(buf, format!("{:x}", i)); + + buf.clear(); + buf.write_prefixed_u8(i).expect("write succeeds"); + assert_eq!(buf, format!("0x{:x}", i)); + + let expected = if (i as i8) < 0 { + format!("-0x{:x}", (i as i8).unsigned_abs()) + } else { + format!("0x{:x}", i) + }; + + buf.clear(); + buf.write_prefixed_i8(i as i8).expect("write succeeds"); + assert_eq!(buf, expected); + } + + for i in 0..=u16::MAX { + buf.clear(); + buf.write_u16(i).expect("write succeeds"); + assert_eq!(buf, format!("{:x}", i)); + + buf.clear(); + buf.write_prefixed_u16(i).expect("write succeeds"); + assert_eq!(buf, format!("0x{:x}", i)); + + let expected = if (i as i16) < 0 { + format!("-0x{:x}", (i as i16).unsigned_abs()) + } else { + format!("0x{:x}", i) + }; + + buf.clear(); + buf.write_prefixed_i16(i as i16).expect("write succeeds"); + assert_eq!(buf, expected); + } +} + +#[cfg(feature="alloc")] +#[test] +fn sinks_are_equivalent() { + use yaxpeax_arch::display::{DisplaySink, FmtSink}; + use yaxpeax_arch::testkit::DisplaySinkWriteComparator; + + let mut bare = String::new(); + let mut through_sink = String::new(); + for i in 0..u16::MAX { + bare.clear(); + through_sink.clear(); + let mut out = FmtSink::new(&mut through_sink); + let mut comparator = DisplaySinkWriteComparator::new( + &mut out, + |sink| { sink.inner_ref().as_str() }, + &mut bare, + |sink| { sink.as_str() }, + ); + comparator.write_u16(i).expect("write succeeds"); + comparator.write_prefixed_u16(i).expect("write succeeds"); + comparator.write_prefixed_i16(i as i16).expect("write succeeds"); + } +} + +#[cfg(all(feature="alloc", feature="color-new"))] +#[test] +fn ansi_sink_works() { + use yaxpeax_arch::color_new::ansi::AnsiDisplaySink; + use yaxpeax_arch::display::DisplaySink; + + let mut buf = String::new(); + + let mut ansi_sink = AnsiDisplaySink::new(&mut buf, yaxpeax_arch::color_new::DefaultColors); + + ansi_sink.span_start_immediate(); + ansi_sink.write_prefixed_u8(0x80).expect("write succeeds"); + ansi_sink.span_end_immediate(); + ansi_sink.write_fixed_size("(").expect("write succeeds"); + ansi_sink.span_start_register(); + ansi_sink.write_fixed_size("rbp").expect("write succeeds"); + ansi_sink.span_end_register(); + ansi_sink.write_fixed_size(")").expect("write succeeds"); + + drop(ansi_sink); + + assert_eq!(buf, "\x1b[37m0x80\x1b[39m(\x1b[38;5;6mrbp\x1b[39m)"); +} diff --git a/tests/lib.rs b/tests/lib.rs index 1d5e964..9dc1449 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -12,6 +12,7 @@ fn test_u16() { } #[test] +#[cfg(std)] fn generic_error_can_bail() { use yaxpeax_arch::{Arch, Decoder, Reader}; @@ -23,6 +24,7 @@ fn generic_error_can_bail() { } } #[test] +#[cfg(std)] fn error_can_bail() { use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader}; struct TestIsa {} @@ -76,3 +78,51 @@ fn error_can_bail() { assert_eq!(exercise_eq(), Err(Error::TestDecode(StandardDecodeError::ExhaustedInput))); } + +#[test] +fn example_arch_impl() { + use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader}; + struct TestIsa {} + #[derive(Debug, Default)] + struct TestInst {} + impl Arch for TestIsa { + type Word = u8; + type Address = u64; + type Instruction = TestInst; + type Decoder = TestIsaDecoder; + type DecodeError = StandardDecodeError; + type Operand = (); + } + + impl Instruction for TestInst { + fn well_defined(&self) -> bool { true } + } + + impl LengthedInstruction for TestInst { + type Unit = AddressDiff<u64>; + fn len(&self) -> Self::Unit { AddressDiff::from_const(1) } + fn min_size() -> Self::Unit { AddressDiff::from_const(1) } + } + + struct TestIsaDecoder {} + + impl Default for TestIsaDecoder { + fn default() -> Self { + TestIsaDecoder {} + } + } + + impl Decoder<TestIsa> for TestIsaDecoder { + fn decode_into<T: Reader<u64, u8>>(&self, _inst: &mut TestInst, _words: &mut T) -> Result<(), StandardDecodeError> { + Err(StandardDecodeError::ExhaustedInput) + } + } + + fn exercise_eq() -> Result<(), StandardDecodeError> { + let mut reader = U8Reader::new(&[]); + TestIsaDecoder::default().decode(&mut reader)?; + Ok(()) + } + + assert_eq!(exercise_eq(), Err(StandardDecodeError::ExhaustedInput)); +} |