aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG84
-rw-r--r--Cargo.toml16
-rw-r--r--Makefile19
-rw-r--r--README.md3
-rw-r--r--fuzz/.gitignore3
-rw-r--r--fuzz/Cargo.toml25
-rw-r--r--fuzz/fuzz_targets/write_helpers_are_correct.rs96
-rw-r--r--goodfile40
-rw-r--r--src/annotation/mod.rs43
-rw-r--r--src/color_new.rs281
-rw-r--r--src/display.rs58
-rw-r--r--src/display/display_sink.rs1017
-rw-r--r--src/display/display_sink/imp_generic.rs26
-rw-r--r--src/display/display_sink/imp_x86.rs187
-rw-r--r--src/lib.rs28
-rw-r--r--src/reader.rs5
-rw-r--r--src/safer_unchecked.rs40
-rw-r--r--src/testkit.rs10
-rw-r--r--src/testkit/display.rs192
-rw-r--r--tests/display.rs143
-rw-r--r--tests/lib.rs50
21 files changed, 2312 insertions, 54 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 4fb39ab..8cde9b8 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,8 +1,88 @@
-## 0.3.0
+## TODO
-TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted
+~~TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted~~
+* a reader's `.offset()` should reflect the amount of items that were consumed, if any. if a reader can quickly determine
+ there is not enough input, should it return Incomplete(0) or ExhaustedInput? Incomplete(0) vs ExhaustedInput may still
+ imply that some state was changed (an access mode, for example). this needs more thought.
TODO: Reader::offset should return an AddressDiff<Address>, not a bare Address
+* quick look seems reasonable enough, should be changed in concert with
+ yaxpeax-core though and that's more than i'm signing up for today
TODO: impls of `fn one` and `fn zero` so downstream users don't have to import num_traits directly
+* seems nice at first but this means that there are conflicting functions when Zero or One are in scope
+ ... assuming that the idea at the time was to add `fn one` and `fn zero` to `AddressBase`.
+TODO: 0.4.0 or later:
+ * remove `mod colors`, crossterm dependency, related feature flags
+
+## 0.3.2
+
+fix yaxpeax-arch not building for non-x86 targets when alloc is not enabled
+
+## 0.3.1
+
+fix InstructionTextSink::write_char to not panic in debug builds
+
+## 0.3.0
+
+added a new crate feature flag, `alloc`.
+ this flag is for any features that do not require std, but do require
+ containers from `liballoc`. good examples are `alloc::string::String` or
+ `alloc::vec::Vec`.
+
+added `yaxpeax_arch::display::DisplaySink` after revisiting output colorization.
+ `DisplaySink` is better suited for general markup, rather than being focused
+ specifically on ANSI/console text coloring. `YaxColors` also simply does not
+ style text in some unfortunate circumstances, such as when the console that
+ needs to be styled is only written to after intermediate buffering.
+
+ `DisplaySink` also includes specializable functions for writing text to an
+ output, and the implementation for `alloc::string::String` takes advantage of
+ this: writing through `impl DisplaySink for String` will often be substantially
+ more performant than writing through `fmt::Write`.
+
+added `mod color_new`:
+ this includes an alternate vision for `YaxColors` and better fits with the
+ new `DisplaySink` machinery; ANSI-style text markup can be done through the
+ new `yaxpeax_arch::color_new::ansi::AnsiDisplaySink`.
+
+ this provides more flexibility than i'd initially expected! yours truly will
+ be using this to render instructions with HTML spans (rather than ANSI
+ sequences) to colorize dis.yaxpeax.net.
+
+ in the future, `mod colored` will be removed, `mod color_new` will be renamed
+ to `mod color`.
+
+deprecated `mod colored`:
+ generally, colorization of text is a presentation issue; `trait Colorize`
+ mixed formatting of data to text with how that text is presented, but that is
+ at odds with the same text being presented in different ways for which
+ colorization is not generic. for example, rendering an instruction as marked
+ up HTML involves coloring in an entirely different way than rendering an
+ instruction with ANSI sequences for a VT100-like terminal.
+
+added `yaxpeax_arch::safer_unchecked` to aid in testing use of unchecked methods
+ these were originally added to improve yaxpeax-x86 testing:
+ https://github.com/iximeow/yaxpeax-x86/pull/17, but are being pulled into
+ yaxpeax-arch as they're generally applicable and overall wonderful tools.
+ thank you again 522!
+
+added `mod testkit`:
+ this module contains tools to validate the correctness of crates implementing
+ `yaxpeax-arch` traits. these initial tools are focused on validating the
+ correctness of functions that write to `DisplaySink`, especially that span
+ management is correct.
+
+ `yaxpeax-x86`, for example, will imminently have fuzz targets to use these
+ types for its own validation.
+
+made VecSink's `records` private. instead of extracting records from the struct
+ by accessing this field directly, call `VecSink::into_inner()`.
+
+made VecSink is now available through the `alloc` feature flag as well as `std`.
+
+meta: the major omission in this release is an architecture-agnostic way to
+format an instruction into a `DisplaySink`. i haven't been able to figure out
+quite the right shape for that! it is fully expected in the future, and will
+probably end up somehow referenced through `yaxpeax_arch::Arch`.
## 0.2.8
diff --git a/Cargo.toml b/Cargo.toml
index c6d3cbf..ca47d5b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ keywords = ["disassembly", "disassembler"]
license = "0BSD"
name = "yaxpeax-arch"
repository = "https://git.iximeow.net/yaxpeax-arch/"
-version = "0.2.8"
+version = "0.3.2"
[dependencies]
"num-traits" = { version = "0.2", default-features = false }
@@ -23,14 +23,24 @@ thiserror = "1.0.26"
lto = true
[features]
-default = ["std", "use-serde", "colors", "address-parse"]
+default = ["std", "alloc", "use-serde", "color-new", "address-parse"]
-std = []
+std = ["alloc"]
+
+alloc = []
# enables the (optional) use of Serde for bounds on
# Arch and Arch::Address
use-serde = ["serde", "serde_derive"]
+# feature flag for the existing but misfeature'd initial support for output
+# coloring. the module this gates will be removed in 0.4.0, which includes
+# removing `trait Colorize`, and requires a major version bump for any
+# dependency that moves forward.
colors = ["crossterm"]
+# feature flag for revised output colorizing support, which will replace the
+# existing `colors` feature in 0.4.0.
+color-new = []
+
address-parse = []
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..57c8615
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,19 @@
+test: build-smoketest test-std test-no-std test-serde-no-std test-colors-no-std test-color-new-no-std test-alloc-no-std
+
+build-smoketest:
+ cargo build
+ cargo build --no-default-features
+ cargo build --no-default-features --target wasm32-wasi
+
+test-std:
+ cargo test
+test-no-std:
+ cargo test --no-default-features
+test-serde-no-std:
+ cargo test --no-default-features --features "serde"
+test-colors-no-std:
+ cargo test --no-default-features --features "colors"
+test-color-new-no-std:
+ cargo test --no-default-features --features "color-new"
+test-alloc-no-std:
+ cargo test --no-default-features --features "alloc"
diff --git a/README.md b/README.md
index 0304607..919e684 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,8 @@ typically this crate is only interesting if you're writing code to operate on mu
`yaxpeax-arch` has several crate features, which implementers are encouraged to also support:
* `std`: opt-in for `std`-specific support - in this crate, `std` enables a [`std::error::Error`](https://doc.rust-lang.org/std/error/trait.Error.html) requirement on `DecodeError`, allowing users to `?`-unwrap decode results.
-* `colors`: enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled.
+* `color_new`: enables traits and structs to stylize formatted instructions, including ANSI colorization.
+* ~`colors`~: DEPRECATED. enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled.
* `address-parse`: enable a requirement that `yaxpeax_arch::Address` be parsable from `&str`. this is useful for use cases that, for example, read addresses from humans.
* `use-serde`: enable [`serde`](https://docs.rs/serde/latest/serde/) serialization and deserialization bounds for types like `Address`.
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
new file mode 100644
index 0000000..a092511
--- /dev/null
+++ b/fuzz/.gitignore
@@ -0,0 +1,3 @@
+target
+corpus
+artifacts
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
new file mode 100644
index 0000000..67ffa43
--- /dev/null
+++ b/fuzz/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "yaxpeax-arch-fuzz"
+version = "0.0.0"
+authors = ["Automatically generated"]
+publish = false
+edition = "2018"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.4"
+
+[dependencies.yaxpeax-arch]
+path = ".."
+
+# Prevent this from interfering with workspaces
+[workspace]
+members = ["."]
+
+[[bin]]
+name = "write_helpers_are_correct"
+path = "fuzz_targets/write_helpers_are_correct.rs"
+test = false
+doc = false
diff --git a/fuzz/fuzz_targets/write_helpers_are_correct.rs b/fuzz/fuzz_targets/write_helpers_are_correct.rs
new file mode 100644
index 0000000..41e27bd
--- /dev/null
+++ b/fuzz/fuzz_targets/write_helpers_are_correct.rs
@@ -0,0 +1,96 @@
+#![no_main]
+use libfuzzer_sys::fuzz_target;
+use yaxpeax_arch::display::DisplaySink;
+
+use std::convert::TryInto;
+
+fuzz_target!(|data: &[u8]| {
+ let mut buf = String::new();
+ match data.len() {
+ 1 => {
+ let i = data[0];
+
+ buf.clear();
+ buf.write_u8(i).expect("write succeeds");
+ assert_eq!(buf, format!("{:x}", i));
+
+ buf.clear();
+ buf.write_prefixed_u8(i).expect("write succeeds");
+ assert_eq!(buf, format!("0x{:x}", i));
+
+ let expected = if (i as i8) < 0 {
+ format!("-0x{:x}", (i as i8).unsigned_abs())
+ } else {
+ format!("0x{:x}", i)
+ };
+
+ buf.clear();
+ buf.write_prefixed_i8(i as i8).expect("write succeeds");
+ assert_eq!(buf, expected);
+ },
+ 2 => {
+ let i: u16 = u16::from_le_bytes(data.try_into().expect("checked the size is right"));
+
+ buf.clear();
+ buf.write_u16(i).expect("write succeeds");
+ assert_eq!(buf, format!("{:x}", i));
+
+ buf.clear();
+ buf.write_prefixed_u16(i).expect("write succeeds");
+ assert_eq!(buf, format!("0x{:x}", i));
+
+ let expected = if (i as i16) < 0 {
+ format!("-0x{:x}", (i as i16).unsigned_abs())
+ } else {
+ format!("0x{:x}", i)
+ };
+
+ buf.clear();
+ buf.write_prefixed_i16(i as i16).expect("write succeeds");
+ assert_eq!(buf, expected);
+ }
+ 4 => {
+ let i: u32 = u32::from_le_bytes(data.try_into().expect("checked the size is right"));
+
+ buf.clear();
+ buf.write_u32(i).expect("write succeeds");
+ assert_eq!(buf, format!("{:x}", i));
+
+ buf.clear();
+ buf.write_prefixed_u32(i).expect("write succeeds");
+ assert_eq!(buf, format!("0x{:x}", i));
+
+ let expected = if (i as i32) < 0 {
+ format!("-0x{:x}", (i as i32).unsigned_abs())
+ } else {
+ format!("0x{:x}", i)
+ };
+
+ buf.clear();
+ buf.write_prefixed_i32(i as i32).expect("write succeeds");
+ assert_eq!(buf, expected);
+ },
+ 8 => {
+ let i: u64 = u64::from_le_bytes(data.try_into().expect("checked the size is right"));
+
+ buf.clear();
+ buf.write_u64(i).expect("write succeeds");
+ assert_eq!(buf, format!("{:x}", i));
+
+ buf.clear();
+ buf.write_prefixed_u64(i).expect("write succeeds");
+ assert_eq!(buf, format!("0x{:x}", i));
+
+ let expected = if (i as i64) < 0 {
+ format!("-0x{:x}", (i as i64).unsigned_abs())
+ } else {
+ format!("0x{:x}", i)
+ };
+
+ buf.clear();
+ buf.write_prefixed_i64(i as i64).expect("write succeeds");
+ assert_eq!(buf, expected);
+ },
+ _ => {}
+ }
+});
diff --git a/goodfile b/goodfile
index 4bdc992..92ebdf0 100644
--- a/goodfile
+++ b/goodfile
@@ -1,19 +1,35 @@
-Build.dependencies({"git", "make", "rustc", "cargo"})
+Build.dependencies({"git", "make", "rustc", "cargo", "rustup"})
Step.start("crate")
Step.push("build")
Build.run({"cargo", "build"})
+-- and now that some code is conditional on target arch, at least try to build
+-- for other architectures even if we might not be able to run on them.
+Build.run({"rustup", "target", "add", "wasm32-wasi"})
+Build.run({"cargo", "build", "--no-default-features", "--target", "wasm32-wasi"})
Step.advance("test")
+-- TODO: set `-D warnings` here and below...
Build.run({"cargo", "test"}, {name="test default features"})
-Build.run({"cargo", "test", "--no-default-features"}, {name="test no features"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "std"}, {name="test std only"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "colors"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "use-serde"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "address-parse"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "std,colors"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "std,use-serde"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "std,address-parse"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "std,colors,address-parse"}, {name="test feature combinations"})
-Build.run({"cargo", "test", "--no-default-features", "--features", "std,use-serde,colors"}, {name="test feature combinations"})
+
+-- `cargo test` ends up running doc tests. great! but yaxpeax-arch's docs reference items in std only.
+-- so for other feature combinations, skip doc tests. do this by passing `--tests` explicitly,
+-- which disables the automagic "run everything" settings.
+Build.run({"cargo", "test", "--no-default-features", "--tests"}, {name="test no features"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std"}, {name="test std only"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "colors"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "address-parse"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "alloc"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,address-parse,alloc"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "use-serde,colors,address-parse,alloc"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,colors,address-parse,alloc"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "std,use-serde,colors,alloc"}, {name="test feature combinations"})
+Build.run({"cargo", "test", "--no-default-features", "--tests", "--features", "color-new,alloc"}, {name="test feature combinations"})
diff --git a/src/annotation/mod.rs b/src/annotation/mod.rs
index 0248b94..af8b4bf 100644
--- a/src/annotation/mod.rs
+++ b/src/annotation/mod.rs
@@ -19,6 +19,8 @@
//! in a generic setting, there isn't much to do with a `FieldDescription` other than display it. a
//! typical use might look something like:
//! ```
+//! #[cfg(feature="std")]
+//! # {
//! use core::fmt;
//!
//! use yaxpeax_arch::annotation::{AnnotatingDecoder, VecSink};
@@ -40,6 +42,7 @@
//! println!(" bits [{}, {}]: {}", start, end, desc);
//! }
//! }
+//! # }
//! ```
//!
//! note that the range `[start, end]` for a reported span is _inclusive_. the `end`-th bit of a
@@ -73,7 +76,7 @@ use crate::{Arch, Reader};
use core::fmt::Display;
-/// implementors of `DescriptionSink` receive descriptions of an instruction's disassembly process
+/// implementers of `DescriptionSink` receive descriptions of an instruction's disassembly process
/// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and
/// architectures are expected to be able to turn the bit-level `start` and `width` values into a
/// meaningful description of bits in the original instruction stream.
@@ -91,24 +94,34 @@ impl<T> DescriptionSink<T> for NullSink {
fn record(&mut self, _start: u32, _end: u32, _description: T) { }
}
-#[cfg(feature = "std")]
-pub struct VecSink<T: Clone + Display> {
- pub records: std::vec::Vec<(u32, u32, T)>
-}
+#[cfg(feature = "alloc")]
+mod vec_sink {
+ use alloc::vec::Vec;
+ use core::fmt::Display;
+ use crate::annotation::DescriptionSink;
-#[cfg(feature = "std")]
-impl<T: Clone + Display> VecSink<T> {
- pub fn new() -> Self {
- VecSink { records: std::vec::Vec::new() }
+ pub struct VecSink<T: Clone + Display> {
+ pub records: Vec<(u32, u32, T)>
+ }
+
+ impl<T: Clone + Display> VecSink<T> {
+ pub fn new() -> Self {
+ VecSink { records: Vec::new() }
+ }
+
+ pub fn into_inner(self) -> Vec<(u32, u32, T)> {
+ self.records
+ }
}
-}
-#[cfg(feature = "std")]
-impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> {
- fn record(&mut self, start: u32, end: u32, description: T) {
- self.records.push((start, end, description));
+ impl<T: Clone + Display> DescriptionSink<T> for VecSink<T> {
+ fn record(&mut self, start: u32, end: u32, description: T) {
+ self.records.push((start, end, description));
+ }
}
}
+#[cfg(feature = "alloc")]
+pub use vec_sink::VecSink;
pub trait FieldDescription {
fn id(&self) -> u32;
@@ -118,7 +131,7 @@ pub trait FieldDescription {
/// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the
/// decoder able to report descriptions of bits or fields in the instruction to a sink implementing
/// [`DescriptionSink`]. the sink may be [`NullSink`] to discard provided data. decoding with a
-/// `NullSink` should behave identically to `Decoder::decode_into`. implementors are recommended to
+/// `NullSink` should behave identically to `Decoder::decode_into`. implementers are recommended to
/// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_annotation` if
/// implementing both traits.
pub trait AnnotatingDecoder<A: Arch + ?Sized> {
diff --git a/src/color_new.rs b/src/color_new.rs
new file mode 100644
index 0000000..1d3e358
--- /dev/null
+++ b/src/color_new.rs
@@ -0,0 +1,281 @@
+#[non_exhaustive]
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+pub enum Color {
+ Black,
+ DarkGrey,
+ Red,
+ DarkRed,
+ Green,
+ DarkGreen,
+ Yellow,
+ DarkYellow,
+ Blue,
+ DarkBlue,
+ Magenta,
+ DarkMagenta,
+ Cyan,
+ DarkCyan,
+ White,
+ Grey,
+}
+
+pub trait YaxColors {
+ fn arithmetic_op(&self) -> Color;
+ fn stack_op(&self) -> Color;
+ fn nop_op(&self) -> Color;
+ fn stop_op(&self) -> Color;
+ fn control_flow_op(&self) -> Color;
+ fn data_op(&self) -> Color;
+ fn comparison_op(&self) -> Color;
+ fn invalid_op(&self) -> Color;
+ fn platform_op(&self) -> Color;
+ fn misc_op(&self) -> Color;
+
+ fn register(&self) -> Color;
+ fn program_counter(&self) -> Color;
+ fn number(&self) -> Color;
+ fn zero(&self) -> Color;
+ fn one(&self) -> Color;
+ fn minus_one(&self) -> Color;
+ fn address(&self) -> Color;
+ fn symbol(&self) -> Color;
+ fn function(&self) -> Color;
+}
+
+/// support for colorizing text with ANSI control sequences.
+///
+/// the most useful item in this module is [`ansi::AnsiDisplaySink`], which interprets span entry
+/// and exit as points at which ANSI sequences may need to be written into the output it wraps -
+/// that output may be any type implementing [`crate::display::DisplaySink`], including
+/// [`crate::display::FmtSink`] to adapt any implementer of `fmt::Write` such as standard out.
+///
+/// ## example
+///
+/// to write colored text to standard out:
+///
+/// ```
+/// # #[cfg(feature="alloc")]
+/// # {
+/// # extern crate alloc;
+/// # use alloc::string::String;
+/// use yaxpeax_arch::color_new::DefaultColors;
+/// use yaxpeax_arch::color_new::ansi::AnsiDisplaySink;
+/// use yaxpeax_arch::display::FmtSink;
+///
+/// let mut s = String::new();
+/// let mut s_sink = FmtSink::new(&mut s);
+///
+/// let mut writer = AnsiDisplaySink::new(&mut s_sink, DefaultColors);
+///
+/// // this might be a yaxpeax crate's `display_into`, or other library implementation code
+/// mod fake_yaxpeax_crate {
+/// use yaxpeax_arch::display::DisplaySink;
+///
+/// pub fn format_memory_operand<T: DisplaySink>(out: &mut T) -> core::fmt::Result {
+/// out.span_start_immediate();
+/// out.write_prefixed_u8(0x80)?;
+/// out.span_end_immediate();
+/// out.write_fixed_size("(")?;
+/// out.span_start_register();
+/// out.write_fixed_size("rbp")?;
+/// out.span_end_register();
+/// out.write_fixed_size(")")?;
+/// Ok(())
+/// }
+/// }
+///
+/// // this might be how a user uses `AnsiDisplaySink`, which will write ANSI-ful text to `s` and
+/// // print it.
+///
+/// fake_yaxpeax_crate::format_memory_operand(&mut writer).expect("write succeeds");
+///
+/// println!("{}", s);
+/// # }
+/// ```
+pub mod ansi {
+ use crate::color_new::Color;
+
+ // color sequences as described by ECMA-48 and, apparently, `man 4 console_codes`
+ /// translate [`yaxpeax_arch::color_new::Color`] to an ANSI control code that changes the
+ /// foreground color to match.
+ #[allow(dead_code)] // allowing this to be dead code because if colors are enabled and alloc is not, there will not be an AnsiDisplaySink, which is the sole user of this function.
+ fn color2ansi(color: Color) -> &'static str {
+ // for most of these, in 256 color space the darker color can be picked by the same color
+ // index as the brighter form (from the 8 color command set). dark grey is an outlier,
+ // where 38;5;0 and 30 both are black. there is no "grey" in the shorter command set to
+ // map to. but it turns out that 38;5;m is exactly the darker grey to use.
+ match color {
+ Color::Black => "\x1b[30m",
+ Color::DarkGrey => "\x1b[38;5;8m",
+ Color::Red => "\x1b[31m",
+ Color::DarkRed => "\x1b[38;5;1m",
+ Color::Green => "\x1b[32m",
+ Color::DarkGreen => "\x1b[38;5;2m",
+ Color::Yellow => "\x1b[33m",
+ Color::DarkYellow => "\x1b[38;5;3m",
+ Color::Blue => "\x1b[34m",
+ Color::DarkBlue => "\x1b[38;5;4m",
+ Color::Magenta => "\x1b[35m",
+ Color::DarkMagenta => "\x1b[38;5;5m",
+ Color::Cyan => "\x1b[36m",
+ Color::DarkCyan => "\x1b[38;5;6m",
+ Color::White => "\x1b[37m",
+ Color::Grey => "\x1b[38;5;7m",
+ }
+ }
+
+ // could reasonably be always present, but only used if feature="alloc"
+ #[cfg(feature="alloc")]
+ const DEFAULT_FG: &'static str = "\x1b[39m";
+
+ #[cfg(feature="alloc")]
+ mod ansi_display_sink {
+ use crate::color_new::{Color, YaxColors};
+ use crate::display::DisplaySink;
+
+ /// adapter to insert ANSI color command sequences in formatted text to style printed
+ /// instructions.
+ ///
+ /// this enables similar behavior as the deprecated [`crate::Colorize`] trait,
+ /// for outputs that can process ANSI color commands.
+ ///
+ /// `AnsiDisplaySink` will silently ignore errors from writes to the underlying `T:
+ /// DisplaySink`. when writing to a string or other growable buffer, errors are likely
+ /// inseparable from `abort()`. when writing to stdout or stderr, write failures likely
+ /// mean output is piped to a process which has closed the pipe but are otherwise harmless.
+ /// `span_enter_*` and `span_exit_*` don't have error reporting mechanisms in their return
+ /// type, so the only available error mechanism would be to also `abort()`.
+ ///
+ /// if this turns out to be a bad decision, it'll have to be rethought!
+ pub struct AnsiDisplaySink<'sink, T: DisplaySink, Y: YaxColors> {
+ out: &'sink mut T,
+ span_stack: alloc::vec::Vec<Color>,
+ colors: Y
+ }
+
+ impl<'sink, T: DisplaySink, Y: YaxColors> AnsiDisplaySink<'sink, T, Y> {
+ pub fn new(out: &'sink mut T, colors: Y) -> Self {
+ Self {
+ out,
+ span_stack: alloc::vec::Vec::new(),
+ colors,
+ }
+ }
+
+ fn push_color(&mut self, color: Color) {
+ self.span_stack.push(color);
+ let _ = self.out.write_fixed_size(super::color2ansi(color));
+ }
+
+ fn restore_prev_color(&mut self) {
+ let _ = self.span_stack.pop();
+ if let Some(prev_color) = self.span_stack.last() {
+ let _ = self.out.write_fixed_size(super::color2ansi(*prev_color));
+ } else {
+ let _ = self.out.write_fixed_size(super::DEFAULT_FG);
+ };
+ }
+ }
+
+ impl<'sink, T: DisplaySink, Y: YaxColors> core::fmt::Write for AnsiDisplaySink<'sink, T, Y> {
+ fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.out.write_str(s)
+ }
+ fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
+ self.out.write_char(c)
+ }
+ }
+
+ impl<'sink, T: DisplaySink, Y: YaxColors> DisplaySink for AnsiDisplaySink<'sink, T, Y> {
+ fn span_start_immediate(&mut self) { self.push_color(self.colors.number()); }
+ fn span_end_immediate(&mut self) { self.restore_prev_color() }
+
+ fn span_start_register(&mut self) { self.push_color(self.colors.register()); }
+ fn span_end_register(&mut self) { self.restore_prev_color() }
+
+ // ah.. the right way, currently, to colorize opcodes would be to collect text while in the
+ // opcode span, and request some kind of user-provided decoder ring to translate mnemonics
+ // into the right color. that's very unfortunate. maybe there should be another span for
+ // `opcode_kind(u8)` for impls to report what kind of opcode they'll be emitting..
+ fn span_start_opcode(&mut self) { self.push_color(self.colors.misc_op()); }
+ fn span_end_opcode(&mut self) { self.restore_prev_color() }
+
+ fn span_start_program_counter(&mut self) { self.push_color(self.colors.program_counter()); }
+ fn span_end_program_counter(&mut self) { self.restore_prev_color() }
+
+ fn span_start_number(&mut self) { self.push_color(self.colors.number()); }
+ fn span_end_number(&mut self) { self.restore_prev_color() }
+
+ fn span_start_address(&mut self) { self.push_color(self.colors.address()); }
+ fn span_end_address(&mut self) { self.restore_prev_color() }
+
+ fn span_start_function_expr(&mut self) { self.push_color(self.colors.function()); }
+ fn span_end_function_expr(&mut self) { self.restore_prev_color() }
+ }
+ }
+ #[cfg(feature="alloc")]
+ pub use ansi_display_sink::AnsiDisplaySink;
+}
+
+pub struct DefaultColors;
+
+impl YaxColors for DefaultColors {
+ fn arithmetic_op(&self) -> Color {
+ Color::Yellow
+ }
+ fn stack_op(&self) -> Color {
+ Color::DarkMagenta
+ }
+ fn nop_op(&self) -> Color {
+ Color::DarkBlue
+ }
+ fn stop_op(&self) -> Color {
+ Color::Red
+ }
+ fn control_flow_op(&self) -> Color {
+ Color::DarkGreen
+ }
+ fn data_op(&self) -> Color {
+ Color::Magenta
+ }
+ fn comparison_op(&self) -> Color {
+ Color::DarkYellow
+ }
+ fn invalid_op(&self) -> Color {
+ Color::DarkRed
+ }
+ fn misc_op(&self) -> Color {
+ Color::Cyan
+ }
+ fn platform_op(&self) -> Color {
+ Color::DarkCyan
+ }
+
+ fn register(&self) -> Color {
+ Color::DarkCyan
+ }
+ fn program_counter(&self) -> Color {
+ Color::DarkRed
+ }
+ fn number(&self) -> Color {
+ Color::White
+ }
+ fn zero(&self) -> Color {
+ Color::White
+ }
+ fn one(&self) -> Color {
+ Color::White
+ }
+ fn minus_one(&self) -> Color {
+ Color::White
+ }
+ fn address(&self) -> Color {
+ Color::DarkGreen
+ }
+ fn symbol(&self) -> Color {
+ Color::Green
+ }
+ fn function(&self) -> Color {
+ Color::Green
+ }
+}
diff --git a/src/display.rs b/src/display.rs
index 789919e..754d3e6 100644
--- a/src/display.rs
+++ b/src/display.rs
@@ -1,9 +1,35 @@
+// allow use of deprecated items in this module since some functions using `SignedHexDisplay` still
+// exist here
+#![allow(deprecated)]
+
use crate::YaxColors;
use core::fmt;
use core::num::Wrapping;
use core::ops::Neg;
+mod display_sink;
+
+pub use display_sink::{DisplaySink, FmtSink};
+#[cfg(feature = "alloc")]
+pub use display_sink::InstructionTextSink;
+
+/// translate a byte in range `[0, 15]` to a lowercase base-16 digit.
+///
+/// if `c` is in range, the output is always valid as the sole byte in a utf-8 string. if `c` is out
+/// of range, the returned character might not be a valid single-byte utf-8 codepoint.
+#[cfg(feature = "alloc")] // this function is of course not directly related to alloc, but it's only needed by impls that themselves are only present with alloc.
+fn u8_to_hex(c: u8) -> u8 {
+ // this conditional branch is faster than a lookup for... most architectures (especially x86
+ // with cmov)
+ if c < 10 {
+ b'0' + c
+ } else {
+ b'a' + c - 10
+ }
+}
+
+#[deprecated(since="0.3.0", note="format_number_i32 does not optimize as expected and will be removed in the future. see DisplaySink instead.")]
pub enum NumberStyleHint {
Signed,
HexSigned,
@@ -17,36 +43,37 @@ pub enum NumberStyleHint {
HexUnsignedWithSign
}
-pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result {
+#[deprecated(since="0.3.0", note="format_number_i32 is both slow and incorrect: YaxColors may not result in correct styling when writing anywhere other than a terminal, and both stylin and formatting does not inline as well as initially expected. see DisplaySink instead.")]
+pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(_colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result {
match hint {
NumberStyleHint::Signed => {
- write!(f, "{}", colors.number(i))
+ write!(f, "{}", (i))
},
NumberStyleHint::HexSigned => {
- write!(f, "{}", colors.number(signed_i32_hex(i)))
+ write!(f, "{}", signed_i32_hex(i))
},
NumberStyleHint::Unsigned => {
- write!(f, "{}", colors.number(i as u32))
+ write!(f, "{}", i as u32)
},
NumberStyleHint::HexUnsigned => {
- write!(f, "{}", colors.number(u32_hex(i as u32)))
+ write!(f, "{}", u32_hex(i as u32))
},
NumberStyleHint::SignedWithSignSplit => {
if i == core::i32::MIN {
- write!(f, "- {}", colors.number("2147483647"))
+ write!(f, "- {}", "2147483647")
} else if i < 0 {
- write!(f, "- {}", colors.number(-Wrapping(i)))
+ write!(f, "- {}", -Wrapping(i))
} else {
- write!(f, "+ {}", colors.number(i))
+ write!(f, "+ {}", i)
}
}
NumberStyleHint::HexSignedWithSignSplit => {
if i == core::i32::MIN {
- write!(f, "- {}", colors.number("0x7fffffff"))
+ write!(f, "- {}", ("0x7fffffff"))
} else if i < 0 {
- write!(f, "- {}", colors.number(u32_hex((-Wrapping(i)).0 as u32)))
+ write!(f, "- {}", u32_hex((-Wrapping(i)).0 as u32))
} else {
- write!(f, "+ {}", colors.number(u32_hex(i as u32)))
+ write!(f, "+ {}", u32_hex(i as u32))
}
},
NumberStyleHint::HexSignedWithSign => {
@@ -64,6 +91,7 @@ pub fn format_number_i32<W: fmt::Write, Y: YaxColors>(colors: &Y, f: &mut W, i:
}
}
+#[deprecated(since="0.3.0", note="SignedHexDisplay does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub struct SignedHexDisplay<T: core::fmt::LowerHex + Neg> {
value: T,
negative: bool
@@ -79,6 +107,7 @@ impl<T: fmt::LowerHex + Neg + Copy> fmt::Display for SignedHexDisplay<T> where W
}
}
+#[deprecated(since="0.3.0", note="u8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> {
SignedHexDisplay {
value: value as i8,
@@ -86,6 +115,7 @@ pub fn u8_hex(value: u8) -> SignedHexDisplay<i8> {
}
}
+#[deprecated(since="0.3.0", note="signed_i8_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> {
SignedHexDisplay {
value: imm,
@@ -93,6 +123,7 @@ pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay<i8> {
}
}
+#[deprecated(since="0.3.0", note="u16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> {
SignedHexDisplay {
value: value as i16,
@@ -100,6 +131,7 @@ pub fn u16_hex(value: u16) -> SignedHexDisplay<i16> {
}
}
+#[deprecated(since="0.3.0", note="signed_i16_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> {
SignedHexDisplay {
value: imm,
@@ -107,6 +139,7 @@ pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay<i16> {
}
}
+#[deprecated(since="0.3.0", note="u32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> {
SignedHexDisplay {
value: value as i32,
@@ -114,6 +147,7 @@ pub fn u32_hex(value: u32) -> SignedHexDisplay<i32> {
}
}
+#[deprecated(since="0.3.0", note="signed_i32_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> {
SignedHexDisplay {
value: imm,
@@ -121,6 +155,7 @@ pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay<i32> {
}
}
+#[deprecated(since="0.3.0", note="u64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> {
SignedHexDisplay {
value: value as i64,
@@ -128,6 +163,7 @@ pub fn u64_hex(value: u64) -> SignedHexDisplay<i64> {
}
}
+#[deprecated(since="0.3.0", note="signed_i64_hex does not optimize like expected and will be removed in the future. see DisplaySink instead.")]
pub fn signed_i64_hex(imm: i64) -> SignedHexDisplay<i64> {
SignedHexDisplay {
value: imm,
diff --git a/src/display/display_sink.rs b/src/display/display_sink.rs
new file mode 100644
index 0000000..9aa3c85
--- /dev/null
+++ b/src/display/display_sink.rs
@@ -0,0 +1,1017 @@
+use core::fmt;
+
+// `imp_x86.rs` has `asm!()` macros, and so is not portable at all.
+#[cfg(all(feature="alloc", target_arch = "x86_64"))]
+#[path="./display_sink/imp_x86.rs"]
+mod imp;
+
+// for other architectures, fall back on possibly-slower portable functions.
+#[cfg(all(feature="alloc", not(target_arch = "x86_64")))]
+#[path="./display_sink/imp_generic.rs"]
+mod imp;
+
+
+/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used
+/// in formatting instructions for two reasons:
+/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at
+/// the same time as collecting output itself.
+/// * `DisplaySink` implementations provide specialized functions for writing strings in
+/// circumstances where a simple "use `core::fmt`" might incur unwanted overhead.
+///
+/// ## spans
+///
+/// spans are out-of-band indicators for the meaning of data written to this sink. when a
+/// `span_start_<foo>` function is called, data written until a matching `span_end_<foo>` can be
+/// considered the text corresponding to `<foo>`.
+///
+/// spans are entered and exited in a FILO manner. implementations of `DisplaySink` are explicitly
+/// allowed to depend on this fact. functions writing to a `DisplaySink` must exit spans in reverse
+/// order to when they are entered. a function that has a call sequence like
+/// ```text
+/// sink.span_start_operand();
+/// sink.span_start_immediate();
+/// sink.span_end_operand();
+/// ```
+/// is in error.
+///
+/// spans are reported through the `span_start_*` and `span_end_*` families of functions to avoid
+/// constraining implementations into tracking current output offset (which may not be knowable) or
+/// span size (which may be knowable, but incur additional overhead to compute or track). if the
+/// task for a span is to simply emit VT100 color codes, for example, implementations avoid the
+/// overhead of tracking offsets.
+///
+/// default implementations of the `span_start_*` and `span_end_*` functions are to do nothing. a
+/// no-op `span_start_*` or `span_end_*` allows rustc to elimiate such calls at compile time for
+/// `DisplaySink` that are uninterested in the corresponding span type.
+///
+/// # write helpers (`write_*`)
+///
+/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in
+/// documentation here to better support writing some kinds of inputs than a fully-general solution
+/// (such as `core::fmt`) might be able to yield.
+///
+/// currently there are two motivating factors for `write_*` helpers:
+///
+/// instruction formatting often involves writing small but variable-size strings, such as register
+/// names, which is something of a pathological case for string appending as Rust currently exists:
+/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an
+/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference:
+/// <https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232>
+///
+/// there are similar papercuts around formatting integers as base-16 numbers, such as
+/// <https://github.com/rust-lang/rust/pull/122770>. in isolation and in most applications these are
+/// not a significant source of overhead. but for programs bounded on decoding and printing
+/// instructions, these can add up to significant overhead - on the order of 10-20% of total
+/// runtime.
+///
+/// ## example
+///
+/// a simple call sequence to `DisplaySink` might look something like:
+/// ```compile_fail
+/// sink.span_start_operand()
+/// sink.write_char('[')
+/// sink.span_start_register()
+/// sink.write_fixed_size("rbp")
+/// sink.span_end_register()
+/// sink.write_char(']')
+/// sink.span_end_operand()
+/// ```
+/// which writes the text `[rbp]`, telling sinks that the operand begins at `[`, ends after `]`,
+/// and `rbp` is a register in that operand.
+///
+/// ## extensibility
+///
+/// additional `span_{start,end}_*` helpers may be added over time - in the above example, one
+/// future addition might be to add a new `effective_address` span that is started before
+/// `register` and ended after `register. for an operand like `\[rbp\]` the effective address span
+/// would exactly match a corresponding register span, but in more complicated scenarios like
+/// `[rsp + rdi * 4 + 0x50]` the effective address would be all of `rsp + rdi * 4 + 0x50`.
+///
+/// additional spans are expected to be added as needed. it is not immediately clear how to add
+/// support for more architecture-specific concepts (such as itanium predicate registers) would be
+/// supported yet, and so architecture-specific concepts may be expressed on `DisplaySink` if the
+/// need arises.
+///
+/// new `span_{start,end}_*` helpers will be defaulted as no-op. additions to this trait will be
+/// minor version bumps, so users should take care to not add custom functions starting with
+/// `span_start_` or `span_end_` to structs implementing `DisplaySink`.
+pub trait DisplaySink: fmt::Write {
+ #[inline(always)]
+ fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.write_str(s)
+ }
+
+ /// write a string to this sink that is less than 32 bytes. this is provided for optimization
+ /// opportunities when writing a variable-length string with known max size.
+ ///
+ /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than
+ /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
+ /// utf-8 string. this may corrupt Rust strings.
+ unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.write_str(s)
+ }
+ /// write a string to this sink that is less than 16 bytes. this is provided for optimization
+ /// opportunities when writing a variable-length string with known max size.
+ ///
+ /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than
+ /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
+ /// utf-8 string. this may corrupt Rust strings.
+ unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.write_str(s)
+ }
+ /// write a string to this sink that is less than 8 bytes. this is provided for optimization
+ /// opportunities when writing a variable-length string with known max size.
+ ///
+ /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than
+ /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
+ /// utf-8 string. this may corrupt Rust strings.
+ unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.write_str(s)
+ }
+
+ /// write a u8 to the output as a base-16 integer.
+ ///
+ /// this corresponds to the Rust format specifier `{:x}` - see [`std::fmt::LowerHex`] for more.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> {
+ write!(self, "{:x}", v)
+ }
+ /// write a u8 to the output as a base-16 integer with leading `0x`.
+ ///
+ /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> {
+ self.write_fixed_size("0x")?;
+ self.write_u8(v)
+ }
+ /// write an i8 to the output as a base-16 integer with leading `0x`, and leading `-` if the
+ /// value is negative.
+ ///
+ /// there is no matching `std` formatter, so some examples here:
+ /// ```text
+ /// sink.write_prefixed_i8(-0x60); // writes `-0x60` to the sink
+ /// sink.write_prefixed_i8(127); // writes `0x7f` to the sink
+ /// sink.write_prefixed_i8(-128); // writes `-0x80` to the sink
+ /// ```
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_i8(&mut self, v: i8) -> Result<(), core::fmt::Error> {
+ let v = if v < 0 {
+ self.write_char('-')?;
+ v.unsigned_abs()
+ } else {
+ v as u8
+ };
+ self.write_prefixed_u8(v)
+ }
+ /// write a u16 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> {
+ write!(self, "{:x}", v)
+ }
+ /// write a u16 to the output as a base-16 integer with leading `0x`.
+ ///
+ /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> {
+ self.write_fixed_size("0x")?;
+ self.write_u16(v)
+ }
+ /// write an i16 to the output as a base-16 integer with leading `0x`, and leading `-` if the
+ /// value is negative.
+ ///
+ /// there is no matching `std` formatter, so some examples here:
+ /// ```text
+ /// sink.write_prefixed_i16(-0x60); // writes `-0x60` to the sink
+ /// sink.write_prefixed_i16(127); // writes `0x7f` to the sink
+ /// sink.write_prefixed_i16(-128); // writes `-0x80` to the sink
+ /// ```
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_i16(&mut self, v: i16) -> Result<(), core::fmt::Error> {
+ let v = if v < 0 {
+ self.write_char('-')?;
+ v.unsigned_abs()
+ } else {
+ v as u16
+ };
+ self.write_prefixed_u16(v)
+ }
+ /// write a u32 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> {
+ write!(self, "{:x}", v)
+ }
+ /// write a u32 to the output as a base-16 integer with leading `0x`.
+ ///
+ /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> {
+ self.write_fixed_size("0x")?;
+ self.write_u32(v)
+ }
+ /// write an i32 to the output as a base-32 integer with leading `0x`, and leading `-` if the
+ /// value is negative.
+ ///
+ /// there is no matching `std` formatter, so some examples here:
+ /// ```text
+ /// sink.write_prefixed_i32(-0x60); // writes `-0x60` to the sink
+ /// sink.write_prefixed_i32(127); // writes `0x7f` to the sink
+ /// sink.write_prefixed_i32(-128); // writes `-0x80` to the sink
+ /// ```
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_i32(&mut self, v: i32) -> Result<(), core::fmt::Error> {
+ let v = if v < 0 {
+ self.write_char('-')?;
+ v.unsigned_abs()
+ } else {
+ v as u32
+ };
+ self.write_prefixed_u32(v)
+ }
+ /// write a u64 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> {
+ write!(self, "{:x}", v)
+ }
+ /// write a u64 to the output as a base-16 integer with leading `0x`.
+ ///
+ /// this corresponds to the Rust format specifier `{#:x}` - see [`std::fmt::LowerHex`] for more.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> {
+ self.write_fixed_size("0x")?;
+ self.write_u64(v)
+ }
+ /// write an i64 to the output as a base-64 integer with leading `0x`, and leading `-` if the
+ /// value is negative.
+ ///
+ /// there is no matching `std` formatter, so some examples here:
+ /// ```text
+ /// sink.write_prefixed_i64(-0x60); // writes `-0x60` to the sink
+ /// sink.write_prefixed_i64(127); // writes `0x7f` to the sink
+ /// sink.write_prefixed_i64(-128); // writes `-0x80` to the sink
+ /// ```
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ fn write_prefixed_i64(&mut self, v: i64) -> Result<(), core::fmt::Error> {
+ let v = if v < 0 {
+ self.write_char('-')?;
+ v.unsigned_abs()
+ } else {
+ v as u64
+ };
+ self.write_prefixed_u64(v)
+ }
+
+ /// enter a region inside which output corresponds to an immediate.
+ fn span_start_immediate(&mut self) { }
+ /// end a region where an immediate was written. see docs on [`DisplaySink`] for more.
+ fn span_end_immediate(&mut self) { }
+
+ /// enter a region inside which output corresponds to a register.
+ fn span_start_register(&mut self) { }
+ /// end a region where a register was written. see docs on [`DisplaySink`] for more.
+ fn span_end_register(&mut self) { }
+
+ /// enter a region inside which output corresponds to an opcode.
+ fn span_start_opcode(&mut self) { }
+ /// end a region where an opcode was written. see docs on [`DisplaySink`] for more.
+ fn span_end_opcode(&mut self) { }
+
+ /// enter a region inside which output corresponds to the program counter.
+ fn span_start_program_counter(&mut self) { }
+ /// end a region where the program counter was written. see docs on [`DisplaySink`] for more.
+ fn span_end_program_counter(&mut self) { }
+
+ /// enter a region inside which output corresponds to a number, such as a memory offset or
+ /// immediate.
+ fn span_start_number(&mut self) { }
+ /// end a region where a number was written. see docs on [`DisplaySink`] for more.
+ fn span_end_number(&mut self) { }
+
+ /// enter a region inside which output corresponds to an address. this is a best guess;
+ /// instructions like x86's `lea` may involve an "address" that is not, and arithmetic
+ /// instructions may operate on addresses held in registers.
+ ///
+ /// where possible, the presence of this span will be informed by ISA semantics - if an
+ /// instruction has a memory operand, the effective address calculation of that operand should
+ /// be in an address span.
+ fn span_start_address(&mut self) { }
+ /// end a region where an address was written. the specifics of an "address" are ambiguous and
+ /// best-effort; see [`DisplaySink::span_start_address`] for more about this. otherwise, see
+ /// docs on [`DisplaySink`] for more about spans.
+ fn span_end_address(&mut self) { }
+
+ /// enter a region inside which output corresponds to a function address, or expression
+ /// evaluating to a function address. this is a best guess; instructions like `call` may call
+ /// to a non-function address, `jmp` may jump to a function (as with tail calls), function
+ /// addresses may be computed via table lookup without semantic hints.
+ ///
+ /// where possible, the presence of this span will be informed by ISA semantics - if an
+ /// instruction is like a "call", an address operand should be a `function` span. if other
+ /// instructions can be expected to handle subroutine starting addresses purely from ISA
+ /// semantics, address operand(s) should be in a `function` span.
+ fn span_start_function_expr(&mut self) { }
+ /// end a region where function address expression was written. the specifics of a "function
+ /// address" are ambiguous and best-effort; see [`DisplaySink::span_start_function_expr`] for more
+ /// about this. otherwise, see docs on [`DisplaySink`] for more about spans.
+ fn span_end_function_expr(&mut self) { }
+}
+
+/// `FmtSink` can be used to adapt any `fmt::Write`-implementing type into a `DisplaySink` to
+/// format an instruction while discarding all span information at zero cost.
+pub struct FmtSink<'a, T: fmt::Write> {
+ out: &'a mut T,
+}
+
+impl<'a, T: fmt::Write> FmtSink<'a, T> {
+ pub fn new(f: &'a mut T) -> Self {
+ Self { out: f }
+ }
+
+ pub fn inner_ref(&self) -> &T {
+ &self.out
+ }
+}
+
+/// blanket impl that discards all span information, forwards writes to the underlying `fmt::Write`
+/// type.
+impl<'a, T: fmt::Write> DisplaySink for FmtSink<'a, T> { }
+
+impl<'a, T: fmt::Write> fmt::Write for FmtSink<'a, T> {
+ fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.out.write_str(s)
+ }
+ fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
+ self.out.write_char(c)
+ }
+ fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> {
+ self.out.write_fmt(f)
+ }
+}
+
+#[cfg(feature = "alloc")]
+mod instruction_text_sink {
+ use core::fmt;
+
+ use super::{DisplaySink, u8_to_hex};
+
+ /// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the
+ /// disassemblers, do not use this struct. do not depend on this struct existing. this struct is
+ /// not stable. this struct is not safe for general use. if you use this struct you and your
+ /// program will be eaten by gremlins.
+ ///
+ /// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct
+ /// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction.
+ /// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are
+ /// not bounds-checked, and the buffer is never grown.
+ ///
+ /// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is
+ /// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be
+ /// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and*
+ /// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would
+ /// be written to without being rewound first.
+ ///
+ /// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect
+ /// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the
+ /// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of
+ /// ensuring that instruction formatting impls this buffer is passed to are appropriately sized.
+ ///
+ /// this is intended to be hidden in docs. if you see this in docs, it's a bug.
+ #[doc(hidden)]
+ pub struct InstructionTextSink<'buf> {
+ buf: &'buf mut alloc::string::String
+ }
+
+ impl<'buf> InstructionTextSink<'buf> {
+ /// create an `InstructionTextSink` using the provided buffer for storage.
+ ///
+ /// SAFETY: callers must ensure that this sink will never have more content written than
+ /// this buffer can hold. while the buffer may appear growable, `write_*` methods here may
+ /// *bypass bounds checks* and so will never trigger the buffer to grow. writing more data
+ /// than the buffer's size when provided to `new` will cause out-of-bounds writes and
+ /// memory corruption.
+ pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self {
+ Self { buf }
+ }
+ }
+
+ impl<'buf> fmt::Write for InstructionTextSink<'buf> {
+ fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.buf.write_str(s)
+ }
+ fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + 1 {
+ panic!("InstructionTextSink::write_char would overflow output");
+ }
+ }
+
+ // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()`
+ // is valid for writing, but may be uninitialized.
+ //
+ // this function is essentially equivalent to `Vec::push` specialized for the case that
+ // `len < buf.capacity()`:
+ // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006
+ unsafe {
+ let underlying = self.buf.as_mut_vec();
+ // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to
+ // write single ASCII characters. this is wrong in the general case, but `write_char`
+ // here is not going to be used in the general case.
+ if cfg!(debug_assertions) {
+ if c > '\x7f' {
+ panic!("InstructionTextSink::write_char would truncate output");
+ }
+ }
+ let to_push = c as u8;
+ // `ptr::write` here because `underlying.add(underlying.len())` may not point to an
+ // initialized value, which would mean that turning that pointer into a `&mut u8` to
+ // store through would be UB. `ptr::write` avoids taking the mut ref.
+ underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push);
+ // we have initialized all (one) bytes that `set_len` is increasing the length to
+ // include.
+ underlying.set_len(underlying.len() + 1);
+ }
+ Ok(())
+ }
+ }
+
+ impl<'buf> DisplaySink for InstructionTextSink<'buf> {
+ #[inline(always)]
+ fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + s.len() {
+ panic!("InstructionTextSink::write_fixed_size would overflow output");
+ }
+ }
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.buf.as_mut_vec() };
+ let new_bytes = s.as_bytes();
+
+ if new_bytes.len() == 0 {
+ return Ok(());
+ }
+
+ unsafe {
+ let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+
+ // this used to be enough to bamboozle llvm away from
+ // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
+ // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped
+ // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`
+ // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this
+ // unrolls into some kind of appropriate series of `mov`.
+ dest.offset(0 as isize).write(new_bytes[0]);
+ for i in 1..new_bytes.len() {
+ dest.offset(i as isize).write(new_bytes[i]);
+ }
+
+ buf.set_len(buf.len() + new_bytes.len());
+ }
+
+ Ok(())
+ }
+ unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + s.len() {
+ panic!("InstructionTextSink::write_lt_32 would overflow output");
+ }
+ }
+
+ // Safety: `new` requires callers promise there is enough space to hold `s`.
+ unsafe {
+ super::imp::append_string_lt_32_unchecked(&mut self.buf, s);
+ }
+
+ Ok(())
+ }
+ unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + s.len() {
+ panic!("InstructionTextSink::write_lt_16 would overflow output");
+ }
+ }
+
+ // Safety: `new` requires callers promise there is enough space to hold `s`.
+ unsafe {
+ super::imp::append_string_lt_16_unchecked(&mut self.buf, s);
+ }
+
+ Ok(())
+ }
+ unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + s.len() {
+ panic!("InstructionTextSink::write_lt_8 would overflow output");
+ }
+ }
+
+ // Safety: `new` requires callers promise there is enough space to hold `s`.
+ unsafe {
+ super::imp::append_string_lt_8_unchecked(&mut self.buf, s);
+ }
+
+ Ok(())
+ }
+ /// write a u8 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize;
+
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + printed_size {
+ panic!("InstructionTextSink::write_u8 would overflow output");
+ }
+ }
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.buf.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: `new()` requires callers promise there is space through to `new_len`
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ /// write a u16 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize;
+
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + printed_size {
+ panic!("InstructionTextSink::write_u16 would overflow output");
+ }
+ }
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.buf.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: `new()` requires callers promise there is space through to `new_len`
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ /// write a u32 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize;
+
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + printed_size {
+ panic!("InstructionTextSink::write_u32 would overflow output");
+ }
+ }
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.buf.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: `new()` requires callers promise there is space through to `new_len`
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ /// write a u64 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize;
+
+ if cfg!(debug_assertions) {
+ if self.buf.capacity() < self.buf.len() + printed_size {
+ panic!("InstructionTextSink::write_u64 would overflow output");
+ }
+ }
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.buf.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: `new()` requires callers promise there is space through to `new_len`
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ }
+}
+#[cfg(feature = "alloc")]
+pub use instruction_text_sink::InstructionTextSink;
+
+
+#[cfg(feature = "alloc")]
+use crate::display::u8_to_hex;
+
+/// this [`DisplaySink`] impl exists to support somewhat more performant buffering of the kinds of
+/// strings `yaxpeax-x86` uses in formatting instructions.
+///
+/// span information is discarded at zero cost.
+#[cfg(feature = "alloc")]
+impl DisplaySink for alloc::string::String {
+ #[inline(always)]
+ fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
+ self.reserve(s.len());
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.as_mut_vec() };
+ let new_bytes = s.as_bytes();
+
+ if new_bytes.len() == 0 {
+ return Ok(());
+ }
+
+ // Safety: we have reserved space for all `buf` bytes, above.
+ unsafe {
+ let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+
+ // this used to be enough to bamboozle llvm away from
+ // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
+ // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped
+ // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`
+ // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this
+ // unrolls into some kind of appropriate series of `mov`.
+ dest.offset(0 as isize).write(new_bytes[0]);
+ for i in 1..new_bytes.len() {
+ dest.offset(i as isize).write(new_bytes[i]);
+ }
+
+ // Safety: we have initialized all bytes from where `self` initially ended, through to
+ // all `new_bytes` additional elements.
+ buf.set_len(buf.len() + new_bytes.len());
+ }
+
+ Ok(())
+ }
+ unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
+ self.reserve(s.len());
+
+ // Safety: we have reserved enough space for `s`.
+ unsafe {
+ imp::append_string_lt_32_unchecked(self, s);
+ }
+
+ Ok(())
+ }
+ unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
+ self.reserve(s.len());
+
+ // Safety: we have reserved enough space for `s`.
+ unsafe {
+ imp::append_string_lt_16_unchecked(self, s);
+ }
+
+ Ok(())
+ }
+ unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
+ self.reserve(s.len());
+
+ // Safety: we have reserved enough space for `s`.
+ unsafe {
+ imp::append_string_lt_8_unchecked(self, s);
+ }
+
+ Ok(())
+ }
+ /// write a u8 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize;
+
+ self.reserve(printed_size);
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: we have reserved space through to `new_len` by calling `reserve` above.
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ /// write a u16 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize;
+
+ self.reserve(printed_size);
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: we have reserved space through to `new_len` by calling `reserve` above.
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ /// write a u32 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize;
+
+ self.reserve(printed_size);
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: we have reserved space through to `new_len` by calling `reserve` above.
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+ /// write a u64 to the output as a base-16 integer.
+ ///
+ /// this is provided for optimization opportunities when the formatted integer can be written
+ /// directly to the sink (rather than formatted to an intermediate buffer and output as a
+ /// followup step)
+ #[inline(always)]
+ fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> {
+ if v == 0 {
+ return self.write_fixed_size("0");
+ }
+ // we can fairly easily predict the size of a formatted string here with lzcnt, which also
+ // means we can write directly into the correct offsets of the output string.
+ let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize;
+
+ self.reserve(printed_size);
+
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { self.as_mut_vec() };
+ let new_len = buf.len() + printed_size;
+
+ // Safety: there is no way to exit this function without initializing all bytes up to
+ // `new_len`
+ unsafe {
+ buf.set_len(new_len);
+ }
+ // Safety: we have reserved space through to `new_len` by calling `reserve` above.
+ let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
+
+ loop {
+ let digit = v % 16;
+ let c = u8_to_hex(digit as u8);
+ // Safety: `p` will not move before `buf`'s length at function entry, so `p` points
+ // to a location valid for writing.
+ unsafe {
+ p = p.offset(-1);
+ p.write(c);
+ }
+ v = v / 16;
+ if v == 0 {
+ break;
+ }
+ }
+
+ Ok(())
+ }
+}
diff --git a/src/display/display_sink/imp_generic.rs b/src/display/display_sink/imp_generic.rs
new file mode 100644
index 0000000..8819243
--- /dev/null
+++ b/src/display/display_sink/imp_generic.rs
@@ -0,0 +1,26 @@
+/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) {
+ buf.push_str(data);
+}
+
+/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) {
+ buf.push_str(data);
+}
+
+/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) {
+ buf.push_str(data);
+}
diff --git a/src/display/display_sink/imp_x86.rs b/src/display/display_sink/imp_x86.rs
new file mode 100644
index 0000000..902ea69
--- /dev/null
+++ b/src/display/display_sink/imp_x86.rs
@@ -0,0 +1,187 @@
+//! `imp_x86` has specialized copies to append short strings to strings. buffer sizing must be
+//! handled by callers, in all cases.
+//!
+//! the structure of all implementations here is, essentially, to take the size of the data to
+//! append and execute a copy for each bit set in that size, from highest to lowest. some bits are
+//! simply never checked if the input is promised to never be that large - if a string to append is
+//! only 0..7 bytes long, it is sufficient to only look at the low three bits to copy all bytes.
+//!
+//! in this way, it is slightly more efficient to right-size which append function is used, if the
+//! maximum size of input strings can be bounded well. if the maximum size of input strings cannot
+//! be bounded, you shouldn't be using these functions.
+
+/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+pub unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) {
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { buf.as_mut_vec() };
+ let new_bytes = data.as_bytes();
+
+ unsafe {
+ let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+ let src = new_bytes.as_ptr();
+
+ let rem = new_bytes.len() as isize;
+
+ // set_len early because there is no way to avoid the following asm!() writing that
+ // same number of bytes into buf
+ buf.set_len(buf.len() + new_bytes.len());
+
+ core::arch::asm!(
+ "8:",
+ "cmp {rem:e}, 4",
+ "jb 9f",
+ "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
+ "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
+ "sub {rem:e}, 4",
+ "jz 11f",
+ "9:",
+ "cmp {rem:e}, 2",
+ "jb 10f",
+ "mov {buf:x}, word ptr [{src} + {rem} - 2]",
+ "mov word ptr [{dest} + {rem} - 2], {buf:x}",
+ "sub {rem:e}, 2",
+ "jz 11f",
+ "10:",
+ "cmp {rem:e}, 1",
+ "jb 11f",
+ "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
+ "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
+ "11:",
+ src = in(reg) src,
+ dest = in(reg) dest,
+ rem = inout(reg) rem => _,
+ buf = out(reg) _,
+ options(nostack),
+ );
+ }
+}
+
+/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+pub unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) {
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { buf.as_mut_vec() };
+ let new_bytes = data.as_bytes();
+
+ unsafe {
+ let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+ let src = new_bytes.as_ptr();
+
+ let rem = new_bytes.len() as isize;
+
+ // set_len early because there is no way to avoid the following asm!() writing that
+ // same number of bytes into buf
+ buf.set_len(buf.len() + new_bytes.len());
+
+ core::arch::asm!(
+ "7:",
+ "cmp {rem:e}, 8",
+ "jb 8f",
+ "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
+ "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
+ "sub {rem:e}, 8",
+ "jz 11f",
+ "8:",
+ "cmp {rem:e}, 4",
+ "jb 9f",
+ "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
+ "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
+ "sub {rem:e}, 4",
+ "jz 11f",
+ "9:",
+ "cmp {rem:e}, 2",
+ "jb 10f",
+ "mov {buf:x}, word ptr [{src} + {rem} - 2]",
+ "mov word ptr [{dest} + {rem} - 2], {buf:x}",
+ "sub {rem:e}, 2",
+ "jz 11f",
+ "10:",
+ "cmp {rem:e}, 1",
+ "jb 11f",
+ "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
+ "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
+ "11:",
+ src = in(reg) src,
+ dest = in(reg) dest,
+ rem = inout(reg) rem => _,
+ buf = out(reg) _,
+ options(nostack),
+ );
+ }
+}
+
+/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+pub unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) {
+ // Safety: we are appending only valid utf8 strings to `self.buf`, as `s` is known to
+ // be valid utf8
+ let buf = unsafe { buf.as_mut_vec() };
+ let new_bytes = data.as_bytes();
+
+ unsafe {
+ let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+ let src = new_bytes.as_ptr();
+
+ let rem = new_bytes.len() as isize;
+
+ // set_len early because there is no way to avoid the following asm!() writing that
+ // same number of bytes into buf
+ buf.set_len(buf.len() + new_bytes.len());
+
+ core::arch::asm!(
+ "6:",
+ "cmp {rem:e}, 16",
+ "jb 7f",
+ "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
+ "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
+ "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
+ "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
+ "sub {rem:e}, 16",
+ "jz 11f",
+ "7:",
+ "cmp {rem:e}, 8",
+ "jb 8f",
+ "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
+ "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
+ "sub {rem:e}, 8",
+ "jz 11f",
+ "8:",
+ "cmp {rem:e}, 4",
+ "jb 9f",
+ "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
+ "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
+ "sub {rem:e}, 4",
+ "jz 11f",
+ "9:",
+ "cmp {rem:e}, 2",
+ "jb 10f",
+ "mov {buf:x}, word ptr [{src} + {rem} - 2]",
+ "mov word ptr [{dest} + {rem} - 2], {buf:x}",
+ "sub {rem:e}, 2",
+ "jz 11f",
+ "10:",
+ "cmp {rem:e}, 1",
+ "jb 11f",
+ "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
+ "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
+ "11:",
+ src = in(reg) src,
+ dest = in(reg) dest,
+ rem = inout(reg) rem => _,
+ buf = out(reg) _,
+ options(nostack),
+ );
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 7aaba21..a0c237b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,12 +1,14 @@
#![no_std]
#![doc = include_str!("../README.md")]
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
use core::fmt::{self, Debug, Display};
use core::hash::Hash;
#[cfg(feature="use-serde")]
#[macro_use] extern crate serde_derive;
-
#[cfg(feature="use-serde")]
use serde::{Serialize, Deserialize};
@@ -18,19 +20,25 @@ pub use address::AddrParse;
pub mod annotation;
+#[deprecated(since="0.3.0", note="yaxpeax_arch::color conflates output mechanism and styling, leaving it brittle and overly-restrictive. see `yaxpeax_arch::color_new`, which will replace `color` in a future version.")]
mod color;
+#[allow(deprecated)] // allow exporting the deprecated items here to not break downstreams even further...
pub use color::{Colorize, NoColors, YaxColors};
-
-#[cfg(feature="colors")]
-pub use color::ColorSettings;
+#[cfg(feature="color-new")]
+pub mod color_new;
pub mod display;
+
mod reader;
pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be};
+pub mod safer_unchecked;
+
+pub mod testkit;
+
/// the minimum set of errors a `yaxpeax-arch` disassembler may produce.
///
-/// it is permissible for an implementor of `DecodeError` to have items that return `false` for
+/// it is permissible for an implementer of `DecodeError` to have items that return `false` for
/// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know
/// about.
pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static {
@@ -42,12 +50,12 @@ pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static {
/// generally indicate an issue with the instruction itself. this is in contrast to one
/// specific operand being invalid for the instruction, or some other issue to do with decoding
/// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy
- /// and left as best-effort for decoder implementors.
+ /// and left as best-effort for decoder implementers.
fn bad_opcode(&self) -> bool;
/// did the decoder error because an operand of the instruction to decode is invalid?
///
/// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on
- /// the part of implementors.
+ /// the part of implementers.
fn bad_operand(&self) -> bool;
/// a human-friendly description of this decode error.
fn description(&self) -> &'static str;
@@ -127,6 +135,7 @@ impl DecodeError for StandardPartialDecoderError {
}
}
+/*
#[derive(Copy, Clone)]
struct NoDescription {}
@@ -135,6 +144,7 @@ impl fmt::Display for NoDescription {
Ok(())
}
}
+*/
/// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are
/// the architecture-defined [`DecodeError`] implemention.
@@ -152,7 +162,7 @@ pub trait Decoder<A: Arch + ?Sized> {
/// SAFETY:
///
/// while `inst` MUST be left in a state that does not violate Rust's safety guarantees,
- /// implementors are NOT obligated to leave `inst` in a semantically meaningful state if
+ /// implementers are NOT obligated to leave `inst` in a semantically meaningful state if
/// decoding fails. if `decode_into` returns an error, callers may find contradictory and
/// useless information in `inst`, as well as *stale data* from whatever was passed in.
fn decode_into<T: Reader<A::Address, A::Word>>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>;
@@ -227,6 +237,8 @@ pub trait Instruction {
fn well_defined(&self) -> bool;
}
+#[allow(deprecated)]
+#[deprecated(since="0.3.0", note="ShowContextual ties YaxColors and fmt::Write in a way that only sometimes composes. simultaneously, it is too generic on Ctx, making it difficult to implement and use. it will be revisited in the future.")]
pub trait ShowContextual<Addr, Ctx: ?Sized, T: fmt::Write, Y: YaxColors> {
fn contextualize(&self, colors: &Y, address: Addr, context: Option<&Ctx>, out: &mut T) -> fmt::Result;
}
diff --git a/src/reader.rs b/src/reader.rs
index 028d835..8b68486 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -24,8 +24,9 @@ pub enum ReadError {
/// isn't a multiple of 8 bits, `U8Reader` won't be sufficient.
pub trait Reader<Address, Item> {
fn next(&mut self) -> Result<Item, ReadError>;
- /// read `buf`-many items from this reader in bulk. if `Reader` cannot read `buf`-many items,
- /// return `ReadError::ExhaustedInput`.
+ /// read `buf`-many items from this reader in bulk.
+ ///
+ /// if `Reader` cannot read `buf`-many items, return `ReadError::ExhaustedInput`.
fn next_n(&mut self, buf: &mut [Item]) -> Result<(), ReadError>;
/// mark the current position as where to measure `offset` against.
fn mark(&mut self);
diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs
new file mode 100644
index 0000000..b556a6f
--- /dev/null
+++ b/src/safer_unchecked.rs
@@ -0,0 +1,40 @@
+//! tools to help validate correct use of `unchecked` functions.
+//!
+//! these `kinda_unchecked` functions will use equivalent implementations that panic when
+//! invariants are violated when the `debug_assertions` config is present, but use the
+//! corresponding `*_unchecked` otherwise.
+//!
+//! for example, `GetSaferUnchecked` uses a normal index when debug assertions are enabled, but
+//! `.get_unchecked()` otherwise. this means that tests and even fuzzing can be made to exercise
+//! panic-on-error cases as desired.
+
+use core::slice::SliceIndex;
+
+pub trait GetSaferUnchecked<T> {
+ unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output
+ where
+ I: SliceIndex<[T]>;
+}
+
+impl<T> GetSaferUnchecked<T> for [T] {
+ #[inline(always)]
+ unsafe fn get_kinda_unchecked<I>(&self, index: I) -> &<I as SliceIndex<[T]>>::Output
+ where
+ I: SliceIndex<[T]>,
+ {
+ if cfg!(debug_assertions) {
+ &self[index]
+ } else {
+ self.get_unchecked(index)
+ }
+ }
+}
+
+#[inline(always)]
+pub unsafe fn unreachable_kinda_unchecked() -> ! {
+ if cfg!(debug_assertions) {
+ panic!("UB: Unreachable unchecked was executed")
+ } else {
+ core::hint::unreachable_unchecked()
+ }
+}
diff --git a/src/testkit.rs b/src/testkit.rs
new file mode 100644
index 0000000..215a062
--- /dev/null
+++ b/src/testkit.rs
@@ -0,0 +1,10 @@
+//! utilities to validate that implementations of traits in `yaxpeax-arch` uphold requirements
+//! described in this crate.
+//!
+//! currently, this only includes tools to validate correct use of
+//! [`crate::display::DisplaySink`], but may grow in the future.
+
+#[cfg(feature="alloc")]
+mod display;
+#[cfg(feature="alloc")]
+pub use display::{DisplaySinkValidator, DisplaySinkWriteComparator};
diff --git a/src/testkit/display.rs b/src/testkit/display.rs
new file mode 100644
index 0000000..3cef59c
--- /dev/null
+++ b/src/testkit/display.rs
@@ -0,0 +1,192 @@
+//! tools to test the correctness of `yaxpeax-arch` trait implementations.
+
+use core::fmt;
+use core::fmt::Write;
+
+use crate::display::DisplaySink;
+
+/// `DisplaySinkValidator` is a `DisplaySink` that panics if invariants required of
+/// `DisplaySink`-writing functions are not upheld.
+///
+/// there are two categories of invariants that `DisplaySinkValidator` validates.
+///
+/// first, this panics if spans are not `span_end_*`-ed in first-in-last-out order with
+/// corresponding `span_start_*. second, this panics if `write_lt_*` functions are ever provided
+/// inputs longer than the corresponding maximum length.
+///
+/// functions that write to a `DisplaySink` are strongly encouraged to come with fuzzing that for
+/// all inputs `DisplaySinkValidator` does not panic.
+pub struct DisplaySinkValidator {
+ spans: alloc::vec::Vec<&'static str>,
+}
+
+impl DisplaySinkValidator {
+ pub fn new() -> Self {
+ Self { spans: alloc::vec::Vec::new() }
+ }
+}
+
+impl core::ops::Drop for DisplaySinkValidator {
+ fn drop(&mut self) {
+ if self.spans.len() != 0 {
+ panic!("DisplaySinkValidator dropped with open spans");
+ }
+ }
+}
+
+impl fmt::Write for DisplaySinkValidator {
+ fn write_str(&mut self, _s: &str) -> Result<(), fmt::Error> {
+ Ok(())
+ }
+ fn write_char(&mut self, _c: char) -> Result<(), fmt::Error> {
+ Ok(())
+ }
+}
+
+impl DisplaySink for DisplaySinkValidator {
+ unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
+ if s.len() >= 32 {
+ panic!("DisplaySinkValidator::write_lt_32 was given a string longer than the maximum permitted length");
+ }
+
+ self.write_str(s)
+ }
+ unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
+ if s.len() >= 16 {
+ panic!("DisplaySinkValidator::write_lt_16 was given a string longer than the maximum permitted length");
+ }
+
+ self.write_str(s)
+ }
+ unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
+ if s.len() >= 8 {
+ panic!("DisplaySinkValidator::write_lt_8 was given a string longer than the maximum permitted length");
+ }
+
+ self.write_str(s)
+ }
+
+ fn span_start_immediate(&mut self) {
+ self.spans.push("immediate");
+ }
+
+ fn span_end_immediate(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "immediate");
+ }
+
+ fn span_start_register(&mut self) {
+ self.spans.push("register");
+ }
+
+ fn span_end_register(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "register");
+ }
+
+ fn span_start_opcode(&mut self) {
+ self.spans.push("opcode");
+ }
+
+ fn span_end_opcode(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "opcode");
+ }
+
+ fn span_start_program_counter(&mut self) {
+ self.spans.push("program counter");
+ }
+
+ fn span_end_program_counter(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "program counter");
+ }
+
+ fn span_start_number(&mut self) {
+ self.spans.push("number");
+ }
+
+ fn span_end_number(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "number");
+ }
+
+ fn span_start_address(&mut self) {
+ self.spans.push("address");
+ }
+
+ fn span_end_address(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "address");
+ }
+
+ fn span_start_function_expr(&mut self) {
+ self.spans.push("function expr");
+ }
+
+ fn span_end_function_expr(&mut self) {
+ let last = self.spans.pop().expect("item to pop");
+ assert_eq!(last, "function expr");
+ }
+}
+
+/// `DisplaySinkWriteComparator` helps test that two `DisplaySink` implementations which should
+/// produce the same output actually do.
+///
+/// this is most useful for cases like testing specialized `write_lt_*` functions, which ought to
+/// behave the same as if `write_str()` were called instead and so can be used as a very simple
+/// oracle.
+///
+/// this is somewhat less useful when the sinks are expected to produce unequal text, such as when
+/// one sink writes ANSI color sequences and the other does not.
+pub struct DisplaySinkWriteComparator<'sinks, T: DisplaySink, U: DisplaySink> {
+ sink1: &'sinks mut T,
+ sink1_check: fn(&T) -> &str,
+ sink2: &'sinks mut U,
+ sink2_check: fn(&U) -> &str,
+}
+
+impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySinkWriteComparator<'sinks, T, U> {
+ pub fn new(
+ t: &'sinks mut T, t_check: fn(&T) -> &str,
+ u: &'sinks mut U, u_check: fn(&U) -> &str
+ ) -> Self {
+ Self {
+ sink1: t,
+ sink1_check: t_check,
+ sink2: u,
+ sink2_check: u_check,
+ }
+ }
+
+ fn compare_sinks(&self) {
+ let sink1_text = (self.sink1_check)(self.sink1);
+ let sink2_text = (self.sink2_check)(self.sink2);
+
+ if sink1_text != sink2_text {
+ panic!("sinks produced different output: {} != {}", sink1_text, sink2_text);
+ }
+ }
+}
+
+impl<'sinks, T: DisplaySink, U: DisplaySink> DisplaySink for DisplaySinkWriteComparator<'sinks, T, U> {
+ fn write_u8(&mut self, v: u8) -> Result<(), fmt::Error> {
+ self.sink1.write_u8(v).expect("write to sink1 succeeds");
+ self.sink2.write_u8(v).expect("write to sink2 succeeds");
+ self.compare_sinks();
+ Ok(())
+ }
+}
+
+impl<'sinks, T: DisplaySink, U: DisplaySink> fmt::Write for DisplaySinkWriteComparator<'sinks, T, U> {
+ fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
+ self.sink1.write_str(s).expect("write to sink1 succeeds");
+ self.sink2.write_str(s).expect("write to sink2 succeeds");
+ Ok(())
+ }
+ fn write_char(&mut self, c: char) -> Result<(), fmt::Error> {
+ self.sink1.write_char(c).expect("write to sink1 succeeds");
+ self.sink2.write_char(c).expect("write to sink2 succeeds");
+ Ok(())
+ }
+}
diff --git a/tests/display.rs b/tests/display.rs
new file mode 100644
index 0000000..8826303
--- /dev/null
+++ b/tests/display.rs
@@ -0,0 +1,143 @@
+
+// this was something of a misfeature for these formatters..
+#[test]
+#[allow(deprecated)]
+fn formatters_are_not_feature_gated() {
+ use yaxpeax_arch::display::{
+ u8_hex, u16_hex, u32_hex, u64_hex,
+ signed_i8_hex, signed_i16_hex, signed_i32_hex, signed_i64_hex
+ };
+ let _ = u8_hex(10);
+ let _ = u16_hex(10);
+ let _ = u32_hex(10);
+ let _ = u64_hex(10);
+ let _ = signed_i8_hex(10);
+ let _ = signed_i16_hex(10);
+ let _ = signed_i32_hex(10);
+ let _ = signed_i64_hex(10);
+}
+
+#[cfg(feature="alloc")]
+#[test]
+fn instruction_text_sink_write_char_requires_ascii() {
+ use core::fmt::Write;
+
+ let mut text = String::with_capacity(512);
+ let mut sink = unsafe {
+ yaxpeax_arch::display::InstructionTextSink::new(&mut text)
+ };
+ let expected = "`1234567890-=+_)(*&^%$#@!~\\][poiuytrewq |}{POIUYTREWQ';lkjhgfdsa\":LKJHGFDSA/.,mnbvcxz?><MNBVCXZ \r\n";
+ for c in expected.as_bytes().iter() {
+ sink.write_char(*c as char).expect("write works");
+ }
+ assert_eq!(text, expected);
+}
+
+#[cfg(feature="alloc")]
+#[test]
+#[should_panic]
+fn instruction_text_sink_write_char_rejects_not_ascii() {
+ use core::fmt::Write;
+
+ let mut text = String::with_capacity(512);
+ let mut sink = unsafe {
+ yaxpeax_arch::display::InstructionTextSink::new(&mut text)
+ };
+ sink.write_char('\u{80}').expect("write works");
+}
+
+#[cfg(feature="alloc")]
+#[test]
+fn display_sink_write_hex_helpers() {
+ use yaxpeax_arch::display::{DisplaySink};
+
+ // for u8/i8/u16/i16 we can exhaustively test. we'll leave the rest for fuzzers.
+ let mut buf = String::new();
+ for i in 0..=u8::MAX {
+ buf.clear();
+ buf.write_u8(i).expect("write succeeds");
+ assert_eq!(buf, format!("{:x}", i));
+
+ buf.clear();
+ buf.write_prefixed_u8(i).expect("write succeeds");
+ assert_eq!(buf, format!("0x{:x}", i));
+
+ let expected = if (i as i8) < 0 {
+ format!("-0x{:x}", (i as i8).unsigned_abs())
+ } else {
+ format!("0x{:x}", i)
+ };
+
+ buf.clear();
+ buf.write_prefixed_i8(i as i8).expect("write succeeds");
+ assert_eq!(buf, expected);
+ }
+
+ for i in 0..=u16::MAX {
+ buf.clear();
+ buf.write_u16(i).expect("write succeeds");
+ assert_eq!(buf, format!("{:x}", i));
+
+ buf.clear();
+ buf.write_prefixed_u16(i).expect("write succeeds");
+ assert_eq!(buf, format!("0x{:x}", i));
+
+ let expected = if (i as i16) < 0 {
+ format!("-0x{:x}", (i as i16).unsigned_abs())
+ } else {
+ format!("0x{:x}", i)
+ };
+
+ buf.clear();
+ buf.write_prefixed_i16(i as i16).expect("write succeeds");
+ assert_eq!(buf, expected);
+ }
+}
+
+#[cfg(feature="alloc")]
+#[test]
+fn sinks_are_equivalent() {
+ use yaxpeax_arch::display::{DisplaySink, FmtSink};
+ use yaxpeax_arch::testkit::DisplaySinkWriteComparator;
+
+ let mut bare = String::new();
+ let mut through_sink = String::new();
+ for i in 0..u16::MAX {
+ bare.clear();
+ through_sink.clear();
+ let mut out = FmtSink::new(&mut through_sink);
+ let mut comparator = DisplaySinkWriteComparator::new(
+ &mut out,
+ |sink| { sink.inner_ref().as_str() },
+ &mut bare,
+ |sink| { sink.as_str() },
+ );
+ comparator.write_u16(i).expect("write succeeds");
+ comparator.write_prefixed_u16(i).expect("write succeeds");
+ comparator.write_prefixed_i16(i as i16).expect("write succeeds");
+ }
+}
+
+#[cfg(all(feature="alloc", feature="color-new"))]
+#[test]
+fn ansi_sink_works() {
+ use yaxpeax_arch::color_new::ansi::AnsiDisplaySink;
+ use yaxpeax_arch::display::DisplaySink;
+
+ let mut buf = String::new();
+
+ let mut ansi_sink = AnsiDisplaySink::new(&mut buf, yaxpeax_arch::color_new::DefaultColors);
+
+ ansi_sink.span_start_immediate();
+ ansi_sink.write_prefixed_u8(0x80).expect("write succeeds");
+ ansi_sink.span_end_immediate();
+ ansi_sink.write_fixed_size("(").expect("write succeeds");
+ ansi_sink.span_start_register();
+ ansi_sink.write_fixed_size("rbp").expect("write succeeds");
+ ansi_sink.span_end_register();
+ ansi_sink.write_fixed_size(")").expect("write succeeds");
+
+ drop(ansi_sink);
+
+ assert_eq!(buf, "\x1b[37m0x80\x1b[39m(\x1b[38;5;6mrbp\x1b[39m)");
+}
diff --git a/tests/lib.rs b/tests/lib.rs
index 1d5e964..9dc1449 100644
--- a/tests/lib.rs
+++ b/tests/lib.rs
@@ -12,6 +12,7 @@ fn test_u16() {
}
#[test]
+#[cfg(std)]
fn generic_error_can_bail() {
use yaxpeax_arch::{Arch, Decoder, Reader};
@@ -23,6 +24,7 @@ fn generic_error_can_bail() {
}
}
#[test]
+#[cfg(std)]
fn error_can_bail() {
use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader};
struct TestIsa {}
@@ -76,3 +78,51 @@ fn error_can_bail() {
assert_eq!(exercise_eq(), Err(Error::TestDecode(StandardDecodeError::ExhaustedInput)));
}
+
+#[test]
+fn example_arch_impl() {
+ use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader};
+ struct TestIsa {}
+ #[derive(Debug, Default)]
+ struct TestInst {}
+ impl Arch for TestIsa {
+ type Word = u8;
+ type Address = u64;
+ type Instruction = TestInst;
+ type Decoder = TestIsaDecoder;
+ type DecodeError = StandardDecodeError;
+ type Operand = ();
+ }
+
+ impl Instruction for TestInst {
+ fn well_defined(&self) -> bool { true }
+ }
+
+ impl LengthedInstruction for TestInst {
+ type Unit = AddressDiff<u64>;
+ fn len(&self) -> Self::Unit { AddressDiff::from_const(1) }
+ fn min_size() -> Self::Unit { AddressDiff::from_const(1) }
+ }
+
+ struct TestIsaDecoder {}
+
+ impl Default for TestIsaDecoder {
+ fn default() -> Self {
+ TestIsaDecoder {}
+ }
+ }
+
+ impl Decoder<TestIsa> for TestIsaDecoder {
+ fn decode_into<T: Reader<u64, u8>>(&self, _inst: &mut TestInst, _words: &mut T) -> Result<(), StandardDecodeError> {
+ Err(StandardDecodeError::ExhaustedInput)
+ }
+ }
+
+ fn exercise_eq() -> Result<(), StandardDecodeError> {
+ let mut reader = U8Reader::new(&[]);
+ TestIsaDecoder::default().decode(&mut reader)?;
+ Ok(())
+ }
+
+ assert_eq!(exercise_eq(), Err(StandardDecodeError::ExhaustedInput));
+}