diff options
| author | iximeow <me@iximeow.net> | 2024-06-22 12:26:04 -0700 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2024-06-22 12:26:04 -0700 | 
| commit | 2bbeeec8cf26c1b165cdc5e6548b28bbc3c1d6a3 (patch) | |
| tree | e96d808768bf10f6a5c9f7358ff7e8fa8dca8809 /src | |
| parent | 0c69ee37f98ce616df5237cbe74952c2a28cc5cb (diff) | |
be more careful about what does and doesnt need alloc
Diffstat (limited to 'src')
| -rw-r--r-- | src/display.rs | 5 | ||||
| -rw-r--r-- | src/display/display_sink.rs | 896 | ||||
| -rw-r--r-- | src/lib.rs | 1 | 
3 files changed, 459 insertions, 443 deletions
| diff --git a/src/display.rs b/src/display.rs index 77f6ba9..3965bdc 100644 --- a/src/display.rs +++ b/src/display.rs @@ -10,12 +10,15 @@ use core::ops::Neg;  mod display_sink; -pub use display_sink::{DisplaySink, FmtSink, InstructionTextSink}; +pub use display_sink::{DisplaySink, FmtSink}; +#[cfg(feature = "alloc")] +pub use display_sink::InstructionTextSink;  /// translate a byte in range `[0, 15]` to a lowercase base-16 digit.  ///  /// if `c` is in range, the output is always valid as the sole byte in a utf-8 string. if `c` is out  /// of range, the returned character might not be a valid single-byte utf-8 codepoint. +#[cfg(feature = "alloc")] // this function is of course not directly related to alloc, but it's only needed by impls that themselves are only present with alloc.  fn u8_to_hex(c: u8) -> u8 {      // this conditional branch is faster than a lookup for... most architectures (especially x86      // with cmov) diff --git a/src/display/display_sink.rs b/src/display/display_sink.rs index 418b6aa..1fb8837 100644 --- a/src/display/display_sink.rs +++ b/src/display/display_sink.rs @@ -1,9 +1,5 @@  use core::fmt; -use crate::display::u8_to_hex; - -use crate::safer_unchecked::unreachable_kinda_unchecked; -  /// `DisplaySink` allows client code to collect output and minimal markup. this is currently used  /// in formatting instructions for two reasons:  /// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at @@ -372,469 +368,518 @@ impl<'a, T: fmt::Write> fmt::Write for FmtSink<'a, T> {      }  } -/// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the -/// disassemblers, do not use this struct. do not depend on this struct existing. this struct is -/// not stable. this struct is not safe for general use. if you use this struct you and your -/// program will be eaten by gremlins. -/// -/// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct -/// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. -/// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are -/// not bounds-checked, and the buffer is never grown. -/// -/// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is -/// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be -/// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* -/// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would -/// be written to without being rewound first. -/// -/// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect -/// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the -/// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of -/// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. -/// -/// this is intended to be hidden in docs. if you see this in docs, it's a bug. -#[doc(hidden)] -pub struct InstructionTextSink<'buf> { -    buf: &'buf mut alloc::string::String -} +#[cfg(feature = "alloc")] +mod instruction_text_sink { +    use core::fmt; -impl<'buf> InstructionTextSink<'buf> { -    // TODO: safety -    pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { -        Self { buf } -    } -} +    use super::{DisplaySink, u8_to_hex}; +    use crate::safer_unchecked::unreachable_kinda_unchecked; -impl<'buf> fmt::Write for InstructionTextSink<'buf> { -    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.buf.write_str(s) -    } -    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + 1 { -                panic!("InstructionTextSink::write_char would overflow output"); -            } -        } -        // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` -        // is valid for writing, but may be uninitialized. -        // -        // this function is essentially equivalent to `Vec::push` specialized for the case that -        // `len < buf.capacity()`: -        // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 -        unsafe { -            let underlying = self.buf.as_mut_vec(); -            // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to -            // write single ASCII characters. this is wrong in the general case, but `write_char` -            // here is not going to be used in the general case. -            if cfg!(debug_asertions) { -                panic!("InstructionTextSink::write_char would truncate output"); -            } -            let to_push = c as u8; -            // `ptr::write` here because `underlying.add(underlying.len())` may not point to an -            // initialized value, which would mean that turning that pointer into a `&mut u8` to -            // store through would be UB. `ptr::write` avoids taking the mut ref. -            underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); -            // we have initialized all (one) bytes that `set_len` is increasing the length to -            // include. -            underlying.set_len(underlying.len() + 1); -        } -        Ok(()) +    /// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the +    /// disassemblers, do not use this struct. do not depend on this struct existing. this struct is +    /// not stable. this struct is not safe for general use. if you use this struct you and your +    /// program will be eaten by gremlins. +    /// +    /// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct +    /// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. +    /// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are +    /// not bounds-checked, and the buffer is never grown. +    /// +    /// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is +    /// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be +    /// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* +    /// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would +    /// be written to without being rewound first. +    /// +    /// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect +    /// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the +    /// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of +    /// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. +    /// +    /// this is intended to be hidden in docs. if you see this in docs, it's a bug. +#[doc(hidden)] +    pub struct InstructionTextSink<'buf> { +        buf: &'buf mut alloc::string::String      } -} -/// this [`DisplaySink`] impl exists to support somewhat more performant buffering of the kinds of -/// strings `yaxpeax-x86` uses in formatting instructions. -/// -/// span information is discarded at zero cost. -impl DisplaySink for alloc::string::String { -    #[inline(always)] -    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.reserve(s.len()); -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        if new_bytes.len() == 0 { -            unsafe { unreachable_kinda_unchecked() } +    impl<'buf> InstructionTextSink<'buf> { +        // TODO: safety +        pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { +            Self { buf }          } +    } -        if new_bytes.len() >= 16 { -            unsafe { unreachable_kinda_unchecked() } +    impl<'buf> fmt::Write for InstructionTextSink<'buf> { +        fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { +            self.buf.write_str(s)          } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); - -            // this used to be enough to bamboozle llvm away from -            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 -            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped -            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` -            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this -            // unrolls into some kind of appropriate series of `mov`. -            dest.offset(0 as isize).write(new_bytes[0]); -            for i in 1..new_bytes.len() { -                dest.offset(i as isize).write(new_bytes[i]); +        fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + 1 { +                    panic!("InstructionTextSink::write_char would overflow output"); +                }              } - -            buf.set_len(buf.len() + new_bytes.len()); +            // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` +            // is valid for writing, but may be uninitialized. +            // +            // this function is essentially equivalent to `Vec::push` specialized for the case that +            // `len < buf.capacity()`: +            // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 +            unsafe { +                let underlying = self.buf.as_mut_vec(); +                // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to +                // write single ASCII characters. this is wrong in the general case, but `write_char` +                // here is not going to be used in the general case. +                if cfg!(debug_asertions) { +                    panic!("InstructionTextSink::write_char would truncate output"); +                } +                let to_push = c as u8; +                // `ptr::write` here because `underlying.add(underlying.len())` may not point to an +                // initialized value, which would mean that turning that pointer into a `&mut u8` to +                // store through would be UB. `ptr::write` avoids taking the mut ref. +                underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); +                // we have initialized all (one) bytes that `set_len` is increasing the length to +                // include. +                underlying.set_len(underlying.len() + 1); +            } +            Ok(())          } - -        Ok(())      } -    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { -        self.reserve(s.len()); -        // SAFETY: todo -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 32 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); +    impl<'buf> DisplaySink for InstructionTextSink<'buf> { +        #[inline(always)] +        fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_fixed_size would overflow output"); +                } +            } -            let rem = new_bytes.len() as isize; +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_bytes = s.as_bytes(); -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "6:", -                "cmp {rem:e}, 16", -                "jb 7f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 16]", -                "mov qword ptr [{dest} + {rem} - 16], {buf:r}", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 16", -                "jz 11f", -                "7:", -                "cmp {rem:e}, 8", -                "jb 8f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 8", -                "jz 11f", -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } +            if new_bytes.len() == 0 { +                return Ok(()); +            } -        Ok(()) -    } -    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { -        self.reserve(s.len()); +            if new_bytes.len() >= 16 { +                unsafe { unreachable_kinda_unchecked() } +            } -        // SAFETY: todo -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); +            unsafe { +                let dest = buf.as_mut_ptr().offset(buf.len() as isize); + +                // this used to be enough to bamboozle llvm away from +                // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +                // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped +                // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` +                // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this +                // unrolls into some kind of appropriate series of `mov`. +                dest.offset(0 as isize).write(new_bytes[0]); +                for i in 1..new_bytes.len() { +                    dest.offset(i as isize).write(new_bytes[i]); +                } + +                buf.set_len(buf.len() + new_bytes.len()); +            } -        // should get DCE -        if new_bytes.len() >= 16 { -            unsafe { core::hint::unreachable_unchecked() } +            Ok(())          } +        unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_lt_32 would overflow output"); +                } +            } -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "7:", -                "cmp {rem:e}, 8", -                "jb 8f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 8", -                "jz 11f", -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } +            // SAFETY: todo +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_bytes = s.as_bytes(); -        Ok(()) -    } -    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { -        self.reserve(s.len()); +            // should get DCE +            if new_bytes.len() >= 32 { +                unsafe { core::hint::unreachable_unchecked() } +            } -        // SAFETY: todo -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); +            unsafe { +                let dest = buf.as_mut_ptr().offset(buf.len() as isize); +                let src = new_bytes.as_ptr(); + +                let rem = new_bytes.len() as isize; + +                // set_len early because there is no way to avoid the following asm!() writing that +                // same number of bytes into buf +                buf.set_len(buf.len() + new_bytes.len()); + +                core::arch::asm!( +                    "6:", +                    "cmp {rem:e}, 16", +                    "jb 7f", +                    "mov {buf:r}, qword ptr [{src} + {rem} - 16]", +                    "mov qword ptr [{dest} + {rem} - 16], {buf:r}", +                    "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                    "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                    "sub {rem:e}, 16", +                    "jz 11f", +                    "7:", +                    "cmp {rem:e}, 8", +                    "jb 8f", +                    "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                    "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                    "sub {rem:e}, 8", +                    "jz 11f", +                    "8:", +                    "cmp {rem:e}, 4", +                    "jb 9f", +                    "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                    "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                    "sub {rem:e}, 4", +                    "jz 11f", +                    "9:", +                    "cmp {rem:e}, 2", +                    "jb 10f", +                    "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                    "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                    "sub {rem:e}, 2", +                    "jz 11f", +                    "10:", +                    "cmp {rem:e}, 1", +                    "jb 11f", +                    "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                    "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                    "11:", +                    src = in(reg) src, +                    dest = in(reg) dest, +                    rem = inout(reg) rem => _, +                    buf = out(reg) _, +                    options(nostack), +                ); +            } -        // should get DCE -        if new_bytes.len() >= 8 { -            unsafe { core::hint::unreachable_unchecked() } +            Ok(())          } +        unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_lt_16 would overflow output"); +                } +            } -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); +            // SAFETY: todo +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_bytes = s.as_bytes(); -            let rem = new_bytes.len() as isize; +            // should get DCE +            if new_bytes.len() >= 16 { +                unsafe { core::hint::unreachable_unchecked() } +            } -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); +            unsafe { +                let dest = buf.as_mut_ptr().offset(buf.len() as isize); +                let src = new_bytes.as_ptr(); + +                let rem = new_bytes.len() as isize; + +                // set_len early because there is no way to avoid the following asm!() writing that +                // same number of bytes into buf +                buf.set_len(buf.len() + new_bytes.len()); + +                core::arch::asm!( +                    "7:", +                    "cmp {rem:e}, 8", +                    "jb 8f", +                    "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                    "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                    "sub {rem:e}, 8", +                    "jz 11f", +                    "8:", +                    "cmp {rem:e}, 4", +                    "jb 9f", +                    "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                    "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                    "sub {rem:e}, 4", +                    "jz 11f", +                    "9:", +                    "cmp {rem:e}, 2", +                    "jb 10f", +                    "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                    "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                    "sub {rem:e}, 2", +                    "jz 11f", +                    "10:", +                    "cmp {rem:e}, 1", +                    "jb 11f", +                    "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                    "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                    "11:", +                    src = in(reg) src, +                    dest = in(reg) dest, +                    rem = inout(reg) rem => _, +                    buf = out(reg) _, +                    options(nostack), +                ); +            } -            core::arch::asm!( -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); +            Ok(())          } +        unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + s.len() { +                    panic!("InstructionTextSink::write_lt_8 would overflow output"); +                } +            } -        Ok(()) -    } -    /// write a u8 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; +            // SAFETY: todo +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_bytes = s.as_bytes(); -        self.reserve(printed_size); +            // should get DCE +            if new_bytes.len() >= 8 { +                unsafe { core::hint::unreachable_unchecked() } +            } -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; +            unsafe { +                let dest = buf.as_mut_ptr().offset(buf.len() as isize); +                let src = new_bytes.as_ptr(); + +                let rem = new_bytes.len() as isize; + +                // set_len early because there is no way to avoid the following asm!() writing that +                // same number of bytes into buf +                buf.set_len(buf.len() + new_bytes.len()); + +                core::arch::asm!( +                    "8:", +                    "cmp {rem:e}, 4", +                    "jb 9f", +                    "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                    "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                    "sub {rem:e}, 4", +                    "jz 11f", +                    "9:", +                    "cmp {rem:e}, 2", +                    "jb 10f", +                    "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                    "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                    "sub {rem:e}, 2", +                    "jz 11f", +                    "10:", +                    "cmp {rem:e}, 1", +                    "jb 11f", +                    "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                    "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                    "11:", +                    src = in(reg) src, +                    dest = in(reg) dest, +                    rem = inout(reg) rem => _, +                    buf = out(reg) _, +                    options(nostack), +                ); +            } -        unsafe { -            buf.set_len(new_len); +            Ok(())          } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; +        /// write a u8 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u8 would overflow output"); +                } +            } + +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; -        loop { -            let digit = v % 16; -            let c = u8_to_hex(digit as u8);              unsafe { -                p = p.offset(-1); -                p.write(c); +                buf.set_len(new_len);              } -            v = v / 16; -            if v == 0 { -                break; +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                }              } -        } -        Ok(()) -    } -    /// write a u16 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); +            Ok(())          } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; +        /// write a u16 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } -        self.reserve(printed_size); +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u16 would overflow output"); +                } +            } -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; -        loop { -            let digit = v % 16; -            let c = u8_to_hex(digit as u8);              unsafe { -                p = p.offset(-1); -                p.write(c); +                buf.set_len(new_len);              } -            v = v / 16; -            if v == 0 { -                break; +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                }              } -        } -        Ok(()) -    } -    /// write a u32 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); +            Ok(())          } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; +        /// write a u32 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } -        self.reserve(printed_size); +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u32 would overflow output"); +                } +            } -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; -        loop { -            let digit = v % 16; -            let c = u8_to_hex(digit as u8);              unsafe { -                p = p.offset(-1); -                p.write(c); +                buf.set_len(new_len);              } -            v = v / 16; -            if v == 0 { -                break; +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                }              } -        } -        Ok(()) -    } -    /// write a u64 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); +            Ok(())          } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; +        /// write a u64 to the output as a base-16 integer. +        /// +        /// this is provided for optimization opportunities when the formatted integer can be written +        /// directly to the sink (rather than formatted to an intermediate buffer and output as a +        /// followup step) +        #[inline(always)] +        fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { +            if v == 0 { +                return self.write_fixed_size("0"); +            } -        self.reserve(printed_size); +            // we can fairly easily predict the size of a formatted string here with lzcnt, which also +            // means we can write directly into the correct offsets of the output string. +            let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; +            if cfg!(debug_assertions) { +                if self.buf.capacity() < self.buf.len() + printed_size { +                    panic!("InstructionTextSink::write_u64 would overflow output"); +                } +            } -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; +            let buf = unsafe { self.buf.as_mut_vec() }; +            let new_len = buf.len() + printed_size; -        loop { -            let digit = v % 16; -            let c = u8_to_hex(digit as u8);              unsafe { -                p = p.offset(-1); -                p.write(c); +                buf.set_len(new_len);              } -            v = v / 16; -            if v == 0 { -                break; +            let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +            loop { +                let digit = v % 16; +                let c = u8_to_hex(digit as u8); +                unsafe { +                    p = p.offset(-1); +                    p.write(c); +                } +                v = v / 16; +                if v == 0 { +                    break; +                }              } -        } -        Ok(()) +            Ok(()) +        }      }  } +#[cfg(feature = "alloc")] +pub use instruction_text_sink::InstructionTextSink; + + +#[cfg(feature = "alloc")] +use crate::display::u8_to_hex; + +#[cfg(feature = "alloc")] +use crate::safer_unchecked::unreachable_kinda_unchecked; -impl<'buf> DisplaySink for InstructionTextSink<'buf> { +/// this [`DisplaySink`] impl exists to support somewhat more performant buffering of the kinds of +/// strings `yaxpeax-x86` uses in formatting instructions. +/// +/// span information is discarded at zero cost. +#[cfg(feature = "alloc")] +impl DisplaySink for alloc::string::String {      #[inline(always)]      fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + s.len() { -                panic!("InstructionTextSink::write_fixed_size would overflow output"); -            } -        } - -        let buf = unsafe { self.buf.as_mut_vec() }; +        self.reserve(s.len()); +        let buf = unsafe { self.as_mut_vec() };          let new_bytes = s.as_bytes();          if new_bytes.len() == 0 { -            return Ok(()); +            unsafe { unreachable_kinda_unchecked() }          }          if new_bytes.len() >= 16 { @@ -845,7 +890,7 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {              let dest = buf.as_mut_ptr().offset(buf.len() as isize);              // this used to be enough to bamboozle llvm away from -            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232              // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped              // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`              // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this @@ -861,14 +906,10 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          Ok(())      }      unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + s.len() { -                panic!("InstructionTextSink::write_lt_32 would overflow output"); -            } -        } +        self.reserve(s.len());          // SAFETY: todo -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_bytes = s.as_bytes();          // should get DCE @@ -934,14 +975,10 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          Ok(())      }      unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + s.len() { -                panic!("InstructionTextSink::write_lt_16 would overflow output"); -            } -        } +        self.reserve(s.len());          // SAFETY: todo -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_bytes = s.as_bytes();          // should get DCE @@ -998,14 +1035,10 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          Ok(())      }      unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + s.len() { -                panic!("InstructionTextSink::write_lt_8 would overflow output"); -            } -        } +        self.reserve(s.len());          // SAFETY: todo -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_bytes = s.as_bytes();          // should get DCE @@ -1068,13 +1101,9 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          // means we can write directly into the correct offsets of the output string.          let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + printed_size { -                panic!("InstructionTextSink::write_u8 would overflow output"); -            } -        } +        self.reserve(printed_size); -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_len = buf.len() + printed_size;          unsafe { @@ -1107,18 +1136,13 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          if v == 0 {              return self.write_fixed_size("0");          } -          // we can fairly easily predict the size of a formatted string here with lzcnt, which also          // means we can write directly into the correct offsets of the output string.          let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + printed_size { -                panic!("InstructionTextSink::write_u16 would overflow output"); -            } -        } +        self.reserve(printed_size); -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_len = buf.len() + printed_size;          unsafe { @@ -1151,18 +1175,13 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          if v == 0 {              return self.write_fixed_size("0");          } -          // we can fairly easily predict the size of a formatted string here with lzcnt, which also          // means we can write directly into the correct offsets of the output string.          let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + printed_size { -                panic!("InstructionTextSink::write_u32 would overflow output"); -            } -        } +        self.reserve(printed_size); -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_len = buf.len() + printed_size;          unsafe { @@ -1195,18 +1214,13 @@ impl<'buf> DisplaySink for InstructionTextSink<'buf> {          if v == 0 {              return self.write_fixed_size("0");          } -          // we can fairly easily predict the size of a formatted string here with lzcnt, which also          // means we can write directly into the correct offsets of the output string.          let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; -        if cfg!(debug_assertions) { -            if self.buf.capacity() < self.buf.len() + printed_size { -                panic!("InstructionTextSink::write_u64 would overflow output"); -            } -        } +        self.reserve(printed_size); -        let buf = unsafe { self.buf.as_mut_vec() }; +        let buf = unsafe { self.as_mut_vec() };          let new_len = buf.len() + printed_size;          unsafe { @@ -27,7 +27,6 @@ pub use color::ColorSettings;  #[cfg(feature = "alloc")]  extern crate alloc; -#[cfg(feature = "alloc")]  pub mod display;  pub mod testkit; | 
