diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/display.rs | 1020 | ||||
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/long_mode/display.rs | 1192 | ||||
| -rw-r--r-- | src/long_mode/mod.rs | 2 | 
4 files changed, 1122 insertions, 1094 deletions
| diff --git a/src/display.rs b/src/display.rs new file mode 100644 index 0000000..e495aee --- /dev/null +++ b/src/display.rs @@ -0,0 +1,1020 @@ +use core::fmt; + +use crate::safer_unchecked::unreachable_kinda_unchecked; + +extern crate alloc; + +// TODO: find a better place to put this.... +fn c_to_hex(c: u8) -> u8 { +    /* +    static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; +    CHARSET[c as usize] +    */ +    // the conditional branch below is faster than a lookup, yes +    if c < 10 { +        b'0' + c +    } else { +        b'a' + c - 10 +    } +} + +pub enum TokenType { +    Mnemonic, +    Operand, +    Immediate, +    Register, +    Offset, +} + +/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used +/// in formatting instructions for two reasons: +/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at +///   the same time as collecting output itself. +/// * `DisplaySink` implementations provides specialized functions for writing strings in +///   circumstances where a simple "use `core::fmt`" might incur unwanted overhead. +/// +/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations +/// into tracking current output offset (which may not be knowable) or span size (which may be +/// knowable, but incur additional overhead to compute or track). +/// +/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must +/// exit spans in reverse order to when they are entered. a function sequence like +/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error. +/// +/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in +/// documentation here to better support writing some kinds of inputs than a fully-general solution +/// (such as `core::fmt`) might be able to yield. +/// +/// currently there are two motivating factors for `write_*` helpers: +/// +/// instruction formatting often involves writing small but variable-size strings, such as register +/// names, which is something of a pathological case for string appending as Rust currently exists: +/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an +/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: +/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +/// +/// there are similar papercuts around formatting integers as base-16 numbers, such as +/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are +/// not a significant source of overhead. but for programs bounded on decoding and printing +/// instructions, these can add up to significant overhead - on the order of 10-20% of total +/// runtime. +/// +/// `DisplaySink` +pub trait DisplaySink: fmt::Write { +    #[inline(always)] +    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } + +    /// write a string to this sink that is less than 32 bytes. this is provided for optimization +    /// opportunities when writing a variable-length string with known max size. +    /// +    /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than +    /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a +    /// utf-8 string. this may corrupt Rust strings. +    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } +    /// write a string to this sink that is less than 16 bytes. this is provided for optimization +    /// opportunities when writing a variable-length string with known max size. +    /// +    /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than +    /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a +    /// utf-8 string. this may corrupt Rust strings. +    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } +    /// write a string to this sink that is less than 8 bytes. this is provided for optimization +    /// opportunities when writing a variable-length string with known max size. +    /// +    /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than +    /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a +    /// utf-8 string. this may corrupt Rust strings. +    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.write_str(s) +    } + +    /// write a u8 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u16 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u32 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// write a u64 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { +        write!(self, "{:x}", v) +    } +    /// enter a region inside which output corresponds to a `ty`. +    /// +    /// the default implementation of these functions is as a no-op. this way, providing span +    /// information to a `DisplaySink` that does not want it is eliminated at compile time. +    /// +    /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must +    /// end spans in reverse order to when they are entered. a function sequence like +    /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error. +    /// +    /// a simple use of `span_start`/`span_end` might look something like: +    /// ```compile_fail +    /// sink.span_start(Operand) +    /// sink.write_char('[') +    /// sink.span_start(Register) +    /// sink.write_fixed_size("rbp") +    /// sink.span_end(Register) +    /// sink.write_char(']') +    /// sink.span_end(Operand) +    /// ``` +    /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins, +    /// as well as the start and end of a register name. +    fn span_start(&mut self, _ty: TokenType) { } +    /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more. +    fn span_end(&mut self, _ty: TokenType) { } +} + +pub struct NoColorsSink<'a, T: fmt::Write> { +    pub out: &'a mut T, +} + +impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { +    fn span_start(&mut self, _ty: TokenType) { } +    fn span_end(&mut self, _ty: TokenType) { } +} + +impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { +    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.out.write_str(s) +    } +    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { +        self.out.write_char(c) +    } +    fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { +        self.out.write_fmt(f) +    } +} + +/// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the +/// disassemblers, do not use this struct. do not depend on this struct existing. this struct is +/// not stable. this struct is not safe for general use. if you use this struct you and your +/// program will be eaten by gremlins. +/// +/// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct +/// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction. +/// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are +/// not bounds-checked, and the buffer is never grown. +/// +/// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is +/// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be +/// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and* +/// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would +/// be written to without being rewound first. +/// +/// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect +/// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the +/// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of +/// ensuring that instruction formatting impls this buffer is passed to are appropriately sized. +/// +/// this is intended to be hidden in docs. if you see this in docs, it's a bug. +#[doc(hidden)] +pub(crate) struct InstructionTextSink<'buf> { +    buf: &'buf mut alloc::string::String +} + +impl<'buf> InstructionTextSink<'buf> { +    pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self { +        Self { buf } +    } +} + +impl<'buf> fmt::Write for InstructionTextSink<'buf> { +    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.buf.write_str(s) +    } +    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { +        // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` +        // is valid for writing, but may be uninitialized. +        // +        // this function is essentially equivalent to `Vec::push` specialized for the case that +        // `len < buf.capacity()`: +        // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 +        unsafe { +            let underlying = self.buf.as_mut_vec(); +            // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to +            // write single ASCII characters. this is wrong in the general case, but `write_char` +            // here is not going to be used in the general case. +            if cfg!(debug_asertions) { +                panic!("InstructionTextSink::write_char would truncate output"); +            } +            let to_push = c as u8; +            // `ptr::write` here because `underlying.add(underlying.len())` may not point to an +            // initialized value, which would mean that turning that pointer into a `&mut u8` to +            // store through would be UB. `ptr::write` avoids taking the mut ref. +            underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); +            // we have initialized all (one) bytes that `set_len` is increasing the length to +            // include. +            underlying.set_len(underlying.len() + 1); +        } +        Ok(()) +    } +} + +/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of +/// strings `yaxpeax-x86` uses in formatting instructions. +impl DisplaySink for alloc::string::String { +    #[inline(always)] +    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        self.reserve(s.len()); +        let buf = unsafe { self.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        if new_bytes.len() == 0 { +            unsafe { unreachable_kinda_unchecked() } +        } + +        if new_bytes.len() >= 16 { +            unsafe { unreachable_kinda_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); + +            // this used to be enough to bamboozle llvm away from +            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped +            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` +            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this +            // unrolls into some kind of appropriate series of `mov`. +            dest.offset(0 as isize).write(new_bytes[0]); +            for i in 1..new_bytes.len() { +                dest.offset(i as isize).write(new_bytes[i]); +            } + +            buf.set_len(buf.len() + new_bytes.len()); +        } + +        Ok(()) +    } +    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.reserve(s.len()); + +        // SAFETY: todo +        let buf = unsafe { self.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        // should get DCE +        if new_bytes.len() >= 32 { +            unsafe { core::hint::unreachable_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); +            let src = new_bytes.as_ptr(); + +            let rem = new_bytes.len() as isize; + +            // set_len early because there is no way to avoid the following asm!() writing that +            // same number of bytes into buf +            buf.set_len(buf.len() + new_bytes.len()); + +            core::arch::asm!( +                "6:", +                "cmp {rem:e}, 16", +                "jb 7f", +                "mov {buf:r}, qword ptr [{src} + {rem} - 16]", +                "mov qword ptr [{dest} + {rem} - 16], {buf:r}", +                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                "sub {rem:e}, 16", +                "jz 11f", +                "7:", +                "cmp {rem:e}, 8", +                "jb 8f", +                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                "sub {rem:e}, 8", +                "jz 11f", +                "8:", +                "cmp {rem:e}, 4", +                "jb 9f", +                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                "sub {rem:e}, 4", +                "jz 11f", +                "9:", +                "cmp {rem:e}, 2", +                "jb 10f", +                "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                "sub {rem:e}, 2", +                "jz 11f", +                "10:", +                "cmp {rem:e}, 1", +                "jb 11f", +                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                "11:", +                src = in(reg) src, +                dest = in(reg) dest, +                rem = inout(reg) rem => _, +                buf = out(reg) _, +                options(nostack), +            ); +        } +        /* +        for i in 0..new_bytes.len() { +            unsafe { +                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); +            } +        } +        */ + +        Ok(()) +    } +    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.reserve(s.len()); + +        // SAFETY: todo +        let buf = unsafe { self.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        // should get DCE +        if new_bytes.len() >= 16 { +            unsafe { core::hint::unreachable_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); +            let src = new_bytes.as_ptr(); + +            let rem = new_bytes.len() as isize; + +            // set_len early because there is no way to avoid the following asm!() writing that +            // same number of bytes into buf +            buf.set_len(buf.len() + new_bytes.len()); + +            core::arch::asm!( +                "7:", +                "cmp {rem:e}, 8", +                "jb 8f", +                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                "sub {rem:e}, 8", +                "jz 11f", +                "8:", +                "cmp {rem:e}, 4", +                "jb 9f", +                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                "sub {rem:e}, 4", +                "jz 11f", +                "9:", +                "cmp {rem:e}, 2", +                "jb 10f", +                "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                "sub {rem:e}, 2", +                "jz 11f", +                "10:", +                "cmp {rem:e}, 1", +                "jb 11f", +                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                "11:", +                src = in(reg) src, +                dest = in(reg) dest, +                rem = inout(reg) rem => _, +                buf = out(reg) _, +                options(nostack), +            ); +        } +        /* +        for i in 0..new_bytes.len() { +            unsafe { +                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); +            } +        } +        */ + +        Ok(()) +    } +    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { +        self.reserve(s.len()); + +        // SAFETY: todo +        let buf = unsafe { self.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        // should get DCE +        if new_bytes.len() >= 8 { +            unsafe { core::hint::unreachable_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); +            let src = new_bytes.as_ptr(); + +            let rem = new_bytes.len() as isize; + +            // set_len early because there is no way to avoid the following asm!() writing that +            // same number of bytes into buf +            buf.set_len(buf.len() + new_bytes.len()); + +            core::arch::asm!( +                "8:", +                "cmp {rem:e}, 4", +                "jb 9f", +                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                "sub {rem:e}, 4", +                "jz 11f", +                "9:", +                "cmp {rem:e}, 2", +                "jb 10f", +                "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                "sub {rem:e}, 2", +                "jz 11f", +                "10:", +                "cmp {rem:e}, 1", +                "jb 11f", +                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                "11:", +                src = in(reg) src, +                dest = in(reg) dest, +                rem = inout(reg) rem => _, +                buf = out(reg) _, +                options(nostack), +            ); +        } +        /* +        for i in 0..new_bytes.len() { +            unsafe { +                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); +            } +        } +        */ + +        Ok(()) +    } +    /// write a u8 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u16 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u32 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u64 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + +        self.reserve(printed_size); + +        let buf = unsafe { self.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    fn span_start(&mut self, _ty: TokenType) {} +    fn span_end(&mut self, _ty: TokenType) {} +} + +impl<'buf> DisplaySink for InstructionTextSink<'buf> { +    #[inline(always)] +    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        if new_bytes.len() == 0 { +            return Ok(()); +        } + +        if new_bytes.len() >= 16 { +            unsafe { unreachable_kinda_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); + +            // this used to be enough to bamboozle llvm away from +            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 +            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped +            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` +            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this +            // unrolls into some kind of appropriate series of `mov`. +            dest.offset(0 as isize).write(new_bytes[0]); +            for i in 1..new_bytes.len() { +                dest.offset(i as isize).write(new_bytes[i]); +            } + +            buf.set_len(buf.len() + new_bytes.len()); +        } + +        Ok(()) +    } +    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { +        // SAFETY: todo +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        // should get DCE +        if new_bytes.len() >= 32 { +            unsafe { core::hint::unreachable_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); +            let src = new_bytes.as_ptr(); + +            let rem = new_bytes.len() as isize; + +            // set_len early because there is no way to avoid the following asm!() writing that +            // same number of bytes into buf +            buf.set_len(buf.len() + new_bytes.len()); + +            core::arch::asm!( +                "6:", +                "cmp {rem:e}, 16", +                "jb 7f", +                "mov {buf:r}, qword ptr [{src} + {rem} - 16]", +                "mov qword ptr [{dest} + {rem} - 16], {buf:r}", +                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                "sub {rem:e}, 16", +                "jz 11f", +                "7:", +                "cmp {rem:e}, 8", +                "jb 8f", +                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                "sub {rem:e}, 8", +                "jz 11f", +                "8:", +                "cmp {rem:e}, 4", +                "jb 9f", +                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                "sub {rem:e}, 4", +                "jz 11f", +                "9:", +                "cmp {rem:e}, 2", +                "jb 10f", +                "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                "sub {rem:e}, 2", +                "jz 11f", +                "10:", +                "cmp {rem:e}, 1", +                "jb 11f", +                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                "11:", +                src = in(reg) src, +                dest = in(reg) dest, +                rem = inout(reg) rem => _, +                buf = out(reg) _, +                options(nostack), +            ); +        } +        /* +        for i in 0..new_bytes.len() { +            unsafe { +                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); +            } +        } +        */ + +        Ok(()) +    } +    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { +        // SAFETY: todo +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        // should get DCE +        if new_bytes.len() >= 16 { +            unsafe { core::hint::unreachable_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); +            let src = new_bytes.as_ptr(); + +            let rem = new_bytes.len() as isize; + +            // set_len early because there is no way to avoid the following asm!() writing that +            // same number of bytes into buf +            buf.set_len(buf.len() + new_bytes.len()); + +            core::arch::asm!( +                "7:", +                "cmp {rem:e}, 8", +                "jb 8f", +                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", +                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", +                "sub {rem:e}, 8", +                "jz 11f", +                "8:", +                "cmp {rem:e}, 4", +                "jb 9f", +                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                "sub {rem:e}, 4", +                "jz 11f", +                "9:", +                "cmp {rem:e}, 2", +                "jb 10f", +                "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                "sub {rem:e}, 2", +                "jz 11f", +                "10:", +                "cmp {rem:e}, 1", +                "jb 11f", +                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                "11:", +                src = in(reg) src, +                dest = in(reg) dest, +                rem = inout(reg) rem => _, +                buf = out(reg) _, +                options(nostack), +            ); +        } +        /* +        for i in 0..new_bytes.len() { +            unsafe { +                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); +            } +        } +        */ + +        Ok(()) +    } +    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { +        // SAFETY: todo +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_bytes = s.as_bytes(); + +        // should get DCE +        if new_bytes.len() >= 8 { +            unsafe { core::hint::unreachable_unchecked() } +        } + +        unsafe { +            let dest = buf.as_mut_ptr().offset(buf.len() as isize); +            let src = new_bytes.as_ptr(); + +            let rem = new_bytes.len() as isize; + +            // set_len early because there is no way to avoid the following asm!() writing that +            // same number of bytes into buf +            buf.set_len(buf.len() + new_bytes.len()); + +            core::arch::asm!( +                "8:", +                "cmp {rem:e}, 4", +                "jb 9f", +                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", +                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", +                "sub {rem:e}, 4", +                "jz 11f", +                "9:", +                "cmp {rem:e}, 2", +                "jb 10f", +                "mov {buf:x}, word ptr [{src} + {rem} - 2]", +                "mov word ptr [{dest} + {rem} - 2], {buf:x}", +                "sub {rem:e}, 2", +                "jz 11f", +                "10:", +                "cmp {rem:e}, 1", +                "jb 11f", +                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", +                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", +                "11:", +                src = in(reg) src, +                dest = in(reg) dest, +                rem = inout(reg) rem => _, +                buf = out(reg) _, +                options(nostack), +            ); +        } +        /* +        for i in 0..new_bytes.len() { +            unsafe { +                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); +            } +        } +        */ + +        Ok(()) +    } +    /// write a u8 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; + +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u16 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; + +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u32 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; + +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    /// write a u64 to the output as a base-16 integer. +    /// +    /// this is provided for optimization opportunities when the formatted integer can be written +    /// directly to the sink (rather than formatted to an intermediate buffer and output as a +    /// followup step) +    #[inline(always)] +    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { +        if v == 0 { +            return self.write_fixed_size("0"); +        } +        // we can fairly easily predict the size of a formatted string here with lzcnt, which also +        // means we can write directly into the correct offsets of the output string. +        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; + +        let buf = unsafe { self.buf.as_mut_vec() }; +        let new_len = buf.len() + printed_size; + +        unsafe { +            buf.set_len(new_len); +        } +        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; + +        loop { +            let digit = v % 16; +            let c = c_to_hex(digit as u8); +            unsafe { +                p = p.offset(-1); +                p.write(c); +            } +            v = v / 16; +            if v == 0 { +                break; +            } +        } + +        Ok(()) +    } +    fn span_start(&mut self, _ty: TokenType) {} +    fn span_end(&mut self, _ty: TokenType) {} +} @@ -139,6 +139,8 @@ pub mod real_mode;  pub use real_mode::Arch as x86_16;  mod safer_unchecked; +#[cfg(feature = "fmt")] +mod display;  const MEM_SIZE_STRINGS: [&'static str; 65] = [      "BUG", diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index ca5e580..d9799e1 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -4,10 +4,12 @@ use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors};  use yaxpeax_arch::display::*;  use crate::safer_unchecked::GetSaferUnchecked as _; -use crate::safer_unchecked::unreachable_kinda_unchecked;  use crate::MEM_SIZE_STRINGS;  use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex, OperandSpec}; +use crate::display::DisplaySink; +use crate::display::TokenType; +  impl fmt::Display for InstDecoder {      fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {          if self == &InstDecoder::default() { @@ -363,1069 +365,6 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Operand {      }  } -pub enum TokenType { -    Mnemonic, -    Operand, -    Immediate, -    Register, -    Offset, -} - -/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used -/// in formatting instructions for two reasons: -/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at -///   the same time as collecting output itself. -/// * `DisplaySink` implementations provides specialized functions for writing strings in -///   circumstances where a simple "use `core::fmt`" might incur unwanted overhead. -/// -/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations -/// into tracking current output offset (which may not be knowable) or span size (which may be -/// knowable, but incur additional overhead to compute or track). -/// -/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must -/// exit spans in reverse order to when they are entered. a function sequence like -/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error. -/// -/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in -/// documentation here to better support writing some kinds of inputs than a fully-general solution -/// (such as `core::fmt`) might be able to yield. -/// -/// currently there are two motivating factors for `write_*` helpers: -/// -/// instruction formatting often involves writing small but variable-size strings, such as register -/// names, which is something of a pathological case for string appending as Rust currently exists: -/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an -/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference: -/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 -/// -/// there are similar papercuts around formatting integers as base-16 numbers, such as -/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are -/// not a significant source of overhead. but for programs bounded on decoding and printing -/// instructions, these can add up to significant overhead - on the order of 10-20% of total -/// runtime. -/// -/// `DisplaySink` -pub trait DisplaySink: fmt::Write { -    #[inline(always)] -    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.write_str(s) -    } - -    /// write a string to this sink that is less than 32 bytes. this is provided for optimization -    /// opportunities when writing a variable-length string with known max size. -    /// -    /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than -    /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a -    /// utf-8 string. this may corrupt Rust strings. -    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.write_str(s) -    } -    /// write a string to this sink that is less than 16 bytes. this is provided for optimization -    /// opportunities when writing a variable-length string with known max size. -    /// -    /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than -    /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a -    /// utf-8 string. this may corrupt Rust strings. -    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.write_str(s) -    } -    /// write a string to this sink that is less than 8 bytes. this is provided for optimization -    /// opportunities when writing a variable-length string with known max size. -    /// -    /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than -    /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a -    /// utf-8 string. this may corrupt Rust strings. -    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.write_str(s) -    } - -    /// write a u8 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> { -        write!(self, "{:x}", v) -    } -    /// write a u16 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> { -        write!(self, "{:x}", v) -    } -    /// write a u32 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> { -        write!(self, "{:x}", v) -    } -    /// write a u64 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> { -        write!(self, "{:x}", v) -    } -    /// enter a region inside which output corresponds to a `ty`. -    /// -    /// the default implementation of these functions is as a no-op. this way, providing span -    /// information to a `DisplaySink` that does not want it is eliminated at compile time. -    /// -    /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must -    /// end spans in reverse order to when they are entered. a function sequence like -    /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error. -    /// -    /// a simple use of `span_start`/`span_end` might look something like: -    /// ```compile_fail -    /// sink.span_start(Operand) -    /// sink.write_char('[') -    /// sink.span_start(Register) -    /// sink.write_fixed_size("rbp") -    /// sink.span_end(Register) -    /// sink.write_char(']') -    /// sink.span_end(Operand) -    /// ``` -    /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins, -    /// as well as the start and end of a register name. -    fn span_start(&mut self, _ty: TokenType) { } -    /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more. -    fn span_end(&mut self, _ty: TokenType) { } -} - -pub struct NoColorsSink<'a, T: fmt::Write> { -    pub out: &'a mut T, -} - -impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> { -    fn span_start(&mut self, _ty: TokenType) { } -    fn span_end(&mut self, _ty: TokenType) { } -} - -impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> { -    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.out.write_str(s) -    } -    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { -        self.out.write_char(c) -    } -    fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> { -        self.out.write_fmt(f) -    } -} - -/// helper to format `amd64` instructions with highest throughupt and least configuration. -/// -/// ### when to use this over `fmt::Display`? -/// -/// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionFormatter` may -/// support formatting options that may be difficult to configure for a `Display` impl. -/// additionally, `InstructionFormatter` may be able to specialize more effectively where -/// `fmt::Display`, writing to a generic `fmt::Write`, may not. -/// -/// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and -/// formatting instructions, [`InstructionFormatter::format_inst`] has been measured as up to 11% -/// faster than an equivalent `write!(buf, "{}", inst)`. -/// -/// `InstructionFormatter` involves internal allocations; if your use case for `yaxpeax-x86` -/// requires allocations never occurring, it is not an appropriate tool. -/// -/// ### example -/// -/// ``` -/// use yaxpeax_x86::long_mode::InstDecoder; -/// use yaxpeax_x86::long_mode::InstructionFormatter; -/// -/// let bytes = &[0x33, 0xc0]; -/// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); -/// let mut formatter = InstructionFormatter::new(); -/// assert_eq!( -///     formatter.format_inst(&inst).expect("can format"), -///     "xor eax, eax" -/// ); -/// -/// // or, getting the formatted instruction with `text_str`: -/// assert_eq!( -///     formatter.text_str(), -///     "xor eax, eax" -/// ); -/// ``` -pub struct InstructionFormatter { -    content: alloc::string::String, -} - -impl InstructionFormatter { -    /// create an `InstructionFormatter` with default settings. `InstructionFormatter`'s default -    /// settings format instructions identically to their corresponding `fmt::Display`. -    pub fn new() -> Self { -        let mut buf = alloc::string::String::new(); -        // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and -        // fuzzing and ..) -        buf.reserve(512); -        Self { -            content: buf, -        } -    } - -    /// format `inst` through this formatter, storing the formatted text in this formatter's -    /// internal buffer. returns a borrow of that same internal buffer for convenience. -    /// -    /// this clears and reuses an internal buffer; if an instruction had been previously formatted -    /// through this formatter, it will be overwritten. -    pub fn format_inst<'formatter>(&'formatter mut self, inst: &Instruction) -> Result<&'formatter str, fmt::Error> { -        let mut handle = self.write_handle(); - -        inst.write_to(&mut handle)?; - -        Ok(self.text_str()) -    } - -    /// return a borrow of this formatter's buffer. if an instruction has been formatted, the -    /// returned `&str` contains that formatted instruction's text. -    pub fn text_str(&self) -> &str { -        self.content.as_str() -    } - -    fn write_handle(&mut self) -> InstructionTextSink { -        self.content.clear(); -        InstructionTextSink { -            buf: &mut self.content -        } -    } -} - -/// this private struct is guaranteed to contain a string that is long enough to hold a -/// fully-formatted x86 instruction. -/// -/// this is wildly dangerous in general use, but because of the constrained lifecycle of -/// `InstructionTextSink` in the context of `InstructionFormatter`, it's OK to use here. it is -/// wildly dangerous because writing to this sink does not bounds check and assumes the contained -/// `buf` is large enough for any input. as an example: if `buf` did not have enough space -/// available from its current position, the `write_*` methods would write into whatever happens to -/// be after `buf` in memory. -/// -/// don't make this pub. if this is pub in docs, it's a bug. -struct InstructionTextSink<'buf> { -    buf: &'buf mut alloc::string::String -} - -impl<'buf> fmt::Write for InstructionTextSink<'buf> { -    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.buf.write_str(s) -    } -    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> { -        // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()` -        // is valid for writing, but may be uninitialized. -        // -        // this function is essentially equivalent to `Vec::push` specialized for the case that -        // `len < buf.capacity()`: -        // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006 -        unsafe { -            let underlying = self.buf.as_mut_vec(); -            // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to -            // write single ASCII characters. this is wrong in the general case, but `write_char` -            // here is not going to be used in the general case. -            if cfg!(debug_asertions) { -                panic!("InstructionTextSink::write_char would truncate output"); -            } -            let to_push = c as u8; -            // `ptr::write` here because `underlying.add(underlying.len())` may not point to an -            // initialized value, which would mean that turning that pointer into a `&mut u8` to -            // store through would be UB. `ptr::write` avoids taking the mut ref. -            underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push); -            // we have initialized all (one) bytes that `set_len` is increasing the length to -            // include. -            underlying.set_len(underlying.len() + 1); -        } -        Ok(()) -    } -} - -/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of -/// strings `yaxpeax-x86` uses in formatting instructions. -impl DisplaySink for alloc::string::String { -    #[inline(always)] -    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        self.reserve(s.len()); -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        if new_bytes.len() == 0 { -            unsafe { unreachable_kinda_unchecked() } -        } - -        if new_bytes.len() >= 16 { -            unsafe { unreachable_kinda_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); - -            // this used to be enough to bamboozle llvm away from -            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 -            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped -            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` -            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this -            // unrolls into some kind of appropriate series of `mov`. -            dest.offset(0 as isize).write(new_bytes[0]); -            for i in 1..new_bytes.len() { -                dest.offset(i as isize).write(new_bytes[i]); -            } - -            buf.set_len(buf.len() + new_bytes.len()); -        } - -        Ok(()) -    } -    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { -        self.reserve(s.len()); - -        // SAFETY: todo -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 32 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "6:", -                "cmp {rem:e}, 16", -                "jb 7f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 16]", -                "mov qword ptr [{dest} + {rem} - 16], {buf:r}", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 16", -                "jz 11f", -                "7:", -                "cmp {rem:e}, 8", -                "jb 8f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 8", -                "jz 11f", -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } -        /* -        for i in 0..new_bytes.len() { -            unsafe { -                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); -            } -        } -        */ - -        Ok(()) -    } -    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { -        self.reserve(s.len()); - -        // SAFETY: todo -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 16 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "7:", -                "cmp {rem:e}, 8", -                "jb 8f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 8", -                "jz 11f", -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } -        /* -        for i in 0..new_bytes.len() { -            unsafe { -                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); -            } -        } -        */ - -        Ok(()) -    } -    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { -        self.reserve(s.len()); - -        // SAFETY: todo -        let buf = unsafe { self.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 8 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } -        /* -        for i in 0..new_bytes.len() { -            unsafe { -                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); -            } -        } -        */ - -        Ok(()) -    } -    /// write a u8 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - -        self.reserve(printed_size); - -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    /// write a u16 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - -        self.reserve(printed_size); - -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    /// write a u32 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - -        self.reserve(printed_size); - -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    /// write a u64 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - -        self.reserve(printed_size); - -        let buf = unsafe { self.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    fn span_start(&mut self, _ty: TokenType) {} -    fn span_end(&mut self, _ty: TokenType) {} -} - -impl<'buf> DisplaySink for InstructionTextSink<'buf> { -    #[inline(always)] -    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> { -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        if new_bytes.len() == 0 { -            return Ok(()); -        } - -        if new_bytes.len() >= 16 { -            unsafe { unreachable_kinda_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); - -            // this used to be enough to bamboozle llvm away from -            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232 -            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped -            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s` -            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this -            // unrolls into some kind of appropriate series of `mov`. -            dest.offset(0 as isize).write(new_bytes[0]); -            for i in 1..new_bytes.len() { -                dest.offset(i as isize).write(new_bytes[i]); -            } - -            buf.set_len(buf.len() + new_bytes.len()); -        } - -        Ok(()) -    } -    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> { -        // SAFETY: todo -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 32 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "6:", -                "cmp {rem:e}, 16", -                "jb 7f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 16]", -                "mov qword ptr [{dest} + {rem} - 16], {buf:r}", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 16", -                "jz 11f", -                "7:", -                "cmp {rem:e}, 8", -                "jb 8f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 8", -                "jz 11f", -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } -        /* -        for i in 0..new_bytes.len() { -            unsafe { -                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); -            } -        } -        */ - -        Ok(()) -    } -    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> { -        // SAFETY: todo -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 16 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "7:", -                "cmp {rem:e}, 8", -                "jb 8f", -                "mov {buf:r}, qword ptr [{src} + {rem} - 8]", -                "mov qword ptr [{dest} + {rem} - 8], {buf:r}", -                "sub {rem:e}, 8", -                "jz 11f", -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } -        /* -        for i in 0..new_bytes.len() { -            unsafe { -                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); -            } -        } -        */ - -        Ok(()) -    } -    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> { -        // SAFETY: todo -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_bytes = s.as_bytes(); - -        // should get DCE -        if new_bytes.len() >= 8 { -            unsafe { core::hint::unreachable_unchecked() } -        } - -        unsafe { -            let dest = buf.as_mut_ptr().offset(buf.len() as isize); -            let src = new_bytes.as_ptr(); - -            let rem = new_bytes.len() as isize; - -            // set_len early because there is no way to avoid the following asm!() writing that -            // same number of bytes into buf -            buf.set_len(buf.len() + new_bytes.len()); - -            core::arch::asm!( -                "8:", -                "cmp {rem:e}, 4", -                "jb 9f", -                "mov {buf:e}, dword ptr [{src} + {rem} - 4]", -                "mov dword ptr [{dest} + {rem} - 4], {buf:e}", -                "sub {rem:e}, 4", -                "jz 11f", -                "9:", -                "cmp {rem:e}, 2", -                "jb 10f", -                "mov {buf:x}, word ptr [{src} + {rem} - 2]", -                "mov word ptr [{dest} + {rem} - 2], {buf:x}", -                "sub {rem:e}, 2", -                "jz 11f", -                "10:", -                "cmp {rem:e}, 1", -                "jb 11f", -                "mov {buf:l}, byte ptr [{src} + {rem} - 1]", -                "mov byte ptr [{dest} + {rem} - 1], {buf:l}", -                "11:", -                src = in(reg) src, -                dest = in(reg) dest, -                rem = inout(reg) rem => _, -                buf = out(reg) _, -                options(nostack), -            ); -        } -        /* -        for i in 0..new_bytes.len() { -            unsafe { -                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]); -            } -        } -        */ - -        Ok(()) -    } -    /// write a u8 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize; - -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    /// write a u16 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize; - -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    /// write a u32 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize; - -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    /// write a u64 to the output as a base-16 integer. -    /// -    /// this is provided for optimization opportunities when the formatted integer can be written -    /// directly to the sink (rather than formatted to an intermediate buffer and output as a -    /// followup step) -    #[inline(always)] -    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> { -        if v == 0 { -            return self.write_fixed_size("0"); -        } -        // we can fairly easily predict the size of a formatted string here with lzcnt, which also -        // means we can write directly into the correct offsets of the output string. -        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize; - -        let buf = unsafe { self.buf.as_mut_vec() }; -        let new_len = buf.len() + printed_size; - -        unsafe { -            buf.set_len(new_len); -        } -        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) }; - -        loop { -            let digit = v % 16; -            let c = c_to_hex(digit as u8); -            unsafe { -                p = p.offset(-1); -                p.write(c); -            } -            v = v / 16; -            if v == 0 { -                break; -            } -        } - -        Ok(()) -    } -    fn span_start(&mut self, _ty: TokenType) {} -    fn span_end(&mut self, _ty: TokenType) {} -} -  struct ColorizingOperandVisitor<'a, T> {      f: &'a mut T,  } @@ -4772,6 +3711,9 @@ pub struct InstructionDisplayer<'instr> {   *   * so write to some Write thing i guess. bite me. i really just want to   * stop thinking about how to support printing instructions... + * + * UPDATE: really wish i thought of DisplaySink back then, really wish this was bounded as T: + * DisplaySink.   */  impl <'instr, T: fmt::Write, Y: YaxColors> Colorize<T, Y> for InstructionDisplayer<'instr> {      fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { @@ -4784,32 +3726,15 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize<T, Y> for InstructionDisplay  /// No per-operand context when contextualizing an instruction!  struct NoContext; -extern crate alloc; - -// TODO: find a better place to put this.... -fn c_to_hex(c: u8) -> u8 { -    /* -    static CHARSET: &'static [u8; 16] = b"0123456789abcdef"; -    CHARSET[c as usize] -    */ -    // the conditional branch below is faster than a lookup, yes -    if c < 10 { -        b'0' + c -    } else { -        b'a' + c - 10 -    } -} -  impl Instruction {      #[cfg_attr(feature="profiling", inline(never))]      pub fn write_to<T: DisplaySink>(&self, out: &mut T) -> fmt::Result {          contextualize_intel(self, out) -//        self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out)      }  }  #[cfg_attr(feature="profiling", inline(never))] -fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt::Result {      if instr.xacquire() {          out.write_fixed_size("xacquire ")?;      } @@ -4951,7 +3876,7 @@ fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt:      Ok(())  } -fn contextualize_c<T: fmt::Write>(instr: &Instruction, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize_c<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt::Result {      let mut brace_count = 0;      let mut prefixed = false; @@ -5286,23 +4211,22 @@ fn contextualize_c<T: fmt::Write>(instr: &Instruction, _address: u64, _context:  }  impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual<u64, NoContext, T, Y> for InstructionDisplayer<'instr> { -    fn contextualize(&self, _colors: &Y, address: u64, context: Option<&NoContext>, out: &mut T) -> fmt::Result { +    fn contextualize(&self, _colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result {          let InstructionDisplayer {              instr,              style,          } = self; +        let mut out = crate::display::NoColorsSink { +            out: out, +        }; +          match style {              DisplayStyle::Intel => { -                let mut out = NoColorsSink { -                    out, -                }; -                let out = &mut out; - -                contextualize_intel(instr, out) +                contextualize_intel(instr, &mut out)              }              DisplayStyle::C => { -                contextualize_c(instr, address, context, out) +                contextualize_c(instr, &mut out)              }          }      } @@ -5311,7 +4235,7 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual<u64, NoContext, T, Y>  #[cfg(feature="std")]  impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::String>], T, Y> for Instruction {      fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option<alloc::string::String>]>, out: &mut T) -> fmt::Result { -        let mut out = NoColorsSink { +        let mut out = crate::display::NoColorsSink {              out,          };          let out = &mut out; @@ -5528,3 +4452,87 @@ impl<'a, F: DisplaySink> crate::long_mode::OperandVisitor for RelativeBranchPrin      }  } +/// helper to format `amd64` instructions with highest throughput and least configuration. this is +/// functionally a buffer for one x86 instruction's text. +/// +/// ### when to use this over `fmt::Display`? +/// +/// `fmt::Display` is a fair choice in most cases. in some cases, `InstructionTextBuffer` may +/// support formatting options that may be difficult to configure for a `Display` impl. +/// additionally, `InstructionTextBuffer` may be able to specialize more effectively where +/// `fmt::Display`, writing to a generic `fmt::Write`, may not. +/// +/// if your use case for `yaxpeax-x86` involves being bounded on the speed of disassembling and +/// formatting instructions, [`InstructionTextBuffer::format_inst`] has been measured as up to 11% +/// faster than an equivalent `write!(buf, "{}", inst)`. +/// +/// `InstructionTextBuffer` involves internal allocations; if your use case for `yaxpeax-x86` +/// requires allocations never occurring, it is not an appropriate tool. +/// +/// ### example +/// +/// ``` +/// use yaxpeax_x86::long_mode::InstDecoder; +/// use yaxpeax_x86::long_mode::InstructionTextBuffer; +/// +/// let bytes = &[0x33, 0xc0]; +/// let inst = InstDecoder::default().decode_slice(bytes).expect("can decode"); +/// let mut text_buf = InstructionTextBuffer::new(); +/// assert_eq!( +///     text_buf.format_inst(&inst).expect("can format"), +///     "xor eax, eax" +/// ); +/// +/// // or, getting the formatted instruction with `text_str`: +/// assert_eq!( +///     text_buf.text_str(), +///     "xor eax, eax" +/// ); +/// ``` +pub struct InstructionTextBuffer { +    content: alloc::string::String, +} + +impl InstructionTextBuffer { +    /// create an `InstructionTextBuffer` with default settings. `InstructionTextBuffer`'s default +    /// settings format instructions identically to their corresponding `fmt::Display`. +    pub fn new() -> Self { +        let mut buf = alloc::string::String::new(); +        // TODO: move 512 out to a MAX_INSTRUCTION_LEN const and appropriate justification (and +        // fuzzing and ..) +        buf.reserve(512); +        Self { +            content: buf, +        } +    } + +    /// format `inst` into this buffer. returns a borrow of that same internal buffer for convenience. +    /// +    /// this clears and reuses an internal buffer; if an instruction had been previously formatted +    /// through this buffer, it will be overwritten. +    pub fn format_inst<'buf, 'instr>(&'buf mut self, display: &InstructionDisplayer<'instr>) -> Result<&'buf str, fmt::Error> { +        let mut handle = self.write_handle(); + +        match display.style { +            DisplayStyle::Intel => { +                contextualize_intel(&display.instr, &mut handle)?; +            } +            DisplayStyle::C => { +                contextualize_c(&display.instr, &mut handle)?; +            } +        } + +        Ok(self.text_str()) +    } + +    /// return a borrow of the internal buffer. if an instruction has been formatted, the +    /// returned `&str` contains that instruction's buffered text. +    pub fn text_str(&self) -> &str { +        self.content.as_str() +    } + +    fn write_handle(&mut self) -> crate::display::InstructionTextSink { +        self.content.clear(); +        crate::display::InstructionTextSink::new(&mut self.content) +    } +} diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 44ed89f..418d57f 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -8,8 +8,6 @@ pub use crate::MemoryAccessSize;  #[cfg(feature = "fmt")]  pub use self::display::{DisplayStyle, InstructionDisplayer}; -#[cfg(feature = "fmt")] -pub use self::display::{InstructionFormatter, NoColorsSink, DisplaySink, TokenType};  use core::cmp::PartialEq;  use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; | 
