From 347042c45ced56b37a665a2c4b042b7f7aae8e03 Mon Sep 17 00:00:00 2001
From: iximeow <me@iximeow.net>
Date: Sat, 22 Jun 2024 00:25:01 -0700
Subject: extract reusable display bits into yaxpeax-arch, add a visitor fn to
 Operand

comes with deleting the body of impl Colorize for Operand, because we can reuse the normal operand formatting code
---
 src/display.rs | 1076 --------------------------------------------------------
 1 file changed, 1076 deletions(-)

(limited to 'src/display.rs')

diff --git a/src/display.rs b/src/display.rs
index 9b72cb3..e69de29 100644
--- a/src/display.rs
+++ b/src/display.rs
@@ -1,1076 +0,0 @@
-use core::fmt;
-
-use crate::safer_unchecked::unreachable_kinda_unchecked;
-
-extern crate alloc;
-
-// TODO: find a better place to put this....
-fn c_to_hex(c: u8) -> u8 {
-    /*
-    static CHARSET: &'static [u8; 16] = b"0123456789abcdef";
-    CHARSET[c as usize]
-    */
-    // the conditional branch below is faster than a lookup, yes
-    if c < 10 {
-        b'0' + c
-    } else {
-        b'a' + c - 10
-    }
-}
-
-pub enum TokenType {
-    Mnemonic,
-    Operand,
-    Immediate,
-    Register,
-    Offset,
-}
-
-/// `DisplaySink` allows client code to collect output and minimal markup. this is currently used
-/// in formatting instructions for two reasons:
-/// * `DisplaySink` implementations have the opportunity to collect starts and ends of tokens at
-///   the same time as collecting output itself.
-/// * `DisplaySink` implementations provides specialized functions for writing strings in
-///   circumstances where a simple "use `core::fmt`" might incur unwanted overhead.
-///
-/// spans are reported through `span_start` and `span_exit` to avoid constraining implementations
-/// into tracking current output offset (which may not be knowable) or span size (which may be
-/// knowable, but incur additional overhead to compute or track).
-///
-/// spans are entered and exited in a FILO manner: a function writing to some `DisplaySink` must
-/// exit spans in reverse order to when they are entered. a function sequence like
-/// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_exit(Operand)` is in error.
-///
-/// the `write_*` helpers on `DisplaySink` may be able to take advantage of contraints described in
-/// documentation here to better support writing some kinds of inputs than a fully-general solution
-/// (such as `core::fmt`) might be able to yield.
-///
-/// currently there are two motivating factors for `write_*` helpers:
-///
-/// instruction formatting often involves writing small but variable-size strings, such as register
-/// names, which is something of a pathological case for string appending as Rust currently exists:
-/// this often becomes `memcpy` and specifically a call to the platform's `memcpy` (rather than an
-/// inlined `rep movsb`) just to move 3-5 bytes. one relevant Rust issue for reference:
-/// https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
-///
-/// there are similar papercuts around formatting integers as base-16 numbers, such as
-/// https://github.com/rust-lang/rust/pull/122770 . in isolation and in most applications these are
-/// not a significant source of overhead. but for programs bounded on decoding and printing
-/// instructions, these can add up to significant overhead - on the order of 10-20% of total
-/// runtime.
-///
-/// `DisplaySink`
-pub trait DisplaySink: fmt::Write {
-    #[inline(always)]
-    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.write_str(s)
-    }
-
-    /// write a string to this sink that is less than 32 bytes. this is provided for optimization
-    /// opportunities when writing a variable-length string with known max size.
-    ///
-    /// SAFETY: the provided `s` must be less than 32 bytes. if the provided string is longer than
-    /// 31 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
-    /// utf-8 string. this may corrupt Rust strings.
-    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.write_str(s)
-    }
-    /// write a string to this sink that is less than 16 bytes. this is provided for optimization
-    /// opportunities when writing a variable-length string with known max size.
-    ///
-    /// SAFETY: the provided `s` must be less than 16 bytes. if the provided string is longer than
-    /// 15 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
-    /// utf-8 string. this may corrupt Rust strings.
-    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.write_str(s)
-    }
-    /// write a string to this sink that is less than 8 bytes. this is provided for optimization
-    /// opportunities when writing a variable-length string with known max size.
-    ///
-    /// SAFETY: the provided `s` must be less than 8 bytes. if the provided string is longer than
-    /// 7 bytes, implementations may only copy part of a multi-byte codepoint while writing to a
-    /// utf-8 string. this may corrupt Rust strings.
-    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.write_str(s)
-    }
-
-    /// write a u8 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    fn write_u8(&mut self, v: u8) -> Result<(), core::fmt::Error> {
-        write!(self, "{:x}", v)
-    }
-    /// write a u16 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    fn write_u16(&mut self, v: u16) -> Result<(), core::fmt::Error> {
-        write!(self, "{:x}", v)
-    }
-    /// write a u32 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    fn write_u32(&mut self, v: u32) -> Result<(), core::fmt::Error> {
-        write!(self, "{:x}", v)
-    }
-    /// write a u64 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    fn write_u64(&mut self, v: u64) -> Result<(), core::fmt::Error> {
-        write!(self, "{:x}", v)
-    }
-    /// enter a region inside which output corresponds to a `ty`.
-    ///
-    /// the default implementation of these functions is as a no-op. this way, providing span
-    /// information to a `DisplaySink` that does not want it is eliminated at compile time.
-    ///
-    /// spans are entered and ended in a FILO manner: a function writing to some `DisplaySink` must
-    /// end spans in reverse order to when they are entered. a function sequence like
-    /// `sink.span_start(Operand); sink.span_start(Immediate); sink.span_end(Operand)` is in error.
-    ///
-    /// a simple use of `span_start`/`span_end` might look something like:
-    /// ```compile_fail
-    /// sink.span_start(Operand)
-    /// sink.write_char('[')
-    /// sink.span_start(Register)
-    /// sink.write_fixed_size("rbp")
-    /// sink.span_end(Register)
-    /// sink.write_char(']')
-    /// sink.span_end(Operand)
-    /// ```
-    /// which writes the text `[rbp]`, with span indicators where the operand (`[ ... ]`) begins,
-    /// as well as the start and end of a register name.
-    fn span_start(&mut self, _ty: TokenType) { }
-    /// end a region where a `ty` was written. see docs on [`DisplaySink::span_start`] for more.
-    fn span_end(&mut self, _ty: TokenType) { }
-}
-
-pub struct NoColorsSink<'a, T: fmt::Write> {
-    pub out: &'a mut T,
-}
-
-impl<'a, T: fmt::Write> DisplaySink for NoColorsSink<'a, T> {
-    fn span_start(&mut self, _ty: TokenType) { }
-    fn span_end(&mut self, _ty: TokenType) { }
-}
-
-impl<'a, T: fmt::Write> fmt::Write for NoColorsSink<'a, T> {
-    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.out.write_str(s)
-    }
-    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
-        self.out.write_char(c)
-    }
-    fn write_fmt(&mut self, f: fmt::Arguments) -> Result<(), core::fmt::Error> {
-        self.out.write_fmt(f)
-    }
-}
-
-/// this is an implementation detail of yaxpeax-arch and related crates. if you are a user of the
-/// disassemblers, do not use this struct. do not depend on this struct existing. this struct is
-/// not stable. this struct is not safe for general use. if you use this struct you and your
-/// program will be eaten by gremlins.
-///
-/// if you are implementing an instruction formatter for the yaxpeax family of crates: this struct
-/// is guaranteed to contain a string that is long enough to hold a fully-formatted instruction.
-/// because the buffer is guaranteed to be long enough, writes through `InstructionTextSink` are
-/// not bounds-checked, and the buffer is never grown.
-///
-/// this is wildly dangerous in general use. the public constructor of `InstructionTextSink` is
-/// unsafe as a result. as used in `InstructionFormatter`, the buffer is guaranteed to be
-/// `clear()`ed before use, `InstructionFormatter` ensures the buffer is large enough, *and*
-/// `InstructionFormatter` never allows `InstructionTextSink` to exist in a context where it would
-/// be written to without being rewound first.
-///
-/// because this opens a very large hole through which `fmt::Write` can become unsafe, incorrect
-/// uses of this struct will be hard to debug in general. `InstructionFormatter` is probably at the
-/// limit of easily-reasoned-about lifecycle of the buffer, which "only" leaves the problem of
-/// ensuring that instruction formatting impls this buffer is passed to are appropriately sized.
-///
-/// this is intended to be hidden in docs. if you see this in docs, it's a bug.
-#[doc(hidden)]
-pub(crate) struct InstructionTextSink<'buf> {
-    buf: &'buf mut alloc::string::String
-}
-
-impl<'buf> InstructionTextSink<'buf> {
-    pub unsafe fn new(buf: &'buf mut alloc::string::String) -> Self {
-        Self { buf }
-    }
-}
-
-impl<'buf> fmt::Write for InstructionTextSink<'buf> {
-    fn write_str(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.buf.write_str(s)
-    }
-    fn write_char(&mut self, c: char) -> Result<(), core::fmt::Error> {
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + 1 {
-                panic!("InstructionTextSink::write_char would overflow output");
-            }
-        }
-        // SAFETY: `buf` is assumed to be long enough to hold all input, `buf` at `underlying.len()`
-        // is valid for writing, but may be uninitialized.
-        //
-        // this function is essentially equivalent to `Vec::push` specialized for the case that
-        // `len < buf.capacity()`:
-        // https://github.com/rust-lang/rust/blob/be9e27e/library/alloc/src/vec/mod.rs#L1993-L2006
-        unsafe {
-            let underlying = self.buf.as_mut_vec();
-            // `InstructionTextSink::write_char` is only used by yaxpeax-x86, and is only used to
-            // write single ASCII characters. this is wrong in the general case, but `write_char`
-            // here is not going to be used in the general case.
-            if cfg!(debug_asertions) {
-                panic!("InstructionTextSink::write_char would truncate output");
-            }
-            let to_push = c as u8;
-            // `ptr::write` here because `underlying.add(underlying.len())` may not point to an
-            // initialized value, which would mean that turning that pointer into a `&mut u8` to
-            // store through would be UB. `ptr::write` avoids taking the mut ref.
-            underlying.as_mut_ptr().offset(underlying.len() as isize).write(to_push);
-            // we have initialized all (one) bytes that `set_len` is increasing the length to
-            // include.
-            underlying.set_len(underlying.len() + 1);
-        }
-        Ok(())
-    }
-}
-
-/// this DisplaySink impl exists to support somewhat more performant buffering of the kinds of
-/// strings `yaxpeax-x86` uses in formatting instructions.
-impl DisplaySink for alloc::string::String {
-    #[inline(always)]
-    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        self.reserve(s.len());
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        if new_bytes.len() == 0 {
-            unsafe { unreachable_kinda_unchecked() }
-        }
-
-        if new_bytes.len() >= 16 {
-            unsafe { unreachable_kinda_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-
-            // this used to be enough to bamboozle llvm away from
-            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
-            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped
-            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`
-            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this
-            // unrolls into some kind of appropriate series of `mov`.
-            dest.offset(0 as isize).write(new_bytes[0]);
-            for i in 1..new_bytes.len() {
-                dest.offset(i as isize).write(new_bytes[i]);
-            }
-
-            buf.set_len(buf.len() + new_bytes.len());
-        }
-
-        Ok(())
-    }
-    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
-        self.reserve(s.len());
-
-        // SAFETY: todo
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 32 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "6:",
-                "cmp {rem:e}, 16",
-                "jb 7f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
-                "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 16",
-                "jz 11f",
-                "7:",
-                "cmp {rem:e}, 8",
-                "jb 8f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 8",
-                "jz 11f",
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
-        }
-        /*
-        for i in 0..new_bytes.len() {
-            unsafe {
-                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]);
-            }
-        }
-        */
-
-        Ok(())
-    }
-    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
-        self.reserve(s.len());
-
-        // SAFETY: todo
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 16 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "7:",
-                "cmp {rem:e}, 8",
-                "jb 8f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 8",
-                "jz 11f",
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
-        }
-        /*
-        for i in 0..new_bytes.len() {
-            unsafe {
-                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]);
-            }
-        }
-        */
-
-        Ok(())
-    }
-    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
-        self.reserve(s.len());
-
-        // SAFETY: todo
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 8 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
-        }
-        /*
-        for i in 0..new_bytes.len() {
-            unsafe {
-                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]);
-            }
-        }
-        */
-
-        Ok(())
-    }
-    /// write a u8 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize;
-
-        self.reserve(printed_size);
-
-        let buf = unsafe { self.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    /// write a u16 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize;
-
-        self.reserve(printed_size);
-
-        let buf = unsafe { self.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    /// write a u32 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize;
-
-        self.reserve(printed_size);
-
-        let buf = unsafe { self.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    /// write a u64 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize;
-
-        self.reserve(printed_size);
-
-        let buf = unsafe { self.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    fn span_start(&mut self, _ty: TokenType) {}
-    fn span_end(&mut self, _ty: TokenType) {}
-}
-
-impl<'buf> DisplaySink for InstructionTextSink<'buf> {
-    #[inline(always)]
-    fn write_fixed_size(&mut self, s: &str) -> Result<(), core::fmt::Error> {
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + s.len() {
-                panic!("InstructionTextSink::write_fixed_size would overflow output");
-            }
-        }
-
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        if new_bytes.len() == 0 {
-            return Ok(());
-        }
-
-        if new_bytes.len() >= 16 {
-            unsafe { unreachable_kinda_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-
-            // this used to be enough to bamboozle llvm away from
-            // https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232https://github.com/rust-lang/rust/issues/92993#issuecomment-2028915232
-            // if `s` is not fixed size. somewhere between Rust 1.68 and Rust 1.74 this stopped
-            // being sufficient, so `write_fixed_size` truly should only be used for fixed size `s`
-            // (otherwise this is a libc memcpy call in disguise). for fixed-size strings this
-            // unrolls into some kind of appropriate series of `mov`.
-            dest.offset(0 as isize).write(new_bytes[0]);
-            for i in 1..new_bytes.len() {
-                dest.offset(i as isize).write(new_bytes[i]);
-            }
-
-            buf.set_len(buf.len() + new_bytes.len());
-        }
-
-        Ok(())
-    }
-    unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + s.len() {
-                panic!("InstructionTextSink::write_lt_32 would overflow output");
-            }
-        }
-
-        // SAFETY: todo
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 32 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "6:",
-                "cmp {rem:e}, 16",
-                "jb 7f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
-                "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 16",
-                "jz 11f",
-                "7:",
-                "cmp {rem:e}, 8",
-                "jb 8f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 8",
-                "jz 11f",
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
-        }
-        /*
-        for i in 0..new_bytes.len() {
-            unsafe {
-                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]);
-            }
-        }
-        */
-
-        Ok(())
-    }
-    unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + s.len() {
-                panic!("InstructionTextSink::write_lt_16 would overflow output");
-            }
-        }
-
-        // SAFETY: todo
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 16 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "7:",
-                "cmp {rem:e}, 8",
-                "jb 8f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 8",
-                "jz 11f",
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
-        }
-        /*
-        for i in 0..new_bytes.len() {
-            unsafe {
-                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]);
-            }
-        }
-        */
-
-        Ok(())
-    }
-    unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + s.len() {
-                panic!("InstructionTextSink::write_lt_8 would overflow output");
-            }
-        }
-
-        // SAFETY: todo
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 8 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
-        unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
-        }
-        /*
-        for i in 0..new_bytes.len() {
-            unsafe {
-                buf.as_mut_ptr().offset(buf.len() as isize).offset(i as isize).write_volatile(new_bytes[i]);
-            }
-        }
-        */
-
-        Ok(())
-    }
-    /// write a u8 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u8(&mut self, mut v: u8) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((8 - v.leading_zeros() + 3) >> 2) as usize;
-
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + printed_size {
-                panic!("InstructionTextSink::write_u8 would overflow output");
-            }
-        }
-
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    /// write a u16 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u16(&mut self, mut v: u16) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((16 - v.leading_zeros() + 3) >> 2) as usize;
-
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + printed_size {
-                panic!("InstructionTextSink::write_u16 would overflow output");
-            }
-        }
-
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    /// write a u32 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u32(&mut self, mut v: u32) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((32 - v.leading_zeros() + 3) >> 2) as usize;
-
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + printed_size {
-                panic!("InstructionTextSink::write_u32 would overflow output");
-            }
-        }
-
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    /// write a u64 to the output as a base-16 integer.
-    ///
-    /// this is provided for optimization opportunities when the formatted integer can be written
-    /// directly to the sink (rather than formatted to an intermediate buffer and output as a
-    /// followup step)
-    #[inline(always)]
-    fn write_u64(&mut self, mut v: u64) -> Result<(), core::fmt::Error> {
-        if v == 0 {
-            return self.write_fixed_size("0");
-        }
-
-        // we can fairly easily predict the size of a formatted string here with lzcnt, which also
-        // means we can write directly into the correct offsets of the output string.
-        let printed_size = ((64 - v.leading_zeros() + 3) >> 2) as usize;
-
-        if cfg!(debug_assertions) {
-            if self.buf.capacity() < self.buf.len() + printed_size {
-                panic!("InstructionTextSink::write_u64 would overflow output");
-            }
-        }
-
-        let buf = unsafe { self.buf.as_mut_vec() };
-        let new_len = buf.len() + printed_size;
-
-        unsafe {
-            buf.set_len(new_len);
-        }
-        let mut p = unsafe { buf.as_mut_ptr().offset(new_len as isize) };
-
-        loop {
-            let digit = v % 16;
-            let c = c_to_hex(digit as u8);
-            unsafe {
-                p = p.offset(-1);
-                p.write(c);
-            }
-            v = v / 16;
-            if v == 0 {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-    fn span_start(&mut self, _ty: TokenType) {}
-    fn span_end(&mut self, _ty: TokenType) {}
-}
-- 
cgit v1.1