From 0357471015bbbe54023a6b729d4d13361d8835b8 Mon Sep 17 00:00:00 2001
From: iximeow <me@iximeow.net>
Date: Sun, 23 Jun 2024 01:58:08 -0700
Subject: deduplicate write_lt_* impls

---
 src/display/display_sink.rs | 511 +++++++++++++++++---------------------------
 1 file changed, 194 insertions(+), 317 deletions(-)

diff --git a/src/display/display_sink.rs b/src/display/display_sink.rs
index 1fb8837..87f5609 100644
--- a/src/display/display_sink.rs
+++ b/src/display/display_sink.rs
@@ -491,69 +491,8 @@ mod instruction_text_sink {
                     panic!("InstructionTextSink::write_lt_32 would overflow output");
                 }
             }
-
-            // SAFETY: todo
-            let buf = unsafe { self.buf.as_mut_vec() };
-            let new_bytes = s.as_bytes();
-
-            // should get DCE
-            if new_bytes.len() >= 32 {
-                unsafe { core::hint::unreachable_unchecked() }
-            }
-
             unsafe {
-                let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-                let src = new_bytes.as_ptr();
-
-                let rem = new_bytes.len() as isize;
-
-                // set_len early because there is no way to avoid the following asm!() writing that
-                // same number of bytes into buf
-                buf.set_len(buf.len() + new_bytes.len());
-
-                core::arch::asm!(
-                    "6:",
-                    "cmp {rem:e}, 16",
-                    "jb 7f",
-                    "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
-                    "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
-                    "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                    "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                    "sub {rem:e}, 16",
-                    "jz 11f",
-                    "7:",
-                    "cmp {rem:e}, 8",
-                    "jb 8f",
-                    "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                    "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                    "sub {rem:e}, 8",
-                    "jz 11f",
-                    "8:",
-                    "cmp {rem:e}, 4",
-                    "jb 9f",
-                    "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                    "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                    "sub {rem:e}, 4",
-                    "jz 11f",
-                    "9:",
-                    "cmp {rem:e}, 2",
-                    "jb 10f",
-                    "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                    "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                    "sub {rem:e}, 2",
-                    "jz 11f",
-                    "10:",
-                    "cmp {rem:e}, 1",
-                    "jb 11f",
-                    "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                    "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                    "11:",
-                    src = in(reg) src,
-                    dest = in(reg) dest,
-                    rem = inout(reg) rem => _,
-                    buf = out(reg) _,
-                    options(nostack),
-                );
+                crate::display::display_sink::append_string_lt_32_unchecked(&mut self.buf, s);
             }
 
             Ok(())
@@ -565,59 +504,8 @@ mod instruction_text_sink {
                 }
             }
 
-            // SAFETY: todo
-            let buf = unsafe { self.buf.as_mut_vec() };
-            let new_bytes = s.as_bytes();
-
-            // should get DCE
-            if new_bytes.len() >= 16 {
-                unsafe { core::hint::unreachable_unchecked() }
-            }
-
             unsafe {
-                let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-                let src = new_bytes.as_ptr();
-
-                let rem = new_bytes.len() as isize;
-
-                // set_len early because there is no way to avoid the following asm!() writing that
-                // same number of bytes into buf
-                buf.set_len(buf.len() + new_bytes.len());
-
-                core::arch::asm!(
-                    "7:",
-                    "cmp {rem:e}, 8",
-                    "jb 8f",
-                    "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                    "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                    "sub {rem:e}, 8",
-                    "jz 11f",
-                    "8:",
-                    "cmp {rem:e}, 4",
-                    "jb 9f",
-                    "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                    "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                    "sub {rem:e}, 4",
-                    "jz 11f",
-                    "9:",
-                    "cmp {rem:e}, 2",
-                    "jb 10f",
-                    "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                    "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                    "sub {rem:e}, 2",
-                    "jz 11f",
-                    "10:",
-                    "cmp {rem:e}, 1",
-                    "jb 11f",
-                    "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                    "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                    "11:",
-                    src = in(reg) src,
-                    dest = in(reg) dest,
-                    rem = inout(reg) rem => _,
-                    buf = out(reg) _,
-                    options(nostack),
-                );
+                crate::display::display_sink::append_string_lt_16_unchecked(&mut self.buf, s);
             }
 
             Ok(())
@@ -629,52 +517,8 @@ mod instruction_text_sink {
                 }
             }
 
-            // SAFETY: todo
-            let buf = unsafe { self.buf.as_mut_vec() };
-            let new_bytes = s.as_bytes();
-
-            // should get DCE
-            if new_bytes.len() >= 8 {
-                unsafe { core::hint::unreachable_unchecked() }
-            }
-
             unsafe {
-                let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-                let src = new_bytes.as_ptr();
-
-                let rem = new_bytes.len() as isize;
-
-                // set_len early because there is no way to avoid the following asm!() writing that
-                // same number of bytes into buf
-                buf.set_len(buf.len() + new_bytes.len());
-
-                core::arch::asm!(
-                    "8:",
-                    "cmp {rem:e}, 4",
-                    "jb 9f",
-                    "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                    "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                    "sub {rem:e}, 4",
-                    "jz 11f",
-                    "9:",
-                    "cmp {rem:e}, 2",
-                    "jb 10f",
-                    "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                    "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                    "sub {rem:e}, 2",
-                    "jz 11f",
-                    "10:",
-                    "cmp {rem:e}, 1",
-                    "jb 11f",
-                    "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                    "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                    "11:",
-                    src = in(reg) src,
-                    dest = in(reg) dest,
-                    rem = inout(reg) rem => _,
-                    buf = out(reg) _,
-                    options(nostack),
-                );
+                crate::display::display_sink::append_string_lt_8_unchecked(&mut self.buf, s);
             }
 
             Ok(())
@@ -908,68 +752,8 @@ impl DisplaySink for alloc::string::String {
     unsafe fn write_lt_32(&mut self, s: &str) -> Result<(), fmt::Error> {
         self.reserve(s.len());
 
-        // SAFETY: todo
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 32 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
         unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "6:",
-                "cmp {rem:e}, 16",
-                "jb 7f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
-                "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 16",
-                "jz 11f",
-                "7:",
-                "cmp {rem:e}, 8",
-                "jb 8f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 8",
-                "jz 11f",
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
+            append_string_lt_32_unchecked(self, s);
         }
 
         Ok(())
@@ -977,59 +761,8 @@ impl DisplaySink for alloc::string::String {
     unsafe fn write_lt_16(&mut self, s: &str) -> Result<(), fmt::Error> {
         self.reserve(s.len());
 
-        // SAFETY: todo
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 16 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
         unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "7:",
-                "cmp {rem:e}, 8",
-                "jb 8f",
-                "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
-                "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
-                "sub {rem:e}, 8",
-                "jz 11f",
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
+            append_string_lt_16_unchecked(self, s);
         }
 
         Ok(())
@@ -1037,52 +770,8 @@ impl DisplaySink for alloc::string::String {
     unsafe fn write_lt_8(&mut self, s: &str) -> Result<(), fmt::Error> {
         self.reserve(s.len());
 
-        // SAFETY: todo
-        let buf = unsafe { self.as_mut_vec() };
-        let new_bytes = s.as_bytes();
-
-        // should get DCE
-        if new_bytes.len() >= 8 {
-            unsafe { core::hint::unreachable_unchecked() }
-        }
-
         unsafe {
-            let dest = buf.as_mut_ptr().offset(buf.len() as isize);
-            let src = new_bytes.as_ptr();
-
-            let rem = new_bytes.len() as isize;
-
-            // set_len early because there is no way to avoid the following asm!() writing that
-            // same number of bytes into buf
-            buf.set_len(buf.len() + new_bytes.len());
-
-            core::arch::asm!(
-                "8:",
-                "cmp {rem:e}, 4",
-                "jb 9f",
-                "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
-                "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
-                "sub {rem:e}, 4",
-                "jz 11f",
-                "9:",
-                "cmp {rem:e}, 2",
-                "jb 10f",
-                "mov {buf:x}, word ptr [{src} + {rem} - 2]",
-                "mov word ptr [{dest} + {rem} - 2], {buf:x}",
-                "sub {rem:e}, 2",
-                "jz 11f",
-                "10:",
-                "cmp {rem:e}, 1",
-                "jb 11f",
-                "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
-                "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
-                "11:",
-                src = in(reg) src,
-                dest = in(reg) dest,
-                rem = inout(reg) rem => _,
-                buf = out(reg) _,
-                options(nostack),
-            );
+            append_string_lt_8_unchecked(self, s);
         }
 
         Ok(())
@@ -1244,3 +933,191 @@ impl DisplaySink for alloc::string::String {
         Ok(())
     }
 }
+
+/// append `data` to `buf`, assuming `data` is less than 8 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+unsafe fn append_string_lt_8_unchecked(buf: &mut alloc::string::String, data: &str) {
+    // SAFETY: todo
+    let buf = unsafe { buf.as_mut_vec() };
+    let new_bytes = data.as_bytes();
+
+    // should get DCE
+    if new_bytes.len() >= 8 {
+        unsafe { core::hint::unreachable_unchecked() }
+    }
+
+    unsafe {
+        let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+        let src = new_bytes.as_ptr();
+
+        let rem = new_bytes.len() as isize;
+
+        // set_len early because there is no way to avoid the following asm!() writing that
+        // same number of bytes into buf
+        buf.set_len(buf.len() + new_bytes.len());
+
+        core::arch::asm!(
+            "8:",
+            "cmp {rem:e}, 4",
+            "jb 9f",
+            "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
+            "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
+            "sub {rem:e}, 4",
+            "jz 11f",
+            "9:",
+            "cmp {rem:e}, 2",
+            "jb 10f",
+            "mov {buf:x}, word ptr [{src} + {rem} - 2]",
+            "mov word ptr [{dest} + {rem} - 2], {buf:x}",
+            "sub {rem:e}, 2",
+            "jz 11f",
+            "10:",
+            "cmp {rem:e}, 1",
+            "jb 11f",
+            "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
+            "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
+            "11:",
+            src = in(reg) src,
+            dest = in(reg) dest,
+            rem = inout(reg) rem => _,
+            buf = out(reg) _,
+            options(nostack),
+        );
+    }
+}
+
+/// append `data` to `buf`, assuming `data` is less than 16 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+unsafe fn append_string_lt_16_unchecked(buf: &mut alloc::string::String, data: &str) {
+    // SAFETY: todo
+    let buf = unsafe { buf.as_mut_vec() };
+    let new_bytes = data.as_bytes();
+
+    // should get DCE
+    if new_bytes.len() >= 16 {
+        unsafe { core::hint::unreachable_unchecked() }
+    }
+
+    unsafe {
+        let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+        let src = new_bytes.as_ptr();
+
+        let rem = new_bytes.len() as isize;
+
+        // set_len early because there is no way to avoid the following asm!() writing that
+        // same number of bytes into buf
+        buf.set_len(buf.len() + new_bytes.len());
+
+        core::arch::asm!(
+            "7:",
+            "cmp {rem:e}, 8",
+            "jb 8f",
+            "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
+            "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
+            "sub {rem:e}, 8",
+            "jz 11f",
+            "8:",
+            "cmp {rem:e}, 4",
+            "jb 9f",
+            "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
+            "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
+            "sub {rem:e}, 4",
+            "jz 11f",
+            "9:",
+            "cmp {rem:e}, 2",
+            "jb 10f",
+            "mov {buf:x}, word ptr [{src} + {rem} - 2]",
+            "mov word ptr [{dest} + {rem} - 2], {buf:x}",
+            "sub {rem:e}, 2",
+            "jz 11f",
+            "10:",
+            "cmp {rem:e}, 1",
+            "jb 11f",
+            "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
+            "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
+            "11:",
+            src = in(reg) src,
+            dest = in(reg) dest,
+            rem = inout(reg) rem => _,
+            buf = out(reg) _,
+            options(nostack),
+        );
+    }
+}
+
+/// append `data` to `buf`, assuming `data` is less than 32 bytes and that `buf` has enough space
+/// remaining to hold all bytes in `data`.
+///
+/// Safety: callers must ensure that `buf.capacity() - buf.len() >= data.len()`.
+#[inline(always)]
+unsafe fn append_string_lt_32_unchecked(buf: &mut alloc::string::String, data: &str) {
+    // SAFETY: todo
+    let buf = unsafe { buf.as_mut_vec() };
+    let new_bytes = data.as_bytes();
+
+    // should get DCE
+    if new_bytes.len() >= 32 {
+        unsafe { core::hint::unreachable_unchecked() }
+    }
+
+    unsafe {
+        let dest = buf.as_mut_ptr().offset(buf.len() as isize);
+        let src = new_bytes.as_ptr();
+
+        let rem = new_bytes.len() as isize;
+
+        // set_len early because there is no way to avoid the following asm!() writing that
+        // same number of bytes into buf
+        buf.set_len(buf.len() + new_bytes.len());
+
+        core::arch::asm!(
+            "6:",
+            "cmp {rem:e}, 16",
+            "jb 7f",
+            "mov {buf:r}, qword ptr [{src} + {rem} - 16]",
+            "mov qword ptr [{dest} + {rem} - 16], {buf:r}",
+            "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
+            "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
+            "sub {rem:e}, 16",
+            "jz 11f",
+            "7:",
+            "cmp {rem:e}, 8",
+            "jb 8f",
+            "mov {buf:r}, qword ptr [{src} + {rem} - 8]",
+            "mov qword ptr [{dest} + {rem} - 8], {buf:r}",
+            "sub {rem:e}, 8",
+            "jz 11f",
+            "8:",
+            "cmp {rem:e}, 4",
+            "jb 9f",
+            "mov {buf:e}, dword ptr [{src} + {rem} - 4]",
+            "mov dword ptr [{dest} + {rem} - 4], {buf:e}",
+            "sub {rem:e}, 4",
+            "jz 11f",
+            "9:",
+            "cmp {rem:e}, 2",
+            "jb 10f",
+            "mov {buf:x}, word ptr [{src} + {rem} - 2]",
+            "mov word ptr [{dest} + {rem} - 2], {buf:x}",
+            "sub {rem:e}, 2",
+            "jz 11f",
+            "10:",
+            "cmp {rem:e}, 1",
+            "jb 11f",
+            "mov {buf:l}, byte ptr [{src} + {rem} - 1]",
+            "mov byte ptr [{dest} + {rem} - 1], {buf:l}",
+            "11:",
+            src = in(reg) src,
+            dest = in(reg) dest,
+            rem = inout(reg) rem => _,
+            buf = out(reg) _,
+            options(nostack),
+        );
+    }
+}
-- 
cgit v1.1