From 3df0790c898d480eda6a906cbc9a3d3d6749a140 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 2 Jan 2022 14:36:15 -0800 Subject: explicit inline annotations for kinda_uncheckeds unfortunately something about the wrapper functions adjusts codegen even when the wrapper functions themselves are just calls to inner functions. the in-tree benchmark (known to not be comprehensive, but enough to spot a difference), showed a ~3.5% regression in throughput with the prior commit, even though it doesn't change behavior at all. explicit #[inline(always)] gets things to a state where the wrapper functions do not penalize performance. for an example of the differences in codegen, see below. before: ``` < 141d4: 48 39 fa cmp %rdi,%rdx < 141d7: 0f 84 0b 4d 00 00 je 18ee8 <_ZN5bench16do_decode_swathe17h694154735739ce4cE+0x4e58> < 141dd: 0f b6 0f movzbl (%rdi),%ecx < 141e0: 48 83 c7 01 add $0x1,%rdi < 141e4: 48 89 7c 24 38 mov %rdi,0x38(%rsp) ... snip ... ``` after: ``` > 141d4: 48 39 ea cmp %rbp,%rdx > 141d7: 0f 84 97 4c 00 00 je 18e74 <_ZN5bench16do_decode_swathe17h694154735739ce4cE+0x4de4> > 141dd: 0f b6 4d 00 movzbl 0x0(%rbp),%ecx > 141e1: 48 83 c5 01 add $0x1,%rbp > 141e5: 48 89 6c 24 38 mov %rbp,0x38(%rsp) ... snip ... ``` there are several spans of code with this kind of change involved; there are no explicit calls to `get_kinda_unchecked` or `unreachable_kinda_unchecked` but clearly a difference did make it through to the benchmark's code. while the choice of `rbp` instead of `rdi` wouldn't seem very interesting, the instructions themselves are more substantially different. `0fb60f` vs `0fb64d00`; to encode `[rbp + 0]`, the instruction requires a displacement, and is one byte longer as a result. there are several instructions so-impacted, and i suspect the increased code size is what ended up changing benchmark behavior. after adding these `#[inline(always)]` annotations, there is no difference in generated code with or without the `kinda_unchecked` helpers! --- src/safer_unchecked.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/safer_unchecked.rs b/src/safer_unchecked.rs index afd0355..34216bc 100644 --- a/src/safer_unchecked.rs +++ b/src/safer_unchecked.rs @@ -7,6 +7,7 @@ pub trait GetSaferUnchecked { } impl GetSaferUnchecked for [T] { + #[inline(always)] unsafe fn get_kinda_unchecked(&self, index: I) -> &>::Output where I: SliceIndex<[T]>, @@ -19,6 +20,7 @@ impl GetSaferUnchecked for [T] { } } +#[inline(always)] pub unsafe fn unreachable_kinda_unchecked() -> ! { if cfg!(debug_assertions) { panic!("UB: Unreachable unchecked was executed") -- cgit v1.1