From 276172a5a888165f82075eba48bd6f79246c2dcc Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 2 Jun 2025 08:03:12 +0000 Subject: some armv7 decode helpers are trivial functions but didn't inline both from_u8 and the build function here compiled to truly trivial code: four instructions (mov rdi, rax; cmp 0xlim, rax; jae panic; ret) in the hot path, and constrained register choice on the caller side. inlining these makes for a *smaller* armv7 decoder, on the order of 5kb down from 5.5kb. in the process it also gets about 45% faster (400mb/s to 560mb/s) inlining decode_into, then, really just helps the standalone decoder benchmark case. this moves decode throughput from 560mb/s to 724mb/s. --- src/armv7.rs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/armv7.rs') diff --git a/src/armv7.rs b/src/armv7.rs index e025267..b8f33d3 100644 --- a/src/armv7.rs +++ b/src/armv7.rs @@ -1291,6 +1291,7 @@ impl Reg { /// create a new `Reg` with the specified number. /// /// panics if `bits` is out of range (16 or above). + #[inline] pub fn from_u8(bits: u8) -> Reg { if bits > 0b1111 { panic!("register number out of range"); @@ -1322,6 +1323,7 @@ impl CReg { /// create a new `CReg` with the specified number. /// /// panics if `bits` is out of range (16 or above). + #[inline] pub fn from_u8(bits: u8) -> CReg { if bits > 0b1111 { panic!("register number out of range"); @@ -1936,6 +1938,7 @@ impl Display for ConditionCode { } impl ConditionCode { + #[inline] fn build(value: u8) -> ConditionCode { match value { 0b0000 => ConditionCode::EQ, @@ -2204,6 +2207,7 @@ impl InstDecoder { #[allow(non_snake_case)] impl Decoder for InstDecoder { + #[inline] fn decode_into::Address, ::Word>>(&self, inst: &mut Instruction, words: &mut T) -> Result<(), ::DecodeError> { inst.set_w(false); inst.set_wide(false); -- cgit v1.1