From d8083b08dc987adeda73fb13298383c6cf519596 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 15 Jan 2021 18:15:04 -0800 Subject: small perf tweaks clearing reg_rrr and reg_mmm more efficiently is an extremely small win, but a win read_imm_signed generally should inline well and runs afoul of some heuristic. inlining gets about 8% improved throughput on the (unrealistic) in-repo benchmark it would be great to be able to avoid bounds checks somehow; it looks like they alone are another ~10% of decode time. i'm not sure how to pull that off while retaining the generic iterator parameter. might just not be possible. --- src/long_mode/mod.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/long_mode/mod.rs') diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index f15e2a1..f9be9ab 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -5835,8 +5835,10 @@ fn read_instr>(decoder: &InstDecoder, mut bytes_iter: T, in // use core::intrinsics::unlikely; let mut prefixes = Prefixes::new(0); - instruction.modrm_mmm.bank = RegisterBank::Q; - instruction.sib_index.bank = RegisterBank::Q; + // ever so slightly faster than just setting .bank: this allows the two assignments to merge + // into one `mov 0, dword [instruction + modrm_mmm_offset]` + instruction.modrm_mmm = RegSpec::rax(); + instruction.sib_index = RegSpec::rax(); fn escapes_are_prefixes_actually(prefixes: &mut Prefixes, opc_map: &mut Option) { match opc_map { @@ -8881,7 +8883,7 @@ fn read_imm_ivq>(bytes: &mut T, width: u8, length: &mut u8) } } -#[inline] +#[inline(always)] fn read_imm_signed>(bytes: &mut T, num_width: u8, length: &mut u8) -> Result { if num_width == 1 { *length += 1; -- cgit v1.1