reorder prefix checks, extract vex/evex prefix handling

sharing vex/evex invalid prefix checks improves codegen a bit, but ordering prefix checks by likeliest prefix first reduces time falling through prefix handling arms. both together are a notable improvement in throughput on typical x86 code. bundled in here is some code motion to where `mem_size = 0` and `operand_count = 2` are executed; this is because, at least on zen2 and cascade lake parts, bunching all stores to the instruction together caused small stalls getting into the decoder. spreading out stores seems to mix these assignments with parts of code that was not using memory anyway, and pipelines better.
author: iximeow <me@iximeow.net> 2022-04-21 02:31:40 -0700
committer: iximeow <me@iximeow.net> 2022-05-30 11:16:52 -0700
commit: f338c74656f6eef8b3080fa9f249b1cb733fd1a9 (patch)
tree: a7aaa075893b66516f4a10935a81a3e6d0b7556b /src
parent: e7f4950985ab9976e9d00599c9225327c64f6439 (diff)
1 files changed, 166 insertions, 171 deletions
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index 2cdb731..d426b2f 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -6422,53 +6422,29 @@ fn read_with_annotations<
     words.mark();
     let mut nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
     let mut next_rec = OPCODES[nextb as usize];
-    let mut prefixes = Prefixes::new(0);
+    instruction.prefixes = Prefixes::new(0);
 
     // default x86_64 registers to `[rax; 4]`
     instruction.regs = unsafe { core::mem::transmute(0u64) };
-    instruction.mem_size = 0;
     // default operands to [RegRRR, Nothing, Nothing, Nothing]
     instruction.operands = unsafe { core::mem::transmute(0x00_00_00_01) };
-    instruction.operand_count = 2;
 
-    let record: OpcodeRecord = loop {
-        let record = next_rec;
-        if nextb >= 0x40 && nextb < 0x50 {
-            let b = nextb;
-            sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
-                desc: InnerDescription::RexPrefix(b),
-                id: words.offset() as u32 * 8 - 8,
-            });
-            if words.offset() >= 15 {
-                return Err(DecodeError::TooLong);
-            }
-            nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
-            next_rec = unsafe {
-                core::ptr::read_volatile(&OPCODES[nextb as usize])
-            };
-            prefixes.rex_from(b);
-        } else if let Interpretation::Instruction(opc) = record.0 {
-            if words.offset() > 1 {
-                sink.record(
-                    words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1,
-                    InnerDescription::Boundary("prefixes end")
-                        .with_id(words.offset() as u32 * 8 - 9)
-                );
-            }
-            if opc != Opcode::Invalid {
+    let record: OperandCode = {
+        let prefixes = &mut instruction.prefixes;
+        let record = loop {
+            let record = next_rec;
+            if nextb >= 0x40 && nextb < 0x50 {
+                let b = nextb;
                 sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
-                    desc: InnerDescription::Opcode(opc),
+                    desc: InnerDescription::RexPrefix(b),
                     id: words.offset() as u32 * 8 - 8,
                 });
-            }
-            sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
-                desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),
-                id: words.offset() as u32 * 8 - 8 + 1,
-            });
-            break record;
-        } else {
-            let b = nextb;
-            if b == 0x0f {
+                nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
+                next_rec = unsafe {
+                    core::ptr::read_volatile(&OPCODES[nextb as usize])
+                };
+                prefixes.rex_from(b);
+            } else if let Interpretation::Instruction(opc) = record.0 {
                 if words.offset() > 1 {
                     sink.record(
                         words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1,
@@ -6476,126 +6452,49 @@ fn read_with_annotations<
                             .with_id(words.offset() as u32 * 8 - 9)
                     );
                 }
-                let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
-                if b == 0x38 {
-                    let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
-                    break read_0f38_opcode(b, &mut prefixes);
-                } else if b == 0x3a {
-                    let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
-                    break read_0f3a_opcode(b, &mut prefixes);
-                } else {
-                    break read_0f_opcode(b, &mut prefixes);
-                }
-            }
-            // some prefix seen after we saw rex, but before the 0f escape or an actual
-            // opcode. so we must forget the rex prefix!
-            // this is to handle sequences like 41660f21cf
-            // where if 660f21 were a valid opcode, 41 would apply a rex.b
-            // prefix, but since 660f21 is not valid, the opcode is interpreted
-            // as 0f21, where 66 is a prefix, which makes 41 not the last
-            // prefix before the opcode, and it's discarded.
-
-            // 2.3.2
-            // Any VEX-encoded instruction with a LOCK prefix preceding VEX will #UD.
-            // 2.3.3
-            // Any VEX-encoded instruction with a 66H, F2H, or F3H prefix preceding VEX
-            // will #UD.
-            // 2.3.4
-            // Any VEX-encoded instruction with a REX prefix proceeding VEX will #UD. 
-            if b == 0xc5 {
-                if prefixes.rex_unchecked().present() || prefixes.lock() || prefixes.operand_size() || prefixes.rep() || prefixes.repnz() {
-                    // rex and then vex is invalid! reject it.
-                    return Err(DecodeError::InvalidPrefixes);
-                } else {
-                    instruction.prefixes = prefixes;
-                    sink.record(
-                        words.offset() as u32 * 8 - 8,
-                        words.offset() as u32 * 8 - 1,
-                        InnerDescription::Misc("two-byte vex prefix (0xc5)")
-                            .with_id(words.offset() as u32 * 8 - 8)
-                    );
-                    vex::two_byte_vex(words, instruction, sink)?;
-                    return Ok(());
-                }
-            } else if b == 0xc4 {
-                if prefixes.rex_unchecked().present() || prefixes.lock() || prefixes.operand_size() || prefixes.rep() || prefixes.repnz() {
-                    // rex and then vex is invalid! reject it.
-                    return Err(DecodeError::InvalidPrefixes);
-                } else {
-                    instruction.prefixes = prefixes;
-                    sink.record(
-                        words.offset() as u32 * 8 - 8,
-                        words.offset() as u32 * 8 - 1,
-                        InnerDescription::Misc("three-byte vex prefix (0xc4)")
-                            .with_id(words.offset() as u32 * 8 - 8)
-                    );
-                    vex::three_byte_vex(words, instruction, sink)?;
-                    return Ok(());
-                }
-            } else if b == 0x62 {
-                if prefixes.rex_unchecked().present() || prefixes.lock() || prefixes.operand_size() || prefixes.rep() || prefixes.repnz() {
-                    // rex and then evex is invalid! reject it.
-                    return Err(DecodeError::InvalidPrefixes);
-                } else {
-                    instruction.prefixes = prefixes;
-                    sink.record(
-                        words.offset() as u32 * 8 - 8,
-                        words.offset() as u32 * 8 - 1,
-                        InnerDescription::Misc("evex prefix (0x62)")
-                            .with_id(words.offset() as u32 * 8 - 8)
-                    );
-                    evex::read_evex(words, instruction, None, sink)?;
-                    return Ok(());
+                if opc != Opcode::Invalid {
+                    sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
+                        desc: InnerDescription::Opcode(opc),
+                        id: words.offset() as u32 * 8 - 8,
+                    });
                 }
-            }
-
-            nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
-            next_rec = unsafe {
-                core::ptr::read_volatile(&OPCODES[nextb as usize])
-            };
-            if words.offset() >= 15 {
-                return Err(DecodeError::TooLong);
-            }
-            if prefixes.rex.bits != 0 {
-                sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
-                    desc: InnerDescription::Misc("invalidates prior rex prefix"),
-                    id: (words.offset() as u32 * 8 - 16) + 1,
+                sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription {
+                    desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }),
+                    id: words.offset() as u32 * 8 - 8 + 1,
                 });
-            }
-            prefixes.rex_from(0);
-            match b {
-                0x26 |
-                0x2e |
-                0x36 |
-                0x3e => {
-                    /* no-op in amd64 */
-                    sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
-                        desc: InnerDescription::Misc("ignored prefix in 64-bit mode"),
-                        id: words.offset() as u32 * 8 - 16,
-                    });
-                },
-                0x64 => {
-                    sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
-                        desc: InnerDescription::SegmentPrefix(Segment::FS),
-                        id: words.offset() as u32 * 8 - 16,
-                    });
-                    prefixes.set_fs();
-                },
-                0x65 => {
-                    sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
-                        desc: InnerDescription::SegmentPrefix(Segment::GS),
-                        id: words.offset() as u32 * 8 - 16,
-                    });
-                    prefixes.set_gs();
-                },
-                0x66 => {
+                instruction.mem_size = 0;
+                instruction.operand_count = 2;
+                break record;
+            } else {
+                let b = nextb;
+                if b == 0x0f {
+                    if words.offset() > 1 {
+                        sink.record(
+                            words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1,
+                            InnerDescription::Boundary("prefixes end")
+                                .with_id(words.offset() as u32 * 8 - 9)
+                        );
+                    }
+                    let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
+                    instruction.mem_size = 0;
+                    instruction.operand_count = 2;
+                    if b == 0x38 {
+                        let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
+                        break read_0f38_opcode(b, prefixes);
+                    } else if b == 0x3a {
+                        let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
+                        break read_0f3a_opcode(b, prefixes);
+                    } else {
+                        break read_0f_opcode(b, prefixes);
+                    }
+                }
+                if b == 0x66 {
                     sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
                         desc: InnerDescription::Misc("operand size override (to 16 bits)"),
                         id: words.offset() as u32 * 8 - 16,
                     });
                     prefixes.set_operand_size();
-                },
-                0x67 => {
+                } else if b == 0x67 {
                     sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
                         desc: InnerDescription::Misc("address size override (to 32 bits)"),
                         id: words.offset() as u32 * 8 - 16,
@@ -6603,39 +6502,80 @@ fn read_with_annotations<
                     prefixes.set_address_size();
                     instruction.regs[1].bank = RegisterBank::D;
                     instruction.regs[2].bank = RegisterBank::D;
-                },
-                0xf0 => {
-                    sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
-                        desc: InnerDescription::Misc("lock prefix"),
-                        id: words.offset() as u32 * 8 - 16,
-                    });
-                    prefixes.set_lock();
-                },
-                0xf2 => {
+                } else if b == 0xf2 {
                     sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
                         desc: InnerDescription::Misc("repnz prefix"),
                         id: words.offset() as u32 * 8 - 16,
                     });
                     prefixes.set_repnz();
-                },
-                0xf3 => {
+                } else if b == 0xf3 {
                     sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
                         desc: InnerDescription::Misc("rep prefix"),
                         id: words.offset() as u32 * 8 - 16,
                     });
                     prefixes.set_rep();
-                },
-                _ => { unsafe { unreachable_unchecked(); } }
+                } else {
+                    match b {
+                        0x26 |
+                        0x2e |
+                        0x36 |
+                        0x3e => {
+                            /* no-op in amd64 */
+                            sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
+                                desc: InnerDescription::Misc("ignored prefix in 64-bit mode"),
+                                id: words.offset() as u32 * 8 - 16,
+                            });
+                        },
+                        0x64 => {
+                            sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
+                                desc: InnerDescription::SegmentPrefix(Segment::FS),
+                                id: words.offset() as u32 * 8 - 16,
+                            });
+                            prefixes.set_fs();
+                        },
+                        0x65 => {
+                            sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
+                                desc: InnerDescription::SegmentPrefix(Segment::GS),
+                                id: words.offset() as u32 * 8 - 16,
+                            });
+                            prefixes.set_gs();
+                        },
+                        0xf0 => {
+                            sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
+                                desc: InnerDescription::Misc("lock prefix"),
+                                id: words.offset() as u32 * 8 - 16,
+                            });
+                            prefixes.set_lock();
+                        },
+                        _ => { return read_avx_prefixed(b, words, instruction, sink); }
+                    }
+                }
+                nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
+                next_rec = unsafe {
+                    core::ptr::read_volatile(&OPCODES[nextb as usize])
+                };
+                if prefixes.rex.bits != 0 {
+                    sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription {
+                        desc: InnerDescription::Misc("invalidates prior rex prefix"),
+                        id: (words.offset() as u32 * 8 - 16) + 1,
+                    });
+                }
+                prefixes.rex_from(0);
+            }
+            if words.offset() >= 15 {
+                return Err(DecodeError::TooLong);
             }
+        };
+
+        if let Interpretation::Instruction(opcode) = record.0 {
+            instruction.opcode = opcode;
+        } else {
+            unsafe { unreachable_unchecked(); }
         }
+
+        record.1
     };
-    if let Interpretation::Instruction(opcode) = record.0 {
-        instruction.opcode = opcode;
-    } else {
-        unsafe { unreachable_unchecked(); }
-    }
-    instruction.prefixes = prefixes;
-    read_operands(decoder, words, instruction, record.1, sink)?;
+    read_operands(decoder, words, instruction, record, sink)?;
 
     if instruction.prefixes.lock() {
         if !LOCKABLE_INSTRUCTIONS.contains(&instruction.opcode) || !instruction.operands[0].is_memory() {
@@ -6645,6 +6585,61 @@ fn read_with_annotations<
 
     Ok(())
 }
+#[inline(never)]
+fn read_avx_prefixed<
+    T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>,
+    S: DescriptionSink<FieldDescription>,
+>(b: u8, words: &mut T, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> {
+    if instruction.prefixes.rex_unchecked().present() || instruction.prefixes.lock() || instruction.prefixes.operand_size() || instruction.prefixes.rep() || instruction.prefixes.repnz() {
+        // rex and then vex is invalid! reject it.
+        return Err(DecodeError::InvalidPrefixes);
+    }
+    instruction.mem_size = 0;
+    instruction.operand_count = 2;
+
+    // some prefix seen after we saw rex, but before the 0f escape or an actual
+    // opcode. so we must forget the rex prefix!
+    // this is to handle sequences like 41660f21cf
+    // where if 660f21 were a valid opcode, 41 would apply a rex.b
+    // prefix, but since 660f21 is not valid, the opcode is interpreted
+    // as 0f21, where 66 is a prefix, which makes 41 not the last
+    // prefix before the opcode, and it's discarded.
+
+    // 2.3.2
+    // Any VEX-encoded instruction with a LOCK prefix preceding VEX will #UD.
+    // 2.3.3
+    // Any VEX-encoded instruction with a 66H, F2H, or F3H prefix preceding VEX
+    // will #UD.
+    // 2.3.4
+    // Any VEX-encoded instruction with a REX prefix proceeding VEX will #UD.
+    if b == 0xc5 {
+        sink.record(
+            words.offset() as u32 * 8 - 8,
+            words.offset() as u32 * 8 - 1,
+            InnerDescription::Misc("two-byte vex prefix (0xc5)")
+                .with_id(words.offset() as u32 * 8 - 8)
+        );
+        vex::two_byte_vex(words, instruction, sink)?;
+    } else if b == 0xc4 {
+        sink.record(
+            words.offset() as u32 * 8 - 8,
+            words.offset() as u32 * 8 - 1,
+            InnerDescription::Misc("three-byte vex prefix (0xc4)")
+                .with_id(words.offset() as u32 * 8 - 8)
+        );
+        vex::three_byte_vex(words, instruction, sink)?;
+    } else if b == 0x62 {
+        sink.record(
+            words.offset() as u32 * 8 - 8,
+            words.offset() as u32 * 8 - 1,
+            InnerDescription::Misc("evex prefix (0x62)")
+                .with_id(words.offset() as u32 * 8 - 8)
+        );
+        evex::read_evex(words, instruction, None, sink)?;
+    }
+    return Ok(());
+}
+
 /* likely cases
         OperandCode::Eb_R0 => 0
         _op @ OperandCode::ModRM_0x80_Eb_Ib => 1
author	iximeow <me@iximeow.net>	2022-04-21 02:31:40 -0700
committer	iximeow <me@iximeow.net>	2022-05-30 11:16:52 -0700
commit	f338c74656f6eef8b3080fa9f249b1cb733fd1a9 (patch)
tree	a7aaa075893b66516f4a10935a81a3e6d0b7556b /src
parent	e7f4950985ab9976e9d00599c9225327c64f6439 (diff)