From 76418a5a934c99ef918070c3c740ce3eceb6c5bb Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 21 Apr 2022 02:31:40 -0700 Subject: reorder prefix checks, extract vex/evex prefix handling sharing vex/evex invalid prefix checks improves codegen a bit, but ordering prefix checks by likeliest prefix first reduces time falling through prefix handling arms. both together are a notable improvement in throughput on typical x86 code. bundled in here is some code motion to where `mem_size = 0` and `operand_count = 2` are executed; this is because, at least on zen2 and cascade lake parts, bunching all stores to the instruction together caused small stalls getting into the decoder. spreading out stores seems to mix these assignments with parts of code that was not using memory anyway, and pipelines better. --- src/long_mode/mod.rs | 337 +++++++++++++++++++++++++-------------------------- 1 file changed, 166 insertions(+), 171 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 7ab5f7d..8be5f61 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -6422,53 +6422,29 @@ fn read_with_annotations< words.mark(); let mut nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; let mut next_rec = OPCODES[nextb as usize]; - let mut prefixes = Prefixes::new(0); + instruction.prefixes = Prefixes::new(0); // default x86_64 registers to `[rax; 4]` instruction.regs = unsafe { core::mem::transmute(0u64) }; - instruction.mem_size = 0; // default operands to [RegRRR, Nothing, Nothing, Nothing] instruction.operands = unsafe { core::mem::transmute(0x00_00_00_01) }; - instruction.operand_count = 2; - let record: OpcodeRecord = loop { - let record = next_rec; - if nextb >= 0x40 && nextb < 0x50 { - let b = nextb; - sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::RexPrefix(b), - id: words.offset() as u32 * 8 - 8, - }); - if words.offset() >= 15 { - return Err(DecodeError::TooLong); - } - nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; - next_rec = unsafe { - core::ptr::read_volatile(&OPCODES[nextb as usize]) - }; - prefixes.rex_from(b); - } else if let Interpretation::Instruction(opc) = record.0 { - if words.offset() > 1 { - sink.record( - words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1, - InnerDescription::Boundary("prefixes end") - .with_id(words.offset() as u32 * 8 - 9) - ); - } - if opc != Opcode::Invalid { + let record: OperandCode = { + let prefixes = &mut instruction.prefixes; + let record = loop { + let record = next_rec; + if nextb >= 0x40 && nextb < 0x50 { + let b = nextb; sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::Opcode(opc), + desc: InnerDescription::RexPrefix(b), id: words.offset() as u32 * 8 - 8, }); - } - sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }), - id: words.offset() as u32 * 8 - 8 + 1, - }); - break record; - } else { - let b = nextb; - if b == 0x0f { + nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + next_rec = unsafe { + core::ptr::read_volatile(&OPCODES[nextb as usize]) + }; + prefixes.rex_from(b); + } else if let Interpretation::Instruction(opc) = record.0 { if words.offset() > 1 { sink.record( words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1, @@ -6476,126 +6452,49 @@ fn read_with_annotations< .with_id(words.offset() as u32 * 8 - 9) ); } - let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; - if b == 0x38 { - let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; - break read_0f38_opcode(b, &mut prefixes); - } else if b == 0x3a { - let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; - break read_0f3a_opcode(b, &mut prefixes); - } else { - break read_0f_opcode(b, &mut prefixes); - } - } - // some prefix seen after we saw rex, but before the 0f escape or an actual - // opcode. so we must forget the rex prefix! - // this is to handle sequences like 41660f21cf - // where if 660f21 were a valid opcode, 41 would apply a rex.b - // prefix, but since 660f21 is not valid, the opcode is interpreted - // as 0f21, where 66 is a prefix, which makes 41 not the last - // prefix before the opcode, and it's discarded. - - // 2.3.2 - // Any VEX-encoded instruction with a LOCK prefix preceding VEX will #UD. - // 2.3.3 - // Any VEX-encoded instruction with a 66H, F2H, or F3H prefix preceding VEX - // will #UD. - // 2.3.4 - // Any VEX-encoded instruction with a REX prefix proceeding VEX will #UD. - if b == 0xc5 { - if prefixes.rex_unchecked().present() || prefixes.lock() || prefixes.operand_size() || prefixes.rep() || prefixes.repnz() { - // rex and then vex is invalid! reject it. - return Err(DecodeError::InvalidPrefixes); - } else { - instruction.prefixes = prefixes; - sink.record( - words.offset() as u32 * 8 - 8, - words.offset() as u32 * 8 - 1, - InnerDescription::Misc("two-byte vex prefix (0xc5)") - .with_id(words.offset() as u32 * 8 - 8) - ); - vex::two_byte_vex(words, instruction, sink)?; - return Ok(()); - } - } else if b == 0xc4 { - if prefixes.rex_unchecked().present() || prefixes.lock() || prefixes.operand_size() || prefixes.rep() || prefixes.repnz() { - // rex and then vex is invalid! reject it. - return Err(DecodeError::InvalidPrefixes); - } else { - instruction.prefixes = prefixes; - sink.record( - words.offset() as u32 * 8 - 8, - words.offset() as u32 * 8 - 1, - InnerDescription::Misc("three-byte vex prefix (0xc4)") - .with_id(words.offset() as u32 * 8 - 8) - ); - vex::three_byte_vex(words, instruction, sink)?; - return Ok(()); - } - } else if b == 0x62 { - if prefixes.rex_unchecked().present() || prefixes.lock() || prefixes.operand_size() || prefixes.rep() || prefixes.repnz() { - // rex and then evex is invalid! reject it. - return Err(DecodeError::InvalidPrefixes); - } else { - instruction.prefixes = prefixes; - sink.record( - words.offset() as u32 * 8 - 8, - words.offset() as u32 * 8 - 1, - InnerDescription::Misc("evex prefix (0x62)") - .with_id(words.offset() as u32 * 8 - 8) - ); - evex::read_evex(words, instruction, None, sink)?; - return Ok(()); + if opc != Opcode::Invalid { + sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::Opcode(opc), + id: words.offset() as u32 * 8 - 8, + }); } - } - - nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; - next_rec = unsafe { - core::ptr::read_volatile(&OPCODES[nextb as usize]) - }; - if words.offset() >= 15 { - return Err(DecodeError::TooLong); - } - if prefixes.rex.bits != 0 { - sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::Misc("invalidates prior rex prefix"), - id: (words.offset() as u32 * 8 - 16) + 1, + sink.record((words.offset() - 1) as u32 * 8, (words.offset() - 1) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::OperandCode(OperandCodeWrapper { code: record.1 }), + id: words.offset() as u32 * 8 - 8 + 1, }); - } - prefixes.rex_from(0); - match b { - 0x26 | - 0x2e | - 0x36 | - 0x3e => { - /* no-op in amd64 */ - sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::Misc("ignored prefix in 64-bit mode"), - id: words.offset() as u32 * 8 - 16, - }); - }, - 0x64 => { - sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::SegmentPrefix(Segment::FS), - id: words.offset() as u32 * 8 - 16, - }); - prefixes.set_fs(); - }, - 0x65 => { - sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::SegmentPrefix(Segment::GS), - id: words.offset() as u32 * 8 - 16, - }); - prefixes.set_gs(); - }, - 0x66 => { + instruction.mem_size = 0; + instruction.operand_count = 2; + break record; + } else { + let b = nextb; + if b == 0x0f { + if words.offset() > 1 { + sink.record( + words.offset() as u32 * 8 - 8 - 1, words.offset() as u32 * 8 - 8 - 1, + InnerDescription::Boundary("prefixes end") + .with_id(words.offset() as u32 * 8 - 9) + ); + } + let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + instruction.mem_size = 0; + instruction.operand_count = 2; + if b == 0x38 { + let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + break read_0f38_opcode(b, prefixes); + } else if b == 0x3a { + let b = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + break read_0f3a_opcode(b, prefixes); + } else { + break read_0f_opcode(b, prefixes); + } + } + if b == 0x66 { sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { desc: InnerDescription::Misc("operand size override (to 16 bits)"), id: words.offset() as u32 * 8 - 16, }); prefixes.set_operand_size(); - }, - 0x67 => { + } else if b == 0x67 { sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { desc: InnerDescription::Misc("address size override (to 32 bits)"), id: words.offset() as u32 * 8 - 16, @@ -6603,39 +6502,80 @@ fn read_with_annotations< prefixes.set_address_size(); instruction.regs[1].bank = RegisterBank::D; instruction.regs[2].bank = RegisterBank::D; - }, - 0xf0 => { - sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { - desc: InnerDescription::Misc("lock prefix"), - id: words.offset() as u32 * 8 - 16, - }); - prefixes.set_lock(); - }, - 0xf2 => { + } else if b == 0xf2 { sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { desc: InnerDescription::Misc("repnz prefix"), id: words.offset() as u32 * 8 - 16, }); prefixes.set_repnz(); - }, - 0xf3 => { + } else if b == 0xf3 { sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { desc: InnerDescription::Misc("rep prefix"), id: words.offset() as u32 * 8 - 16, }); prefixes.set_rep(); - }, - _ => { unsafe { unreachable_unchecked(); } } + } else { + match b { + 0x26 | + 0x2e | + 0x36 | + 0x3e => { + /* no-op in amd64 */ + sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::Misc("ignored prefix in 64-bit mode"), + id: words.offset() as u32 * 8 - 16, + }); + }, + 0x64 => { + sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::SegmentPrefix(Segment::FS), + id: words.offset() as u32 * 8 - 16, + }); + prefixes.set_fs(); + }, + 0x65 => { + sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::SegmentPrefix(Segment::GS), + id: words.offset() as u32 * 8 - 16, + }); + prefixes.set_gs(); + }, + 0xf0 => { + sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::Misc("lock prefix"), + id: words.offset() as u32 * 8 - 16, + }); + prefixes.set_lock(); + }, + _ => { return read_avx_prefixed(b, words, instruction, sink); } + } + } + nextb = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + next_rec = unsafe { + core::ptr::read_volatile(&OPCODES[nextb as usize]) + }; + if prefixes.rex.bits != 0 { + sink.record((words.offset() - 2) as u32 * 8, (words.offset() - 2) as u32 * 8 + 7, FieldDescription { + desc: InnerDescription::Misc("invalidates prior rex prefix"), + id: (words.offset() as u32 * 8 - 16) + 1, + }); + } + prefixes.rex_from(0); + } + if words.offset() >= 15 { + return Err(DecodeError::TooLong); } + }; + + if let Interpretation::Instruction(opcode) = record.0 { + instruction.opcode = opcode; + } else { + unsafe { unreachable_unchecked(); } } + + record.1 }; - if let Interpretation::Instruction(opcode) = record.0 { - instruction.opcode = opcode; - } else { - unsafe { unreachable_unchecked(); } - } - instruction.prefixes = prefixes; - read_operands(decoder, words, instruction, record.1, sink)?; + read_operands(decoder, words, instruction, record, sink)?; if instruction.prefixes.lock() { if !LOCKABLE_INSTRUCTIONS.contains(&instruction.opcode) || !instruction.operands[0].is_memory() { @@ -6645,6 +6585,61 @@ fn read_with_annotations< Ok(()) } +#[inline(never)] +fn read_avx_prefixed< + T: Reader<::Address, ::Word>, + S: DescriptionSink, +>(b: u8, words: &mut T, instruction: &mut Instruction, sink: &mut S) -> Result<(), DecodeError> { + if instruction.prefixes.rex_unchecked().present() || instruction.prefixes.lock() || instruction.prefixes.operand_size() || instruction.prefixes.rep() || instruction.prefixes.repnz() { + // rex and then vex is invalid! reject it. + return Err(DecodeError::InvalidPrefixes); + } + instruction.mem_size = 0; + instruction.operand_count = 2; + + // some prefix seen after we saw rex, but before the 0f escape or an actual + // opcode. so we must forget the rex prefix! + // this is to handle sequences like 41660f21cf + // where if 660f21 were a valid opcode, 41 would apply a rex.b + // prefix, but since 660f21 is not valid, the opcode is interpreted + // as 0f21, where 66 is a prefix, which makes 41 not the last + // prefix before the opcode, and it's discarded. + + // 2.3.2 + // Any VEX-encoded instruction with a LOCK prefix preceding VEX will #UD. + // 2.3.3 + // Any VEX-encoded instruction with a 66H, F2H, or F3H prefix preceding VEX + // will #UD. + // 2.3.4 + // Any VEX-encoded instruction with a REX prefix proceeding VEX will #UD. + if b == 0xc5 { + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Misc("two-byte vex prefix (0xc5)") + .with_id(words.offset() as u32 * 8 - 8) + ); + vex::two_byte_vex(words, instruction, sink)?; + } else if b == 0xc4 { + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Misc("three-byte vex prefix (0xc4)") + .with_id(words.offset() as u32 * 8 - 8) + ); + vex::three_byte_vex(words, instruction, sink)?; + } else if b == 0x62 { + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Misc("evex prefix (0x62)") + .with_id(words.offset() as u32 * 8 - 8) + ); + evex::read_evex(words, instruction, None, sink)?; + } + return Ok(()); +} + /* likely cases OperandCode::Eb_R0 => 0 _op @ OperandCode::ModRM_0x80_Eb_Ib => 1 -- cgit v1.1