From c216e625815ce15d1c73cecd064888a843cca472 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 1 Jul 2021 01:10:32 -0700 Subject: complete yaxpeax-arch 0.1.0 adaptation, shore up .mem_size() --- src/long_mode/mod.rs | 91 ++++++++++++++++++++++++++++++++++++---------------- test/bench.rs | 4 +-- 2 files changed, 65 insertions(+), 30 deletions(-) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 578e2f2..f4c78c5 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -761,39 +761,35 @@ impl Operand { } /// return the width of this operand, in bytes. register widths are determined by the - /// register's class. - /// - /// TODO: /!\ MEMORY WIDTHS ARE ALWAYS REPORTED AS 8 /!\ - pub fn width(&self) -> u8 { + /// register's class. the widths of memory operands are recorded on the instruction this + /// `Operand` came from; `None` here mans the authoritative width is `instr.mem_size()`. + pub fn width(&self) -> Option { match self { - Operand::Nothing => { - panic!("non-operand does not have a size"); - } Operand::Register(reg) => { - reg.width() + Some(reg.width()) } Operand::RegisterMaskMerge(reg, _, _) => { reg.width() } Operand::ImmediateI8(_) | Operand::ImmediateU8(_) => { - 1 + Some(1) } Operand::ImmediateI16(_) | Operand::ImmediateU16(_) => { - 2 + Some(2) } Operand::ImmediateI32(_) | Operand::ImmediateU32(_) => { - 4 + Some(4) } Operand::ImmediateI64(_) | Operand::ImmediateU64(_) => { - 8 + Some(8) } - // memory operands + // memory operands or `Nothing` _ => { - 8 + None } } } @@ -4169,6 +4165,23 @@ impl Default for Instruction { } } +pub struct MemoryAccessSize { + size: u8, +} +impl MemoryAccessSize { + pub fn bytes_size(&self) -> Option { + if self.size == 63 { + None + } else { + Some(self.size) + } + } + + pub fn size_name(&self) -> &'static str { + "name_strings" + } +} + impl Instruction { pub fn opcode(&self) -> Opcode { self.opcode @@ -4203,6 +4216,14 @@ impl Instruction { } } + pub fn mem_size(&self) -> Option { + if self.mem_size != 0 { + Some(MemoryAccessSize { size: self.mem_size }) + } else { + None + } + } + /// build a new instruction representing nothing in particular. this is primarily useful as a /// default to pass to `decode_into`. pub fn invalid() -> Instruction { @@ -5589,6 +5610,7 @@ fn read_sib::Address, ::Address, ::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result { let modbits = modrm >> 6; let mmm = modrm & 7; @@ -7006,7 +7028,10 @@ fn read_instr::Address, 15 { + return Err(DecodeError::TooLong); + } if decoder != &InstDecoder::default() { decoder.revise_instruction(instruction)?; } @@ -7020,6 +7045,10 @@ fn read_instr::Address, 15 { + return Err(DecodeError::TooLong); + } if decoder != &InstDecoder::default() { decoder.revise_instruction(instruction)?; } @@ -7033,6 +7062,10 @@ fn read_instr::Address, 15 { + return Err(DecodeError::TooLong); + } if decoder != &InstDecoder::default() { decoder.revise_instruction(instruction)?; } @@ -7082,8 +7115,9 @@ fn read_instr::Address, 15 { + read_operands(decoder, words, instruction, record.1)?; + instruction.length = words.offset() as u8; + if words.offset() > 15 { return Err(DecodeError::TooLong); } @@ -7135,7 +7169,7 @@ fn read_instr::Address, ::Address, ::Word>>(words: &mut T, instruction: &mut Instruction, operand_code: OperandCode) -> Result<(), DecodeError> { +fn read_operands::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode) -> Result<(), DecodeError> { instruction.operands[0] = OperandSpec::RegRRR; instruction.operand_count = 2; let operand_code = OperandCodeBuilder::from_bits(operand_code as u16); @@ -7640,14 +7674,15 @@ fn read_operands::Address, { let operand_code: OperandCode = unsafe { core::mem::transmute(operand_code.bits()) }; - unlikely_operands(decoder, bytes_iter, instruction, operand_code, mem_oper)?; + unlikely_operands(decoder, words, instruction, operand_code, mem_oper)?; } }; } Ok(()) } -fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter: T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec, length: &mut u8) -> Result<(), DecodeError> { +#[inline(never)] +fn unlikely_operands::Address, ::Word>>(decoder: &InstDecoder, words: &mut T, instruction: &mut Instruction, operand_code: OperandCode, mem_oper: OperandSpec) -> Result<(), DecodeError> { match operand_code { OperandCode::G_E_mm_Ib => { let modrm = read_modrm(words)?; @@ -8653,7 +8688,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } OperandCode::ModRM_0xf30f38dc => { - read_operands(decoder, bytes_iter, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; if let OperandSpec::RegMMM = instruction.operands[1] { instruction.opcode = Opcode::LOADIWKEY; } else { @@ -8661,7 +8696,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } OperandCode::ModRM_0xf30f38dd => { - read_operands(decoder, bytes_iter, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8669,7 +8704,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } OperandCode::ModRM_0xf30f38de => { - read_operands(decoder, bytes_iter, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8677,7 +8712,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } } OperandCode::ModRM_0xf30f38df => { - read_operands(decoder, bytes_iter, instruction, OperandCode::G_E_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_E_xmm)?; if let OperandSpec::RegMMM = instruction.operands[1] { return Err(DecodeError::InvalidOperand); } else { @@ -8686,13 +8721,13 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter } OperandCode::ModRM_0xf30f38fa => { instruction.opcode = Opcode::ENCODEKEY128; - read_operands(decoder, bytes_iter, instruction, OperandCode::G_U_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_U_xmm)?; instruction.modrm_rrr.bank = RegisterBank::D; instruction.modrm_mmm.bank = RegisterBank::D; } OperandCode::ModRM_0xf30f38fb => { instruction.opcode = Opcode::ENCODEKEY256; - read_operands(decoder, bytes_iter, instruction, OperandCode::G_U_xmm)?; + read_operands(decoder, words, instruction, OperandCode::G_U_xmm)?; instruction.modrm_rrr.bank = RegisterBank::D; instruction.modrm_mmm.bank = RegisterBank::D; } @@ -9947,7 +9982,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter OperandCode::x87_dd | OperandCode::x87_de | OperandCode::x87_df => { - return decode_x87(decoder, bytes_iter, instruction, operand_code); + return decode_x87(words, instruction, operand_code); } OperandCode::MOVDIR64B => { // at this point we've done a read as if it was Gv_M (`lea` operands). because the @@ -9991,7 +10026,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter Ok(()) } -fn decode_x87>(_decoder: &InstDecoder, mut bytes_iter: T, instruction: &mut Instruction, operand_code: OperandCode, length: &mut u8) -> Result<(), DecodeError> { +fn decode_x87::Address, ::Word>>(words: &mut T, instruction: &mut Instruction, operand_code: OperandCode) -> Result<(), DecodeError> { #[allow(non_camel_case_types)] enum OperandCodeX87 { Est, diff --git a/test/bench.rs b/test/bench.rs index 486fa6e..bf73b1b 100644 --- a/test/bench.rs +++ b/test/bench.rs @@ -87,11 +87,11 @@ const DECODE_DATA: [u8; 130] = [ fn do_decode_swathe() { // let mut buf = [0u8; 128]; - let mut iter = DECODE_DATA.iter().map(|x| *x); let mut result = yaxpeax_x86::long_mode::Instruction::invalid(); + let mut reader = yaxpeax_arch::U8Reader::new(&DECODE_DATA[..]); let decoder = yaxpeax_x86::long_mode::InstDecoder::default(); loop { - match decoder.decode_into(&mut result, &mut iter) { + match decoder.decode_into(&mut result, &mut reader) { Ok(()) => { #[cfg(feature = "capstone_bench")] test::black_box(write!(&mut buf[..], "{}", result)); -- cgit v1.1