From 404cb6e81988ed84a75c89d67bf324409e22a390 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 3 Jul 2021 23:52:58 -0700 Subject: update protected_mode to match long_mode docs, apis --- src/protected_mode/mod.rs | 188 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 153 insertions(+), 35 deletions(-) (limited to 'src/protected_mode/mod.rs') diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index cacfcbb..3678bf8 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -22,6 +22,16 @@ impl fmt::Display for DecodeError { } } +/// an `x86` register, including its number and type. if `fmt` is enabled, name too. +/// +/// ``` +/// use yaxpeax_x86::long_mode::{RegSpec, register_class}; +/// +/// assert_eq!(RegSpec::ecx().num(), 1); +/// assert_eq!(RegSpec::ecx().class(), register_class::D); +/// ``` +/// +/// some registers have classes of their own, and only one member: `eip` and `eflags`. #[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] #[derive(Copy, Clone, Debug, PartialOrd, Ord, Eq, PartialEq)] pub struct RegSpec { @@ -38,7 +48,15 @@ impl Hash for RegSpec { } } -#[derive(Debug)] +/// the condition for a conditional instruction. +/// +/// these are only obtained through [`Opcode::condition()`]: +/// ``` +/// use yaxpeax_x86::long_mode::{Opcode, ConditionCode}; +/// +/// assert_eq!(Opcode::JB.condition(), Some(ConditionCode::B)); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ConditionCode { O, NO, @@ -63,10 +81,20 @@ impl RegSpec { /// the register `eip`. this register is in the class `eip`, which contains only it. pub const EIP: RegSpec = RegSpec::eip(); + /// the number of this register in its `RegisterClass`. + /// + /// for many registers this is a number in the name, but for registers harkening back to + /// `x86_32`, the first eight registers are `rax`, `rcx`, `rdx`, `rbx`, `rsp`, `rbp`, `rsi`, + /// and `rdi` (or `eXX` for the 32-bit forms, `XX` for 16-bit forms). pub fn num(&self) -> u8 { self.num } + /// the class of register this register is in. + /// + /// this corresponds to the register's size, but is by the register's usage in the instruction + /// set; `rax` and `mm0` are the same size, but different classes (`Q`(word) and `MM` (mmx) + /// respectively). pub fn class(&self) -> RegisterClass { RegisterClass { kind: self.bank } } @@ -78,6 +106,7 @@ impl RegSpec { display::regspec_label(self) } + /// construct a `RegSpec` for x87 register `st(num)` #[inline] pub fn st(num: u8) -> RegSpec { if num >= 8 { @@ -379,6 +408,10 @@ enum SizeCode { vd, } +/// an operand for an `x86` instruction. +/// +/// `Operand::Nothing` should be unreachable in practice; any such instructions should have an +/// operand count of 0 (or at least one fewer than the `Nothing` operand's position). #[derive(Clone, Debug, PartialEq)] #[non_exhaustive] pub enum Operand { @@ -485,7 +518,7 @@ impl OperandSpec { o => o, } } - pub fn is_memory(&self) -> bool { + fn is_memory(&self) -> bool { match self { OperandSpec::DispU16 | OperandSpec::DispU32 | @@ -529,6 +562,11 @@ impl OperandSpec { } } } +/// an `avx512` merging mode. +/// +/// the behavior for non-`avx512` instructions is equivalent to `merge`. `zero` is only useful in +/// conjunction with a mask register, where bits specified in the mask register correspond to +/// unmodified items in the instruction's desination. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum MergeMode { Merge, @@ -543,6 +581,7 @@ impl From for MergeMode { } } } +/// an `avx512` custom rounding mode. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum SaeMode { RoundNearest, @@ -557,6 +596,16 @@ const SAE_MODES: [SaeMode; 4] = [ SaeMode::RoundZero, ]; impl SaeMode { + /// a human-friendly label for this `SaeMode`: + /// + /// ``` + /// use yaxpeax_x86::long_mode::SaeMode; + /// + /// assert_eq!(SaeMode::RoundNearest.label(), "{rne-sae}"); + /// assert_eq!(SaeMode::RoundDown.label(), "{rd-sae}"); + /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}"); + /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}"); + /// ``` pub fn label(&self) -> &'static str { match self { SaeMode::RoundNearest => "{rne-sae}", @@ -722,6 +771,10 @@ impl Operand { } } } + /// returns `true` if this operand implies a memory access, `false` otherwise. + /// + /// notably, the `lea` instruction uses a memory operand without actually ever accessing + /// memory. pub fn is_memory(&self) -> bool { match self { Operand::DisplacementU16(_) | @@ -799,6 +852,10 @@ fn operand_size() { // assert_eq!(core::mem::size_of::(), 40); } +/// an `x86` register class - `qword`, `dword`, `xmmword`, `segment`, and so on. +/// +/// this is mostly useful for comparing a `RegSpec`'s [`RegSpec::class()`] with a constant out of +/// [`register_class`]. #[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct RegisterClass { @@ -970,6 +1027,11 @@ enum RegisterBank { K = 20, // AVX512 mask registers } +/// the segment register used by the corresponding instruction. +/// +/// typically this will be `ds` but can be overridden. some instructions have specific segment +/// registers used regardless of segment prefixes, and in these cases `yaxpeax-x86` will report the +/// actual segment register a physical processor would use. #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub enum Segment { DS = 0, CS, ES, FS, GS, SS @@ -2533,7 +2595,12 @@ impl PartialEq for Instruction { } } -#[derive(Debug, Clone, Eq)] +/// an `x86` instruction. +/// +/// typically an opcode will be inspected by [`Instruction::opcode()`], and an instruction has +/// [`Instruction::operand_count()`] many operands. operands are provided by +/// [`Instruction::operand()`]. +#[derive(Debug, Clone, Copy, Eq)] pub struct Instruction { pub prefixes: Prefixes, /* @@ -2649,8 +2716,10 @@ enum OperandSpec { // Foo for T == x86. This is only to access associated types // which themselves are bounded, but their #[derive] require T to // implement these traits. +/// a trivial struct for `yaxpeax_arch::Arch` to be implemented on. it's only interesting for the +/// associated type parameters. #[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] -#[derive(Hash, Eq, PartialEq, Debug)] +#[derive(Hash, Eq, PartialEq, Debug, Copy, Clone)] #[allow(non_camel_case_types)] pub struct Arch; @@ -2675,7 +2744,15 @@ impl LengthedInstruction for Instruction { } } -#[derive(PartialEq)] +/// an `x86` instruction decoder. +/// +/// fundamentally this is one or two primitives with no additional state kept during decoding. it +/// can be copied cheaply, hashed cheaply, compared cheaply. if you really want to share an +/// `InstDecoder` between threads, you could - but you might want to clone it instead. +/// +/// unless you're using an `Arc>`, which is _fine_ but i'd be very curious about +/// the design requiring that. +#[derive(PartialEq, Copy, Clone, Eq, Hash, PartialOrd, Ord)] pub struct InstDecoder { // extensions tracked here: // 0. SSE3 @@ -3171,6 +3248,7 @@ impl InstDecoder { self } + /// returns `true` if this `InstDecoder` has **all** `avx512` features enabled. pub fn avx512(&self) -> bool { let avx512_mask = (1 << 19) | @@ -3191,6 +3269,9 @@ impl InstDecoder { (self.flags & avx512_mask) == avx512_mask } + /// enable all `avx512` features on this `InstDecoder`. no real CPU, at time of writing, + /// actually has such a feature comination, but this is a useful overestimate for `avx512` + /// generally. pub fn with_avx512(mut self) -> Self { let avx512_mask = (1 << 19) | @@ -4114,6 +4195,8 @@ impl Decoder for InstDecoder { } impl Opcode { + /// get the [`ConditionCode`] for this instruction, if it is in fact conditional. x86's + /// conditional instructions are `Jcc`, `CMOVcc`, andd `SETcc`. pub fn condition(&self) -> Option { match self { Opcode::JO | @@ -4176,6 +4259,7 @@ impl Default for Instruction { } impl Instruction { + /// get the `Opcode` of this instruction. pub fn opcode(&self) -> Opcode { self.opcode } @@ -4209,6 +4293,11 @@ impl Instruction { } } + /// get the memory access information for this instruction, if it accesses memory. + /// + /// the corresponding `MemoryAccessSize` may report that the size of accessed memory is + /// indeterminate; this is the case for `xsave/xrestor`-style instructions whose operation size + /// varies based on physical processor. pub fn mem_size(&self) -> Option { if self.mem_size != 0 { Some(MemoryAccessSize { size: self.mem_size }) @@ -4222,7 +4311,7 @@ impl Instruction { pub fn invalid() -> Instruction { Instruction { prefixes: Prefixes::new(0), - opcode: Opcode::Invalid, + opcode: Opcode::NOP, mem_size: 0, regs: [RegSpec::eax(); 4], scale: 0, @@ -4234,13 +4323,10 @@ impl Instruction { } } - pub fn is_invalid(&self) -> bool { - match self.opcode { - Opcode::Invalid => true, - _ => false - } - } - + /// get the `Segment` that will *actually* be used for accessing the operand at index `i`. + /// + /// `stos`, `lods`, `movs`, and `cmps` specifically name some segments for use regardless of + /// prefixes. pub fn segment_override_for_op(&self, op: u8) -> Option { match self.opcode { Opcode::STOS => { @@ -4288,6 +4374,18 @@ impl Instruction { } #[cfg(feature = "fmt")] + /// wrap a reference to this instruction with a `DisplayStyle` to format the instruction with + /// later. see the documentation on [`display::DisplayStyle`] for more. + /// + /// ``` + /// use yaxpeax_x86::long_mode::{InstDecoder, DisplayStyle}; + /// + /// let decoder = InstDecoder::default(); + /// let inst = decoder.decode_slice(&[0x33, 0xc1]).unwrap(); + /// + /// assert_eq!("eax ^= ecx", inst.display_with(DisplayStyle::C).to_string()); + /// assert_eq!("xor eax, ecx", inst.display_with(DisplayStyle::Intel).to_string()); + /// ``` pub fn display_with<'a>(&'a self, style: display::DisplayStyle) -> display::InstructionDisplayer<'a> { display::InstructionDisplayer { style, @@ -4297,11 +4395,15 @@ impl Instruction { } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct EvexData { +struct EvexData { // data: present, z, b, Lp, Rp. aaa bits: u8, } +/// the prefixes on an instruction. +/// +/// `rep`, `repnz`, `lock`, and segment override prefixes are directly accessible here. `vex` and +/// `evex` prefixes are available through their associated helpers. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Prefixes { bits: u8, @@ -4320,26 +4422,32 @@ impl PrefixEvex { fn present(&self) -> bool { self.evex_data.present() } - fn vex(&self) -> &PrefixVex { + /// the `evex` prefix's parts that overlap with `vex` definitions - `L`, `W`, `R`, `X`, and `B` + /// bits. + pub fn vex(&self) -> &PrefixVex { &self.vex } - fn mask_reg(&self) -> u8 { + /// the `avx512` mask register in use. `0` indicates "no mask register". + pub fn mask_reg(&self) -> u8 { self.evex_data.aaa() } - fn broadcast(&self) -> bool { + pub fn broadcast(&self) -> bool { self.evex_data.b() } - fn merge(&self) -> bool { + pub fn merge(&self) -> bool { self.evex_data.z() } - fn lp(&self) -> bool { + /// the `evex` `L'` bit. + pub fn lp(&self) -> bool { self.evex_data.lp() } - fn rp(&self) -> bool { + /// the `evex` `R'` bit. + pub fn rp(&self) -> bool { self.evex_data.rp() } } +/// bits specified in an avx/avx2 [`vex`](https://en.wikipedia.org/wiki/VEX_prefix) prefix, `L`, `W`, `R`, `X`, and `B`. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct PrefixVex { bits: u8, @@ -4348,15 +4456,15 @@ pub struct PrefixVex { #[allow(dead_code)] impl PrefixVex { #[inline] - fn b(&self) -> bool { (self.bits & 0x01) == 0x01 } + pub fn b(&self) -> bool { (self.bits & 0x01) == 0x01 } #[inline] - fn x(&self) -> bool { (self.bits & 0x02) == 0x02 } + pub fn x(&self) -> bool { (self.bits & 0x02) == 0x02 } #[inline] - fn r(&self) -> bool { (self.bits & 0x04) == 0x04 } + pub fn r(&self) -> bool { (self.bits & 0x04) == 0x04 } #[inline] - fn w(&self) -> bool { (self.bits & 0x08) == 0x08 } + pub fn w(&self) -> bool { (self.bits & 0x08) == 0x08 } #[inline] - fn l(&self) -> bool { (self.bits & 0x10) == 0x10 } + pub fn l(&self) -> bool { (self.bits & 0x10) == 0x10 } #[inline] fn present(&self) -> bool { (self.bits & 0x80) == 0x80 } #[inline] @@ -4383,10 +4491,6 @@ impl Prefixes { #[inline] fn set_rep(&mut self) { self.bits = (self.bits & 0xcf) | 0x10 } #[inline] - pub fn repz(&self) -> bool { self.bits & 0x30 == 0x20 } - #[inline] - fn set_repz(&mut self) { self.bits = (self.bits & 0xcf) | 0x20 } - #[inline] pub fn repnz(&self) -> bool { self.bits & 0x30 == 0x30 } #[inline] fn set_repnz(&mut self) { self.bits = (self.bits & 0xcf) | 0x30 } @@ -4431,11 +4535,20 @@ impl Prefixes { #[inline] fn set_ss(&mut self) { self.segment = Segment::SS } #[inline] - fn vex(&self) -> PrefixVex { PrefixVex { bits: self.vex.bits } } + fn vex_unchecked(&self) -> PrefixVex { PrefixVex { bits: self.vex.bits } } + #[inline] + pub fn vex(&self) -> Option { + let vex = self.vex_unchecked(); + if vex.present() { + Some(vex) + } else { + None + } + } #[inline] fn evex_unchecked(&self) -> PrefixEvex { PrefixEvex { vex: PrefixVex { bits: self.vex.bits }, evex_data: self.evex_data } } #[inline] - fn evex(&self) -> Option { + pub fn evex(&self) -> Option { let evex = self.evex_unchecked(); if evex.present() { Some(evex) @@ -5484,19 +5597,24 @@ fn read_modrm_reg(instr: &mut Instruction, modrm: u8, reg_bank: RegisterBank) -> fn read_sib::Address, ::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result { let modbits = modrm >> 6; let sibbyte = words.next().ok().ok_or(DecodeError::ExhaustedInput)?; + instr.regs[1].num |= sibbyte & 7; + instr.regs[2].num |= (sibbyte >> 3) & 7; let disp = if modbits == 0b00 { if (sibbyte & 7) == 0b101 { - read_num(words, 4)? as i32 as u32 + read_num(words, 4)? as i32 } else { 0 } } else if modbits == 0b01 { - read_num(words, 1)? as i8 as i32 as u32 + read_num(words, 1)? as i8 as i32 } else { - read_num(words, 4)? as i32 as u32 + read_num(words, 4)? as i32 }; - instr.disp = disp; + instr.disp = disp as u32; + + let scale = 1u8 << (sibbyte >> 6); + instr.scale = scale; let op_spec = if (sibbyte & 7) == 0b101 { if ((sibbyte >> 3) & 7) == 0b100 { -- cgit v1.1