aboutsummaryrefslogtreecommitdiff
path: root/src/protected_mode/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/protected_mode/mod.rs')
-rw-r--r--src/protected_mode/mod.rs188
1 files changed, 153 insertions, 35 deletions
diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs
index cacfcbb..3678bf8 100644
--- a/src/protected_mode/mod.rs
+++ b/src/protected_mode/mod.rs
@@ -22,6 +22,16 @@ impl fmt::Display for DecodeError {
}
}
+/// an `x86` register, including its number and type. if `fmt` is enabled, name too.
+///
+/// ```
+/// use yaxpeax_x86::long_mode::{RegSpec, register_class};
+///
+/// assert_eq!(RegSpec::ecx().num(), 1);
+/// assert_eq!(RegSpec::ecx().class(), register_class::D);
+/// ```
+///
+/// some registers have classes of their own, and only one member: `eip` and `eflags`.
#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
#[derive(Copy, Clone, Debug, PartialOrd, Ord, Eq, PartialEq)]
pub struct RegSpec {
@@ -38,7 +48,15 @@ impl Hash for RegSpec {
}
}
-#[derive(Debug)]
+/// the condition for a conditional instruction.
+///
+/// these are only obtained through [`Opcode::condition()`]:
+/// ```
+/// use yaxpeax_x86::long_mode::{Opcode, ConditionCode};
+///
+/// assert_eq!(Opcode::JB.condition(), Some(ConditionCode::B));
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ConditionCode {
O,
NO,
@@ -63,10 +81,20 @@ impl RegSpec {
/// the register `eip`. this register is in the class `eip`, which contains only it.
pub const EIP: RegSpec = RegSpec::eip();
+ /// the number of this register in its `RegisterClass`.
+ ///
+ /// for many registers this is a number in the name, but for registers harkening back to
+ /// `x86_32`, the first eight registers are `rax`, `rcx`, `rdx`, `rbx`, `rsp`, `rbp`, `rsi`,
+ /// and `rdi` (or `eXX` for the 32-bit forms, `XX` for 16-bit forms).
pub fn num(&self) -> u8 {
self.num
}
+ /// the class of register this register is in.
+ ///
+ /// this corresponds to the register's size, but is by the register's usage in the instruction
+ /// set; `rax` and `mm0` are the same size, but different classes (`Q`(word) and `MM` (mmx)
+ /// respectively).
pub fn class(&self) -> RegisterClass {
RegisterClass { kind: self.bank }
}
@@ -78,6 +106,7 @@ impl RegSpec {
display::regspec_label(self)
}
+ /// construct a `RegSpec` for x87 register `st(num)`
#[inline]
pub fn st(num: u8) -> RegSpec {
if num >= 8 {
@@ -379,6 +408,10 @@ enum SizeCode {
vd,
}
+/// an operand for an `x86` instruction.
+///
+/// `Operand::Nothing` should be unreachable in practice; any such instructions should have an
+/// operand count of 0 (or at least one fewer than the `Nothing` operand's position).
#[derive(Clone, Debug, PartialEq)]
#[non_exhaustive]
pub enum Operand {
@@ -485,7 +518,7 @@ impl OperandSpec {
o => o,
}
}
- pub fn is_memory(&self) -> bool {
+ fn is_memory(&self) -> bool {
match self {
OperandSpec::DispU16 |
OperandSpec::DispU32 |
@@ -529,6 +562,11 @@ impl OperandSpec {
}
}
}
+/// an `avx512` merging mode.
+///
+/// the behavior for non-`avx512` instructions is equivalent to `merge`. `zero` is only useful in
+/// conjunction with a mask register, where bits specified in the mask register correspond to
+/// unmodified items in the instruction's desination.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum MergeMode {
Merge,
@@ -543,6 +581,7 @@ impl From<bool> for MergeMode {
}
}
}
+/// an `avx512` custom rounding mode.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum SaeMode {
RoundNearest,
@@ -557,6 +596,16 @@ const SAE_MODES: [SaeMode; 4] = [
SaeMode::RoundZero,
];
impl SaeMode {
+ /// a human-friendly label for this `SaeMode`:
+ ///
+ /// ```
+ /// use yaxpeax_x86::long_mode::SaeMode;
+ ///
+ /// assert_eq!(SaeMode::RoundNearest.label(), "{rne-sae}");
+ /// assert_eq!(SaeMode::RoundDown.label(), "{rd-sae}");
+ /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}");
+ /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}");
+ /// ```
pub fn label(&self) -> &'static str {
match self {
SaeMode::RoundNearest => "{rne-sae}",
@@ -722,6 +771,10 @@ impl Operand {
}
}
}
+ /// returns `true` if this operand implies a memory access, `false` otherwise.
+ ///
+ /// notably, the `lea` instruction uses a memory operand without actually ever accessing
+ /// memory.
pub fn is_memory(&self) -> bool {
match self {
Operand::DisplacementU16(_) |
@@ -799,6 +852,10 @@ fn operand_size() {
// assert_eq!(core::mem::size_of::<Instruction>(), 40);
}
+/// an `x86` register class - `qword`, `dword`, `xmmword`, `segment`, and so on.
+///
+/// this is mostly useful for comparing a `RegSpec`'s [`RegSpec::class()`] with a constant out of
+/// [`register_class`].
#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct RegisterClass {
@@ -970,6 +1027,11 @@ enum RegisterBank {
K = 20, // AVX512 mask registers
}
+/// the segment register used by the corresponding instruction.
+///
+/// typically this will be `ds` but can be overridden. some instructions have specific segment
+/// registers used regardless of segment prefixes, and in these cases `yaxpeax-x86` will report the
+/// actual segment register a physical processor would use.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum Segment {
DS = 0, CS, ES, FS, GS, SS
@@ -2533,7 +2595,12 @@ impl PartialEq for Instruction {
}
}
-#[derive(Debug, Clone, Eq)]
+/// an `x86` instruction.
+///
+/// typically an opcode will be inspected by [`Instruction::opcode()`], and an instruction has
+/// [`Instruction::operand_count()`] many operands. operands are provided by
+/// [`Instruction::operand()`].
+#[derive(Debug, Clone, Copy, Eq)]
pub struct Instruction {
pub prefixes: Prefixes,
/*
@@ -2649,8 +2716,10 @@ enum OperandSpec {
// Foo<T> for T == x86. This is only to access associated types
// which themselves are bounded, but their #[derive] require T to
// implement these traits.
+/// a trivial struct for `yaxpeax_arch::Arch` to be implemented on. it's only interesting for the
+/// associated type parameters.
#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
-#[derive(Hash, Eq, PartialEq, Debug)]
+#[derive(Hash, Eq, PartialEq, Debug, Copy, Clone)]
#[allow(non_camel_case_types)]
pub struct Arch;
@@ -2675,7 +2744,15 @@ impl LengthedInstruction for Instruction {
}
}
-#[derive(PartialEq)]
+/// an `x86` instruction decoder.
+///
+/// fundamentally this is one or two primitives with no additional state kept during decoding. it
+/// can be copied cheaply, hashed cheaply, compared cheaply. if you really want to share an
+/// `InstDecoder` between threads, you could - but you might want to clone it instead.
+///
+/// unless you're using an `Arc<Mutex<InstDecoder>>`, which is _fine_ but i'd be very curious about
+/// the design requiring that.
+#[derive(PartialEq, Copy, Clone, Eq, Hash, PartialOrd, Ord)]
pub struct InstDecoder {
// extensions tracked here:
// 0. SSE3
@@ -3171,6 +3248,7 @@ impl InstDecoder {
self
}
+ /// returns `true` if this `InstDecoder` has **all** `avx512` features enabled.
pub fn avx512(&self) -> bool {
let avx512_mask =
(1 << 19) |
@@ -3191,6 +3269,9 @@ impl InstDecoder {
(self.flags & avx512_mask) == avx512_mask
}
+ /// enable all `avx512` features on this `InstDecoder`. no real CPU, at time of writing,
+ /// actually has such a feature comination, but this is a useful overestimate for `avx512`
+ /// generally.
pub fn with_avx512(mut self) -> Self {
let avx512_mask =
(1 << 19) |
@@ -4114,6 +4195,8 @@ impl Decoder<Arch> for InstDecoder {
}
impl Opcode {
+ /// get the [`ConditionCode`] for this instruction, if it is in fact conditional. x86's
+ /// conditional instructions are `Jcc`, `CMOVcc`, andd `SETcc`.
pub fn condition(&self) -> Option<ConditionCode> {
match self {
Opcode::JO |
@@ -4176,6 +4259,7 @@ impl Default for Instruction {
}
impl Instruction {
+ /// get the `Opcode` of this instruction.
pub fn opcode(&self) -> Opcode {
self.opcode
}
@@ -4209,6 +4293,11 @@ impl Instruction {
}
}
+ /// get the memory access information for this instruction, if it accesses memory.
+ ///
+ /// the corresponding `MemoryAccessSize` may report that the size of accessed memory is
+ /// indeterminate; this is the case for `xsave/xrestor`-style instructions whose operation size
+ /// varies based on physical processor.
pub fn mem_size(&self) -> Option<MemoryAccessSize> {
if self.mem_size != 0 {
Some(MemoryAccessSize { size: self.mem_size })
@@ -4222,7 +4311,7 @@ impl Instruction {
pub fn invalid() -> Instruction {
Instruction {
prefixes: Prefixes::new(0),
- opcode: Opcode::Invalid,
+ opcode: Opcode::NOP,
mem_size: 0,
regs: [RegSpec::eax(); 4],
scale: 0,
@@ -4234,13 +4323,10 @@ impl Instruction {
}
}
- pub fn is_invalid(&self) -> bool {
- match self.opcode {
- Opcode::Invalid => true,
- _ => false
- }
- }
-
+ /// get the `Segment` that will *actually* be used for accessing the operand at index `i`.
+ ///
+ /// `stos`, `lods`, `movs`, and `cmps` specifically name some segments for use regardless of
+ /// prefixes.
pub fn segment_override_for_op(&self, op: u8) -> Option<Segment> {
match self.opcode {
Opcode::STOS => {
@@ -4288,6 +4374,18 @@ impl Instruction {
}
#[cfg(feature = "fmt")]
+ /// wrap a reference to this instruction with a `DisplayStyle` to format the instruction with
+ /// later. see the documentation on [`display::DisplayStyle`] for more.
+ ///
+ /// ```
+ /// use yaxpeax_x86::long_mode::{InstDecoder, DisplayStyle};
+ ///
+ /// let decoder = InstDecoder::default();
+ /// let inst = decoder.decode_slice(&[0x33, 0xc1]).unwrap();
+ ///
+ /// assert_eq!("eax ^= ecx", inst.display_with(DisplayStyle::C).to_string());
+ /// assert_eq!("xor eax, ecx", inst.display_with(DisplayStyle::Intel).to_string());
+ /// ```
pub fn display_with<'a>(&'a self, style: display::DisplayStyle) -> display::InstructionDisplayer<'a> {
display::InstructionDisplayer {
style,
@@ -4297,11 +4395,15 @@ impl Instruction {
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub struct EvexData {
+struct EvexData {
// data: present, z, b, Lp, Rp. aaa
bits: u8,
}
+/// the prefixes on an instruction.
+///
+/// `rep`, `repnz`, `lock`, and segment override prefixes are directly accessible here. `vex` and
+/// `evex` prefixes are available through their associated helpers.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Prefixes {
bits: u8,
@@ -4320,26 +4422,32 @@ impl PrefixEvex {
fn present(&self) -> bool {
self.evex_data.present()
}
- fn vex(&self) -> &PrefixVex {
+ /// the `evex` prefix's parts that overlap with `vex` definitions - `L`, `W`, `R`, `X`, and `B`
+ /// bits.
+ pub fn vex(&self) -> &PrefixVex {
&self.vex
}
- fn mask_reg(&self) -> u8 {
+ /// the `avx512` mask register in use. `0` indicates "no mask register".
+ pub fn mask_reg(&self) -> u8 {
self.evex_data.aaa()
}
- fn broadcast(&self) -> bool {
+ pub fn broadcast(&self) -> bool {
self.evex_data.b()
}
- fn merge(&self) -> bool {
+ pub fn merge(&self) -> bool {
self.evex_data.z()
}
- fn lp(&self) -> bool {
+ /// the `evex` `L'` bit.
+ pub fn lp(&self) -> bool {
self.evex_data.lp()
}
- fn rp(&self) -> bool {
+ /// the `evex` `R'` bit.
+ pub fn rp(&self) -> bool {
self.evex_data.rp()
}
}
+/// bits specified in an avx/avx2 [`vex`](https://en.wikipedia.org/wiki/VEX_prefix) prefix, `L`, `W`, `R`, `X`, and `B`.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct PrefixVex {
bits: u8,
@@ -4348,15 +4456,15 @@ pub struct PrefixVex {
#[allow(dead_code)]
impl PrefixVex {
#[inline]
- fn b(&self) -> bool { (self.bits & 0x01) == 0x01 }
+ pub fn b(&self) -> bool { (self.bits & 0x01) == 0x01 }
#[inline]
- fn x(&self) -> bool { (self.bits & 0x02) == 0x02 }
+ pub fn x(&self) -> bool { (self.bits & 0x02) == 0x02 }
#[inline]
- fn r(&self) -> bool { (self.bits & 0x04) == 0x04 }
+ pub fn r(&self) -> bool { (self.bits & 0x04) == 0x04 }
#[inline]
- fn w(&self) -> bool { (self.bits & 0x08) == 0x08 }
+ pub fn w(&self) -> bool { (self.bits & 0x08) == 0x08 }
#[inline]
- fn l(&self) -> bool { (self.bits & 0x10) == 0x10 }
+ pub fn l(&self) -> bool { (self.bits & 0x10) == 0x10 }
#[inline]
fn present(&self) -> bool { (self.bits & 0x80) == 0x80 }
#[inline]
@@ -4383,10 +4491,6 @@ impl Prefixes {
#[inline]
fn set_rep(&mut self) { self.bits = (self.bits & 0xcf) | 0x10 }
#[inline]
- pub fn repz(&self) -> bool { self.bits & 0x30 == 0x20 }
- #[inline]
- fn set_repz(&mut self) { self.bits = (self.bits & 0xcf) | 0x20 }
- #[inline]
pub fn repnz(&self) -> bool { self.bits & 0x30 == 0x30 }
#[inline]
fn set_repnz(&mut self) { self.bits = (self.bits & 0xcf) | 0x30 }
@@ -4431,11 +4535,20 @@ impl Prefixes {
#[inline]
fn set_ss(&mut self) { self.segment = Segment::SS }
#[inline]
- fn vex(&self) -> PrefixVex { PrefixVex { bits: self.vex.bits } }
+ fn vex_unchecked(&self) -> PrefixVex { PrefixVex { bits: self.vex.bits } }
+ #[inline]
+ pub fn vex(&self) -> Option<PrefixVex> {
+ let vex = self.vex_unchecked();
+ if vex.present() {
+ Some(vex)
+ } else {
+ None
+ }
+ }
#[inline]
fn evex_unchecked(&self) -> PrefixEvex { PrefixEvex { vex: PrefixVex { bits: self.vex.bits }, evex_data: self.evex_data } }
#[inline]
- fn evex(&self) -> Option<PrefixEvex> {
+ pub fn evex(&self) -> Option<PrefixEvex> {
let evex = self.evex_unchecked();
if evex.present() {
Some(evex)
@@ -5484,19 +5597,24 @@ fn read_modrm_reg(instr: &mut Instruction, modrm: u8, reg_bank: RegisterBank) ->
fn read_sib<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(words: &mut T, instr: &mut Instruction, modrm: u8) -> Result<OperandSpec, DecodeError> {
let modbits = modrm >> 6;
let sibbyte = words.next().ok().ok_or(DecodeError::ExhaustedInput)?;
+ instr.regs[1].num |= sibbyte & 7;
+ instr.regs[2].num |= (sibbyte >> 3) & 7;
let disp = if modbits == 0b00 {
if (sibbyte & 7) == 0b101 {
- read_num(words, 4)? as i32 as u32
+ read_num(words, 4)? as i32
} else {
0
}
} else if modbits == 0b01 {
- read_num(words, 1)? as i8 as i32 as u32
+ read_num(words, 1)? as i8 as i32
} else {
- read_num(words, 4)? as i32 as u32
+ read_num(words, 4)? as i32
};
- instr.disp = disp;
+ instr.disp = disp as u32;
+
+ let scale = 1u8 << (sibbyte >> 6);
+ instr.scale = scale;
let op_spec = if (sibbyte & 7) == 0b101 {
if ((sibbyte >> 3) & 7) == 0b100 {