diff options
author | iximeow <me@iximeow.net> | 2021-07-04 12:20:13 -0700 |
---|---|---|
committer | iximeow <me@iximeow.net> | 2021-07-04 12:36:03 -0700 |
commit | 48559b18574b44e2de879a5c641ab602ec22f0d8 (patch) | |
tree | 0195cd249c1ce2429b12a14d063447fe96e3283e /src | |
parent | 404cb6e81988ed84a75c89d67bf324409e22a390 (diff) |
fix several incorrect tests and docs in 64- and 32-bit modes
Diffstat (limited to 'src')
-rw-r--r-- | src/lib.rs | 34 | ||||
-rw-r--r-- | src/long_mode/mod.rs | 2 | ||||
-rw-r--r-- | src/long_mode/vex.rs | 10 | ||||
-rw-r--r-- | src/protected_mode/display.rs | 12 | ||||
-rw-r--r-- | src/protected_mode/mod.rs | 55 | ||||
-rw-r--r-- | src/protected_mode/vex.rs | 28 |
6 files changed, 90 insertions, 51 deletions
@@ -84,8 +84,8 @@ mod real_mode; pub use real_mode::Arch as x86_16; const MEM_SIZE_STRINGS: [&'static str; 64] = [ - "byte", "word", "BUG", "dword", "far", "ptr", "BUG", "qword", - "far", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", + "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", + "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", @@ -98,6 +98,11 @@ pub struct MemoryAccessSize { size: u8, } impl MemoryAccessSize { + /// return the number of bytes referenced by this memory access. + /// + /// if the number of bytes cannot be confidently known by the instruction in isolation (as is + /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this + /// function will return `None`. pub fn bytes_size(&self) -> Option<u8> { if self.size == 63 { None @@ -106,6 +111,31 @@ impl MemoryAccessSize { } } + /// a human-friendly label for the number of bytes this memory access references. + /// + /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86` + /// prefers to use consistent names for a width even if the way those bytes are used varies. + /// + /// the sizes `yaxpeax-x86` knows are as follows: + /// | size (bytes) | name | + /// |--------------|------------| + /// | 1 | `byte` | + /// | 2 | `word` | + /// | 4 | `dword` | + /// | 6 | `far` | + /// | 8 | `qword` | + /// | 10 | `mword` | + /// | 16 | `xmmword` | + /// | 32 | `ymmword` | + /// | 64 | `zmmword` | + /// | variable | `ptr` | + /// + /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for + /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment + /// selector and eight bytes of address. + /// + /// "variable" accesses access a number of bytes dependent on the physical processor and its + /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions. pub fn size_name(&self) -> &'static str { MEM_SIZE_STRINGS[self.size as usize - 1] } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 600a81a..79f3fee 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -7673,7 +7673,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP { instruction.mem_size = 8; } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF { - instruction.mem_size = 9; + instruction.mem_size = 10; } } instruction.opcode = opcode; diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index 67c4965..5695b17 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -411,7 +411,7 @@ fn read_vex_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y instruction.opcode = if modrm & 0xc0 == 0xc0 { Opcode::VMOVHLPS } else { - instruction.mem_size = 4; + instruction.mem_size = 8; Opcode::VMOVLPS }; instruction.regs[0] = @@ -1925,7 +1925,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a } else { VEXOperandCode::G_V_E_xmm }), - 0xDA => (Opcode::VPMINSW, if L { + 0xDA => (Opcode::VPMINUB, if L { VEXOperandCode::G_V_E_ymm } else { VEXOperandCode::G_V_E_xmm @@ -3201,12 +3201,12 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a VEXOperandCode::G_E_xmm_imm8 }), 0x0A => (Opcode::VROUNDSS, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), 0x0B => (Opcode::VROUNDSD, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), @@ -3269,7 +3269,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a return Err(DecodeError::InvalidOpcode); } else { (Opcode::VINSERTF128, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_ymm_V_ymm_E_xmm_imm8 } else { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index baca1a2..c5892c2 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -1328,7 +1328,7 @@ const MNEMONICS: &[&'static str] = &[ "loopnz", "loopz", "loop", - "jrcxz", + "jecxz", "pusha", "popa", "bound", @@ -1337,8 +1337,8 @@ const MNEMONICS: &[&'static str] = &[ "aaa", "das", "daa", - "amx", - "adx", + "aam", + "aad", "movdir64b", "movdiri", "aesdec128kl", @@ -2332,8 +2332,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode { Opcode::AAS | Opcode::DAS | Opcode::DAA | - Opcode::ADX | - Opcode::AMX | + Opcode::AAD | + Opcode::AAM | Opcode::KADDB | Opcode::KANDB | Opcode::KANDNB | @@ -2403,7 +2403,7 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode { Opcode::LOOPNZ | Opcode::LOOPZ | Opcode::LOOP | - Opcode::JRCXZ | + Opcode::JECXZ | Opcode::CALL | Opcode::CALLF | Opcode::JMP | diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 3678bf8..a6ad2ee 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -888,7 +888,7 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[ "eflags", ]; -/// high-level register classes in an x86 machine, such as "8-byte general purpose", "xmm", "x87", +/// high-level register classes in an x86 machine, such as "4-byte general purpose", "xmm", "x87", /// and so on. constants in this module are useful for inspecting the register class of a decoded /// instruction. as an example: /// ``` @@ -2078,7 +2078,7 @@ pub enum Opcode { LOOPNZ, LOOPZ, LOOP, - JRCXZ, + JECXZ, PUSHA, POPA, @@ -2088,8 +2088,8 @@ pub enum Opcode { AAA, DAS, DAA, - AMX, - ADX, + AAM, + AAD, // started shipping in Tremont, 2020 sept 23 MOVDIR64B, @@ -5471,8 +5471,8 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd1_Ev_1), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd2_Eb_CL), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd3_Ev_CL), - OpcodeRecord(Interpretation::Instruction(Opcode::AMX), OperandCode::Ib), - OpcodeRecord(Interpretation::Instruction(Opcode::ADX), OperandCode::Ib), + OpcodeRecord(Interpretation::Instruction(Opcode::AAM), OperandCode::Ib), + OpcodeRecord(Interpretation::Instruction(Opcode::AAD), OperandCode::Ib), OpcodeRecord(Interpretation::Instruction(Opcode::SALC), OperandCode::Nothing), // XLAT OpcodeRecord(Interpretation::Instruction(Opcode::XLAT), OperandCode::Nothing), @@ -5496,7 +5496,7 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::LOOPNZ), OperandCode::Ibs), OpcodeRecord(Interpretation::Instruction(Opcode::LOOPZ), OperandCode::Ibs), OpcodeRecord(Interpretation::Instruction(Opcode::LOOP), OperandCode::Ibs), - OpcodeRecord(Interpretation::Instruction(Opcode::JRCXZ), OperandCode::Ibs), + OpcodeRecord(Interpretation::Instruction(Opcode::JECXZ), OperandCode::Ibs), OpcodeRecord(Interpretation::Instruction(Opcode::IN), OperandCode::AL_Ib), OpcodeRecord(Interpretation::Instruction(Opcode::IN), OperandCode::AX_Ib), OpcodeRecord(Interpretation::Instruction(Opcode::OUT), OperandCode::Ib_AL), @@ -5721,19 +5721,35 @@ fn read_M_16bit<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpea } }, 0b01 => { - instr.disp = read_num(words, 1)?; + instr.disp = read_num(words, 1)? as i8 as i32 as u32; if mmm > 3 { - Ok(OperandSpec::RegDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegDisp) + } else { + Ok(OperandSpec::Deref) + } } else { - Ok(OperandSpec::RegIndexBaseDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegIndexBaseDisp) + } else { + Ok(OperandSpec::RegIndexBase) + } } }, 0b10 => { - instr.disp = read_num(words, 2)?; + instr.disp = read_num(words, 2)? as i16 as i32 as u32; if mmm > 3 { - Ok(OperandSpec::RegDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegDisp) + } else { + Ok(OperandSpec::Deref) + } } else { - Ok(OperandSpec::RegIndexBaseDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegIndexBaseDisp) + } else { + Ok(OperandSpec::RegIndexBase) + } } }, _ => { @@ -7528,7 +7544,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP { instruction.mem_size = 4; } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF { - instruction.mem_size = 5; + instruction.mem_size = 6; } } instruction.opcode = opcode; @@ -7782,6 +7798,9 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y instruction.regs[0] = RegSpec { bank: RegisterBank::X, num: (modrm >> 3) & 7 }; instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + if instruction.opcode == Opcode::CVTTSD2SI || instruction.opcode == Opcode::CVTSD2SI { + instruction.regs[0].bank = RegisterBank::D; + } if instruction.operands[1] != OperandSpec::RegMMM { if [Opcode::PMOVSXBQ, Opcode::PMOVZXBQ].contains(&instruction.opcode) { instruction.mem_size = 2; @@ -8462,7 +8481,7 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y } 7 => { instruction.opcode = Opcode::RDPID; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4)?; if instruction.operands[0] != OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -9668,7 +9687,11 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y } 6 => { instruction.opcode = Opcode::UMONITOR; - instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::D); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::W); + } else { + instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::D); + }; instruction.operands[0] = OperandSpec::RegMMM; instruction.operand_count = 1; } diff --git a/src/protected_mode/vex.rs b/src/protected_mode/vex.rs index 053d1aa..3550f77 100644 --- a/src/protected_mode/vex.rs +++ b/src/protected_mode/vex.rs @@ -412,7 +412,7 @@ fn read_vex_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y instruction.opcode = if modrm & 0xc0 == 0xc0 { Opcode::VMOVHLPS } else { - instruction.mem_size = 4; + instruction.mem_size = 8; Opcode::VMOVLPS }; instruction.regs[0] = @@ -1854,7 +1854,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a } else { VEXOperandCode::G_V_E_xmm }), - 0xDA => (Opcode::VPMINSW, if L { + 0xDA => (Opcode::VPMINUB, if L { VEXOperandCode::G_V_E_ymm } else { VEXOperandCode::G_V_E_xmm @@ -3130,12 +3130,12 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a VEXOperandCode::G_E_xmm_imm8 }), 0x0A => (Opcode::VROUNDSS, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), 0x0B => (Opcode::VROUNDSD, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), @@ -3171,14 +3171,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a } else { VEXOperandCode::Ev_G_xmm_imm8 }), - 0x16 => if instruction.prefixes.vex_unchecked().w() { - (Opcode::VPEXTRQ, if L { - instruction.opcode = Opcode::Invalid; - return Err(DecodeError::InvalidOpcode); - } else { - VEXOperandCode::Ev_G_xmm_imm8 - }) - } else { + 0x16 => { (Opcode::VPEXTRD, if L { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); @@ -3198,7 +3191,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a return Err(DecodeError::InvalidOpcode); } else { (Opcode::VINSERTF128, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_ymm_V_ymm_E_xmm_imm8 } else { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); @@ -3232,14 +3225,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a } else { VEXOperandCode::G_V_E_xmm_imm8 }), - 0x22 => if instruction.prefixes.vex_unchecked().w() { - (Opcode::VPINSRQ, if L { - instruction.opcode = Opcode::Invalid; - return Err(DecodeError::InvalidOpcode); - } else { - VEXOperandCode::G_V_xmm_Ev_imm8 - }) - } else { + 0x22 => { (Opcode::VPINSRD, if L { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); |