From 48559b18574b44e2de879a5c641ab602ec22f0d8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 4 Jul 2021 12:20:13 -0700 Subject: fix several incorrect tests and docs in 64- and 32-bit modes --- src/lib.rs | 34 ++++++++++++++++++++++++-- src/long_mode/mod.rs | 2 +- src/long_mode/vex.rs | 10 ++++---- src/protected_mode/display.rs | 12 +++++----- src/protected_mode/mod.rs | 55 ++++++++++++++++++++++++++++++------------- src/protected_mode/vex.rs | 28 ++++++---------------- 6 files changed, 90 insertions(+), 51 deletions(-) (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs index cb879fd..057c125 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,8 +84,8 @@ mod real_mode; pub use real_mode::Arch as x86_16; const MEM_SIZE_STRINGS: [&'static str; 64] = [ - "byte", "word", "BUG", "dword", "far", "ptr", "BUG", "qword", - "far", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", + "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", + "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", @@ -98,6 +98,11 @@ pub struct MemoryAccessSize { size: u8, } impl MemoryAccessSize { + /// return the number of bytes referenced by this memory access. + /// + /// if the number of bytes cannot be confidently known by the instruction in isolation (as is + /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this + /// function will return `None`. pub fn bytes_size(&self) -> Option { if self.size == 63 { None @@ -106,6 +111,31 @@ impl MemoryAccessSize { } } + /// a human-friendly label for the number of bytes this memory access references. + /// + /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86` + /// prefers to use consistent names for a width even if the way those bytes are used varies. + /// + /// the sizes `yaxpeax-x86` knows are as follows: + /// | size (bytes) | name | + /// |--------------|------------| + /// | 1 | `byte` | + /// | 2 | `word` | + /// | 4 | `dword` | + /// | 6 | `far` | + /// | 8 | `qword` | + /// | 10 | `mword` | + /// | 16 | `xmmword` | + /// | 32 | `ymmword` | + /// | 64 | `zmmword` | + /// | variable | `ptr` | + /// + /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for + /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment + /// selector and eight bytes of address. + /// + /// "variable" accesses access a number of bytes dependent on the physical processor and its + /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions. pub fn size_name(&self) -> &'static str { MEM_SIZE_STRINGS[self.size as usize - 1] } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 600a81a..79f3fee 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -7673,7 +7673,7 @@ fn read_operands::Address, ::Address, ::Address, (Opcode::VPMINSW, if L { + 0xDA => (Opcode::VPMINUB, if L { VEXOperandCode::G_V_E_ymm } else { VEXOperandCode::G_V_E_xmm @@ -3201,12 +3201,12 @@ fn read_vex_instruction::Address, (Opcode::VROUNDSS, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), 0x0B => (Opcode::VROUNDSD, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), @@ -3269,7 +3269,7 @@ fn read_vex_instruction::Address, Colorize for Opcode { Opcode::AAS | Opcode::DAS | Opcode::DAA | - Opcode::ADX | - Opcode::AMX | + Opcode::AAD | + Opcode::AAM | Opcode::KADDB | Opcode::KANDB | Opcode::KANDNB | @@ -2403,7 +2403,7 @@ impl Colorize for Opcode { Opcode::LOOPNZ | Opcode::LOOPZ | Opcode::LOOP | - Opcode::JRCXZ | + Opcode::JECXZ | Opcode::CALL | Opcode::CALLF | Opcode::JMP | diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 3678bf8..a6ad2ee 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -888,7 +888,7 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[ "eflags", ]; -/// high-level register classes in an x86 machine, such as "8-byte general purpose", "xmm", "x87", +/// high-level register classes in an x86 machine, such as "4-byte general purpose", "xmm", "x87", /// and so on. constants in this module are useful for inspecting the register class of a decoded /// instruction. as an example: /// ``` @@ -2078,7 +2078,7 @@ pub enum Opcode { LOOPNZ, LOOPZ, LOOP, - JRCXZ, + JECXZ, PUSHA, POPA, @@ -2088,8 +2088,8 @@ pub enum Opcode { AAA, DAS, DAA, - AMX, - ADX, + AAM, + AAD, // started shipping in Tremont, 2020 sept 23 MOVDIR64B, @@ -5471,8 +5471,8 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd1_Ev_1), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd2_Eb_CL), OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd3_Ev_CL), - OpcodeRecord(Interpretation::Instruction(Opcode::AMX), OperandCode::Ib), - OpcodeRecord(Interpretation::Instruction(Opcode::ADX), OperandCode::Ib), + OpcodeRecord(Interpretation::Instruction(Opcode::AAM), OperandCode::Ib), + OpcodeRecord(Interpretation::Instruction(Opcode::AAD), OperandCode::Ib), OpcodeRecord(Interpretation::Instruction(Opcode::SALC), OperandCode::Nothing), // XLAT OpcodeRecord(Interpretation::Instruction(Opcode::XLAT), OperandCode::Nothing), @@ -5496,7 +5496,7 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord(Interpretation::Instruction(Opcode::LOOPNZ), OperandCode::Ibs), OpcodeRecord(Interpretation::Instruction(Opcode::LOOPZ), OperandCode::Ibs), OpcodeRecord(Interpretation::Instruction(Opcode::LOOP), OperandCode::Ibs), - OpcodeRecord(Interpretation::Instruction(Opcode::JRCXZ), OperandCode::Ibs), + OpcodeRecord(Interpretation::Instruction(Opcode::JECXZ), OperandCode::Ibs), OpcodeRecord(Interpretation::Instruction(Opcode::IN), OperandCode::AL_Ib), OpcodeRecord(Interpretation::Instruction(Opcode::IN), OperandCode::AX_Ib), OpcodeRecord(Interpretation::Instruction(Opcode::OUT), OperandCode::Ib_AL), @@ -5721,19 +5721,35 @@ fn read_M_16bit::Address, { - instr.disp = read_num(words, 1)?; + instr.disp = read_num(words, 1)? as i8 as i32 as u32; if mmm > 3 { - Ok(OperandSpec::RegDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegDisp) + } else { + Ok(OperandSpec::Deref) + } } else { - Ok(OperandSpec::RegIndexBaseDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegIndexBaseDisp) + } else { + Ok(OperandSpec::RegIndexBase) + } } }, 0b10 => { - instr.disp = read_num(words, 2)?; + instr.disp = read_num(words, 2)? as i16 as i32 as u32; if mmm > 3 { - Ok(OperandSpec::RegDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegDisp) + } else { + Ok(OperandSpec::Deref) + } } else { - Ok(OperandSpec::RegIndexBaseDisp) + if instr.disp != 0 { + Ok(OperandSpec::RegIndexBaseDisp) + } else { + Ok(OperandSpec::RegIndexBase) + } } }, _ => { @@ -7528,7 +7544,7 @@ fn read_operands::Address, ::Address, > 3) & 7 }; instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; + if instruction.opcode == Opcode::CVTTSD2SI || instruction.opcode == Opcode::CVTSD2SI { + instruction.regs[0].bank = RegisterBank::D; + } if instruction.operands[1] != OperandSpec::RegMMM { if [Opcode::PMOVSXBQ, Opcode::PMOVZXBQ].contains(&instruction.opcode) { instruction.mem_size = 2; @@ -8462,7 +8481,7 @@ fn unlikely_operands::Address, { instruction.opcode = Opcode::RDPID; - instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; + instruction.operands[0] = read_E(words, instruction, modrm, 4)?; if instruction.operands[0] != OperandSpec::RegMMM { return Err(DecodeError::InvalidOperand); } @@ -9668,7 +9687,11 @@ fn unlikely_operands::Address, { instruction.opcode = Opcode::UMONITOR; - instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::D); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::W); + } else { + instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::D); + }; instruction.operands[0] = OperandSpec::RegMMM; instruction.operand_count = 1; } diff --git a/src/protected_mode/vex.rs b/src/protected_mode/vex.rs index 053d1aa..3550f77 100644 --- a/src/protected_mode/vex.rs +++ b/src/protected_mode/vex.rs @@ -412,7 +412,7 @@ fn read_vex_operands::Address, ::Address, (Opcode::VPMINSW, if L { + 0xDA => (Opcode::VPMINUB, if L { VEXOperandCode::G_V_E_ymm } else { VEXOperandCode::G_V_E_xmm @@ -3130,12 +3130,12 @@ fn read_vex_instruction::Address, (Opcode::VROUNDSS, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), 0x0B => (Opcode::VROUNDSD, if L { - VEXOperandCode::G_V_E_ymm_imm8 + VEXOperandCode::G_V_E_xmm_imm8 } else { VEXOperandCode::G_V_E_xmm_imm8 }), @@ -3171,14 +3171,7 @@ fn read_vex_instruction::Address, if instruction.prefixes.vex_unchecked().w() { - (Opcode::VPEXTRQ, if L { - instruction.opcode = Opcode::Invalid; - return Err(DecodeError::InvalidOpcode); - } else { - VEXOperandCode::Ev_G_xmm_imm8 - }) - } else { + 0x16 => { (Opcode::VPEXTRD, if L { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); @@ -3198,7 +3191,7 @@ fn read_vex_instruction::Address, ::Address, if instruction.prefixes.vex_unchecked().w() { - (Opcode::VPINSRQ, if L { - instruction.opcode = Opcode::Invalid; - return Err(DecodeError::InvalidOpcode); - } else { - VEXOperandCode::G_V_xmm_Ev_imm8 - }) - } else { + 0x22 => { (Opcode::VPINSRD, if L { instruction.opcode = Opcode::Invalid; return Err(DecodeError::InvalidOpcode); -- cgit v1.1