diff options
| author | iximeow <me@iximeow.net> | 2021-07-04 12:20:13 -0700 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2021-07-04 12:36:03 -0700 | 
| commit | 48559b18574b44e2de879a5c641ab602ec22f0d8 (patch) | |
| tree | 0195cd249c1ce2429b12a14d063447fe96e3283e /src | |
| parent | 404cb6e81988ed84a75c89d67bf324409e22a390 (diff) | |
fix several incorrect tests and docs in 64- and 32-bit modes
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 34 | ||||
| -rw-r--r-- | src/long_mode/mod.rs | 2 | ||||
| -rw-r--r-- | src/long_mode/vex.rs | 10 | ||||
| -rw-r--r-- | src/protected_mode/display.rs | 12 | ||||
| -rw-r--r-- | src/protected_mode/mod.rs | 55 | ||||
| -rw-r--r-- | src/protected_mode/vex.rs | 28 | 
6 files changed, 90 insertions, 51 deletions
@@ -84,8 +84,8 @@ mod real_mode;  pub use real_mode::Arch as x86_16;  const MEM_SIZE_STRINGS: [&'static str; 64] = [ -    "byte", "word", "BUG", "dword", "far", "ptr", "BUG", "qword", -    "far", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", +    "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", +    "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword",      "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",      "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword",      "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", @@ -98,6 +98,11 @@ pub struct MemoryAccessSize {      size: u8,  }  impl MemoryAccessSize { +    /// return the number of bytes referenced by this memory access. +    /// +    /// if the number of bytes cannot be confidently known by the instruction in isolation (as is +    /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this +    /// function will return `None`.      pub fn bytes_size(&self) -> Option<u8> {          if self.size == 63 {              None @@ -106,6 +111,31 @@ impl MemoryAccessSize {          }      } +    /// a human-friendly label for the number of bytes this memory access references. +    /// +    /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86` +    /// prefers to use consistent names for a width even if the way those bytes are used varies. +    /// +    /// the sizes `yaxpeax-x86` knows are as follows: +    /// | size (bytes) | name       | +    /// |--------------|------------| +    /// | 1            | `byte`     | +    /// | 2            | `word`     | +    /// | 4            | `dword`    | +    /// | 6            | `far`      | +    /// | 8            | `qword`    | +    /// | 10           | `mword`    | +    /// | 16           | `xmmword`  | +    /// | 32           | `ymmword`  | +    /// | 64           | `zmmword`  | +    /// | variable     | `ptr`      | +    /// +    /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for +    /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment +    /// selector and eight bytes of address. +    /// +    /// "variable" accesses access a number of bytes dependent on the physical processor and its +    /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions.      pub fn size_name(&self) -> &'static str {          MEM_SIZE_STRINGS[self.size as usize - 1]      } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 600a81a..79f3fee 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -7673,7 +7673,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe                  if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP {                      instruction.mem_size = 8;                  } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF { -                    instruction.mem_size = 9; +                    instruction.mem_size = 10;                  }              }              instruction.opcode = opcode; diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index 67c4965..5695b17 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -411,7 +411,7 @@ fn read_vex_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y              instruction.opcode = if modrm & 0xc0 == 0xc0 {                  Opcode::VMOVHLPS              } else { -                instruction.mem_size = 4; +                instruction.mem_size = 8;                  Opcode::VMOVLPS              };              instruction.regs[0] = @@ -1925,7 +1925,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                          } else {                              VEXOperandCode::G_V_E_xmm                          }), -                        0xDA => (Opcode::VPMINSW, if L { +                        0xDA => (Opcode::VPMINUB, if L {                              VEXOperandCode::G_V_E_ymm                          } else {                              VEXOperandCode::G_V_E_xmm @@ -3201,12 +3201,12 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                          VEXOperandCode::G_E_xmm_imm8                      }),                      0x0A => (Opcode::VROUNDSS, if L { -                        VEXOperandCode::G_V_E_ymm_imm8 +                        VEXOperandCode::G_V_E_xmm_imm8                      } else {                          VEXOperandCode::G_V_E_xmm_imm8                      }),                      0x0B => (Opcode::VROUNDSD, if L { -                        VEXOperandCode::G_V_E_ymm_imm8 +                        VEXOperandCode::G_V_E_xmm_imm8                      } else {                          VEXOperandCode::G_V_E_xmm_imm8                      }), @@ -3269,7 +3269,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                          return Err(DecodeError::InvalidOpcode);                      } else {                          (Opcode::VINSERTF128, if L { -                            VEXOperandCode::G_V_E_ymm_imm8 +                            VEXOperandCode::G_ymm_V_ymm_E_xmm_imm8                          } else {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode); diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index baca1a2..c5892c2 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -1328,7 +1328,7 @@ const MNEMONICS: &[&'static str] = &[      "loopnz",      "loopz",      "loop", -    "jrcxz", +    "jecxz",      "pusha",      "popa",      "bound", @@ -1337,8 +1337,8 @@ const MNEMONICS: &[&'static str] = &[      "aaa",      "das",      "daa", -    "amx", -    "adx", +    "aam", +    "aad",      "movdir64b",      "movdiri",      "aesdec128kl", @@ -2332,8 +2332,8 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::AAS |              Opcode::DAS |              Opcode::DAA | -            Opcode::ADX | -            Opcode::AMX | +            Opcode::AAD | +            Opcode::AAM |              Opcode::KADDB |              Opcode::KANDB |              Opcode::KANDNB | @@ -2403,7 +2403,7 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode {              Opcode::LOOPNZ |              Opcode::LOOPZ |              Opcode::LOOP | -            Opcode::JRCXZ | +            Opcode::JECXZ |              Opcode::CALL |              Opcode::CALLF |              Opcode::JMP | diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 3678bf8..a6ad2ee 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -888,7 +888,7 @@ const REGISTER_CLASS_NAMES: &[&'static str] = &[      "eflags",  ]; -/// high-level register classes in an x86 machine, such as "8-byte general purpose", "xmm", "x87", +/// high-level register classes in an x86 machine, such as "4-byte general purpose", "xmm", "x87",  /// and so on. constants in this module are useful for inspecting the register class of a decoded  /// instruction. as an example:  /// ``` @@ -2078,7 +2078,7 @@ pub enum Opcode {      LOOPNZ,      LOOPZ,      LOOP, -    JRCXZ, +    JECXZ,      PUSHA,      POPA, @@ -2088,8 +2088,8 @@ pub enum Opcode {      AAA,      DAS,      DAA, -    AMX, -    ADX, +    AAM, +    AAD,      // started shipping in Tremont, 2020 sept 23      MOVDIR64B, @@ -5471,8 +5471,8 @@ const OPCODES: [OpcodeRecord; 256] = [      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd1_Ev_1),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd2_Eb_CL),      OpcodeRecord(Interpretation::Instruction(Opcode::Invalid), OperandCode::ModRM_0xd3_Ev_CL), -    OpcodeRecord(Interpretation::Instruction(Opcode::AMX), OperandCode::Ib), -    OpcodeRecord(Interpretation::Instruction(Opcode::ADX), OperandCode::Ib), +    OpcodeRecord(Interpretation::Instruction(Opcode::AAM), OperandCode::Ib), +    OpcodeRecord(Interpretation::Instruction(Opcode::AAD), OperandCode::Ib),      OpcodeRecord(Interpretation::Instruction(Opcode::SALC), OperandCode::Nothing),      // XLAT      OpcodeRecord(Interpretation::Instruction(Opcode::XLAT), OperandCode::Nothing), @@ -5496,7 +5496,7 @@ const OPCODES: [OpcodeRecord; 256] = [      OpcodeRecord(Interpretation::Instruction(Opcode::LOOPNZ), OperandCode::Ibs),      OpcodeRecord(Interpretation::Instruction(Opcode::LOOPZ), OperandCode::Ibs),      OpcodeRecord(Interpretation::Instruction(Opcode::LOOP), OperandCode::Ibs), -    OpcodeRecord(Interpretation::Instruction(Opcode::JRCXZ), OperandCode::Ibs), +    OpcodeRecord(Interpretation::Instruction(Opcode::JECXZ), OperandCode::Ibs),      OpcodeRecord(Interpretation::Instruction(Opcode::IN), OperandCode::AL_Ib),      OpcodeRecord(Interpretation::Instruction(Opcode::IN), OperandCode::AX_Ib),      OpcodeRecord(Interpretation::Instruction(Opcode::OUT), OperandCode::Ib_AL), @@ -5721,19 +5721,35 @@ fn read_M_16bit<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpea              }          },          0b01 => { -            instr.disp = read_num(words, 1)?; +            instr.disp = read_num(words, 1)? as i8 as i32 as u32;              if mmm > 3 { -                Ok(OperandSpec::RegDisp) +                if instr.disp != 0 { +                    Ok(OperandSpec::RegDisp) +                } else { +                    Ok(OperandSpec::Deref) +                }              } else { -                Ok(OperandSpec::RegIndexBaseDisp) +                if instr.disp != 0 { +                    Ok(OperandSpec::RegIndexBaseDisp) +                } else { +                    Ok(OperandSpec::RegIndexBase) +                }              }          },          0b10 => { -            instr.disp = read_num(words, 2)?; +            instr.disp = read_num(words, 2)? as i16 as i32 as u32;              if mmm > 3 { -                Ok(OperandSpec::RegDisp) +                if instr.disp != 0 { +                    Ok(OperandSpec::RegDisp) +                } else { +                    Ok(OperandSpec::Deref) +                }              } else { -                Ok(OperandSpec::RegIndexBaseDisp) +                if instr.disp != 0 { +                    Ok(OperandSpec::RegIndexBaseDisp) +                } else { +                    Ok(OperandSpec::RegIndexBase) +                }              }          },          _ => { @@ -7528,7 +7544,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe                  if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP {                      instruction.mem_size = 4;                  } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF { -                    instruction.mem_size = 5; +                    instruction.mem_size = 6;                  }              }              instruction.opcode = opcode; @@ -7782,6 +7798,9 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y              instruction.regs[0] = RegSpec { bank: RegisterBank::X, num: (modrm >> 3) & 7 };              instruction.operands[0] = OperandSpec::RegRRR;              instruction.operands[1] = read_E_xmm(words, instruction, modrm)?; +            if instruction.opcode == Opcode::CVTTSD2SI || instruction.opcode == Opcode::CVTSD2SI { +                instruction.regs[0].bank = RegisterBank::D; +            }              if instruction.operands[1] != OperandSpec::RegMMM {                  if [Opcode::PMOVSXBQ, Opcode::PMOVZXBQ].contains(&instruction.opcode) {                      instruction.mem_size = 2; @@ -8462,7 +8481,7 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y                      }                      7 => {                          instruction.opcode = Opcode::RDPID; -                        instruction.operands[0] = read_E(words, instruction, modrm, opwidth)?; +                        instruction.operands[0] = read_E(words, instruction, modrm, 4)?;                          if instruction.operands[0] != OperandSpec::RegMMM {                              return Err(DecodeError::InvalidOperand);                          } @@ -9668,7 +9687,11 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y                          }                          6 => {                              instruction.opcode = Opcode::UMONITOR; -                            instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::D); +                            if instruction.prefixes.address_size() { +                                instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::W); +                            } else { +                                instruction.regs[1] = RegSpec::from_parts(m, RegisterBank::D); +                            };                              instruction.operands[0] = OperandSpec::RegMMM;                              instruction.operand_count = 1;                          } diff --git a/src/protected_mode/vex.rs b/src/protected_mode/vex.rs index 053d1aa..3550f77 100644 --- a/src/protected_mode/vex.rs +++ b/src/protected_mode/vex.rs @@ -412,7 +412,7 @@ fn read_vex_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y              instruction.opcode = if modrm & 0xc0 == 0xc0 {                  Opcode::VMOVHLPS              } else { -                instruction.mem_size = 4; +                instruction.mem_size = 8;                  Opcode::VMOVLPS              };              instruction.regs[0] = @@ -1854,7 +1854,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                          } else {                              VEXOperandCode::G_V_E_xmm                          }), -                        0xDA => (Opcode::VPMINSW, if L { +                        0xDA => (Opcode::VPMINUB, if L {                              VEXOperandCode::G_V_E_ymm                          } else {                              VEXOperandCode::G_V_E_xmm @@ -3130,12 +3130,12 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                          VEXOperandCode::G_E_xmm_imm8                      }),                      0x0A => (Opcode::VROUNDSS, if L { -                        VEXOperandCode::G_V_E_ymm_imm8 +                        VEXOperandCode::G_V_E_xmm_imm8                      } else {                          VEXOperandCode::G_V_E_xmm_imm8                      }),                      0x0B => (Opcode::VROUNDSD, if L { -                        VEXOperandCode::G_V_E_ymm_imm8 +                        VEXOperandCode::G_V_E_xmm_imm8                      } else {                          VEXOperandCode::G_V_E_xmm_imm8                      }), @@ -3171,14 +3171,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                      } else {                          VEXOperandCode::Ev_G_xmm_imm8                      }), -                    0x16 => if instruction.prefixes.vex_unchecked().w() { -                        (Opcode::VPEXTRQ, if L { -                            instruction.opcode = Opcode::Invalid; -                            return Err(DecodeError::InvalidOpcode); -                        } else { -                            VEXOperandCode::Ev_G_xmm_imm8 -                        }) -                    } else { +                    0x16 => {                          (Opcode::VPEXTRD, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode); @@ -3198,7 +3191,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                          return Err(DecodeError::InvalidOpcode);                      } else {                          (Opcode::VINSERTF128, if L { -                            VEXOperandCode::G_V_E_ymm_imm8 +                            VEXOperandCode::G_ymm_V_ymm_E_xmm_imm8                          } else {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode); @@ -3232,14 +3225,7 @@ fn read_vex_instruction<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch a                      } else {                          VEXOperandCode::G_V_E_xmm_imm8                      }), -                    0x22 => if instruction.prefixes.vex_unchecked().w() { -                        (Opcode::VPINSRQ, if L { -                            instruction.opcode = Opcode::Invalid; -                            return Err(DecodeError::InvalidOpcode); -                        } else { -                            VEXOperandCode::G_V_xmm_Ev_imm8 -                        }) -                    } else { +                    0x22 => {                          (Opcode::VPINSRD, if L {                              instruction.opcode = Opcode::Invalid;                              return Err(DecodeError::InvalidOpcode);  | 
