From ab51fd1b2c7cf1b7bb6f84c5b07e06245f6b3d99 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 24 Jul 2023 06:41:02 -0700 Subject: fix handling of lar/lsl source register --- CHANGELOG | 5 +++++ src/long_mode/mod.rs | 33 +++++++++++++++++++++++++++------ src/protected_mode/mod.rs | 37 +++++++++++++++++++++++++++++-------- src/real_mode/mod.rs | 37 +++++++++++++++++++++++++++++-------- test/long_mode/mod.rs | 18 +++++++++++++----- test/protected_mode/mod.rs | 10 ++++++++-- test/real_mode/mod.rs | 10 ++++++++-- todo_notes | 1 - 8 files changed, 119 insertions(+), 32 deletions(-) delete mode 100644 todo_notes diff --git a/CHANGELOG b/CHANGELOG index 81a1f46..091aa06 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -25,6 +25,11 @@ encodings and bitness * in some cases, instructions loading a single-precision float reported 8-byte loads * in some cases, instructions loading a double-precision float reported 4-byte loads +* fix register sizes for lar/lsl + * 16 bits are read from the source register, but x86 docs state that the + source register is written as 16-bit, 32-bit, or 64-bit, as prefixes dictate. + memory is always written as `word [addr]`, which was correct before and + remains the case. ## 1.1.5 * fix several typos across crate docs - thank you Bruce! (aka github user waywardmonkeys) diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 71b2373..14f782c 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -5037,6 +5037,7 @@ enum OperandCase { AX_Ib, Ib_AL, Ib_AX, + Gv_Ew_LAR, Gv_Ew_LSL, Gdq_Ev, Gv_Ev_Ib, @@ -5374,6 +5375,7 @@ enum OperandCode { Zv_Ivq_R7 = OperandCodeBuilder::new().op0_is_rrr_and_Z_operand(ZOperandCategory::Zv_Ivq_R, 7).bits(), Gv_Eb = OperandCodeBuilder::new().read_E().operand_case(OperandCase::Gv_Eb).bits(), Gv_Ew = OperandCodeBuilder::new().read_E().operand_case(OperandCase::Gv_Ew).bits(), + Gv_Ew_LAR = OperandCodeBuilder::new().read_E().operand_case(OperandCase::Gv_Ew_LAR).bits(), Gv_Ew_LSL = OperandCodeBuilder::new().read_E().operand_case(OperandCase::Gv_Ew_LSL).bits(), Gdq_Ed = OperandCodeBuilder::new().read_E().operand_case(OperandCase::Gdq_Ed).bits(), Gd_Ed = OperandCodeBuilder::new().read_E().operand_case(OperandCase::Gd_Ed).bits(), @@ -8009,11 +8011,30 @@ fn read_operands< instruction.regs[1].bank = RegisterBank::MM; instruction.regs[1].num &= 0b111; }, + OperandCase::Gv_Ew_LAR => { + instruction.operands[1] = mem_oper; + // lar is weird. a segment selector is taken from the source register, which means + // either we read the low 16-bits of a register or read 16 bits from a memory operand. + // for whatever reason, the intel manual writes a source register as a dword/qword for + // larger modes even though the upper 16 bits would be ignored. + // + // so the registers are correct by the time we're here, we might just need to override + // mem size as well. + if instruction.operands[1] != OperandSpec::RegMMM { + instruction.mem_size = 2; + } + instruction.regs[0].bank = self.vqp_size(); + }, OperandCase::Gv_Ew_LSL => { instruction.operands[1] = mem_oper; - // lsl is weird. the full register width is written, but only the low 16 bits are used. + // lsl is weird. a segment selector is taken from the source register, which means + // either we read the low 16-bits of a register or read 16 bits from a memory operand. + // for whatever reason, the intel manual writes a source register as a dword for larger + // modes even though the upper 16 bits would be ignored. if instruction.operands[1] == OperandSpec::RegMMM { - instruction.regs[1].bank = RegisterBank::D; + if instruction.regs[1].bank == RegisterBank::Q { + instruction.regs[1].bank = RegisterBank::D; + } } else { instruction.mem_size = 2; } @@ -10869,7 +10890,7 @@ fn read_modrm::Address, { + OperandCase::Gv_Ew_LAR => { instruction.operands[1] = mem_oper; - // lsl is weird. the full register width is written, but only the low 16 bits are used. - if instruction.operands[1] == OperandSpec::RegMMM { - instruction.regs[1].bank = RegisterBank::D; + // lar is weird. a segment selector is taken from the source register, which means + // either we read the low 16-bits of a register or read 16 bits from a memory operand. + // for whatever reason, the intel manual writes a source register as a dword/qword for + // larger modes even though the upper 16 bits would be ignored. + // + // so the registers are correct by the time we're here, we might just need to override + // mem size as well. + if instruction.operands[1] != OperandSpec::RegMMM { + instruction.mem_size = 2; + } + instruction.regs[0].bank = if instruction.prefixes.operand_size() { + RegisterBank::W } else { + RegisterBank::D + }; + }, + OperandCase::Gv_Ew_LSL => { + instruction.operands[1] = mem_oper; + // lsl is weird. a segment selector is taken from the source register, which means + // either we read the low 16-bits of a register or read 16 bits from a memory operand. + // for whatever reason, the intel manual writes a source register as a dword for larger + // modes even though the upper 16 bits would be ignored. + if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 2; } instruction.regs[0].bank = if instruction.prefixes.operand_size() { @@ -10743,7 +10764,7 @@ fn read_modrm::Address, { + OperandCase::Gv_Ew_LAR => { instruction.operands[1] = mem_oper; - // lsl is weird. the full register width is written, but only the low 16 bits are used. - if instruction.operands[1] == OperandSpec::RegMMM { - instruction.regs[1].bank = RegisterBank::W; + // lar is weird. a segment selector is taken from the source register, which means + // either we read the low 16-bits of a register or read 16 bits from a memory operand. + // for whatever reason, the intel manual writes a source register as a dword/qword for + // larger modes even though the upper 16 bits would be ignored. + // + // so the registers are correct by the time we're here, we might just need to override + // mem size as well. + if instruction.operands[1] != OperandSpec::RegMMM { + instruction.mem_size = 2; + } + instruction.regs[0].bank = if !instruction.prefixes.operand_size() { + RegisterBank::W } else { + RegisterBank::D + }; + }, + OperandCase::Gv_Ew_LSL => { + instruction.operands[1] = mem_oper; + // lsl is weird. a segment selector is taken from the source register, which means + // either we read the low 16-bits of a register or read 16 bits from a memory operand. + // for whatever reason, the intel manual writes a source register as a dword for larger + // modes even though the upper 16 bits would be ignored. + if instruction.operands[1] != OperandSpec::RegMMM { instruction.mem_size = 2; } instruction.regs[0].bank = if !instruction.prefixes.operand_size() { @@ -10755,7 +10776,7 @@ fn read_modrm::Address,