From 4371ed02ac30cb56ec4ddbf60c87e85c183d860b Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 22 Jul 2021 00:31:02 -0700 Subject: fix incorrect decodes with scas and 67-prefixes --- CHANGELOG | 18 +++++++++++++ Cargo.toml | 2 +- src/long_mode/mod.rs | 67 +++++++++++++++++++++++++++++++++++++++------- src/protected_mode/mod.rs | 56 +++++++++++++++++++++++++++++++------- src/real_mode/mod.rs | 25 +++++++++++++---- test/long_mode/mod.rs | 9 +++++++ test/protected_mode/mod.rs | 10 +++++++ test/real_mode/mod.rs | 16 +++++++++++ 8 files changed, 179 insertions(+), 24 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5ae5dba..1d3103a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,21 @@ +## 1.0.4 + +in 64-, 32-, and 16-bit modes: + * fix incorrect decoding of `scas`; memory access is through `*di` not `*si`. + * fix incorrect segment register for `scas` memory operand; `es` segment is always used. + * fix incorrect decoding of some 67-prefixed string instructions: `movs`, `scas`, `lods`, `stos`, `cmps`. + - a 67-prefix selects an alternate addressing mode. in 64-bit mode, this + selects 32-bit registers for addressing, 32-bit selects 16-bit registers, + and 16-bit selects 32-bit registers. the decoder had ignored the 67 prefix + on these instructions. + +in 32- and 16-bit modes: + * fix incorrect decoding of 16-bit memory accesses with modrm where mod=00 and mmm=110. + - the memory access from this modrm is a disp16 memory access, which the + decoder reports. the decoder would then not read the subsequent 16-bit + displacement. this would typically result in a `Displacement(0)` operand, + and incorrect following instructions. + ## 1.0.3 * fix a few broken doc links, added example of yaxpeax-x86 usage through yaxpeax-arch traits diff --git a/Cargo.toml b/Cargo.toml index 86cbe2b..4536904 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "yaxpeax-x86" -version = "1.0.3" +version = "1.0.4" authors = [ "iximeow " ] license = "0BSD" repository = "http://git.iximeow.net/yaxpeax-x86/" diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index e967f4b..72ec802 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -382,6 +382,18 @@ impl RegSpec { RegSpec { bank: RegisterBank::D, num: 3 } } + // TODO: make pub in the 1.1 version bump + #[inline] + const fn esi() -> RegSpec { + RegSpec { bank: RegisterBank::D, num: 6 } + } + + // TODO: make pub in the 1.1 version bump + #[inline] + const fn edi() -> RegSpec { + RegSpec { bank: RegisterBank::D, num: 7 } + } + #[inline] pub const fn ax() -> RegSpec { RegSpec { bank: RegisterBank::W, num: 0 } @@ -582,6 +594,8 @@ impl OperandSpec { OperandSpec::DispU32 | OperandSpec::DispU64 | OperandSpec::Deref | + OperandSpec::Deref_esi | + OperandSpec::Deref_edi | OperandSpec::Deref_rsi | OperandSpec::Deref_rdi | OperandSpec::RegDisp | @@ -760,6 +774,12 @@ impl Operand { OperandSpec::Deref => { Operand::RegDeref(inst.regs[1]) } + OperandSpec::Deref_esi => { + Operand::RegDeref(RegSpec::esi()) + } + OperandSpec::Deref_edi => { + Operand::RegDeref(RegSpec::edi()) + } OperandSpec::Deref_rsi => { Operand::RegDeref(RegSpec::rsi()) } @@ -2782,6 +2802,8 @@ enum OperandSpec { DispU32, DispU64, Deref, + Deref_esi, + Deref_edi, Deref_rsi, Deref_rdi, RegDisp, @@ -4415,7 +4437,8 @@ impl Instruction { /// prefixes. pub fn segment_override_for_op(&self, op: u8) -> Option { match self.opcode { - Opcode::STOS => { + Opcode::STOS | + Opcode::SCAS => { if op == 0 { Some(Segment::ES) } else { @@ -9122,21 +9145,34 @@ fn unlikely_operands::Address, { instruction.regs[0] = RegSpec::al(); - instruction.regs[1] = RegSpec::rsi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::esi(); + } else { + instruction.regs[1] = RegSpec::rsi(); + } instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = OperandSpec::Deref; instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_Xb => { - instruction.operands[0] = OperandSpec::Deref_rdi; - instruction.operands[1] = OperandSpec::Deref_rsi; + if instruction.prefixes.address_size() { + instruction.operands[0] = OperandSpec::Deref_edi; + instruction.operands[1] = OperandSpec::Deref_esi; + } else { + instruction.operands[0] = OperandSpec::Deref_rdi; + instruction.operands[1] = OperandSpec::Deref_rsi; + } instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_AL => { instruction.regs[0] = RegSpec::al(); - instruction.regs[1] = RegSpec::rsi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::edi(); + } else { + instruction.regs[1] = RegSpec::rdi(); + } instruction.operands[0] = OperandSpec::Deref; instruction.operands[1] = OperandSpec::RegRRR; instruction.mem_size = 1; @@ -9150,7 +9186,11 @@ fn unlikely_operands::Address, RegSpec::rax(), _ => { unreachable!(); } }; - instruction.regs[1] = RegSpec::rsi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::esi(); + } else { + instruction.regs[1] = RegSpec::rsi(); + } instruction.operands[1] = OperandSpec::Deref; instruction.mem_size = opwidth; } @@ -9162,7 +9202,11 @@ fn unlikely_operands::Address, RegSpec::rax(), _ => { unreachable!(); } }; - instruction.regs[1] = RegSpec::rdi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::edi(); + } else { + instruction.regs[1] = RegSpec::rdi(); + } instruction.operands[0] = OperandSpec::Deref; instruction.operands[1] = OperandSpec::RegRRR; instruction.mem_size = opwidth; @@ -9170,8 +9214,13 @@ fn unlikely_operands::Address, { let opwidth = imm_width_from_prefixes_64(SizeCode::vqp, instruction.prefixes); instruction.mem_size = opwidth; - instruction.operands[0] = OperandSpec::Deref_rdi; - instruction.operands[1] = OperandSpec::Deref_rsi; + if instruction.prefixes.address_size() { + instruction.operands[0] = OperandSpec::Deref_edi; + instruction.operands[1] = OperandSpec::Deref_esi; + } else { + instruction.operands[0] = OperandSpec::Deref_rdi; + instruction.operands[1] = OperandSpec::Deref_rsi; + } } OperandCode::ModRM_0x0f12 => { instruction.regs[0].bank = RegisterBank::X; diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs index 6e503a0..19fe881 100644 --- a/src/protected_mode/mod.rs +++ b/src/protected_mode/mod.rs @@ -525,6 +525,8 @@ impl OperandSpec { OperandSpec::DispU16 | OperandSpec::DispU32 | OperandSpec::Deref | + OperandSpec::Deref_si | + OperandSpec::Deref_di | OperandSpec::Deref_esi | OperandSpec::Deref_edi | OperandSpec::RegDisp | @@ -704,6 +706,12 @@ impl Operand { OperandSpec::Deref => { Operand::RegDeref(inst.regs[1]) } + OperandSpec::Deref_si => { + Operand::RegDeref(RegSpec::si()) + } + OperandSpec::Deref_di => { + Operand::RegDeref(RegSpec::di()) + } OperandSpec::Deref_esi => { Operand::RegDeref(RegSpec::esi()) } @@ -2720,6 +2728,8 @@ enum OperandSpec { DispU16, DispU32, Deref, + Deref_si, + Deref_di, Deref_esi, Deref_edi, RegDisp, @@ -4357,7 +4367,8 @@ impl Instruction { /// prefixes. pub fn segment_override_for_op(&self, op: u8) -> Option { match self.opcode { - Opcode::STOS => { + Opcode::STOS | + Opcode::SCAS => { if op == 0 { Some(Segment::ES) } else { @@ -5748,6 +5759,7 @@ fn read_M_16bit::Address, > 6; let mmm = modrm & 7; if modbits == 0b00 && mmm == 0b110 { + instr.disp = read_num(words, 2)? as u16 as u32; return Ok(OperandSpec::DispU16); } match mmm { @@ -8975,21 +8987,34 @@ fn unlikely_operands::Address, { instruction.regs[0] = RegSpec::al(); - instruction.regs[1] = RegSpec::esi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::si(); + } else { + instruction.regs[1] = RegSpec::esi(); + } instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = OperandSpec::Deref; instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_Xb => { - instruction.operands[0] = OperandSpec::Deref_edi; - instruction.operands[1] = OperandSpec::Deref_esi; + if instruction.prefixes.address_size() { + instruction.operands[0] = OperandSpec::Deref_di; + instruction.operands[1] = OperandSpec::Deref_si; + } else { + instruction.operands[0] = OperandSpec::Deref_edi; + instruction.operands[1] = OperandSpec::Deref_esi; + } instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_AL => { instruction.regs[0] = RegSpec::al(); - instruction.regs[1] = RegSpec::esi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::di(); + } else { + instruction.regs[1] = RegSpec::edi(); + } instruction.operands[0] = OperandSpec::Deref; instruction.operands[1] = OperandSpec::RegRRR; instruction.mem_size = 1; @@ -9003,7 +9028,11 @@ fn unlikely_operands::Address, { @@ -9014,7 +9043,11 @@ fn unlikely_operands::Address, ::Address, { instruction.regs[0].bank = RegisterBank::X; diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 6913fb4..3c9c279 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -4367,7 +4367,8 @@ impl Instruction { /// prefixes. pub fn segment_override_for_op(&self, op: u8) -> Option { match self.opcode { - Opcode::STOS => { + Opcode::STOS | + Opcode::SCAS => { if op == 0 { Some(Segment::ES) } else { @@ -5758,6 +5759,7 @@ fn read_M_16bit::Address, > 6; let mmm = modrm & 7; if modbits == 0b00 && mmm == 0b110 { + instr.disp = read_num(words, 2)? as u16 as u32; return Ok(OperandSpec::DispU16); } match mmm { @@ -8994,21 +8996,34 @@ fn unlikely_operands::Address, { instruction.regs[0] = RegSpec::al(); - instruction.regs[1] = RegSpec::esi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::esi(); + } else { + instruction.regs[1] = RegSpec::si(); + } instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = OperandSpec::Deref; instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_Xb => { - instruction.operands[0] = OperandSpec::Deref_edi; - instruction.operands[1] = OperandSpec::Deref_esi; + if instruction.prefixes.address_size() { + instruction.operands[0] = OperandSpec::Deref_edi; + instruction.operands[1] = OperandSpec::Deref_esi; + } else { + instruction.operands[0] = OperandSpec::Deref_di; + instruction.operands[1] = OperandSpec::Deref_si; + } instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_AL => { instruction.regs[0] = RegSpec::al(); - instruction.regs[1] = RegSpec::esi(); + if instruction.prefixes.address_size() { + instruction.regs[1] = RegSpec::edi(); + } else { + instruction.regs[1] = RegSpec::di(); + } instruction.operands[0] = OperandSpec::Deref; instruction.operands[1] = OperandSpec::RegRRR; instruction.mem_size = 1; diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index ae79761..bfe21bd 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -2676,6 +2676,15 @@ fn prefixed_f30f() { } #[test] +fn only_64bit() { + test_display(&[0xae], "scas byte es:[rdi], al"); + test_display(&[0xaf], "scas dword es:[rdi], eax"); + test_display(&[0x67, 0xaf], "scas dword es:[edi], eax"); + test_display(&[0x67, 0xac], "lods al, byte ds:[esi]"); + test_display(&[0x67, 0xaa], "stos byte es:[edi], al"); +} + +#[test] fn test_adx() { test_display(&[0x66, 0x0f, 0x38, 0xf6, 0xc1], "adcx eax, ecx"); test_display(&[0x66, 0x0f, 0x38, 0xf6, 0x01], "adcx eax, dword [rcx]"); diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs index 28682ef..c03a19d 100644 --- a/test/protected_mode/mod.rs +++ b/test/protected_mode/mod.rs @@ -2363,6 +2363,16 @@ fn prefixed_f30f() { #[test] fn only_32bit() { + test_display(&[0x67, 0xac], "lods al, byte ds:[si]"); + test_display(&[0x67, 0xae], "scas byte es:[di], al"); + test_display(&[0xac], "lods al, byte ds:[esi]"); + test_display(&[0xae], "scas byte es:[edi], al"); + test_display(&[0x67, 0xf3, 0xa4], "rep movs byte es:[di], byte ds:[si]"); + test_display(&[0xf3, 0xa4], "rep movs byte es:[edi], byte ds:[esi]"); + test_display(&[0x67, 0xf3, 0xa5], "rep movs dword es:[di], dword ds:[si]"); + test_display(&[0xf3, 0xa5], "rep movs dword es:[edi], dword ds:[esi]"); + test_display(&[0x66, 0x67, 0x8b, 0x0e, 0x55, 0xaa], "mov cx, word [0xaa55]"); + test_display(&[0x66, 0x8b, 0x0e], "mov cx, word [esi]"); test_display(&[0x40], "inc eax"); test_display(&[0x41], "inc ecx"); test_display(&[0x47], "inc edi"); diff --git a/test/real_mode/mod.rs b/test/real_mode/mod.rs index 23744e1..82ccfab 100644 --- a/test/real_mode/mod.rs +++ b/test/real_mode/mod.rs @@ -50,6 +50,22 @@ fn test_display_under(decoder: &InstDecoder, data: &[u8], expected: &'static str } #[test] +fn only_16bit() { + test_display(&[0xac], "lods al, byte ds:[si]"); + test_display(&[0xae], "scas byte es:[di], al"); + test_display(&[0x67, 0xac], "lods al, byte ds:[esi]"); + test_display(&[0x67, 0xae], "scas byte es:[edi], al"); + test_display(&[0xf3, 0xa4], "rep movs byte es:[di], byte ds:[si]"); + test_display(&[0x67, 0xf3, 0xa4], "rep movs byte es:[edi], byte ds:[esi]"); + test_display(&[0xf3, 0xa5], "rep movs word es:[di], word ds:[si]"); + test_display(&[0x67, 0xf3, 0xa5], "rep movs word es:[edi], word ds:[esi]"); + test_display(&[0x8b, 0x0e, 0x55, 0xaa], "mov cx, word [0xaa55]"); + test_display(&[0x66, 0x8b, 0x0e, 0x55, 0xaa], "mov ecx, dword [0xaa55]"); + test_display(&[0x67, 0x8b, 0x0e], "mov cx, word [esi]"); + test_display(&[0x66, 0x67, 0x8b, 0x0e], "mov ecx, dword [esi]"); +} + +#[test] fn test_real_mode() { test_display(&[0x00, 0xcc], "add ah, cl"); test_display(&[0x03, 0x0b], "add cx, word [bp + di]"); -- cgit v1.1