From cdce6ba84e8aa5972fb4b5820fab87ce1b197d77 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 17 Mar 2019 01:04:40 -0700 Subject: several tweaks: * DisplacementI32 was never used, DisplacementU64 added to distinguish 8 and 4 byte addresses * Added setCC instructions * Fix sign extension bug for displacement as interpreted by E operands * Add operand code support for a0,a1,a2,a3 movs * Add operand code support for Ivs, Ibs * Complete support for 0x81 * Clean up tests --- src/lib.rs | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- test/test.rs | 191 +++++++++++++++++++++++----------------------------- 2 files changed, 286 insertions(+), 120 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e6e72fb..a6f5824 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,9 +95,14 @@ impl fmt::Display for Operand { &Operand::ImmediateI8(imm) => write!(f, "0x{:x}", imm), &Operand::ImmediateU8(imm) => write!(f, "0x{:x}", imm), &Operand::DisplacementU32(imm) => write!(f, "[0x{:x}]", imm), - &Operand::DisplacementI32(imm) => write!(f, "[0x{:x}]", imm), - &Operand::RegDisp(ref spec, ref disp) => write!(f, "[{} + 0x{:x}]", - spec, disp), + &Operand::DisplacementU64(imm) => write!(f, "[0x{:x}]", imm), + &Operand::RegDisp(ref spec, ref disp) => { + if *disp < 0 { + write!(f, "[{} - 0x{:x}]", spec, -disp) + } else { + write!(f, "[{} + 0x{:x}]", spec, disp) + } + }, &Operand::RegDeref(ref spec) => write!(f, "[{}]", spec), &Operand::RegScale(ref spec, scale) => write!(f, "[{} * {}]", spec, scale), &Operand::RegScaleDisp(ref spec, scale, disp) => { @@ -140,8 +145,8 @@ pub enum Operand { ImmediateU64(u64), ImmediateI64(i64), Register(RegSpec), - DisplacementI32(u32), - DisplacementU32(i32), + DisplacementU32(u32), + DisplacementU64(u64), RegDeref(RegSpec), RegDisp(RegSpec, i32), RegScale(RegSpec, u8), @@ -263,6 +268,22 @@ impl fmt::Display for Opcode { &Opcode::MOVZX_b => write!(f, "{}", "movzx"), &Opcode::MOVZX_w => write!(f, "{}", "movzx"), &Opcode::MOVSX => write!(f, "{}", "movsx"), + &Opcode::SETO => write!(f, "{}", "seto"), + &Opcode::SETNO => write!(f, "{}", "setno"), + &Opcode::SETB => write!(f, "{}", "setb"), + &Opcode::SETAE => write!(f, "{}", "setae"), + &Opcode::SETZ => write!(f, "{}", "setz"), + &Opcode::SETNZ => write!(f, "{}", "setnz"), + &Opcode::SETBE => write!(f, "{}", "setbe"), + &Opcode::SETA => write!(f, "{}", "seta"), + &Opcode::SETS => write!(f, "{}", "sets"), + &Opcode::SETNS => write!(f, "{}", "setns"), + &Opcode::SETP => write!(f, "{}", "setp"), + &Opcode::SETNP => write!(f, "{}", "setnp"), + &Opcode::SETL => write!(f, "{}", "setl"), + &Opcode::SETGE => write!(f, "{}", "setge"), + &Opcode::SETLE => write!(f, "{}", "setle"), + &Opcode::SETG => write!(f, "{}", "setg"), &Opcode::Invalid => write!(f, "{}", "invalid") } } @@ -364,6 +385,22 @@ pub enum Opcode { NEG, NOT, CMPXCHG, + SETO, + SETNO, + SETB, + SETAE, + SETZ, + SETNZ, + SETBE, + SETA, + SETS, + SETNS, + SETP, + SETNP, + SETL, + SETGE, + SETLE, + SETG, Invalid } #[derive(Debug)] @@ -578,6 +615,7 @@ impl fmt::Display for Instruction { #[allow(non_camel_case_types)] #[derive(Copy, Clone, Debug)] pub enum OperandCode { + Eb_R0, ModRM_0xf6, ModRM_0xf7, Gv_Ev_Iv, @@ -836,6 +874,28 @@ fn read_opcode_0f_map>(bytes_iter: &mut T, instruction: &mu instruction.opcode = Opcode::JG; Ok(OperandCode::Jvds) }, + x if x < 0xa0 => { + instruction.prefixes = prefixes; + instruction.opcode = [ + Opcode::SETO, + Opcode::SETNO, + Opcode::SETB, + Opcode::SETAE, + Opcode::SETZ, + Opcode::SETNZ, + Opcode::SETBE, + Opcode::SETA, + Opcode::SETS, + Opcode::SETNS, + Opcode::SETP, + Opcode::SETNP, + Opcode::SETL, + Opcode::SETGE, + Opcode::SETLE, + Opcode::SETG + ][(x & 0xf) as usize]; + Ok(OperandCode::Eb_R0) + } 0xb0 => { instruction.prefixes = prefixes; instruction.opcode = Opcode::CMPXCHG; @@ -1396,16 +1456,16 @@ fn read_E>(bytes_iter: &mut T, prefixes: &Prefixes, m: u8, if base == 0b101 { let disp = if modbits == 0b00 { - read_num(bytes_iter, 4, length) + read_num(bytes_iter, 4, length) as i32 } else if modbits == 0b01 { - read_num(bytes_iter, 1, length) + read_num(bytes_iter, 1, length) as i8 as i32 } else { - read_num(bytes_iter, 4, length) + read_num(bytes_iter, 4, length) as i32 }; if index == 0b100 { if modbits == 0b00 && !prefixes.rex().x() { - *result = Operand::DisplacementU32(disp as i32); + *result = Operand::DisplacementU32(disp as u32); } else { let reg = RegSpec::gp_from_parts(0b100, prefixes.rex().x(), addr_width, prefixes.rex().present()); @@ -1453,9 +1513,9 @@ fn read_E>(bytes_iter: &mut T, prefixes: &Prefixes, m: u8, let disp = if modbits == 0b00 { 0 } else if modbits == 0b01 { - read_num(bytes_iter, 1, length) + read_num(bytes_iter, 1, length) as i8 as i32 } else { - read_num(bytes_iter, 4, length) + read_num(bytes_iter, 4, length) as i32 }; if index == 0b100 { @@ -1479,8 +1539,11 @@ fn read_E>(bytes_iter: &mut T, prefixes: &Prefixes, m: u8, if modbits == 0b00 { *result = Operand::RegDeref(reg); } else { - let disp_width = if modbits == 0b01 { 1 } else { 4 }; - let disp = read_num(bytes_iter, disp_width, length) as i32; + let disp = if modbits == 0b01 { + read_num(bytes_iter, 1, length) as i8 as i32 + } else { + read_num(bytes_iter, 4, length) as i8 as i32 + }; *result = Operand::RegDisp(reg, disp); } } @@ -1541,6 +1604,84 @@ fn read_operands>( Zv, Zv_AX, */ + OperandCode::Eb_R0 => { + let opwidth = 1; + // TODO: ... + let modrm = bytes_iter.next().unwrap(); + *length += 1; + let (mod_bits, r, m) = octets_of(modrm); + + if r != 0 { + instruction.opcode = Opcode::Invalid; + return Err("Invalid modr/m for opcode 0xc6".to_owned()); + } + + read_E(bytes_iter, &instruction.prefixes, m, mod_bits, opwidth, &mut instruction.operands[0], length) + }, + OperandCode::AL_Ob => { + let addr_width = if instruction.prefixes.address_size() { 4 } else { 8 }; + // stupid RCT thing: + let addr_width = if instruction.prefixes.address_size() { 2 } else { 4 }; + let opwidth = 1; + let imm = read_num(bytes_iter, addr_width, length); + instruction.operands = [ + Operand::Register(RegSpec::gp_from_parts(0, instruction.prefixes.rex().b(), opwidth, instruction.prefixes.rex().present())), + if instruction.prefixes.address_size() { + Operand::DisplacementU32(imm as u32) + } else { + Operand::DisplacementU64(imm) + } + ]; + Ok(()) + } + OperandCode::AX_Ov => { + let addr_width = if instruction.prefixes.address_size() { 4 } else { 8 }; + // stupid RCT thing: + let addr_width = if instruction.prefixes.address_size() { 2 } else { 4 }; + let opwidth = imm_width_from_prefixes_64(SizeCode::vqp, &instruction.prefixes); + let imm = read_num(bytes_iter, addr_width, length); + instruction.operands = [ + Operand::Register(RegSpec::gp_from_parts(0, instruction.prefixes.rex().b(), opwidth, instruction.prefixes.rex().present())), + if instruction.prefixes.address_size() { + Operand::DisplacementU32(imm as u32) + } else { + Operand::DisplacementU64(imm) + } + ]; + Ok(()) + } + OperandCode::Ob_AL => { + let addr_width = if instruction.prefixes.address_size() { 4 } else { 8 }; + // stupid RCT thing: + let addr_width = if instruction.prefixes.address_size() { 2 } else { 4 }; + let opwidth = 1; + let imm = read_num(bytes_iter, addr_width, length); + instruction.operands = [ + if instruction.prefixes.address_size() { + Operand::DisplacementU32(imm as u32) + } else { + Operand::DisplacementU64(imm) + }, + Operand::Register(RegSpec::gp_from_parts(0, instruction.prefixes.rex().b(), opwidth, instruction.prefixes.rex().present())) + ]; + Ok(()) + } + OperandCode::Ov_AX => { + let addr_width = if instruction.prefixes.address_size() { 4 } else { 8 }; + // stupid RCT thing: + let addr_width = if instruction.prefixes.address_size() { 2 } else { 4 }; + let opwidth = imm_width_from_prefixes_64(SizeCode::vqp, &instruction.prefixes); + let imm = read_num(bytes_iter, addr_width, length); + instruction.operands = [ + if instruction.prefixes.address_size() { + Operand::DisplacementU32(imm as u32) + } else { + Operand::DisplacementU64(imm) + }, + Operand::Register(RegSpec::gp_from_parts(0, instruction.prefixes.rex().b(), opwidth, instruction.prefixes.rex().present())) + ]; + Ok(()) + } OperandCode::ModRM_0x80_Eb_Ib => { let opwidth = 1; // TODO: ... @@ -1559,6 +1700,32 @@ fn read_operands>( Err(reason) => Err(reason) } }, + OperandCode::ModRM_0x81_Ev_Ivs => { + let opwidth = imm_width_from_prefixes_64(SizeCode::vqp, &instruction.prefixes); + // TODO: ... + let modrm = bytes_iter.next().unwrap(); + *length += 1; + let (mod_bits, r, m) = octets_of(modrm); + + match read_E(bytes_iter, &instruction.prefixes, m, mod_bits, opwidth, &mut instruction.operands[0], length) { + Ok(()) => { + match read_imm_signed(bytes_iter, if opwidth == 8 { 4 } else { opwidth }, opwidth, length) { + Ok(imm) => { + let opcode = BASE_OPCODE_MAP[r as usize].clone(); + instruction.opcode = opcode; + instruction.operands[1] = imm; + Ok(()) + }, + Err(reason) => { + instruction.opcode = Opcode::Invalid; + Err(reason) + } + + } + }, + Err(reason) => Err(reason) + } + }, OperandCode::ModRM_0xc0_Eb_Ib => { let opwidth = 1; // TODO: ... @@ -1606,7 +1773,8 @@ fn read_operands>( Ok(()) => { let num = read_num(bytes_iter, 1, length) as i8; if r != 0 { - return Err("modrm invalid".to_owned()); + instruction.opcode = Opcode::Invalid; + return Err("Invalid modr/m for opcode 0xc6".to_owned()); } instruction.opcode = Opcode::MOV; instruction.operands[1] = Operand::ImmediateI8(num); @@ -1986,6 +2154,25 @@ fn read_operands>( Err(reason) => Err(reason) } }, + OperandCode::Ibs => { + match read_imm_signed(bytes_iter, 1, 8, length) { + Ok(imm) => { + instruction.operands = [imm, Operand::Nothing]; + Ok(()) + }, + Err(reason) => Err(reason) + } + }, + OperandCode::Ivs => { + let opwidth = imm_width_from_prefixes_64(SizeCode::vd, &instruction.prefixes); + match read_imm_unsigned(bytes_iter, opwidth, length) { + Ok(imm) => { + instruction.operands = [imm, Operand::Nothing]; + Ok(()) + }, + Err(reason) => Err(reason) + } + }, OperandCode::ModRM_0x83_Ev_Ibs => { let modrm = bytes_iter.next().unwrap(); *length += 1; diff --git a/test/test.rs b/test/test.rs index 8a0b6cb..7f00432 100644 --- a/test/test.rs +++ b/test/test.rs @@ -1,146 +1,125 @@ +extern crate yaxpeax_arch; extern crate yaxpeax_x86; +use std::fmt::Write; + +use yaxpeax_arch::Decodable; use yaxpeax_x86::{Instruction, Opcode, decode_one}; fn decode(bytes: &[u8]) -> Option { let mut instr = Instruction::invalid(); - match decode_one(bytes, &mut instr) { + match decode_one(bytes.iter().map(|x| *x).take(16).collect::>(), &mut instr) { Some(()) => Some(instr), None => None } } +fn test_display(data: &[u8], expected: &'static str) { + let mut hex = String::new(); + for b in data { + write!(hex, "{:02x}", b); + } + match Instruction::decode(data.into_iter().map(|x| *x)) { + Some(instr) => { + let text = format!("{}", instr); + assert!( + text == expected, + "display error for {}:\n decoded: {:?}\n displayed: {}\n expected: {}\n", + hex, + instr, + text, + expected + ); + }, + None => { + assert!(false, "decode error for {}:\n expected: {}\n", hex, expected); + } + } +} + +#[test] +fn test_arithmetic() { + test_display(&[0x81, 0xec, 0x10, 0x03, 0x00, 0x00], "sub esp, 0x310"); +} + +#[test] +fn test_E_decode() { + test_display(&[0xff, 0x75, 0xb8], "push [rbp - 0x48]"); + test_display(&[0xff, 0x75, 0x08], "push [rbp + 0x8]"); +} + +// SETLE, SETNG, ... + #[test] fn test_mov() { - assert_eq!(&format!("{}", decode( - &[0x48, 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, 0x00] - ).unwrap()), "mov [rsp], 0x0"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x89, 0x44, 0x24, 0x08] - ).unwrap()), "mov [rsp + 0x8], rax"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x89, 0x43, 0x18] - ).unwrap()), "mov [rbx + 0x18], rax"); - assert_eq!(&format!("{}", decode( - &[0x48, 0xc7, 0x43, 0x10, 0x00, 0x00, 0x00, 0x00] - ).unwrap()), "mov [rbx + 0x10], 0x0"); - assert_eq!(&format!("{}", decode( - &[0x49, 0x89, 0x4e, 0x08] - ).unwrap()), "mov [r14 + 0x8], rcx"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x8b, 0x32] - ).unwrap()), "mov rsi, [rdx]"); - assert_eq!(&format!("{}", decode( - &[0x49, 0x89, 0x46, 0x10] - ).unwrap()), "mov [r14 + 0x10], rax"); - assert_eq!(&format!("{}", decode( - &[0x4d, 0x0f, 0x43, 0xec, 0x49] - ).unwrap()), "cmovnb r13, r12"); - assert_eq!(&format!("{}", decode( - &[0x0f, 0xb6, 0x06] - ).unwrap()), "movzx eax, byte [rsi]"); - assert_eq!(&format!("{}", decode( - &[0x0f, 0xb7, 0x06] - ).unwrap()), "movzx eax, word [rsi]"); + // test_display(&[0xa1, 0x93, 0x62, 0xc4, 0x00, 0x12, 0x34, 0x12, 0x34], "mov eax, [0x3412341200c46293]"); + // RCT.exe 32bit version, TODO: FIX + test_display(&[0xa1, 0x93, 0x62, 0xc4, 0x00], "mov eax, [0xc46293]"); + test_display(&[0x48, 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, 0x00], "mov [rsp], 0x0"); + test_display(&[0x48, 0x89, 0x44, 0x24, 0x08], "mov [rsp + 0x8], rax"); + test_display(&[0x48, 0x89, 0x43, 0x18], "mov [rbx + 0x18], rax"); + test_display(&[0x48, 0xc7, 0x43, 0x10, 0x00, 0x00, 0x00, 0x00], "mov [rbx + 0x10], 0x0"); + test_display(&[0x49, 0x89, 0x4e, 0x08], "mov [r14 + 0x8], rcx"); + test_display(&[0x48, 0x8b, 0x32], "mov rsi, [rdx]"); + test_display(&[0x49, 0x89, 0x46, 0x10], "mov [r14 + 0x10], rax"); + test_display(&[0x4d, 0x0f, 0x43, 0xec, 0x49], "cmovnb r13, r12"); + test_display(&[0x0f, 0xb6, 0x06], "movzx eax, byte [rsi]"); + test_display(&[0x0f, 0xb7, 0x06], "movzx eax, word [rsi]"); + test_display(&[0x89, 0x55, 0x94], "mov [rbp - 0x6c], edx"); } #[test] fn test_stack() { - assert_eq!(&format!("{}", decode( - &[0x66, 0x41, 0x50] - ).unwrap()), "push r8w"); + test_display(&[0x66, 0x41, 0x50], "push r8w"); } #[test] fn test_prefixes() { - assert_eq!(&format!("{}", decode( - &[0x66, 0x41, 0x31, 0xc0] - ).unwrap()), "xor r8w, ax"); - assert_eq!(&format!("{}", decode( - &[0x66, 0x41, 0x32, 0xc0] - ).unwrap()), "xor al, r8b"); - assert_eq!(&format!("{}", decode( - &[0x40, 0x32, 0xc5] - ).unwrap()), "xor al, bpl"); + test_display(&[0x66, 0x41, 0x31, 0xc0], "xor r8w, ax"); + test_display(&[0x66, 0x41, 0x32, 0xc0], "xor al, r8b"); + test_display(&[0x40, 0x32, 0xc5], "xor al, bpl"); } #[test] fn test_control_flow() { - assert_eq!(&format!("{}", decode( - &[0x73, 0x31] - ).unwrap()), "jnb 0x31"); - assert_eq!(&format!("{}", decode( - &[0x72, 0x5a] - ).unwrap()), "jb 0x5a"); - assert_eq!(&format!("{}", decode( - &[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00] - ).unwrap()), "jna 0x18b"); - assert_eq!(&format!("{}", decode( - &[0x74, 0x47] - ).unwrap()), "jz 0x47"); - assert_eq!(&format!("{}", decode( - &[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00] - ).unwrap()), "call [rip + 0x24727e]"); - assert_eq!(&format!("{}", decode( - &[0xc3] - ).unwrap()), "ret"); + test_display(&[0x73, 0x31], "jnb 0x31"); + test_display(&[0x72, 0x5a], "jb 0x5a"); + test_display(&[0x0f, 0x86, 0x8b, 0x01, 0x00, 0x00], "jna 0x18b"); + test_display(&[0x74, 0x47], "jz 0x47"); + test_display(&[0xff, 0x15, 0x7e, 0x72, 0x24, 0x00], "call [rip + 0x24727e]"); + test_display(&[0xc3], "ret"); } #[test] fn test_test_cmp() { - assert_eq!(&format!("{}", decode( - &[0x48, 0x3d, 0x01, 0xf0, 0xff, 0xff] - ).unwrap()), "cmp rax, 0xfffffffffffff001"); - assert_eq!(&format!("{}", decode( - &[0x3d, 0x01, 0xf0, 0xff, 0xff] - ).unwrap()), "cmp eax, 0xfffff001"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x83, 0xf8, 0xff] - ).unwrap()), "cmp rax, 0xffffffffffffffff"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x39, 0xc6] - ).unwrap()), "cmp rsi, rax"); + test_display(&[0x48, 0x3d, 0x01, 0xf0, 0xff, 0xff], "cmp rax, 0xfffffffffffff001"); + test_display(&[0x3d, 0x01, 0xf0, 0xff, 0xff], "cmp eax, 0xfffff001"); + test_display(&[0x48, 0x83, 0xf8, 0xff], "cmp rax, 0xffffffffffffffff"); + test_display(&[0x48, 0x39, 0xc6], "cmp rsi, rax"); } #[test] #[ignore] // VEX prefixes are not supported at the moment, in any form fn test_avx() { - assert_eq!(&format!("{}", decode( - &[0xc5, 0xf8, 0x10, 0x00] - ).unwrap()), "vmovups xmm0, xmmword [rax]"); + test_display(&[0xc5, 0xf8, 0x10, 0x00], "vmovups xmm0, xmmword [rax]"); +} + +#[test] +fn test_push_pop() { + test_display(&[0x5b], "pop rbx"); + test_display(&[0x41, 0x5e], "pop r14"); + test_display(&[0x68, 0x7f, 0x63, 0xc4, 0x00], "push 0xc4637f"); } #[test] fn test_misc() { - assert_eq!(&format!("{}", decode( - &[0x48, 0x8d, 0xa4, 0xc7, 0x20, 0x00, 0x00, 0x12] - ).unwrap()), "lea rsp, [rdi + rax * 8 + 0x12000020]"); - assert_eq!(&format!("{}", decode( - &[0x33, 0xc0] - ).unwrap()), "xor eax, eax"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x8d, 0x53, 0x08] - ).unwrap()), "lea rdx, [rbx + 0x8]"); - assert_eq!(&format!("{}", decode( - &[0x31, 0xc9] - ).unwrap()), "xor ecx, ecx"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x29, 0xc8] - ).unwrap()), "sub rax, rcx"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x03, 0x0b] - ).unwrap()), "add rcx, [rbx]"); - assert_eq!(&format!("{}", decode( - &[0x5b] - ).unwrap()), "pop rbx"); - assert_eq!(&format!("{}", decode( - &[0x41, 0x5e] - ).unwrap()), "pop r14"); - assert_eq!(&format!("{}", decode( - &[0x48, 0x8d, 0x0c, 0x12] - ).unwrap()), "lea rcx, [rdx + rdx]"); - assert_eq!(&format!("{}", decode( - &[0xf6, 0xc2, 0x18] - ).unwrap()), "test dl, 0x18"); + test_display(&[0x48, 0x8d, 0xa4, 0xc7, 0x20, 0x00, 0x00, 0x12], "lea rsp, [rdi + rax * 8 + 0x12000020]"); + test_display(&[0x33, 0xc0], "xor eax, eax"); + test_display(&[0x48, 0x8d, 0x53, 0x08], "lea rdx, [rbx + 0x8]"); + test_display(&[0x31, 0xc9], "xor ecx, ecx"); + test_display(&[0x48, 0x29, 0xc8], "sub rax, rcx"); + test_display(&[0x48, 0x03, 0x0b], "add rcx, [rbx]"); + test_display(&[0x48, 0x8d, 0x0c, 0x12], "lea rcx, [rdx + rdx]"); + test_display(&[0xf6, 0xc2, 0x18], "test dl, 0x18"); } -- cgit v1.1