From f314ecafcdf3f80cce2d79214bda046cd1535e8c Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 14 Mar 2021 23:27:14 -0700 Subject: alternate display mode for c-style expressions --- src/long_mode/display.rs | 400 ++++++++++++++++++++++++++++++++++++++--------- src/long_mode/mod.rs | 24 ++- 2 files changed, 346 insertions(+), 78 deletions(-) (limited to 'src/long_mode') diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 1be4a32..52018da 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -2201,10 +2201,40 @@ impl > Colorize fmt::Result { + self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) + } +} + +impl<'instr> fmt::Display for InstructionDisplayer<'instr> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { self.colorize(&NoColors, fmt) } } +/// enum controlling how `Instruction::display_with` renders instructions. `Intel` is more or less +/// intel syntax, though memory operand sizes are elided if they can be inferred from other +/// operands. +#[derive(Copy, Clone)] +pub enum DisplayStyle { + /// intel-style syntax for instructions, like + /// `add rax, [rdx + rcx * 2 + 0x1234]` + Intel, + /// C-style syntax for instructions, like + /// `rax += [rdx + rcx * 2 + 0x1234]` + C, + // one might imagine an ATT style here, which is mostly interesting for reversing operand + // order. + // well. + // it also complicates memory operands in an offset-only operand, and is just kind of awful, so + // it's just not implemented yet. + // ATT, +} + +pub struct InstructionDisplayer<'instr> { + pub(crate) instr: &'instr Instruction, + pub(crate) style: DisplayStyle, +} + /* * Can't implement this as accepting a formatter because rust * doesn't let me build one outside println! or write! or whatever. @@ -2218,7 +2248,7 @@ impl fmt::Display for Instruction { * so write to some Write thing i guess. bite me. i really just want to * stop thinking about how to support printing instructions... */ -impl > Colorize for Instruction { +impl <'instr, T: fmt::Write, Color: fmt::Display, Y: YaxColors> Colorize for InstructionDisplayer<'instr> { fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { // TODO: I DONT LIKE THIS, there is no address i can give contextualize here, // the address operand maybe should be optional.. @@ -2231,104 +2261,320 @@ struct NoContext; impl Instruction { pub fn write_to(&self, out: &mut T) -> fmt::Result { - self.contextualize(&NoColors, 0, Some(&NoContext), out) + self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) } } -impl > ShowContextual for Instruction { - fn contextualize(&self, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { - if self.prefixes.lock() { - write!(out, "lock ")?; - } +fn contextualize_intel>(instr: &Instruction, colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { + if instr.prefixes.lock() { + write!(out, "lock ")?; + } - if self.prefixes.rep_any() { - if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&self.opcode) { - // only a few of you actually use the prefix... - if self.prefixes.rep() { - write!(out, "rep ")?; - } else if self.prefixes.repz() { - write!(out, "repz ")?; - } else if self.prefixes.repnz() { - write!(out, "repnz ")?; - } + if instr.prefixes.rep_any() { + if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + // only a few of you actually use the prefix... + if instr.prefixes.rep() { + write!(out, "rep ")?; + } else if instr.prefixes.repz() { + write!(out, "repz ")?; + } else if instr.prefixes.repnz() { + write!(out, "repnz ")?; } } + } - out.write_str(self.opcode.name())?; + out.write_str(instr.opcode.name())?; - if self.opcode == Opcode::XBEGIN { - return write!(out, " $+{}", colors.number(signed_i32_hex(self.imm as i32))); - } + if instr.opcode == Opcode::XBEGIN { + return write!(out, " $+{}", colors.number(signed_i32_hex(instr.imm as i32))); + } - if self.operand_count > 0 { - out.write_str(" ")?; + if instr.operand_count > 0 { + out.write_str(" ")?; - if let Some(prefix) = self.segment_override_for_op(0) { - write!(out, "{}:", prefix)?; - } + if let Some(prefix) = instr.segment_override_for_op(0) { + write!(out, "{}:", prefix)?; + } - let x = Operand::from_spec(self, self.operands[0]); - x.colorize(colors, out)?; + let x = Operand::from_spec(instr, instr.operands[0]); + x.colorize(colors, out)?; - for i in 1..self.operand_count { - match self.opcode { - Opcode::MOVSX_b | - Opcode::MOVZX_b => { - match &self.operands[i as usize] { - &OperandSpec::Nothing => { - return Ok(()); - }, - &OperandSpec::RegMMM => { - out.write_str(", ")?; - } - _ => { - out.write_str(", byte ")?; - if let Some(prefix) = self.segment_override_for_op(i) { - write!(out, "{}:", prefix)?; - } - } + for i in 1..instr.operand_count { + match instr.opcode { + Opcode::MOVSX_b | + Opcode::MOVZX_b => { + match &instr.operands[i as usize] { + &OperandSpec::Nothing => { + return Ok(()); + }, + &OperandSpec::RegMMM => { + out.write_str(", ")?; } - let x = Operand::from_spec(self, self.operands[i as usize]); - x.colorize(colors, out)? - }, - Opcode::MOVSX_w | - Opcode::MOVZX_w => { - match &self.operands[i as usize] { - &OperandSpec::Nothing => { - return Ok(()); - }, - &OperandSpec::RegMMM => { - out.write_str(", ")?; + _ => { + out.write_str(", byte ")?; + if let Some(prefix) = instr.segment_override_for_op(i) { + write!(out, "{}:", prefix)?; } - _ => { - out.write_str(", word ")?; - if let Some(prefix) = self.segment_override_for_op(1) { - write!(out, "{}:", prefix)?; - } + } + } + let x = Operand::from_spec(instr, instr.operands[i as usize]); + x.colorize(colors, out)? + }, + Opcode::MOVSX_w | + Opcode::MOVZX_w => { + match &instr.operands[i as usize] { + &OperandSpec::Nothing => { + return Ok(()); + }, + &OperandSpec::RegMMM => { + out.write_str(", ")?; + } + _ => { + out.write_str(", word ")?; + if let Some(prefix) = instr.segment_override_for_op(1) { + write!(out, "{}:", prefix)?; } } - let x = Operand::from_spec(self, self.operands[i as usize]); - x.colorize(colors, out)? - }, - _ => { - match &self.operands[i as usize] { - &OperandSpec::Nothing => { - return Ok(()); - }, - _ => { - out.write_str(", ")?; - if let Some(prefix) = self.segment_override_for_op(1) { - write!(out, "{}:", prefix)?; - } - let x = Operand::from_spec(self, self.operands[i as usize]); - x.colorize(colors, out)? + } + let x = Operand::from_spec(instr, instr.operands[i as usize]); + x.colorize(colors, out)? + }, + _ => { + match &instr.operands[i as usize] { + &OperandSpec::Nothing => { + return Ok(()); + }, + _ => { + out.write_str(", ")?; + if let Some(prefix) = instr.segment_override_for_op(1) { + write!(out, "{}:", prefix)?; } + let x = Operand::from_spec(instr, instr.operands[i as usize]); + x.colorize(colors, out)? } } } } } - Ok(()) + } + Ok(()) +} + +fn contextualize_c>(instr: &Instruction, _colors: &Y, _address: u64, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { + let mut brace_count = 0; + + if instr.prefixes.lock() { + out.write_str("lock { ")?; + brace_count += 1; + } + + if instr.prefixes.rep_any() { + if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { + let word_str = match instr.mem_size { + 1 => "byte", + 2 => "word", + 4 => "dword", + 8 => "qword", + _ => { unreachable!("invalid word size") } + }; + + // only a few of you actually use the prefix... + if instr.prefixes.rep() { + out.write_str("rep ")?; + } else if instr.prefixes.repz() { + out.write_str("repz ")?; + } else if instr.prefixes.repnz() { + out.write_str("repnz ")?; + } // TODO: other rep kinds? + + out.write_str(word_str)?; + out.write_str(" { ")?; + brace_count += 1; + } + } + + match instr.opcode { + Opcode::Invalid => { out.write_str("invalid")?; }, + Opcode::MOVS => { + out.write_str("es:[rdi++] = ds:[rsi++]")?; + }, + Opcode::CMPS => { + out.write_str("rflags = flags(ds:[rsi++] - es:[rdi++])")?; + }, + Opcode::LODS => { + // TODO: size + out.write_str("rax = ds:[rsi++]")?; + }, + Opcode::STOS => { + // TODO: size + out.write_str("es:[rdi++] = rax")?; + }, + Opcode::INS => { + // TODO: size + out.write_str("es:[rdi++] = port(dx)")?; + }, + Opcode::OUTS => { + // TODO: size + out.write_str("port(dx) = ds:[rsi++]")?; + } + Opcode::ADD => { + write!(out, "{} += {}", instr.operand(0), instr.operand(1))?; + } + Opcode::OR => { + write!(out, "{} |= {}", instr.operand(0), instr.operand(1))?; + } + Opcode::ADC => { + write!(out, "{} += {} + rflags.cf", instr.operand(0), instr.operand(1))?; + } + Opcode::ADCX => { + write!(out, "{} += {} + rflags.cf", instr.operand(0), instr.operand(1))?; + } + Opcode::ADOX => { + write!(out, "{} += {} + rflags.of", instr.operand(0), instr.operand(1))?; + } + Opcode::SBB => { + write!(out, "{} -= {} + rflags.cf", instr.operand(0), instr.operand(1))?; + } + Opcode::AND => { + write!(out, "{} &= {}", instr.operand(0), instr.operand(1))?; + } + Opcode::XOR => { + write!(out, "{} ^= {}", instr.operand(0), instr.operand(1))?; + } + Opcode::SUB => { + write!(out, "{} -= {}", instr.operand(0), instr.operand(1))?; + } + Opcode::CMP => { + write!(out, "rflags = flags({} - {})", instr.operand(0), instr.operand(1))?; + } + Opcode::TEST => { + write!(out, "rflags = flags({} & {})", instr.operand(0), instr.operand(1))?; + } + Opcode::XADD => { + write!(out, "({}, {}) = ({} + {}, {})", instr.operand(0), instr.operand(1), instr.operand(0), instr.operand(1), instr.operand(0))?; + } + Opcode::BT => { + write!(out, "bt")?; + } + Opcode::BTS => { + write!(out, "bts")?; + } + Opcode::BTC => { + write!(out, "btc")?; + } + Opcode::BSR => { + write!(out, "{} = msb({})", instr.operand(0), instr.operand(1))?; + } + Opcode::BSF => { + write!(out, "{} = lsb({}) (x86 bsf)", instr.operand(0), instr.operand(1))?; + } + Opcode::TZCNT => { + write!(out, "{} = lsb({})", instr.operand(0), instr.operand(1))?; + } + Opcode::MOV => { + write!(out, "{} = {}", instr.operand(0), instr.operand(1))?; + } + Opcode::SAR => { + write!(out, "{} = {} >>> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::SAL => { + write!(out, "{} = {} <<< {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::SHR => { + write!(out, "{} = {} >> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::SHRX => { + write!(out, "{} = {} >> {} (x86 shrx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + } + Opcode::SHL => { + write!(out, "{} = {} << {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::SHLX => { + write!(out, "{} = {} << {} (x86 shlx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + } + Opcode::ROR => { + write!(out, "{} = {} ror {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::RORX => { + write!(out, "{} = {} ror {} (x86 rorx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + } + Opcode::ROL => { + write!(out, "{} = {} rol {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::RCR => { + write!(out, "{} = {} rcr {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::RCL => { + write!(out, "{} = {} rcl {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + } + Opcode::PUSH => { + write!(out, "push({})", instr.operand(0))?; + } + Opcode::POP => { + write!(out, "{} = pop()", instr.operand(0))?; + } + Opcode::MOVD => { + write!(out, "{} = movd({})", instr.operand(0), instr.operand(1))?; + } + Opcode::MOVQ => { + write!(out, "{} = movq({})", instr.operand(0), instr.operand(1))?; + } + Opcode::MOVNTQ => { + write!(out, "{} = movntq({})", instr.operand(0), instr.operand(1))?; + } + Opcode::INC => { + write!(out, "{}++", instr.operand(0))?; + } + Opcode::DEC => { + write!(out, "{}--", instr.operand(0))?; + } + Opcode::JG => { + write!(out, "if greater(rflags) then jmp {}", instr.operand(0))?; + } + Opcode::NOP => { + write!(out, "nop")?; + } + _ => { + if instr.operand_count() == 0 { + write!(out, "{}()", instr.opcode())?; + } else { + write!(out, "{} = {}({}", instr.operand(0), instr.opcode(), instr.operand(0))?; + let mut comma = true; + for i in 1..instr.operand_count() { + if comma { + write!(out, ", ")?; + } + write!(out, "{}", instr.operand(i))?; + comma = true; + } + write!(out, ")")?; + } + } + } + + while brace_count > 0 { + out.write_str(" }")?; + brace_count -= 1; + } + + Ok(()) +} + +impl <'instr, T: fmt::Write, Color: fmt::Display, Y: YaxColors> ShowContextual for InstructionDisplayer<'instr> { + fn contextualize(&self, colors: &Y, address: u64, context: Option<&NoContext>, out: &mut T) -> fmt::Result { + let InstructionDisplayer { + instr, + style, + } = self; + + match style { + DisplayStyle::Intel => { + contextualize_intel(instr, colors, address, context, out) + } + DisplayStyle::C => { + contextualize_c(instr, colors, address, context, out) + } + } } } diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index ff5e43b..6349aa4 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -3,6 +3,9 @@ mod vex; mod display; pub mod uarch; +#[cfg(feature = "fmt")] +pub use self::display::DisplayStyle; + use core::hint::unreachable_unchecked; use yaxpeax_arch::{AddressDiff, Decoder, LengthedInstruction}; @@ -1891,6 +1894,7 @@ pub struct Instruction { imm: u64, disp: u64, opcode: Opcode, + mem_size: u8, } impl yaxpeax_arch::Instruction for Instruction { @@ -3449,6 +3453,7 @@ impl Instruction { Instruction { prefixes: Prefixes::new(0), opcode: Opcode::Invalid, + mem_size: 1, modrm_rrr: RegSpec::rax(), modrm_mmm: RegSpec::rax(), // doubles as sib_base sib_index: RegSpec::rax(), @@ -3514,6 +3519,14 @@ impl Instruction { } } } + + #[cfg(feature = "fmt")] + pub fn display_with<'a>(&'a self, style: display::DisplayStyle) -> display::InstructionDisplayer<'a> { + display::InstructionDisplayer { + style, + instr: self, + } + } } #[derive(Debug, Copy, Clone)] @@ -5855,6 +5868,7 @@ fn read_instr>(decoder: &InstDecoder, mut bytes_iter: T, in // into one `mov 0, dword [instruction + modrm_mmm_offset]` instruction.modrm_mmm = RegSpec::rax(); instruction.sib_index = RegSpec::rax(); + instruction.mem_size = 0; fn escapes_are_prefixes_actually(prefixes: &mut Prefixes, opc_map: &mut Option) { match opc_map { @@ -6161,6 +6175,7 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, if !operand_code.has_byte_operands() { // further, this is an vdq E opwidth = imm_width_from_prefixes_64(SizeCode::vqp, instruction.prefixes); + instruction.mem_size = opwidth; if opwidth == 4 { bank = RegisterBank::D; } else if opwidth == 2 { @@ -6168,6 +6183,7 @@ fn read_operands>(decoder: &InstDecoder, mut bytes_iter: T, } } else { opwidth = 1; + instruction.mem_size = opwidth; if instruction.prefixes.rex().present() { bank = RegisterBank::rB; } else { @@ -7709,11 +7725,13 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.modrm_mmm = RegSpec::rsi(); instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = OperandSpec::Deref; + instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_Xb => { instruction.operands[0] = OperandSpec::Deref_rdi; instruction.operands[1] = OperandSpec::Deref_rsi; + instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::Yb_AL => { @@ -7721,6 +7739,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.modrm_mmm = RegSpec::rsi(); instruction.operands[0] = OperandSpec::Deref; instruction.operands[1] = OperandSpec::RegRRR; + instruction.mem_size = 1; instruction.operand_count = 2; } OperandCode::AX_Xv => { @@ -7733,6 +7752,7 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter }; instruction.modrm_mmm = RegSpec::rsi(); instruction.operands[1] = OperandSpec::Deref; + instruction.mem_size = opwidth; } OperandCode::Yv_AX => { let opwidth = imm_width_from_prefixes_64(SizeCode::vqp, instruction.prefixes); @@ -7745,9 +7765,11 @@ fn unlikely_operands>(decoder: &InstDecoder, mut bytes_iter instruction.modrm_mmm = RegSpec::rdi(); instruction.operands[0] = OperandSpec::Deref; instruction.operands[1] = OperandSpec::RegRRR; + instruction.mem_size = opwidth; } OperandCode::Yv_Xv => { - // TODO: repsect prefixes + let opwidth = imm_width_from_prefixes_64(SizeCode::vqp, instruction.prefixes); + instruction.mem_size = opwidth; instruction.operands[0] = OperandSpec::Deref_rdi; instruction.operands[1] = OperandSpec::Deref_rsi; } -- cgit v1.1