From 97dbde69221127d2552cb4fc442b90a2c0ff2a95 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 19 Jun 2026 03:02:35 +0000 Subject: add DisplayRules, docs, doc tests, .. this includes `trait DisplayRules` as a generic mechanism to control parts of instruction printing, a `DefaultRules` for the existing formatting style, and `AbsoluteAddressFormatter` to print instructions as at some location in an address space. --- src/long_mode/display.rs | 1092 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 873 insertions(+), 219 deletions(-) (limited to 'src/long_mode/display.rs') diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 9f2022c..4cef8f1 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -5,9 +5,13 @@ use core::fmt; // allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis #[allow(deprecated)] use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; +use yaxpeax_arch::{AddressBase, AddressDiff, LengthedInstruction}; use crate::MEM_SIZE_STRINGS; -use crate::long_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixRex}; +use crate::long_mode::{ + RegSpec, Opcode, Operand, OperandSpec, + MergeMode, InstDecoder, Instruction, Segment, PrefixRex +}; use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; @@ -301,23 +305,42 @@ impl fmt::Display for Operand { impl Colorize for Operand { fn colorize(&self, _colors: &Y, f: &mut T) -> fmt::Result { let mut f = yaxpeax_arch::display::FmtSink::new(f); + let rules = DefaultRules::for_style(DisplayStyle::Intel); let mut visitor = DisplayingOperandVisitor { - f: &mut f + instr_len: AddressDiff::from_const(0), + f: &mut f, + rules: &rules, }; self.visit(&mut visitor) } } -struct DisplayingOperandVisitor<'a, T> { +struct DisplayingOperandVisitor<'a, 'rules, T, R> { + instr_len: AddressDiff, f: &'a mut T, + rules: &'rules R, } -impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> { +impl > DisplayingOperandVisitor<'_, '_, T, R> { + fn write_register(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + if self.rules.emit_register(reg, &mut self.f)? { + return Ok(()); + } + + self.f.write_reg(reg) + } +} + +impl > super::OperandVisitor for DisplayingOperandVisitor<'_, '_, T, R> { type Ok = (); type Error = core::fmt::Error; #[cfg_attr(feature="profiling", inline(never))] fn visit_u8(&mut self, imm: u8) -> Result { + if self.rules.emit_unsigned_immediate(imm as u64, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u8(imm)?; @@ -326,6 +349,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, imm: i8) -> Result { + if self.rules.emit_signed_immediate(imm as i64, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u8; if imm < 0 { @@ -339,6 +366,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_u16(&mut self, imm: u16) -> Result { + if self.rules.emit_unsigned_immediate(imm as u64, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u16(imm)?; @@ -347,6 +378,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_i16(&mut self, imm: i16) -> Result { + if self.rules.emit_signed_immediate(imm as i64, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u16; if imm < 0 { @@ -360,6 +395,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_u32(&mut self, imm: u32) -> Result { + if self.rules.emit_unsigned_immediate(imm as u64, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u32(imm)?; @@ -367,6 +406,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result { + if self.rules.emit_signed_immediate(imm as i64, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u32; if imm < 0 { @@ -380,6 +423,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_u64(&mut self, imm: u64) -> Result { + if self.rules.emit_unsigned_immediate(imm, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u64(imm)?; @@ -388,6 +435,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_i64(&mut self, imm: i64) -> Result { + if self.rules.emit_signed_immediate(imm, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u64; if imm < 0 { @@ -401,14 +452,14 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result { - self.f.write_reg(reg)?; + self.write_register(reg)?; Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.write_reg(spec)?; + self.write_register(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_reg(mask)?; + self.write_register(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -417,10 +468,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> Ok(()) } fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::long_mode::SaeMode) -> Result { - self.f.write_reg(spec)?; + self.write_register(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_reg(mask)?; + self.write_register(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -430,10 +481,10 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> Ok(()) } fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result { - self.f.write_reg(spec)?; + self.write_register(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_reg(mask)?; + self.write_register(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -444,17 +495,21 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_abs_u32(&mut self, imm: u32) -> Result { self.f.write_fixed_size("[")?; - self.f.span_start_address(); - self.f.write_prefixed_u32(imm)?; - self.f.span_end_address(); + if !self.rules.emit_absolute_address(imm as u64, self.f)? { + self.f.span_start_address(); + self.f.write_prefixed_u32(imm)?; + self.f.span_end_address(); + } self.f.write_fixed_size("]")?; Ok(()) } fn visit_abs_u64(&mut self, imm: u64) -> Result { self.f.write_fixed_size("[")?; - self.f.span_start_address(); - self.f.write_prefixed_u64(imm)?; - self.f.span_end_address(); + if !self.rules.emit_absolute_address(imm as u64, self.f)? { + self.f.span_start_address(); + self.f.write_prefixed_u64(imm)?; + self.f.span_end_address(); + } self.f.write_fixed_size("]")?; Ok(()) } @@ -462,19 +517,25 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> #[cfg_attr(feature="profiling", inline(never))] fn visit_disp(&mut self, base: RegSpec, disp: i32) -> Result { self.f.write_char('[')?; - self.f.write_reg(base)?; - self.f.write_fixed_size(" ")?; - self.f.write_displacement(disp)?; + let mut printed = false; + if base == RegSpec::rip() { + printed = self.rules.emit_relative_address(self.instr_len, disp, self.f)?; + } + if !printed { + self.write_register(base)?; + self.f.write_fixed_size(" ")?; + self.f.write_displacement(disp)?; + } self.f.write_fixed_size("]") } fn visit_deref(&mut self, base: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size("]") } fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; @@ -483,7 +544,7 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; @@ -492,18 +553,18 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]") } fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; @@ -512,97 +573,97 @@ impl super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_char('[')?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_char(' ')?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" ")?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_char(' ')?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } @@ -3558,9 +3619,9 @@ impl Colorize for Opcode { impl fmt::Display for Instruction { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - // to reuse one implementation, call the deprecated function for now. - #[allow(deprecated)] - self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) + let rules = DefaultRules::for_style(DisplayStyle::Intel); + let bundle = rules.display(self); + fmt::Display::fmt(&bundle, fmt) } } @@ -3576,13 +3637,20 @@ impl<'instr> fmt::Display for InstructionDisplayer<'instr> { /// intel syntax, though memory operand sizes are elided if they can be inferred from other /// operands. /// -/// note that `yaxpeax-x86` does not (and can not!) try to guarantee that formatting through any -/// `DisplayStyle` round-trips through an assembler to produce the same bytes as were intially -/// disassembled. opcode choice (for example, `0x31` vs `0x33` encodings of register-register -/// `xor`) may not be controllable, immediates and displacements may have multiple valid encodings, -/// and prefix handling in general is very lossy especially in the presence of repeat or -/// ineffectual prefixes. -#[derive(Copy, Clone, Debug)] +/// ## lossiness +/// +/// for some display styles, `yaxpeax-x86` tries to ensure instructions are formatted in a way that +/// is accepted by some corresponding assemblers. even so, such round-tripping is inherently lossy; +/// some instructions may have many equally-valid encodings, so re-assembling an instruction does +/// not guarantee an assembler would produce the same bytes as were initially decoded. +/// +/// a non-exhaustive list of ways that going through `bytes -> decode -> assemble -> bytes` can be lossy: +/// * opcode choice (for example, `0x31` vs `0x33` encodings of register-register `xor`) may not be controllable, +/// * immediates and displacements may have multiple valid encodings, +/// * non-effectual prefixes are generally not printed by `yaxpeax-x86`, +/// * repeated prefixes (imagine "`rep rep rep movsb`") are not printed and generally not accepted by assemblers, +#[non_exhaustive] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum DisplayStyle { /// intel-style syntax for instructions, like /// `add rax, [rdx + rcx * 2 + 0x1234]` @@ -3590,7 +3658,7 @@ pub enum DisplayStyle { /// C-style syntax for instructions, like /// `rax += [rdx + rcx * 2 + 0x1234]` C, - /// format instructions in the syntax used by the Microsoft Assembler (MASM), like + /// format instructions in the syntax used by the Microsoft Macro Assembler (MASM), like /// `add rax, dword ptr [rdx + rcx * 2 + 1234h]` /// /// some instructions are decoded by `dumpbin.exe` and `yaxpeax-x86` but cannot be assembled by @@ -3606,11 +3674,374 @@ pub enum DisplayStyle { /// implementation of [`Display`](fmt::Display) that renders instructions using a specified display /// style. +/// +/// this is less flexible than the full gamut of `DisplayRules`, in favor of using `yaxpeax-x86`'s +/// default address-insensitive instruction formatting. pub struct InstructionDisplayer<'instr> { pub(crate) instr: &'instr Instruction, pub(crate) style: DisplayStyle, } +pub struct InstructionRuleBundle<'instr, 'rules, Rules> { + pub(crate) instr: &'instr Instruction, + pub(crate) rules: &'rules Rules, +} + +impl<'instr, 'rules, Rules> InstructionRuleBundle<'instr, 'rules, Rules> { + pub fn new(instr: &'instr Instruction, rules: &'rules Rules) -> Self { + Self { instr, rules } + } +} + +impl<'instr, 'fmt, Rules> fmt::Display for + InstructionRuleBundle<'instr, 'fmt, Rules> where + Rules: for<'f, 'g> DisplayRules>> +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut sink = yaxpeax_arch::display::FmtSink::new(fmt); + let style = self.rules.display_style(); + match style { + DisplayStyle::Intel => format_intel(&self.instr, self.rules, &mut sink), + DisplayStyle::C => { + format_c(&self.instr, self.rules, &mut sink) + } + DisplayStyle::Masm => { + masm::contextualize(&self.instr, self.rules, &mut sink) + } + } + } +} + +pub struct DefaultRules { + style: DisplayStyle +} + +impl DefaultRules { + pub fn for_style(style: DisplayStyle) -> Self { + Self { style } + } + + pub fn display<'me, 'instr>(&'me self, instr: &'instr Instruction) -> InstructionRuleBundle<'instr, 'me, Self> { + InstructionRuleBundle { + instr, + rules: self, + } + } +} + +impl DisplayRules for DefaultRules { + fn display_style(&self) -> DisplayStyle { + self.style + } +} + +/// ```rust +/// use yaxpeax_x86::long_mode::{InstDecoder, AbsoluteAddressFormatter}; +/// +/// // `AbsoluteAddressFormatter` prints instructions as a contiguous sequence starting from the +/// // provided address. +/// let mut addr_formatter = AbsoluteAddressFormatter::new(0x10); +/// +/// let decoder = InstDecoder::default(); +/// +/// let instr = decoder.decode_slice(&[0x33, 0x05, 0x08, 0x00, 0x00, 0x00]) +/// .expect("can decode 'xor eax, dword [rip + 0x08]'"); +/// +/// // instructions are printed with `rip` taken to be the address in the formatter, which +/// // overrides rip-relative display. +/// let formatted = format!("{}", addr_formatter.display(&instr)); +/// assert_eq!(formatted, "xor eax, dword [0x1e]"); +/// // the program address in `addr_formatter` must be advanced to the next instruction. +/// addr_formatter.advance(&instr); +/// let formatted = format!("{}", addr_formatter.display(&instr)); +/// assert_eq!(formatted, "xor eax, dword [0x24]"); +/// addr_formatter.advance(&instr); +/// +/// let branch = decoder.decode_slice(&[0xeb, 0x70]) +/// .expect("can decode 'jmp $+0x70'"); +/// +/// // jump destinations are also made absolute. +/// let formatted = format!("{}", addr_formatter.display(&branch)); +/// // note this instruction is printed at 0x10 (formatter base) + 6 + 6. +/// assert_eq!(formatted, "jmp 0x8e"); +/// ``` +#[derive(Copy, Clone)] +pub struct AbsoluteAddressFormatter { + rip: u64, + style: DisplayStyle, +} + +impl AbsoluteAddressFormatter { + pub fn new(rip: u64) -> Self { + AbsoluteAddressFormatter { + rip, + style: DisplayStyle::Intel, + } + } + + pub fn with_style(mut self, style: DisplayStyle) -> Self { + self.style = style; + self + } + + pub fn display<'me, 'instr>(&'me self, instr: &'instr Instruction) -> InstructionRuleBundle<'instr, 'me, Self> { + InstructionRuleBundle { + instr, + rules: self, + } + } + + pub fn advance(&mut self, instr: &Instruction) { + use yaxpeax_arch::{AddressBase, LengthedInstruction}; + self.rip = self.rip.wrapping_offset(instr.len()); + } +} + +impl DisplayRules for AbsoluteAddressFormatter { + fn display_style(&self) -> DisplayStyle { + self.style + } + + unsafe fn max_size() -> Option { + // Safety: this is somewhat subtle. + // + // `MAX_INSTRUCTION_LEN` is sized for the longest possible instruction lengths. the longest + // strings that `emit_address` may write are u64-sized: "0xffffffffffffffff". the previous + // maximum length of replaced fields ranges from `$ + 0x12341234` to `rip + 0x12341234`. + // addresses are mildly longer now. + // + // instructions that include a rip-sensitive value have only one: a branch offset or a + // rip-relative memory reference. this means the actual addition to an instruction's + // maximum printed size is `8 (u32->u64 digits) - 4 ("$ + ")` four more bytes. + // `MAX_INSTRUCTION_LEN` has around 180 bytes of excess reservation, so this is within that + // margin of error. + Some(crate::MAX_INSTRUCTION_LEN) + } + + fn instr_addr(&self) -> Option { + Some(self.rip) + } + + fn emit_address(&self, addr: u64, s: &mut S) -> Result { + fn needs_leading_0(imm: u64) -> bool { + let mut rem = imm; + let mut digit = 0; + while rem > 0 { + digit = rem & 0xf; + rem = rem >> 4; + } + + // digit is whatever the top non-zero hex digit was in the number + digit >= 10 + } + + fn hex_ambiguous(imm: u64) -> bool { + imm >= 10 + } + + s.span_start_immediate(); + if self.style != DisplayStyle::Masm { + s.write_fixed_size("0x")?; + s.write_u64(addr)?; + } else { + if needs_leading_0(addr) { + s.write_char('0')?; + } + write!(s, "{:X}", addr)?; + if hex_ambiguous(addr) { + s.write_char('h')?; + } + } + s.span_end_immediate(); + Ok(true) + } +} + +/// a set of functions controlling how instructions are formatted in an [`InstructionDisplayer`]. +/// +/// generally, functions on this trait return either `Option`, where `None` indicates "data not +/// available, caller should use some kind of default fallback behavior", +/// or `Result` where the bool indicates if the implementation has done the +/// formatting work that would otherwise be left up to the caller. +/// +/// functions on this trait default to returning `None` and `Ok(false)`, meaning a minimal +/// `impl DisplayRules for ... { }` will not override any display logic. +/// +/// ## interaction with `DisplayStyle` +/// +/// `DisplayStyle` controls the overall style used for instruction printing, through to how +/// registers and numbers are printed. as `DisplayRules` functions override default `yaxpeax-x86` +/// printing behavior, implementations must consider if they want to support variants of +/// `DisplayStyle as well, or only format with an assumed `DisplayStyle`. +/// +/// the `DisplayStyle` used when formatting an instruction and invoking `DisplayRules` functions is +/// controlled by `DisplayRules::display_style`. it is an implementation's decision to customize +/// this (as an example: [`AbsoluteAddressFormatter::with_style()`]) or to simply return one style +/// that is always used for instruction formatting. +/// +/// `DisplayStyle` variants in some cases also control the display (or non-display) of implicit +/// operands. for example, the `Intel` style writes x87 instructions with implicit operands, but +/// `Masm` omits some (such as `fld` or `fst`, where `masm.exe` will treat `st(0)` as a syntax +/// error). correspondingly, `DisplayRules::emit_operand` will be invoked only for the +/// normally-printed non-`st(0)` operand of `fld` or `fst`. in a handful of cases the differences +/// across display styles involve invoking more specific `DisplayRules` rules, particularly when +/// there may not be an explicit operand to format and a register or immediate is shown in its +/// stead; an example of this is formatting of the implicitly-read registers of `monitor` or +/// `mwait` under `DisplayStyle::Masm`. +/// +/// some other `DisplayStyle` variance includes size suffixes on opcodes, though there is no +/// comprehensive list of per-style formatting variance. +pub trait DisplayRules { + /// provide a `DisplayStyle` used when formatting instructions through this impl of + /// `DisplayRules`. + /// + /// if this impl of `DisplayRules` overrides some aspect of instruction formatting, it is that + /// override's responsibility to follow the `DisplayStyle` reported here. or ignore it, if you + /// so choose! + fn display_style(&self) -> DisplayStyle; + + /// report the maximum size of an instruction formatted by this `DisplayRules` implementation, + /// across all x86_64 instructions. + /// + /// `max_size()` is used to control the use of bounds checks when formatting into an + /// [`InstructionTextBuffer`]. this is fundamentally a hint, and may be ignored by future + /// versions of `yaxpeax-x86`. + /// + /// # implementation guidance + /// + /// user code is extraordinarily unlikely to be able to safely return `Some` from this + /// function. + /// + /// implementations that return `Some` must never produce more than that many *bytes* of + /// output, including any output produced by `yaxpeax-x86`, in formatting an instruction. take + /// care to consider multi-byte UTF-8 codepoints, as `InstructionTextBuffer` writes into Rust + /// `alloc::string::String`, which uses that representation character data. + /// + /// implementations that use arbitrary strings for symbols, for example, are extremly unlikely + /// to be able to uphold a fixed-max-size hint. + /// + /// note that `yaxpeax-x86` may write additional data in future `DisplayStyle` variants. new + /// instructions may have longer mnemonics. new extensions may define longer register names, or + /// memory label sizes. new encodings may have additional (longer!) decorators or otherwise + /// raise the maximum size of a formatted instruction. any of these may occur across a patch or + /// minor version. + unsafe fn max_size() -> Option { None } + + /// the address of the instruction to be printed. + /// + /// if this returns `None`, the default behavior is to show the relevant details of an + /// instruction in an address-insensitive way. see [`DisplayRules::emit_branch_addr`] and + /// [`DisplayRules::emit_relative_address`] for more. + fn instr_addr(&self) -> Option { + None + } + + /// override all aspects of formatting the instruction. + fn emit_instruction(&self, instr: &Instruction, s: &mut S) -> Result { + let _ = instr; + let _ = s; + Ok(false) + } + + /// override the display of the `op_idx`'th explicit operand in `instr`. + /// + /// if a memory access, the overridden region includes the operand's memory size label and + /// segment override. if the operand has descriptors (like AVX512 rounding or broadcast modes), + /// they are overridden here as well. + fn emit_operand(&self, instr: &Instruction, op_idx: u8, s: &mut S) -> Result { + let _ = instr; + let _ = op_idx; + let _ = s; + Ok(false) + } + + fn emit_register(&self, reg: RegSpec, s: &mut S) -> Result { + let _ = reg; + let _ = s; + Ok(false) + } + + /// override the display of an immediate in an instruction. + /// + /// this is the `0x50` in `push 0x50`, as well as `0x60` in `ret 0x60`, and `0x500098` in + /// `mov rax, 0x500098`. this function is a unified formatter for all size of immediate; the + /// size of the encoded value is not reported. + fn emit_signed_immediate(&self, imm: i64, s: &mut S) -> Result { + let _ = imm; + let _ = s; + Ok(false) + } + + /// override the display of an unsigned immediate in an instruction. + /// + /// this is typically an integer involved in SIMD instruction configuration, or immediates in + /// instructions that operate on single-byte registers. that is, this is the `0xa4` in + /// `cmp al, 0xa4`. this also reports u16 immediates as in `return 0x1234` or `retf 0x1234`. + fn emit_unsigned_immediate(&self, imm: u64, s: &mut S) -> Result { + let _ = imm; + let _ = s; + Ok(false) + } + + /// override the display of a conditional or unconditional relative branch target. + /// + /// this is the `$+0x60` in `jmp $+0x60`. the default behavior is to check + /// [`DisplayRules::instr_addr()`] for the address of this instruction, convert the relative + /// branch to an absolute destination, and call [`DisplayRules::emit_address()`] with that + /// address. + /// + /// note that custom implementations of `emit_branch_addr` may want to retain the above + /// behavior. + fn emit_branch_addr(&self, inst_len: AddressDiff, rel: i32, s: &mut S) -> Result { + let Some(ip) = self.instr_addr() else { + return Ok(false); + }; + + let next = ip.wrapping_offset(inst_len); + let dest = next.wrapping_add(rel as i64 as u64); + self.emit_address(dest, s) + } + + /// override the display of a rip-relative address. + /// + /// this is the `rip + 0x1234` in `xor rax, [rip + 0x1234]`. + fn emit_relative_address(&self, inst_len: AddressDiff, rel: i32, s: &mut S) -> Result { + let Some(ip) = self.instr_addr() else { + return Ok(false); + }; + + let next = ip.wrapping_offset(inst_len); + let dest = next.wrapping_add(rel as i64 as u64); + self.emit_address(dest, s) + } + + /// override the display of a literal absolute address. + /// + /// this is the `0x10` in `mov rax, gs:[0x10]`, as well as + /// the `0x12345678` in `add rax, [0x12345678]`. the default behavior is to simply call + /// [`emit_address`] with the absolute value being dereferenced. + fn emit_absolute_address(&self, abs: u64, s: &mut S) -> Result { + self.emit_address(abs, s) + } + + /// write some address out to the provided `DisplaySink`. + /// + /// notably, this does *not* include values like relocated immeidates in expressions like + /// `mov rax, fn_ptr`; immediates are always printed through [`DisplayRules::exit_immediate`]. + fn emit_address(&self, addr: u64, s: &mut S) -> Result { + let _ = addr; + let _ = s; + Ok(false) + } + + // 32-bit, 16-bit-only.. + /* + fn emit_far_address(&mut self, seg: u16, offs: u32, s: &mut S) -> Result { + Ok(false) + } + */ +} + /* * Can't implement this as accepting a formatter because rust * doesn't let me build one outside println! or write! or whatever. @@ -3648,19 +4079,32 @@ impl Instruction { #[cfg_attr(feature="profiling", inline(never))] pub fn write_to(&self, out: &mut T) -> fmt::Result { let mut out = yaxpeax_arch::display::FmtSink::new(out); - contextualize_intel(self, &mut out) + let rules = DefaultRules::for_style(DisplayStyle::Intel); + format_intel(self, &rules, &mut out) } /// format this instruction into `out`, which may perform additional styling based on its /// `DisplaySink` implementation. #[cfg_attr(feature="profiling", inline(never))] pub fn display_into(&self, out: &mut T) -> fmt::Result { - contextualize_intel(self, out) + let rules = DefaultRules::for_style(DisplayStyle::Intel); + format_intel(self, &rules, out) + } + + /// format this instruction into `out`, using the provided `rules` to potentially override the + /// default formatting for a given `DisplayStyle`. + #[cfg_attr(feature="profiling", inline(never))] + pub fn format_into>(&self, rules: &Rules, out: &mut T) -> fmt::Result { + format_intel(self, rules, out) } } #[cfg_attr(feature="profiling", inline(never))] -pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn format_intel>(instr: &Instruction, rules: &R, out: &mut T) -> fmt::Result { + if rules.emit_instruction(instr, out)? { + return Ok(()); + } + if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -3686,30 +4130,46 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut if instr.operand_count > 0 { out.write_fixed_size(" ")?; - if instr.visit_operand(0, &mut RelativeBranchPrinter { - inst: instr, - out, - })? { - return Ok(()); + if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { + if RELATIVE_BRANCHES.contains(&instr.opcode) { + // relative branch instructions have only one operand, so print this one and we're + // done. relative branch instructions *also* have a ... relative branch ... as + // their only operand, so don't `emit_operand()` which would confuse these for a + // "normal" immediate. + if rules.emit_operand(instr, 0, out)? { + return Ok(()); + } + } + if instr.visit_operand(0, &mut RelativeBranchPrinter { + inst: instr, + rules, + out, + })? { + return Ok(()); + } } - if instr.operands[0 as usize].is_memory() { - out.write_mem_size_label(instr.mem_size)?; - if let Some(prefix) = instr.segment_override_for_op(0) { - let name = prefix.name(); - out.write_char(' ')?; - out.write_char(name[0] as char)?; - out.write_char(name[1] as char)?; - out.write_fixed_size(":")?; - } else { - out.write_fixed_size(" ")?; + if !rules.emit_operand(instr, 0, out)? { + if instr.operands[0 as usize].is_memory() { + out.write_mem_size_label(instr.mem_size)?; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } } - } - let mut displayer = DisplayingOperandVisitor { - f: out, - }; - instr.visit_operand(0 as u8, &mut displayer)?; + let mut displayer = DisplayingOperandVisitor { + instr_len: instr.len(), + f: out, + rules, + }; + instr.visit_operand(0 as u8, &mut displayer)?; + } for i in 1..instr.operand_count { // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to @@ -3723,6 +4183,11 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut unsafe { unreachable_unchecked(); } } + if rules.emit_operand(instr, i, out)? { + // if the rule printed an operand out, continue on to the next one! + continue; + } + if instr.operands[i as usize].is_memory() { out.write_mem_size_label(instr.mem_size)?; if let Some(prefix) = instr.segment_override_for_op(i) { @@ -3737,7 +4202,9 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut } let mut displayer = DisplayingOperandVisitor { + instr_len: instr.len(), f: out, + rules, }; instr.visit_operand(i as u8, &mut displayer)?; @@ -3802,7 +4269,7 @@ pub(crate) fn contextualize_intel(instr: &Instruction, out: &mut Ok(()) } -pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn format_c>(instr: &Instruction, rules: &R, out: &mut T) -> fmt::Result { let mut brace_count = 0; let mut prefixed = false; @@ -3848,300 +4315,429 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) } } - fn write_jmp_operand(op: Operand, out: &mut T) -> fmt::Result { - let mut out = yaxpeax_arch::display::FmtSink::new(out); - use core::fmt::Write; - match op { - Operand::ImmediateI8 { imm: rel } => { - let rel = if rel >= 0 { - out.write_str("$+")?; - rel as u8 - } else { - out.write_str("$-")?; - rel.unsigned_abs() + fn write_jmp_operand>(instr: &Instruction, idx: usize, rules: &Rules, out: &mut T) -> fmt::Result { + match instr.operands[idx] { + OperandSpec::ImmI8 => { + let mut rel_printer = RelativeBranchPrinter { + inst: instr, out, rules }; - out.write_prefixed_u8(rel) + instr.visit_operand(idx as u8, &mut rel_printer) + .map(|x| assert!(x)) } - Operand::ImmediateI32 { imm: rel } => { - let rel = if rel >= 0 { - out.write_str("$+")?; - rel as u32 - } else { - out.write_str("$-")?; - rel.unsigned_abs() + OperandSpec::ImmI32 => { + let mut rel_printer = RelativeBranchPrinter { + inst: instr, out, rules }; - out.write_prefixed_u32(rel) + instr.visit_operand(idx as u8, &mut rel_printer) + .map(|x| assert!(x)) } - other => { - write!(out, "{}", other) + _other => { + let mut displayer = DisplayingOperandVisitor { + instr_len: instr.len(), + f: out, + rules, + }; + instr.visit_operand(idx as u8, &mut displayer) } } } + let mut displayer = DisplayingOperandVisitor { + instr_len: instr.len(), + f: out, + rules, + }; + match instr.opcode { - Opcode::Invalid => { out.write_str("invalid")?; }, + Opcode::Invalid => { displayer.f.write_str("invalid")?; }, Opcode::MOVS => { - out.write_str("es:[rdi++] = ds:[rsi++]")?; + displayer.f.write_str("es:[rdi++] = ds:[rsi++]")?; }, Opcode::CMPS => { - out.write_str("rflags = flags(ds:[rsi++] - es:[rdi++])")?; + displayer.f.write_str("rflags = flags(ds:[rsi++] - es:[rdi++])")?; }, Opcode::LODS => { // TODO: size - out.write_str("rax = ds:[rsi++]")?; + displayer.f.write_str("rax = ds:[rsi++]")?; }, Opcode::STOS => { // TODO: size - out.write_str("es:[rdi++] = rax")?; + displayer.f.write_str("es:[rdi++] = rax")?; }, Opcode::INS => { // TODO: size - out.write_str("es:[rdi++] = port(dx)")?; + displayer.f.write_str("es:[rdi++] = port(dx)")?; }, Opcode::OUTS => { // TODO: size - out.write_str("port(dx) = ds:[rsi++]")?; + displayer.f.write_str("port(dx) = ds:[rsi++]")?; } Opcode::ADD => { - write!(out, "{} += {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::OR => { - write!(out, "{} |= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" |= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::ADC => { - write!(out, "{} += {} + rflags.cf", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("rflags.cf")?; } Opcode::ADCX => { - write!(out, "{} += {} + rflags.cf", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("rflags.cf")?; } Opcode::ADOX => { - write!(out, "{} += {} + rflags.of", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("rflags.of")?; } Opcode::SBB => { - write!(out, "{} -= {} + rflags.cf", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" -= ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("rflags.cf")?; } Opcode::AND => { - write!(out, "{} &= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" &= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::XOR => { - write!(out, "{} ^= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" ^= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SUB => { - write!(out, "{} -= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" -= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::CMP => { - write!(out, "rflags = flags({} - {})", instr.operand(0), instr.operand(1))?; + displayer.f.write_str("rflags")?; + displayer.f.write_str(" = flags(")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" - ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::TEST => { - write!(out, "rflags = flags({} & {})", instr.operand(0), instr.operand(1))?; + displayer.f.write_str("rflags")?; + displayer.f.write_str(" = flags(")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" & ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::XADD => { - write!(out, "({}, {}) = ({} + {}, {})", instr.operand(0), instr.operand(1), instr.operand(0), instr.operand(1), instr.operand(0))?; + // something like "({}, {}) = ({} + {}, {})"; + displayer.f.write_char('(')?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(", ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(") = (")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" + ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(", ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_char(')')?; } Opcode::BT => { - write!(out, "bt")?; + displayer.f.write_str("bt")?; } Opcode::BTS => { - write!(out, "bts")?; + displayer.f.write_str("bts")?; } Opcode::BTC => { - write!(out, "btc")?; + displayer.f.write_str("btc")?; } Opcode::BSR => { - write!(out, "{} = msb({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = msb(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::BSF => { - write!(out, "{} = lsb({}) (x86 bsf)", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = lsb(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(") (x86 bsf)")?; } Opcode::TZCNT => { - write!(out, "{} = lsb({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = lsb(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::MOV => { - write!(out, "{} = {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SAR => { - write!(out, "{} = {} >>> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" >>> ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SAL => { - write!(out, "{} = {} <<< {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" <<< ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SHR => { - write!(out, "{} = {} >> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" >> ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SHRX => { - write!(out, "{} = {} >> {} (x86 shrx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" >> ")?; + instr.visit_operand(2, &mut displayer)?; + displayer.f.write_str(" (x86 shrx)")?; } Opcode::SHL => { - write!(out, "{} = {} << {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" << ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SHLX => { - write!(out, "{} = {} << {} (x86 shlx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" << ")?; + instr.visit_operand(2, &mut displayer)?; + displayer.f.write_str(" (x86 shlx)")?; } Opcode::ROR => { - write!(out, "{} = {} ror {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" ror ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::RORX => { - write!(out, "{} = {} ror {} (x86 rorx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" ror ")?; + instr.visit_operand(2, &mut displayer)?; + displayer.f.write_str(" (x86 rorx)")?; } Opcode::ROL => { - write!(out, "{} = {} rol {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" rol ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::RCR => { - write!(out, "{} = {} rcr {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" rcr ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::RCL => { - write!(out, "{} = {} rcl {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" rcl ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::PUSH => { - write!(out, "push({})", instr.operand(0))?; + displayer.f.write_str("push(")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::POP => { - write!(out, "{} = pop()", instr.operand(0))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = pop()")?; } Opcode::MOVD => { - write!(out, "{} = movd({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = movd(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::MOVQ => { - write!(out, "{} = movq({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = movq(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::MOVNTQ => { - write!(out, "{} = movntq({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = movntq(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::INC => { if instr.operand(0).is_memory() { match instr.mem_size { - 1 => { write!(out, "byte {}++", instr.operand(0))?; }, - 2 => { write!(out, "word {}++", instr.operand(0))?; }, - 4 => { write!(out, "dword {}++", instr.operand(0))?; }, - _ => { write!(out, "qword {}++", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8. + 1 => { displayer.f.write_str("byte ")?; }, + 2 => { displayer.f.write_str("word ")?; }, + 4 => { displayer.f.write_str("dword ")?; }, + _ => { displayer.f.write_str("qword ")?; }, // sizes that are not 1, 2, or 4, *better* be 8. } - } else { - write!(out, "{}++", instr.operand(0))?; } + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str("++")?; } Opcode::DEC => { if instr.operand(0).is_memory() { match instr.mem_size { - 1 => { write!(out, "byte {}--", instr.operand(0))?; }, - 2 => { write!(out, "word {}--", instr.operand(0))?; }, - 4 => { write!(out, "dword {}--", instr.operand(0))?; }, - _ => { write!(out, "qword {}--", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8. + 1 => { displayer.f.write_str("byte ")?; }, + 2 => { displayer.f.write_str("word ")?; }, + 4 => { displayer.f.write_str("dword ")?; }, + _ => { displayer.f.write_str("qword ")?; }, // sizes that are not 1, 2, or 4, *better* be 8. } - } else { - write!(out, "{}--", instr.operand(0))?; } + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str("--")?; } Opcode::JMP => { - out.write_str("jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; + }, + Opcode::CALL => { + displayer.f.write_str("call ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JRCXZ => { - out.write_str("if rcx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if rcx == 0 then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JECXZ => { - out.write_str("if ecx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if ecx == 0 then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::LOOP => { - out.write_str("rcx--; if rcx != 0 then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("rcx--; if rcx != 0 then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::LOOPZ => { - out.write_str("rcx--; if rcx != 0 and zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("rcx--; if rcx != 0 and zero(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::LOOPNZ => { - out.write_str("rcx--; if rcx != 0 and !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("rcx--; if rcx != 0 and !zero(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JO => { - out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if _(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNO => { - out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if _(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JB => { - out.write_str("if /* unsigned */ below(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ below(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNB => { - out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JZ => { - out.write_str("if zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if zero(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNZ => { - out.write_str("if !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if !zero(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNA => { - out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JA => { - out.write_str("if /* unsigned */ above(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ above(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JS => { - out.write_str("if signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if signed(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNS => { - out.write_str("if !signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if !signed(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JP => { - out.write_str("if parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if parity(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNP => { - out.write_str("if !parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if !parity(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JL => { - out.write_str("if /* signed */ less(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ less(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JGE => { - out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JLE => { - out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JG => { - out.write_str("if /* signed */ greater(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ greater(rflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::NOP => { - write!(out, "nop")?; + displayer.f.write_str("nop")?; } _ => { if instr.operand_count() == 0 { - write!(out, "{}()", instr.opcode())?; + displayer.f.write_opcode(instr.opcode)?; + displayer.f.write_str("()")?; } else { - write!(out, "{} = {}({}", instr.operand(0), instr.opcode(), instr.operand(0))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + displayer.f.write_opcode(instr.opcode)?; + displayer.f.write_str("(")?; + instr.visit_operand(0, &mut displayer)?; let mut comma = true; for i in 1..instr.operand_count() { if comma { - write!(out, ", ")?; + displayer.f.write_str(", ")?; } - write!(out, "{}", instr.operand(i))?; + instr.visit_operand(i, &mut displayer)?; comma = true; } - write!(out, ")")?; + displayer.f.write_str(")")?; } } } while brace_count > 0 { - out.write_str(" }")?; + displayer.f.write_str(" }")?; brace_count -= 1; } @@ -4159,15 +4755,16 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual let mut out = yaxpeax_arch::display::FmtSink::new(out); + let rules = DefaultRules::for_style(*style); match style { DisplayStyle::Intel => { - contextualize_intel(instr, &mut out) + format_intel(instr, &rules, &mut out) } DisplayStyle::C => { - contextualize_c(instr, &mut out) + format_c(instr, &rules, &mut out) } DisplayStyle::Masm => { - masm::contextualize(&instr, &mut out) + masm::contextualize(&instr, &rules, &mut out) } } } @@ -4180,6 +4777,8 @@ impl ShowContextual]>, out: &mut T) -> fmt::Result { let mut out = yaxpeax_arch::display::FmtSink::new(out); let out = &mut out; + let rules = DefaultRules::for_style(DisplayStyle::Intel); + let rules = &rules; use core::fmt::Write; if self.prefixes.lock() { @@ -4215,7 +4814,9 @@ impl ShowContextual ShowContextual { write!(out, ", ")?; let mut displayer = DisplayingOperandVisitor { + instr_len: self.len(), f: out, + rules, }; self.visit_operand(i as u8, &mut displayer)?; } @@ -4258,12 +4861,13 @@ static RELATIVE_BRANCHES: [Opcode; 23] = [ Opcode::JLE, Opcode::JG, ]; -struct RelativeBranchPrinter<'a, F: DisplaySink> { +struct RelativeBranchPrinter<'a, F: DisplaySink, Rules: DisplayRules> { inst: &'a Instruction, out: &'a mut F, + rules: &'a Rules, } -impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> { +impl<'a, F: DisplaySink, Rules: DisplayRules> super::OperandVisitor for RelativeBranchPrinter<'a, F, Rules> { // return true if we printed a relative branch offset, false otherwise type Ok = bool; // but errors are errors @@ -4281,6 +4885,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, rel: i8) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) { + if self.rules.emit_branch_addr(self.inst.len(), rel as i32, &mut self.out)? { + // the display rule declared it has fully printed the relative address, so we have + // nothing to do. + return Ok(true); + } + self.out.write_char('$')?; let mut v = rel as u8; if rel < 0 { @@ -4299,6 +4909,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> #[cfg_attr(feature="profiling", inline(never))] fn visit_i32(&mut self, rel: i32) -> Result { if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { + if self.rules.emit_branch_addr(self.inst.len(), rel, &mut self.out)? { + // the display rule declared it has fully printed the relative address, so we have + // nothing to do. + return Ok(true); + } + self.out.write_char('$')?; let mut v = rel as u32; if rel < 0 { @@ -4391,8 +5007,9 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> #[cfg(feature="alloc")] mod buffer_sink { use core::fmt; - use super::super::{DisplayStyle, InstructionDisplayer}; - use super::{contextualize_c, contextualize_intel}; + use super::super::{DisplayRules, DisplayStyle, Instruction, InstructionDisplayer}; + use super::{format_c, format_intel}; + use super::DefaultRules; /// helper to format `amd64` instructions with highest throughput and least configuration. this is /// functionally a buffer for one x86 instruction's text. @@ -4456,15 +5073,52 @@ mod buffer_sink { // never escape `format_inst`. let mut handle = unsafe { self.write_handle() }; + let rules = DefaultRules::for_style(display.style); match display.style { DisplayStyle::Intel => { - contextualize_intel(&display.instr, &mut handle)?; + format_intel(&display.instr, &rules, &mut handle)?; + } + DisplayStyle::C => { + format_c(&display.instr, &rules, &mut handle)?; + } + DisplayStyle::Masm => { + super::masm::contextualize(&display.instr, &rules, &mut handle)?; + } + } + + Ok(self.text_str()) + } + + /// format an instruction via some rules into this buffer. returns a borrow of that same + /// internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this buffer, it will be overwritten. + pub fn format_inst_rules< + 'buf, 'instr, + Rules: for<'r> DisplayRules> + >(&'buf mut self, instr: &'instr Instruction, rules: &Rules) -> Result<&'buf str, fmt::Error> { + let Some(rule_max) = (unsafe { Rules::max_size() }) else { + return Err(fmt::Error); + }; + + if self.content.capacity() < rule_max { + return Err(fmt::Error); + } + + // Safety: this sink is used to format exactly one instruction and then dropped. it can + // never escape `format_inst`. + let mut handle = unsafe { self.write_handle() }; + + match rules.display_style() { + DisplayStyle::Intel => { + format_intel(&instr, rules, &mut handle)?; } DisplayStyle::C => { - contextualize_c(&display.instr, &mut handle)?; + format_c(&instr, rules, &mut handle)?; } DisplayStyle::Masm => { - super::masm::contextualize(&display.instr, &mut handle)?; + super::masm::contextualize(&instr, rules, &mut handle)?; } } -- cgit v1.1