diff options
Diffstat (limited to 'src/real_mode')
| -rw-r--r-- | src/real_mode/display.rs | 1059 | ||||
| -rw-r--r-- | src/real_mode/display/masm.rs | 841 | ||||
| -rw-r--r-- | src/real_mode/mod.rs | 20 |
3 files changed, 1325 insertions, 595 deletions
diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index d4d5944..98f292b 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -5,9 +5,13 @@ use core::fmt; // allowing these deprecated items for the time being, not yet breaking yaxpeax-x86 apis #[allow(deprecated)] use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; +use yaxpeax_arch::{AddressBase, AddressDiff, LengthedInstruction}; use crate::MEM_SIZE_STRINGS; -use crate::real_mode::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex}; +use crate::real_mode::{ + RegSpec, Opcode, Operand, OperandSpec, + MergeMode, InstDecoder, Instruction, Segment, PrefixVex +}; use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; @@ -292,23 +296,41 @@ impl fmt::Display for Operand { impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Operand { fn colorize(&self, _colors: &Y, f: &mut T) -> fmt::Result { let mut f = yaxpeax_arch::display::FmtSink::new(f); + let rules = DefaultRules::for_style(DisplayStyle::Intel); let mut visitor = DisplayingOperandVisitor { - f: &mut f + f: &mut f, + rules: &rules, }; self.visit(&mut visitor) } } -struct DisplayingOperandVisitor<'a, T> { +struct DisplayingOperandVisitor<'a, 'rules, T, R> { f: &'a mut T, + rules: &'rules R, } -impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> { + +impl <T: DisplaySink, R: DisplayRules<T>> DisplayingOperandVisitor<'_, '_, T, R> { + fn write_register(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + if self.rules.emit_register(reg, &mut self.f)? { + return Ok(()); + } + + self.f.write_reg(reg) + } +} + +impl <T: DisplaySink, R: DisplayRules<T>> super::OperandVisitor for DisplayingOperandVisitor<'_, '_, T, R> { type Ok = (); type Error = core::fmt::Error; #[cfg_attr(feature="profiling", inline(never))] fn visit_u8(&mut self, imm: u8) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_unsigned_immediate(imm as u32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u8(imm)?; @@ -317,6 +339,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, imm: i8) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_signed_immediate(imm as i32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u8; if imm < 0 { @@ -330,6 +356,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_u16(&mut self, imm: u16) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_unsigned_immediate(imm as u32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u16(imm)?; @@ -338,6 +368,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_i16(&mut self, imm: i16) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_signed_immediate(imm as i32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u16; if imm < 0 { @@ -351,6 +385,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_u32(&mut self, imm: u32) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_unsigned_immediate(imm, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); self.f.write_fixed_size("0x")?; self.f.write_u32(imm)?; @@ -358,6 +396,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_signed_immediate(imm, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let mut v = imm as u32; if imm < 0 { @@ -371,20 +413,14 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } #[cfg_attr(feature="profiling", inline(never))] fn visit_reg(&mut self, reg: RegSpec) -> Result<Self::Ok, Self::Error> { - self.f.span_start_register(); - self.f.write_reg(reg)?; - self.f.span_end_register(); + self.write_register(reg)?; Ok(()) } fn visit_reg_mask_merge(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result<Self::Ok, Self::Error> { - self.f.span_start_register(); - self.f.write_reg(spec)?; - self.f.span_end_register(); + self.write_register(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.span_start_register(); - self.f.write_reg(mask)?; - self.f.span_end_register(); + self.write_register(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -393,10 +429,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> Ok(()) } fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: super::SaeMode) -> Result<Self::Ok, Self::Error> { - self.f.write_reg(spec)?; + self.write_register(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_reg(mask)?; + self.write_register(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -406,10 +442,10 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> Ok(()) } fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result<Self::Ok, Self::Error> { - self.f.write_reg(spec)?; + self.write_register(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; - self.f.write_reg(mask)?; + self.write_register(mask)?; self.f.write_fixed_size("}")?; } if let MergeMode::Zero = merge_mode { @@ -420,15 +456,23 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_abs_u16(&mut self, imm: u16) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_fixed_size("0x")?; - self.f.write_u16(imm)?; + if !self.rules.emit_absolute_address(imm as u32, self.f)? { + self.f.span_start_address(); + self.f.write_fixed_size("0x")?; + self.f.write_u16(imm)?; + self.f.span_end_address(); + } self.f.write_fixed_size("]")?; Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_fixed_size("0x")?; - self.f.write_u32(imm)?; + if !self.rules.emit_absolute_address(imm, self.f)? { + self.f.span_start_address(); + self.f.write_fixed_size("0x")?; + self.f.write_u32(imm)?; + self.f.span_end_address(); + } self.f.write_fixed_size("]")?; Ok(()) } @@ -436,19 +480,19 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> #[cfg_attr(feature="profiling", inline(never))] fn visit_disp(&mut self, base: RegSpec, disp: i32) -> Result<Self::Ok, Self::Error> { self.f.write_char('[')?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" ")?; self.f.write_displacement(disp)?; self.f.write_fixed_size("]") } fn visit_deref(&mut self, base: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size("]") } fn visit_index_scale(&mut self, index: RegSpec, scale: u8) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; @@ -457,7 +501,7 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_index_scale_disp(&mut self, index: RegSpec, scale: u8, disp: i32) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; @@ -466,18 +510,18 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_base_index_scale(&mut self, base: RegSpec, index: RegSpec, scale: u8) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]") } fn visit_base_index_scale_disp(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; @@ -486,97 +530,97 @@ impl <T: DisplaySink> super::OperandVisitor for DisplayingOperandVisitor<'_, T> } fn visit_disp_masked(&mut self, base: RegSpec, disp: i32, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_char('[')?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_char(' ')?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_deref_masked(&mut self, base: RegSpec, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_scale_masked(&mut self, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_index_scale_disp_masked(&mut self, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size(" ")?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_masked(&mut self, base: RegSpec, index: RegSpec, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_disp_masked(&mut self, base: RegSpec, index: RegSpec, disp: i32, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" ")?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_scale_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_fixed_size("]")?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } fn visit_base_index_scale_disp_masked(&mut self, base: RegSpec, index: RegSpec, scale: u8, disp: i32, mask_reg: RegSpec) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.write_reg(base)?; + self.write_register(base)?; self.f.write_fixed_size(" + ")?; - self.f.write_reg(index)?; + self.write_register(index)?; self.f.write_fixed_size(" * ")?; self.f.write_scale(scale)?; self.f.write_char(' ')?; self.f.write_displacement(disp)?; self.f.write_char(']')?; self.f.write_char('{')?; - self.f.write_reg(mask_reg)?; + self.write_register(mask_reg)?; self.f.write_char('}')?; Ok(()) } @@ -2121,9 +2165,9 @@ impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Opcode { impl fmt::Display for Instruction { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - // to reuse one implementation, call the deprecated function for now. - #[allow(deprecated)] - self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) + let rules = DefaultRules::for_style(DisplayStyle::Intel); + let bundle = rules.display(self); + fmt::Display::fmt(&bundle, fmt) } } @@ -2139,13 +2183,20 @@ impl<'instr> fmt::Display for InstructionDisplayer<'instr> { /// intel syntax, though memory operand sizes are elided if they can be inferred from other /// operands. /// -/// note that `yaxpeax-x86` does not (and can not!) try to guarantee that formatting through any -/// `DisplayStyle` round-trips through an assembler to produce the same bytes as were intially -/// disassembled. opcode choice (for example, `0x31` vs `0x33` encodings of register-register -/// `xor`) may not be controllable, immediates and displacements may have multiple valid encodings, -/// and prefix handling in general is very lossy especially in the presence of repeat or -/// ineffectual prefixes. -#[derive(Copy, Clone)] +/// ## lossiness +/// +/// for some display styles, `yaxpeax-x86` tries to ensure instructions are formatted in a way that +/// is accepted by some corresponding assemblers. even so, such round-tripping is inherently lossy; +/// some instructions may have many equally-valid encodings, so re-assembling an instruction does +/// not guarantee an assembler would produce the same bytes as were initially decoded. +/// +/// a non-exhaustive list of ways that going through `bytes -> decode -> assemble -> bytes` can be lossy: +/// * opcode choice (for example, `0x31` vs `0x33` encodings of register-register `xor`) may not be controllable, +/// * immediates and displacements may have multiple valid encodings, +/// * non-effectual prefixes are generally not printed by `yaxpeax-x86`, +/// * repeated prefixes (imagine "`rep rep rep movsb`") are not printed and generally not accepted by assemblers, +#[non_exhaustive] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum DisplayStyle { /// intel-style syntax for instructions, like /// `add eax, [edx + ecx * 2 + 0x1234]` @@ -2153,7 +2204,7 @@ pub enum DisplayStyle { /// C-style syntax for instructions, like /// `eax += [edx + ecx * 2 + 0x1234]` C, - /// format instructions in the syntax used by the Microsoft Assembler (MASM), like + /// format instructions in the syntax used by the Microsoft Macro Assembler (MASM), like /// `add eax, dword ptr [edx + ecx * 2 + 1234h]` /// /// some instructions are decoded by `dumpbin.exe` and `yaxpeax-x86` but cannot be assembled by @@ -2169,11 +2220,340 @@ pub enum DisplayStyle { /// implementation of [`Display`](fmt::Display) that renders instructions using a specified display /// style. +/// +/// this is less flexible than the full gamut of `DisplayRules`, in favor of using `yaxpeax-x86`'s +/// default address-insensitive instruction formatting. pub struct InstructionDisplayer<'instr> { pub(crate) instr: &'instr Instruction, pub(crate) style: DisplayStyle, } +pub struct InstructionRuleBundle<'instr, 'rules, Rules> { + pub(crate) instr: &'instr Instruction, + pub(crate) rules: &'rules Rules, +} + +impl<'instr, 'rules, Rules> InstructionRuleBundle<'instr, 'rules, Rules> { + pub fn new(instr: &'instr Instruction, rules: &'rules Rules) -> Self { + Self { instr, rules } + } +} + +impl<'instr, 'fmt, Rules> fmt::Display for + InstructionRuleBundle<'instr, 'fmt, Rules> where + Rules: for<'f, 'g> DisplayRules<yaxpeax_arch::display::FmtSink<'f, fmt::Formatter<'g>>> +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut sink = yaxpeax_arch::display::FmtSink::new(fmt); + let style = self.rules.display_style(); + match style { + DisplayStyle::Intel => format_intel(&self.instr, self.rules, &mut sink), + DisplayStyle::C => { + format_c(&self.instr, self.rules, &mut sink) + } + DisplayStyle::Masm => { + masm::contextualize(&self.instr, self.rules, &mut sink) + } + } + } +} + +pub struct DefaultRules { + style: DisplayStyle +} + +impl DefaultRules { + pub fn for_style(style: DisplayStyle) -> Self { + Self { style } + } + + pub fn display<'me, 'instr>(&'me self, instr: &'instr Instruction) -> InstructionRuleBundle<'instr, 'me, Self> { + InstructionRuleBundle { + instr, + rules: self, + } + } +} + +impl<S: DisplaySink> DisplayRules<S> for DefaultRules { + fn display_style(&self) -> DisplayStyle { + self.style + } +} + +/// ```rust +/// use yaxpeax_x86::protected_mode::{InstDecoder, AbsoluteAddressFormatter}; +/// +/// // `AbsoluteAddressFormatter` prints instructions as a contiguous sequence starting from the +/// // provided address. +/// let mut addr_formatter = AbsoluteAddressFormatter::new(0x10); +/// +/// let decoder = InstDecoder::default(); +/// +/// let branch = decoder.decode_slice(&[0xeb, 0x70]) +/// .expect("can decode 'jmp $+0x70'"); +/// +/// // jump destinations are also made absolute. +/// let formatted = format!("{}", addr_formatter.display(&branch)); +/// assert_eq!(formatted, "jmp 0x82"); +/// +/// // unlike in 64-bit mode, relative branches are really the only operands shown differently. +/// ``` +#[derive(Copy, Clone)] +pub struct AbsoluteAddressFormatter { + eip: u32, + style: DisplayStyle, +} + +impl AbsoluteAddressFormatter { + pub fn new(eip: u32) -> Self { + AbsoluteAddressFormatter { + eip, + style: DisplayStyle::Intel, + } + } + + pub fn with_style(mut self, style: DisplayStyle) -> Self { + self.style = style; + self + } + + pub fn display<'me, 'instr>(&'me self, instr: &'instr Instruction) -> InstructionRuleBundle<'instr, 'me, Self> { + InstructionRuleBundle { + instr, + rules: self, + } + } + + pub fn advance(&mut self, instr: &Instruction) { + use yaxpeax_arch::{AddressBase, LengthedInstruction}; + self.eip = self.eip.wrapping_offset(instr.len()); + } +} + +impl<S: DisplaySink> DisplayRules<S> for AbsoluteAddressFormatter { + fn display_style(&self) -> DisplayStyle { + self.style + } + + unsafe fn max_size() -> Option<usize> { + // Safety: this is more straightforawrd than in 64-bit code: what was `$ + <i32>` + // becomes simply `<u32>`. the substitution strictly reduces the maximum length. + Some(crate::MAX_INSTRUCTION_LEN) + } + + fn instr_addr(&self) -> Option<u32> { + Some(self.eip) + } + + fn emit_address(&self, addr: u32, s: &mut S) -> Result<bool, fmt::Error> { + fn needs_leading_0(imm: u32) -> bool { + let mut rem = imm; + let mut digit = 0; + while rem > 0 { + digit = rem & 0xf; + rem = rem >> 4; + } + + // digit is whatever the top non-zero hex digit was in the number + digit >= 10 + } + + fn hex_ambiguous(imm: u32) -> bool { + imm >= 10 + } + + s.span_start_immediate(); + if self.style != DisplayStyle::Masm { + s.write_fixed_size("0x")?; + s.write_u32(addr)?; + } else { + if needs_leading_0(addr) { + s.write_char('0')?; + } + write!(s, "{:X}", addr)?; + if hex_ambiguous(addr) { + s.write_char('h')?; + } + } + s.span_end_immediate(); + Ok(true) + } +} + +/// a set of functions controlling how instructions are formatted in an [`InstructionDisplayer`]. +/// +/// generally, functions on this trait return either `Option`, where `None` indicates "data not +/// available, caller should use some kind of default fallback behavior", +/// or `Result<bool, fmt::Error>` where the bool indicates if the implementation has done the +/// formatting work that would otherwise be left up to the caller. +/// +/// functions on this trait default to returning `None` and `Ok(false)`, meaning a minimal +/// `impl DisplayRules for ... { }` will not override any display logic. +/// +/// ## interaction with `DisplayStyle` +/// +/// `DisplayStyle` controls the overall style used for instruction printing, through to how +/// registers and numbers are printed. as `DisplayRules` functions override default `yaxpeax-x86` +/// printing behavior, implementations must consider if they want to support variants of +/// `DisplayStyle as well, or only format with an assumed `DisplayStyle`. +/// +/// the `DisplayStyle` used when formatting an instruction and invoking `DisplayRules` functions is +/// controlled by `DisplayRules::display_style`. it is an implementation's decision to customize +/// this (as an example: [`AbsoluteAddressFormatter::with_style()`]) or to simply return one style +/// that is always used for instruction formatting. +/// +/// `DisplayStyle` variants in some cases also control the display (or non-display) of implicit +/// operands. for example, the `Intel` style writes x87 instructions with implicit operands, but +/// `Masm` omits some (such as `fld` or `fst`, where `masm.exe` will treat `st(0)` as a syntax +/// error). correspondingly, `DisplayRules::emit_operand` will be invoked only for the +/// normally-printed non-`st(0)` operand of `fld` or `fst`. in a handful of cases the differences +/// across display styles involve invoking more specific `DisplayRules` rules, particularly when +/// there may not be an explicit operand to format and a register or immediate is shown in its +/// stead; an example of this is formatting of the implicitly-read registers of `monitor` or +/// `mwait` under `DisplayStyle::Masm`. +/// +/// some other `DisplayStyle` variance includes size suffixes on opcodes, though there is no +/// comprehensive list of per-style formatting variance. +pub trait DisplayRules<S: DisplaySink> { + /// provide a `DisplayStyle` used when formatting instructions through this impl of + /// `DisplayRules`. + /// + /// if this impl of `DisplayRules` overrides some aspect of instruction formatting, it is that + /// override's responsibility to follow the `DisplayStyle` reported here. or ignore it, if you + /// so choose! + fn display_style(&self) -> DisplayStyle; + + /// report the maximum size of an instruction formatted by this `DisplayRules` implementation, + /// across all x86_32 instructions. + /// + /// `max_size()` is used to control the use of bounds checks when formatting into an + /// [`InstructionTextBuffer`]. this is fundamentally a hint, and may be ignored by future + /// versions of `yaxpeax-x86`. + /// + /// # implementation guidance + /// + /// user code is extraordinarily unlikely to be able to safely return `Some` from this + /// function. + /// + /// implementations that return `Some` must never produce more than that many *bytes* of + /// output, including any output produced by `yaxpeax-x86`, in formatting an instruction. take + /// care to consider multi-byte UTF-8 codepoints, as `InstructionTextBuffer` writes into Rust + /// `alloc::string::String`, which uses that representation character data. + /// + /// implementations that use arbitrary strings for symbols, for example, are extremly unlikely + /// to be able to uphold a fixed-max-size hint. + /// + /// note that `yaxpeax-x86` may write additional data in future `DisplayStyle` variants. new + /// instructions may have longer mnemonics. new extensions may define longer register names, or + /// memory label sizes. new encodings may have additional (longer!) decorators or otherwise + /// raise the maximum size of a formatted instruction. any of these may occur across a patch or + /// minor version. + unsafe fn max_size() -> Option<usize> { None } + + /// the address of the instruction to be printed. + /// + /// if this returns `None`, the default behavior is to show the relevant details of an + /// instruction in an address-insensitive way. see [`DisplayRules::emit_branch_addr`] and + /// [`DisplayRules::emit_relative_address`] for more. + fn instr_addr(&self) -> Option<u32> { + None + } + + /// override all aspects of formatting the instruction. + fn emit_instruction(&self, instr: &Instruction, s: &mut S) -> Result<bool, fmt::Error> { + let _ = instr; + let _ = s; + Ok(false) + } + + /// override the display of the `op_idx`'th explicit operand in `instr`. + /// + /// if a memory access, the overridden region includes the operand's memory size label and + /// segment override. if the operand has descriptors (like AVX512 rounding or broadcast modes), + /// they are overridden here as well. + fn emit_operand(&self, instr: &Instruction, op_idx: u8, s: &mut S) -> Result<bool, fmt::Error> { + let _ = instr; + let _ = op_idx; + let _ = s; + Ok(false) + } + + fn emit_register(&self, reg: RegSpec, s: &mut S) -> Result<bool, fmt::Error> { + let _ = reg; + let _ = s; + Ok(false) + } + + /// override the display of an immediate in an instruction. + /// + /// this is the `0x50` in `push 0x50`, as well as `0x60` in `ret 0x60`, and `0x500098` in + /// `mov rax, 0x500098`. this function is a unified formatter for all size of immediate; the + /// size of the encoded value is not reported. + fn emit_signed_immediate(&self, imm: i32, s: &mut S) -> Result<bool, fmt::Error> { + let _ = imm; + let _ = s; + Ok(false) + } + + /// override the display of an unsigned immediate in an instruction. + /// + /// this is typically an integer involved in SIMD instruction configuration, or immediates in + /// instructions that operate on single-byte registers. that is, this is the `0xa4` in + /// `cmp al, 0xa4`. this also reports u16 immediates as in `return 0x1234` or `retf 0x1234`. + fn emit_unsigned_immediate(&self, imm: u32, s: &mut S) -> Result<bool, fmt::Error> { + let _ = imm; + let _ = s; + Ok(false) + } + + /// override the display of a conditional or unconditional relative branch target. + /// + /// this is the `$+0x60` in `jmp $+0x60`. the default behavior is to check + /// [`DisplayRules::instr_addr()`] for the address of this instruction, convert the relative + /// branch to an absolute destination, and call [`DisplayRules::emit_address()`] with that + /// address. + /// + /// note that custom implementations of `emit_branch_addr` may want to retain the above + /// behavior. + fn emit_branch_addr(&self, instr_len: AddressDiff<u32>, rel: i32, s: &mut S) -> Result<bool, fmt::Error> { + let Some(ip) = self.instr_addr() else { + return Ok(false); + }; + + let next = ip.wrapping_offset(instr_len); + let dest = next.wrapping_add(rel as u32); + self.emit_address(dest, s) + } + + /// override the display of a literal absolute address. + /// + /// this is the `0x10` in `mov eax, gs:[0x10]`, as well as + /// the `0x12345678` in `add eax, [0x12345678]`. the default behavior is to simply call + /// [`emit_address`] with the absolute value being dereferenced. + fn emit_absolute_address(&self, abs: u32, s: &mut S) -> Result<bool, fmt::Error> { + self.emit_address(abs, s) + } + + /// write some address out to the provided `DisplaySink`. + /// + /// notably, this does *not* include values like relocated immeidates in expressions like + /// `mov eax, fn_ptr`; immediates are always printed through [`DisplayRules::exit_immediate`]. + fn emit_address(&self, addr: u32, s: &mut S) -> Result<bool, fmt::Error> { + let _ = addr; + let _ = s; + Ok(false) + } + + /// write the destination of an absolute far jump or call to the provided `DisplaySink`. + fn emit_far_address(&self, seg: u16, offs: u32, s: &mut S) -> Result<bool, fmt::Error> { + let _ = seg; + let _ = offs; + let _ = s; + Ok(false) + } +} + /* * Can't implement this as accepting a formatter because rust * doesn't let me build one outside println! or write! or whatever. @@ -2201,22 +2581,38 @@ impl <'instr, T: fmt::Write, Y: YaxColors> Colorize<T, Y> for InstructionDisplay struct NoContext; impl Instruction { - /// format this instruction into `out` as a plain text string. + /// format this instruction into `out` as a plain text string, in the default display + /// configuration for an `x86_64` instruction (that is, roughly Intel syntax). + /// + /// for more customizable formatting options, see [`Instruction::display_with`]. #[cfg_attr(feature="profiling", inline(never))] pub fn write_to<T: fmt::Write>(&self, out: &mut T) -> fmt::Result { let mut out = yaxpeax_arch::display::FmtSink::new(out); - contextualize_intel(self, &mut out) + let rules = DefaultRules::for_style(DisplayStyle::Intel); + format_intel(self, &rules, &mut out) } /// format this instruction into `out`, which may perform additional styling based on its /// `DisplaySink` implementation. #[cfg_attr(feature="profiling", inline(never))] pub fn display_into<T: DisplaySink>(&self, out: &mut T) -> fmt::Result { - contextualize_intel(self, out) + let rules = DefaultRules::for_style(DisplayStyle::Intel); + format_intel(self, &rules, out) + } + + /// format this instruction into `out`, using the provided `rules` to potentially override the + /// default formatting for a given `DisplayStyle`. + #[cfg_attr(feature="profiling", inline(never))] + pub fn format_into<T: DisplaySink, Rules: DisplayRules<T>>(&self, rules: &Rules, out: &mut T) -> fmt::Result { + format_intel(self, rules, out) } } -pub(crate) fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn format_intel<T: DisplaySink, R: DisplayRules<T>>(instr: &Instruction, rules: &R, out: &mut T) -> fmt::Result { + if rules.emit_instruction(instr, out)? { + return Ok(()); + } + if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -2242,30 +2638,45 @@ pub(crate) fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut if instr.operand_count > 0 { out.write_fixed_size(" ")?; - if instr.visit_operand(0, &mut RelativeBranchPrinter { - inst: instr, - out, - })? { - return Ok(()); + if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { + if RELATIVE_BRANCHES.contains(&instr.opcode) { + // relative branch instructions have only one operand, so print this one and we're + // done. relative branch instructions *also* have a ... relative branch ... as + // their only operand, so don't `emit_operand()` which would confuse these for a + // "normal" immediate. + if rules.emit_operand(instr, 0, out)? { + return Ok(()); + } + } + if instr.visit_operand(0, &mut RelativeBranchPrinter { + inst: instr, + rules, + out, + })? { + return Ok(()); + } } - if instr.operands[0 as usize].is_memory() { - out.write_mem_size_label(instr.mem_size)?; - if let Some(prefix) = instr.segment_override_for_op(0) { - let name = prefix.name(); - out.write_char(' ')?; - out.write_char(name[0] as char)?; - out.write_char(name[1] as char)?; - out.write_fixed_size(":")?; - } else { - out.write_fixed_size(" ")?; + if !rules.emit_operand(instr, 0, out)? { + if instr.operands[0 as usize].is_memory() { + out.write_mem_size_label(instr.mem_size)?; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + out.write_char(' ')?; + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } else { + out.write_fixed_size(" ")?; + } } - } - let mut displayer = DisplayingOperandVisitor { - f: out, - }; - instr.visit_operand(0 as u8, &mut displayer)?; + let mut displayer = DisplayingOperandVisitor { + f: out, + rules, + }; + instr.visit_operand(0 as u8, &mut displayer)?; + } for i in 1..instr.operand_count { // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to @@ -2279,6 +2690,11 @@ pub(crate) fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut unsafe { unreachable_unchecked(); } } + if rules.emit_operand(instr, i, out)? { + // if the rule printed an operand out, continue on to the next one! + continue; + } + if instr.operands[i as usize].is_memory() { out.write_mem_size_label(instr.mem_size)?; if let Some(prefix) = instr.segment_override_for_op(i) { @@ -2294,6 +2710,7 @@ pub(crate) fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut let mut displayer = DisplayingOperandVisitor { f: out, + rules, }; instr.visit_operand(i as u8, &mut displayer)?; @@ -2358,7 +2775,7 @@ pub(crate) fn contextualize_intel<T: DisplaySink>(instr: &Instruction, out: &mut Ok(()) } -pub(crate) fn contextualize_c<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn format_c<T: DisplaySink, R: DisplayRules<T>>(instr: &Instruction, rules: &R, out: &mut T) -> fmt::Result { let mut brace_count = 0; let mut prefixed = false; @@ -2404,294 +2821,421 @@ pub(crate) fn contextualize_c<T: DisplaySink>(instr: &Instruction, out: &mut T) } } - fn write_jmp_operand<T: fmt::Write>(op: Operand, out: &mut T) -> fmt::Result { - let mut out = yaxpeax_arch::display::FmtSink::new(out); - use core::fmt::Write; - match op { - Operand::ImmediateI8 { imm: rel } => { - let rel = if rel >= 0 { - out.write_str("$+")?; - rel as u8 - } else { - out.write_str("$-")?; - rel.unsigned_abs() + fn write_jmp_operand<T: DisplaySink, Rules: DisplayRules<T>>(instr: &Instruction, idx: usize, rules: &Rules, out: &mut T) -> fmt::Result { + match instr.operands[idx] { + OperandSpec::ImmI8 => { + let mut rel_printer = RelativeBranchPrinter { + inst: instr, out, rules }; - out.write_prefixed_u8(rel) + instr.visit_operand(idx as u8, &mut rel_printer) + .map(|x| assert!(x)) } - Operand::ImmediateI32 { imm: rel } => { - let rel = if rel >= 0 { - out.write_str("$+")?; - rel as u32 - } else { - out.write_str("$-")?; - rel.unsigned_abs() + OperandSpec::ImmI32 => { + let mut rel_printer = RelativeBranchPrinter { + inst: instr, out, rules }; - out.write_prefixed_u32(rel) + instr.visit_operand(idx as u8, &mut rel_printer) + .map(|x| assert!(x)) } - other => { - write!(out, "{}", other) + _other => { + let mut displayer = DisplayingOperandVisitor { + f: out, + rules, + }; + instr.visit_operand(idx as u8, &mut displayer) } } } + let mut displayer = DisplayingOperandVisitor { + f: out, + rules, + }; + match instr.opcode { - Opcode::Invalid => { out.write_str("invalid")?; }, + Opcode::Invalid => { displayer.f.write_str("invalid")?; }, Opcode::MOVS => { - out.write_str("es:[edi++] = ds:[esi++]")?; + displayer.f.write_str("es:[edi++] = ds:[esi++]")?; }, Opcode::CMPS => { - out.write_str("eflags = flags(ds:[esi++] - es:[edi++])")?; + displayer.f.write_str("eflags = flags(ds:[esi++] - es:[edi++])")?; }, Opcode::LODS => { // TODO: size - out.write_str("rax = ds:[esi++]")?; + displayer.f.write_str("rax = ds:[esi++]")?; }, Opcode::STOS => { // TODO: size - out.write_str("es:[edi++] = rax")?; + displayer.f.write_str("es:[edi++] = rax")?; }, Opcode::INS => { // TODO: size - out.write_str("es:[edi++] = port(dx)")?; + displayer.f.write_str("es:[edi++] = port(dx)")?; }, Opcode::OUTS => { // TODO: size - out.write_str("port(dx) = ds:[esi++]")?; + displayer.f.write_str("port(dx) = ds:[esi++]")?; } Opcode::ADD => { - write!(out, "{} += {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::OR => { - write!(out, "{} |= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" |= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::ADC => { - write!(out, "{} += {} + eflags.cf", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("eflags.cf")?; } Opcode::ADCX => { - write!(out, "{} += {} + eflags.cf", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("eflags.cf")?; } Opcode::ADOX => { - write!(out, "{} += {} + eflags.of", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" += ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("eflags.of")?; } Opcode::SBB => { - write!(out, "{} -= {} + eflags.cf", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" -= ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" + ")?; + displayer.f.write_str("eflags.cf")?; } Opcode::AND => { - write!(out, "{} &= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" &= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::XOR => { - write!(out, "{} ^= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" ^= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SUB => { - write!(out, "{} -= {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" -= ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::CMP => { - write!(out, "eflags = flags({} - {})", instr.operand(0), instr.operand(1))?; + displayer.f.write_str("eflags")?; + displayer.f.write_str(" = flags(")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" - ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::TEST => { - write!(out, "eflags = flags({} & {})", instr.operand(0), instr.operand(1))?; + displayer.f.write_str("eflags")?; + displayer.f.write_str(" = flags(")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" & ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::XADD => { - write!(out, "({}, {}) = ({} + {}, {})", instr.operand(0), instr.operand(1), instr.operand(0), instr.operand(1), instr.operand(0))?; + // something like "({}, {}) = ({} + {}, {})"; + displayer.f.write_char('(')?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(", ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(") = (")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" + ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(", ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_char(')')?; } Opcode::BT => { - write!(out, "bt")?; + displayer.f.write_str("bt")?; } Opcode::BTS => { - write!(out, "bts")?; + displayer.f.write_str("bts")?; } Opcode::BTC => { - write!(out, "btc")?; + displayer.f.write_str("btc")?; } Opcode::BSR => { - write!(out, "{} = msb({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = msb(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::BSF => { - write!(out, "{} = lsb({}) (x86 bsf)", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = lsb(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(") (x86 bsf)")?; } Opcode::TZCNT => { - write!(out, "{} = lsb({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = lsb(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::MOV => { - write!(out, "{} = {}", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SAR => { - write!(out, "{} = {} >>> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" >>> ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SAL => { - write!(out, "{} = {} <<< {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" <<< ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SHR => { - write!(out, "{} = {} >> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" >> ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SHRX => { - write!(out, "{} = {} >> {} (x86 shrx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" >> ")?; + instr.visit_operand(2, &mut displayer)?; + displayer.f.write_str(" (x86 shrx)")?; } Opcode::SHL => { - write!(out, "{} = {} << {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" << ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::SHLX => { - write!(out, "{} = {} << {} (x86 shlx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" << ")?; + instr.visit_operand(2, &mut displayer)?; + displayer.f.write_str(" (x86 shlx)")?; } Opcode::ROR => { - write!(out, "{} = {} ror {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" ror ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::RORX => { - write!(out, "{} = {} ror {} (x86 rorx)", instr.operand(0), instr.operand(1), instr.operand(2))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(" ror ")?; + instr.visit_operand(2, &mut displayer)?; + displayer.f.write_str(" (x86 rorx)")?; } Opcode::ROL => { - write!(out, "{} = {} rol {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" rol ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::RCR => { - write!(out, "{} = {} rcr {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" rcr ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::RCL => { - write!(out, "{} = {} rcl {}", instr.operand(0), instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" rcl ")?; + instr.visit_operand(1, &mut displayer)?; } Opcode::PUSH => { - write!(out, "push({})", instr.operand(0))?; + displayer.f.write_str("push(")?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::POP => { - write!(out, "{} = pop()", instr.operand(0))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = pop()")?; } Opcode::MOVD => { - write!(out, "{} = movd({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = movd(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::MOVQ => { - write!(out, "{} = movq({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = movq(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::MOVNTQ => { - write!(out, "{} = movntq({})", instr.operand(0), instr.operand(1))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = movntq(")?; + instr.visit_operand(1, &mut displayer)?; + displayer.f.write_str(")")?; } Opcode::INC => { if instr.operand(0).is_memory() { match instr.mem_size { - 1 => { write!(out, "byte {}++", instr.operand(0))?; }, - 2 => { write!(out, "word {}++", instr.operand(0))?; }, - 4 => { write!(out, "dword {}++", instr.operand(0))?; }, - _ => { write!(out, "qword {}++", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8. + 1 => { displayer.f.write_str("byte ")?; }, + 2 => { displayer.f.write_str("word ")?; }, + 4 => { displayer.f.write_str("dword ")?; }, + _ => { displayer.f.write_str("qword ")?; }, // sizes that are not 1, 2, or 4, *better* be 8. } - } else { - write!(out, "{}++", instr.operand(0))?; } + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str("++")?; } Opcode::DEC => { if instr.operand(0).is_memory() { match instr.mem_size { - 1 => { write!(out, "byte {}--", instr.operand(0))?; }, - 2 => { write!(out, "word {}--", instr.operand(0))?; }, - 4 => { write!(out, "dword {}--", instr.operand(0))?; }, - _ => { write!(out, "qword {}--", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8. + 1 => { displayer.f.write_str("byte ")?; }, + 2 => { displayer.f.write_str("word ")?; }, + 4 => { displayer.f.write_str("dword ")?; }, + _ => { displayer.f.write_str("qword ")?; }, // sizes that are not 1, 2, or 4, *better* be 8. } - } else { - write!(out, "{}--", instr.operand(0))?; } + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str("--")?; } Opcode::JMP => { - out.write_str("jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; + }, + Opcode::CALL => { + displayer.f.write_str("call ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JECXZ => { - out.write_str("if ecx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if ecx == 0 then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JCXZ => { - out.write_str("if cx == 0 then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if cx == 0 then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::LOOP => { - out.write_str("cx--; if cx != 0 then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("cx--; if cx != 0 then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::LOOPZ => { - out.write_str("cx--; if cx != 0 and zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("cx--; if cx != 0 and zero(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::LOOPNZ => { - out.write_str("cx--; if cx != 0 and !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("cx--; if cx != 0 and !zero(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JO => { - out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if _(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNO => { - out.write_str("if _(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if _(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JB => { - out.write_str("if /* unsigned */ below(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ below(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNB => { - out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ above_or_equal(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JZ => { - out.write_str("if zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if zero(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNZ => { - out.write_str("if !zero(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if !zero(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNA => { - out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ below_or_equal(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JA => { - out.write_str("if /* unsigned */ above(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* unsigned */ above(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JS => { - out.write_str("if signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if signed(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNS => { - out.write_str("if !signed(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if !signed(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JP => { - out.write_str("if parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if parity(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JNP => { - out.write_str("if !parity(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if !parity(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JL => { - out.write_str("if /* signed */ less(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ less(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JGE => { - out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ greater_or_equal(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JLE => { - out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ less_or_equal(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::JG => { - out.write_str("if /* signed */ greater(rflags) then jmp ")?; - write_jmp_operand(instr.operand(0), out)?; + displayer.f.write_str("if /* signed */ greater(eflags) then jmp ")?; + write_jmp_operand(instr, 0, rules, displayer.f)?; }, Opcode::NOP => { - write!(out, "nop")?; + displayer.f.write_str("nop")?; } _ => { if instr.operand_count() == 0 { - write!(out, "{}()", instr.opcode())?; + displayer.f.write_opcode(instr.opcode)?; + displayer.f.write_str("()")?; } else { - write!(out, "{} = {}({}", instr.operand(0), instr.opcode(), instr.operand(0))?; + instr.visit_operand(0, &mut displayer)?; + displayer.f.write_str(" = ")?; + displayer.f.write_opcode(instr.opcode)?; + displayer.f.write_str("(")?; + instr.visit_operand(0, &mut displayer)?; let mut comma = true; for i in 1..instr.operand_count() { if comma { - write!(out, ", ")?; + displayer.f.write_str(", ")?; } - write!(out, "{}", instr.operand(i))?; + instr.visit_operand(i, &mut displayer)?; comma = true; } - write!(out, ")")?; + displayer.f.write_str(")")?; } } } @@ -2715,15 +3259,16 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual<u32, NoContext, T, Y> let mut out = yaxpeax_arch::display::FmtSink::new(out); + let rules = DefaultRules::for_style(*style); match style { DisplayStyle::Intel => { - contextualize_intel(instr, &mut out) + format_intel(instr, &rules, &mut out) } DisplayStyle::C => { - contextualize_c(instr, &mut out) + format_c(instr, &rules, &mut out) } DisplayStyle::Masm => { - masm::contextualize(&instr, &mut out) + masm::contextualize(&instr, &rules, &mut out) } } } @@ -2734,6 +3279,12 @@ impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual<u32, NoContext, T, Y> #[cfg(feature="std")] impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::String>], T, Y> for Instruction { fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option<alloc::string::String>]>, out: &mut T) -> fmt::Result { + let mut out = yaxpeax_arch::display::FmtSink::new(out); + let out = &mut out; + let rules = DefaultRules::for_style(DisplayStyle::Intel); + let rules = &rules; + use core::fmt::Write; + if self.prefixes.lock() { write!(out, "lock ")?; } @@ -2763,8 +3314,12 @@ impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::St } } } - let x = Operand::from_spec(self, self.operands[0]); - x.colorize(colors, out)?; + + let mut displayer = DisplayingOperandVisitor { + f: out, + rules, + }; + self.visit_operand(0, &mut displayer)?; } }; for i in 1..self.operand_count { @@ -2781,8 +3336,11 @@ impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::St if let Some(prefix) = self.segment_override_for_op(1) { write!(out, "{}:", prefix)?; } - let x = Operand::from_spec(self, self.operands[i]); - x.colorize(colors, out)? + let mut displayer = DisplayingOperandVisitor { + f: out, + rules, + }; + self.visit_operand(i as u8, &mut displayer)?; } } } @@ -2806,12 +3364,13 @@ static RELATIVE_BRANCHES: [Opcode; 23] = [ Opcode::JLE, Opcode::JG, ]; -struct RelativeBranchPrinter<'a, F: DisplaySink> { +struct RelativeBranchPrinter<'a, F: DisplaySink, Rules: DisplayRules<F>> { inst: &'a Instruction, out: &'a mut F, + rules: &'a Rules, } -impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> { +impl<'a, F: DisplaySink, Rules: DisplayRules<F>> super::OperandVisitor for RelativeBranchPrinter<'a, F, Rules> { // return true if we printed a relative branch offset, false otherwise type Ok = bool; // but errors are errors @@ -2829,6 +3388,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, rel: i8) -> Result<Self::Ok, Self::Error> { if RELATIVE_BRANCHES.contains(&self.inst.opcode) { + if self.rules.emit_branch_addr(self.inst.len(), rel as i32, &mut self.out)? { + // the display rule declared it has fully printed the relative address, so we have + // nothing to do. + return Ok(true); + } + self.out.write_char('$')?; let mut v = rel as u8; if rel < 0 { @@ -2847,6 +3412,12 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> #[cfg_attr(feature="profiling", inline(never))] fn visit_i32(&mut self, rel: i32) -> Result<Self::Ok, Self::Error> { if RELATIVE_BRANCHES.contains(&self.inst.opcode) || self.inst.opcode == Opcode::XBEGIN { + if self.rules.emit_branch_addr(self.inst.len(), rel, &mut self.out)? { + // the display rule declared it has fully printed the relative address, so we have + // nothing to do. + return Ok(true); + } + self.out.write_char('$')?; let mut v = rel as u32; if rel < 0 { @@ -2936,8 +3507,9 @@ impl<'a, F: DisplaySink> super::OperandVisitor for RelativeBranchPrinter<'a, F> #[cfg(feature="alloc")] mod buffer_sink { use core::fmt; - use super::super::{DisplayStyle, InstructionDisplayer}; - use super::{contextualize_c, contextualize_intel}; + use super::super::{DisplayRules, DisplayStyle, Instruction, InstructionDisplayer}; + use super::{format_c, format_intel}; + use super::DefaultRules; /// helper to format `amd64` instructions with highest throughput and least configuration. this is /// functionally a buffer for one x86 instruction's text. @@ -3001,15 +3573,52 @@ mod buffer_sink { // never escape `format_inst`. let mut handle = unsafe { self.write_handle() }; + let rules = DefaultRules::for_style(display.style); match display.style { DisplayStyle::Intel => { - contextualize_intel(&display.instr, &mut handle)?; + format_intel(&display.instr, &rules, &mut handle)?; + } + DisplayStyle::C => { + format_c(&display.instr, &rules, &mut handle)?; + } + DisplayStyle::Masm => { + super::masm::contextualize(&display.instr, &rules, &mut handle)?; + } + } + + Ok(self.text_str()) + } + + /// format an instruction via some rules into this buffer. returns a borrow of that same + /// internal buffer for convenience. + /// + /// this clears and reuses an internal buffer; if an instruction had been previously formatted + /// through this buffer, it will be overwritten. + pub fn format_inst_rules< + 'buf, 'instr, + Rules: for<'r> DisplayRules<yaxpeax_arch::display::InstructionTextSink<'r>> + >(&'buf mut self, instr: &'instr Instruction, rules: &Rules) -> Result<&'buf str, fmt::Error> { + let Some(rule_max) = (unsafe { Rules::max_size() }) else { + return Err(fmt::Error); + }; + + if self.content.capacity() < rule_max { + return Err(fmt::Error); + } + + // Safety: this sink is used to format exactly one instruction and then dropped. it can + // never escape `format_inst`. + let mut handle = unsafe { self.write_handle() }; + + match rules.display_style() { + DisplayStyle::Intel => { + format_intel(&instr, rules, &mut handle)?; } DisplayStyle::C => { - contextualize_c(&display.instr, &mut handle)?; + format_c(&instr, rules, &mut handle)?; } DisplayStyle::Masm => { - super::masm::contextualize(&display.instr, &mut handle)?; + super::masm::contextualize(&instr, rules, &mut handle)?; } } diff --git a/src/real_mode/display/masm.rs b/src/real_mode/display/masm.rs index 2885fc5..9f0097d 100644 --- a/src/real_mode/display/masm.rs +++ b/src/real_mode/display/masm.rs @@ -4,25 +4,29 @@ use yaxpeax_arch::AddressBase; use yaxpeax_arch::LengthedInstruction; use crate::real_mode::{ + DisplayRules, RegSpec, Opcode, Operand, OperandSpec, MergeMode, SaeMode, Instruction, RegisterBank, - display::DisplaySinkExt, OperandVisitor, + display::DisplaySinkExt, }; use yaxpeax_arch::display::DisplaySink; use yaxpeax_arch::safer_unchecked::GetSaferUnchecked as _; use yaxpeax_arch::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; -struct DisplayingOperandVisitor<'a, T> { +struct DisplayingOperandVisitor<'a, 'rules, T, R> { f: &'a mut T, - show_sae: bool, - sae_mode: Option<SaeMode>, + rules: &'rules R, } -impl<'a, T> DisplayingOperandVisitor<'a, T> { - pub fn new(f: &'a mut T) -> Self { - Self { f, show_sae: false, sae_mode: None } +impl<T: DisplaySink, R: DisplayRules<T>> DisplayingOperandVisitor<'_, '_, T, R> { + fn write_register(&mut self, reg: RegSpec) -> Result<(), core::fmt::Error> { + if self.rules.emit_register(reg, &mut self.f)? { + return Ok(()); + } + + self.f.write_reg(reg) } } @@ -64,12 +68,13 @@ static RELATIVE_BRANCHES: [Opcode; 23] = [ Opcode::JLE, Opcode::JG, ]; -struct RelativeBranchPrinter<'a, F: DisplaySink> { +struct RelativeBranchPrinter<'a, F: DisplaySink, Rules: DisplayRules<F>> { inst: &'a Instruction, out: &'a mut F, + rules: &'a Rules, } -impl<'a, F: DisplaySink> crate::real_mode::OperandVisitor for RelativeBranchPrinter<'a, F> { +impl<'a, F: DisplaySink, Rules: DisplayRules<F>> crate::real_mode::OperandVisitor for RelativeBranchPrinter<'a, F, Rules> { // return true if we printed a relative branch offset, false otherwise type Ok = bool; // but errors are errors @@ -96,12 +101,14 @@ impl<'a, F: DisplaySink> crate::real_mode::OperandVisitor for RelativeBranchPrin } else { self.out.write_char('+')?; } - if needs_leading_0(v as u64) { - self.out.write_char('0')?; - } - write!(self.out, "{:X}", v)?; - if hex_ambiguous(v as u64) { - self.out.write_char('h')?; + if !self.rules.emit_signed_immediate(rel as i32, self.out)? { + if needs_leading_0(v as u64) { + self.out.write_char('0')?; + } + write!(self.out, "{:X}", v)?; + if hex_ambiguous(v as u64) { + self.out.write_char('h')?; + } } Ok(true) } else { @@ -129,12 +136,14 @@ impl<'a, F: DisplaySink> crate::real_mode::OperandVisitor for RelativeBranchPrin } else { self.out.write_char('+')?; } - if needs_leading_0(v as u64) { - self.out.write_char('0')?; - } - write!(self.out, "{:X}", v)?; - if hex_ambiguous(v as u64) { - self.out.write_char('h')?; + if !self.rules.emit_signed_immediate(rel as i32, self.out)? { + if needs_leading_0(v as u64) { + self.out.write_char('0')?; + } + write!(self.out, "{:X}", v)?; + if hex_ambiguous(v as u64) { + self.out.write_char('h')?; + } } Ok(true) } else { @@ -232,12 +241,16 @@ fn masm_displacement<T: core::fmt::Write>(f: &mut T, disp: i32) -> Result<(), co Ok(()) } -impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisitor<'_, T> { +impl <T: DisplaySink, R: DisplayRules<T>> crate::real_mode::OperandVisitor for DisplayingOperandVisitor<'_, '_, T, R> { type Ok = (); type Error = core::fmt::Error; #[cfg_attr(feature="profiling", inline(never))] fn visit_u8(&mut self, imm: u8) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_unsigned_immediate(imm as u32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); if needs_leading_0(imm as u64) { self.f.write_char('0')?; @@ -251,6 +264,10 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_i8(&mut self, imm: i8) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_signed_immediate(imm as i32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let imm = imm as i32 as u32; if needs_leading_0(imm as u64) { @@ -265,6 +282,10 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_u16(&mut self, imm: u16) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_unsigned_immediate(imm as u32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); if needs_leading_0(imm as u64) { self.f.write_char('0')?; @@ -278,6 +299,10 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_i16(&mut self, imm: i16) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_signed_immediate(imm as i32, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let imm = imm as i32 as u32; if needs_leading_0(imm as u64) { @@ -292,6 +317,10 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] fn visit_u32(&mut self, imm: u32) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_unsigned_immediate(imm, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); if needs_leading_0(imm as u64) { self.f.write_char('0')?; @@ -304,6 +333,10 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi Ok(()) } fn visit_i32(&mut self, imm: i32) -> Result<Self::Ok, Self::Error> { + if self.rules.emit_signed_immediate(imm, self.f)? { + return Ok(()); + } + self.f.span_start_immediate(); let imm = imm as u32; if needs_leading_0(imm as u64) { @@ -333,7 +366,7 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi } Ok(()) } - fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, sae_mode: crate::real_mode::SaeMode) -> Result<Self::Ok, Self::Error> { + fn visit_reg_mask_merge_sae(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode, _sae_mode: crate::real_mode::SaeMode) -> Result<Self::Ok, Self::Error> { self.f.write_reg(spec)?; if mask.num != 0 { self.f.write_fixed_size("{")?; @@ -343,8 +376,6 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi if let MergeMode::Zero = merge_mode { self.f.write_fixed_size("{z}")?; } - self.show_sae = true; - self.sae_mode = Some(sae_mode); Ok(()) } fn visit_reg_mask_merge_sae_noround(&mut self, spec: RegSpec, mask: RegSpec, merge_mode: MergeMode) -> Result<Self::Ok, Self::Error> { @@ -357,46 +388,50 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi if let MergeMode::Zero = merge_mode { self.f.write_fixed_size("{z}")?; } - self.show_sae = true; - self.sae_mode = None; Ok(()) } fn visit_abs_u16(&mut self, imm: u16) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.span_start_address(); - if imm >= 0x1000 && needs_leading_0(imm as u64) { - self.f.write_char('0')?; + if !self.rules.emit_absolute_address(imm as u32, self.f)? { + self.f.span_start_address(); + if imm >= 0x1000 && needs_leading_0(imm as u64) { + self.f.write_char('0')?; + } + write!(self.f, "{:04X}", imm)?; + self.f.write_char('h')?; + self.f.span_end_address(); } - write!(self.f, "{:04X}", imm)?; - self.f.write_char('h')?; - self.f.span_end_address(); self.f.write_fixed_size("]")?; Ok(()) } fn visit_abs_u32(&mut self, imm: u32) -> Result<Self::Ok, Self::Error> { self.f.write_fixed_size("[")?; - self.f.span_start_address(); - if imm >= 0x1000_0000 && needs_leading_0(imm as u64) { - self.f.write_char('0')?; + if !self.rules.emit_absolute_address(imm, self.f)? { + self.f.span_start_address(); + if imm >= 0x1000_0000 && needs_leading_0(imm as u64) { + self.f.write_char('0')?; + } + write!(self.f, "{:08X}", imm)?; + self.f.write_char('h')?; + self.f.span_end_address(); } - write!(self.f, "{:08X}", imm)?; - self.f.write_char('h')?; - self.f.span_end_address(); self.f.write_fixed_size("]")?; Ok(()) } fn visit_absolute_far_address(&mut self, segment: u16, address: u32) -> Result<Self::Ok, Self::Error> { - if needs_leading_0(segment as u64) { - self.f.write_char('0')?; - } - write!(self.f, "{:4X}", segment)?; - self.f.write_char('h')?; - self.f.write_fixed_size(":")?; - if needs_leading_0(address as u64) { - self.f.write_char('0')?; + if !self.rules.emit_far_address(segment, address, self.f)? { + if needs_leading_0(segment as u64) { + self.f.write_char('0')?; + } + write!(self.f, "{:4X}", segment)?; + self.f.write_char('h')?; + self.f.write_fixed_size(":")?; + if needs_leading_0(address as u64) { + self.f.write_char('0')?; + } + write!(self.f, "{:4X}", address)?; + self.f.write_char('h')?; } - write!(self.f, "{:4X}", address)?; - self.f.write_char('h')?; Ok(()) } #[cfg_attr(not(feature="profiling"), inline(always))] @@ -570,7 +605,7 @@ impl <T: DisplaySink> crate::real_mode::OperandVisitor for DisplayingOperandVisi } #[cfg_attr(feature="profiling", inline(never))] -pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> fmt::Result { +pub(crate) fn contextualize<T: DisplaySink, R: DisplayRules<T>>(instr: &Instruction, rules: &R, out: &mut T) -> fmt::Result { if instr.xacquire() { out.write_fixed_size("xacquire ")?; } @@ -612,133 +647,119 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> match instr.opcode { Opcode::HRESET => { // dumpbin shows, and MASM needs, the implicit "eax" operand as an explicit textual operand. - out.write_fixed_size(" ")?; - instr.visit_operand(0, &mut DisplayingOperandVisitor::new(out))?; - out.write_fixed_size(", eax")?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + + visitor.f.write_fixed_size(" ")?; + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } + visitor.f.write_fixed_size(", ")?; + visitor.write_register(RegSpec::eax())?; return Ok(()); } Opcode::LSL => { // dumpbin shows, and MASM needs, the first and second operands to match in size. // this means `lsl eax, edx` is actually shown as `lsl eax, edx`. fix that up here. - let mut visitor = DisplayingOperandVisitor::new(out); + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; let Operand::Register { reg: dest } = instr.operand(0) else { panic!("impossible LSL dest"); }; if let Operand::Register { reg: mut src } = instr.operand(1) { visitor.f.write_fixed_size(" ")?; - instr.visit_operand(0, &mut visitor)?; + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } src.bank = dest.bank; visitor.f.write_fixed_size(", ")?; - visitor.visit_reg(src)?; + visitor.write_register(src)?; return Ok(()); } else { // don't need to do anything about memory sources }; } - Opcode::PREFETCHNTA => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::PREFETCH0 => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::PREFETCH1 => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::PREFETCH2 => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::PREFETCHW => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::INVLPG => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::CLFLUSH => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } - Opcode::CLFLUSHOPT => { - // dumpbin doesn't bother with the memory size here, same for masm. - out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; - - return Ok(()); - } + Opcode::PREFETCHNTA | + Opcode::PREFETCH0 | + Opcode::PREFETCH1 | + Opcode::PREFETCH2 | + Opcode::PREFETCHW | + Opcode::INVLPG | + Opcode::CLFLUSH | + Opcode::CLFLUSHOPT | Opcode::CLWB => { // dumpbin doesn't bother with the memory size here, same for masm. out.write_char(' ')?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } return Ok(()); } Opcode::SGDT | Opcode::SIDT => { // masm uses "tbyte" as a memory size here. - out.write_fixed_size(" fword ptr ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; + out.write_char(' ')?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + + if !rules.emit_operand(instr, 0, visitor.f)? { + visitor.f.write_fixed_size("fword ptr ")?; + instr.visit_operand(0, &mut visitor)?; + } return Ok(()); } Opcode::LSS => { - let mut visitor = DisplayingOperandVisitor::new(out); + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + visitor.f.write_char(' ')?; - instr.visit_operand(0, &mut visitor)?; - - match instr.mem_size { - 4 => { - visitor.f.write_fixed_size(", dword ptr ")?; - }, - 6 => { - visitor.f.write_fixed_size(", fword ptr ")?; - }, - o => { panic!("impossible memory size: {:?}", o); } + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; } + visitor.f.write_fixed_size(", ")?; + + if !rules.emit_operand(instr, 1, visitor.f)? { + match instr.mem_size { + 4 => { + visitor.f.write_fixed_size("dword ptr ")?; + }, + 6 => { + visitor.f.write_fixed_size("fword ptr ")?; + }, + o => { panic!("impossible memory size: {:?}", o); } + } - instr.visit_operand(1, &mut visitor)?; + instr.visit_operand(1, &mut visitor)?; + } return Ok(()); } Opcode::LGDT | Opcode::LIDT => { // masm uses "fword" as a memory size here. - out.write_fixed_size(" fword ptr ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; + out.write_char(' ')?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + + if !rules.emit_operand(instr, 0, visitor.f)? { + visitor.f.write_fixed_size("fword ptr ")?; + instr.visit_operand(0, &mut visitor)?; + } return Ok(()); } @@ -749,159 +770,171 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> return Ok(()); } }, - Opcode::VPSCATTERDD => { - // intel/xed/etc syntax has the mask register as an operand rather than normal memory masking. is xed wrong? - let mut visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_str(" dword ptr ")?; - instr.visit_operand(0, &mut visitor)?; - visitor.f.write_char('{')?; - visitor.f.write_reg(instr.regs[3])?; - visitor.f.write_fixed_size("}, ")?; - instr.visit_operand(2, &mut visitor)?; - return Ok(()); - }, - Opcode::VPSCATTERQD => { - // intel/xed/etc syntax has the mask register as an operand rather than normal memory masking. is xed wrong? - let mut visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_str(" dword ptr ")?; - instr.visit_operand(0, &mut visitor)?; - visitor.f.write_char('{')?; - visitor.f.write_reg(instr.regs[3])?; - visitor.f.write_fixed_size("}, ")?; - instr.visit_operand(2, &mut visitor)?; - return Ok(()); - }, - Opcode::VPSCATTERDQ => { + Opcode::VPSCATTERDD | Opcode::VPSCATTERQD => { // intel/xed/etc syntax has the mask register as an operand rather than normal memory masking. is xed wrong? - let mut visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_str(" qword ptr ")?; - instr.visit_operand(0, &mut visitor)?; - visitor.f.write_char('{')?; - visitor.f.write_reg(instr.regs[3])?; - visitor.f.write_fixed_size("}, ")?; - instr.visit_operand(2, &mut visitor)?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + + visitor.f.write_char(' ')?; + if !rules.emit_operand(instr, 0, visitor.f)? { + visitor.f.write_str("dword ptr ")?; + instr.visit_operand(0, &mut visitor)?; + visitor.f.write_char('{')?; + if !rules.emit_operand(instr, 1, visitor.f)? { + visitor.f.write_reg(instr.regs[3])?; + } + visitor.f.write_char('}')?; + } + visitor.f.write_fixed_size(", ")?; + if !rules.emit_operand(instr, 2, visitor.f)? { + instr.visit_operand(2, &mut visitor)?; + } return Ok(()); }, - Opcode::VPSCATTERQQ => { + Opcode::VPSCATTERDQ | Opcode::VPSCATTERQQ => { // intel/xed/etc syntax has the mask register as an operand rather than normal memory masking. is xed wrong? - let mut visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_str(" qword ptr ")?; - instr.visit_operand(0, &mut visitor)?; - visitor.f.write_char('{')?; - visitor.f.write_reg(instr.regs[3])?; - visitor.f.write_fixed_size("}, ")?; - instr.visit_operand(2, &mut visitor)?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + + visitor.f.write_char(' ')?; + if !rules.emit_operand(instr, 0, visitor.f)? { + visitor.f.write_str("qword ptr ")?; + instr.visit_operand(0, &mut visitor)?; + visitor.f.write_char('{')?; + if !rules.emit_operand(instr, 1, visitor.f)? { + visitor.f.write_reg(instr.regs[3])?; + } + visitor.f.write_char('}')?; + } + visitor.f.write_fixed_size(", ")?; + if !rules.emit_operand(instr, 2, visitor.f)? { + instr.visit_operand(2, &mut visitor)?; + } return Ok(()); }, - Opcode::MONITOR => { + Opcode::MONITOR | Opcode::MONITORX | Opcode::MWAITX => { // masm wants the implicit registers to all be ... explicit. - let visitor = DisplayingOperandVisitor::new(out); + let visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + visitor.f.write_char(' ')?; - visitor.f.write_reg(RegSpec::eax())?; + if !rules.emit_register(RegSpec::eax(), visitor.f)? { + visitor.f.write_reg(RegSpec::eax())?; + } visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::ecx())?; + if !rules.emit_register(RegSpec::ecx(), visitor.f)? { + visitor.f.write_reg(RegSpec::ecx())?; + } visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::edx())?; + if !rules.emit_register(RegSpec::edx(), visitor.f)? { + visitor.f.write_reg(RegSpec::edx())?; + } return Ok(()); } Opcode::MWAIT => { // masm wants the implicit registers to all be ... explicit. - let visitor = DisplayingOperandVisitor::new(out); + let visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + visitor.f.write_char(' ')?; - visitor.f.write_reg(RegSpec::eax())?; + if !rules.emit_register(RegSpec::eax(), visitor.f)? { + visitor.f.write_reg(RegSpec::eax())?; + } visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::ecx())?; + if !rules.emit_register(RegSpec::ecx(), visitor.f)? { + visitor.f.write_reg(RegSpec::ecx())?; + } return Ok(()); } Opcode::INVLPGB => { // masm bug: it doesn't tolerate the mention of the second operand?! - let mut visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_char(' ')?; - instr.visit_operand(0, &mut visitor)?; - visitor.f.write_fixed_size(", ")?; - instr.visit_operand(2, &mut visitor)?; - return Ok(()); - } - Opcode::MONITORX => { - // masm wants the implicit registers to all be ... explicit. - let visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_char(' ')?; - visitor.f.write_reg(RegSpec::eax())?; - visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::ecx())?; - visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::edx())?; - return Ok(()); - } - Opcode::MWAITX => { - // masm wants the implicit registers to all be ... explicit. - let visitor = DisplayingOperandVisitor::new(out); + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + visitor.f.write_char(' ')?; - visitor.f.write_reg(RegSpec::eax())?; - visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::ecx())?; + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::ebx())?; + if !rules.emit_operand(instr, 2, visitor.f)? { + instr.visit_operand(2, &mut visitor)?; + } return Ok(()); } Opcode::RDPRU => { // masm wants no implicit registers this time. return Ok(()); } - Opcode::SCAS => { + Opcode::SCAS | Opcode::STOS => { // masm does not want the implicit r/e/ax out.write_fixed_size(" ")?; - out.write_mem_size_label(instr.mem_size)?; - out.write_fixed_size(" ptr ")?; - if let Some(prefix) = instr.segment_override_for_op(0) { - let name = prefix.name(); - out.write_char(name[0] as char)?; - out.write_char(name[1] as char)?; - out.write_fixed_size(":")?; + if !rules.emit_operand(instr, 0, out)? { + out.write_mem_size_label(instr.mem_size)?; + out.write_fixed_size(" ptr ")?; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + out.write_char(name[0] as char)?; + out.write_char(name[1] as char)?; + out.write_fixed_size(":")?; + } + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + instr.visit_operand(0, &mut visitor)?; } - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; return Ok(()); } Opcode::LODS => { // masm does not want the implicit r/e/ax out.write_fixed_size(" ")?; - out.write_mem_size_label(instr.mem_size)?; - out.write_fixed_size(" ptr ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(1, &mut visitor)?; - return Ok(()); - } - Opcode::STOS => { - // masm does not want the implicit r/e/ax - out.write_fixed_size(" ")?; - out.write_mem_size_label(instr.mem_size)?; - out.write_fixed_size(" ptr ")?; - if let Some(prefix) = instr.segment_override_for_op(0) { - let name = prefix.name(); - out.write_char(name[0] as char)?; - out.write_char(name[1] as char)?; - out.write_fixed_size(":")?; + if !rules.emit_operand(instr, 1, out)? { + out.write_mem_size_label(instr.mem_size)?; + out.write_fixed_size(" ptr ")?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + instr.visit_operand(1, &mut visitor)?; } - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; return Ok(()); } Opcode::PSMASH => { // masm wants the implicit eax operand out.write_fixed_size(" ")?; - out.write_reg(RegSpec::eax())?; + if !rules.emit_register(RegSpec::eax(), out)? { + out.write_reg(RegSpec::eax())?; + } return Ok(()); } Opcode::PVALIDATE | Opcode::RMPADJUST | Opcode::RMPUPDATE => { // masm wants the implicit registers to all be ... explicit. - let visitor = DisplayingOperandVisitor::new(out); + let visitor = DisplayingOperandVisitor { + f: out, + rules, + }; visitor.f.write_char(' ')?; - visitor.f.write_reg(RegSpec::eax())?; + if !rules.emit_register(RegSpec::eax(), visitor.f)? { + visitor.f.write_reg(RegSpec::eax())?; + } visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::ecx())?; + if !rules.emit_register(RegSpec::ecx(), visitor.f)? { + visitor.f.write_reg(RegSpec::ecx())?; + } visitor.f.write_fixed_size(", ")?; - visitor.f.write_reg(RegSpec::edx())?; + if !rules.emit_register(RegSpec::edx(), visitor.f)? { + visitor.f.write_reg(RegSpec::edx())?; + } return Ok(()); } Opcode::INCSSP => { @@ -929,35 +962,50 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> Opcode::PBLENDVB | Opcode::BLENDVPS | Opcode::BLENDVPD | Opcode::SHA256RNDS2 => { // masm wants the implicit xmm0 operand as ... explicit. out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } visitor.f.write_str(", ")?; - if instr.operands[1].is_memory() { - visitor.f.write_mem_size_label(instr.mem_size)?; - visitor.f.write_fixed_size(" ptr")?; - visitor.f.write_char(' ')?; - if let Some(prefix) = instr.segment_override_for_op(1) { - let name = prefix.name(); - visitor.f.write_char(name[0] as char)?; - visitor.f.write_char(name[1] as char)?; - visitor.f.write_fixed_size(":")?; + if !rules.emit_operand(instr, 1, visitor.f)? { + if instr.operands[1].is_memory() { + visitor.f.write_mem_size_label(instr.mem_size)?; + visitor.f.write_fixed_size(" ptr")?; + visitor.f.write_char(' ')?; + if let Some(prefix) = instr.segment_override_for_op(1) { + let name = prefix.name(); + visitor.f.write_char(name[0] as char)?; + visitor.f.write_char(name[1] as char)?; + visitor.f.write_fixed_size(":")?; + } } + instr.visit_operand(1, &mut visitor)?; } - - instr.visit_operand(1, &mut visitor)?; visitor.f.write_str(", ")?; - visitor.f.write_reg(RegSpec::xmm0())?; + if !rules.emit_register(RegSpec::xmm0(), visitor.f)? { + visitor.f.write_reg(RegSpec::xmm0())?; + } return Ok(()); } Opcode::LEA => { // dumpbin/masm don't want the `<word> ptr` prefix on the memory access here.. out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - instr.visit_operand(0, &mut visitor)?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } visitor.f.write_str(", ")?; - instr.visit_operand(1, &mut visitor)?; + if !rules.emit_operand(instr, 1, visitor.f)? { + instr.visit_operand(1, &mut visitor)?; + } return Ok(()); } Opcode::PUSHF => { @@ -975,21 +1023,26 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> Opcode::FCOM | Opcode::FCOMP | Opcode::FICOM | Opcode::FICOMP => { // masm does not want the first operand *ever*? out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; - if instr.operands[1].is_memory() { - visitor.f.write_mem_size_label(instr.mem_size)?; - visitor.f.write_fixed_size(" ptr")?; - visitor.f.write_char(' ')?; - if let Some(prefix) = instr.segment_override_for_op(1) { - let name = prefix.name(); - visitor.f.write_char(name[0] as char)?; - visitor.f.write_char(name[1] as char)?; - visitor.f.write_fixed_size(":")?; + if !rules.emit_operand(instr, 1, visitor.f)? { + if instr.operands[1].is_memory() { + visitor.f.write_mem_size_label(instr.mem_size)?; + visitor.f.write_fixed_size(" ptr")?; + visitor.f.write_char(' ')?; + if let Some(prefix) = instr.segment_override_for_op(1) { + let name = prefix.name(); + visitor.f.write_char(name[0] as char)?; + visitor.f.write_char(name[1] as char)?; + visitor.f.write_fixed_size(":")?; + } } - } - instr.visit_operand(1, &mut visitor)?; + instr.visit_operand(1, &mut visitor)?; + } return Ok(()); } Opcode::FADD | Opcode::FMUL | Opcode::FSUB | Opcode::FSUBR | Opcode::FDIV | Opcode::FDIVR | @@ -997,105 +1050,123 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> Opcode::FIADD | Opcode::FIMUL | Opcode::FISUB | Opcode::FISUBR | Opcode::FIDIV | Opcode::FIDIVR | Opcode::FCMOVB | Opcode::FCMOVE | Opcode::FCMOVBE | Opcode::FCMOVU | Opcode::FCMOVNB | Opcode::FCMOVNE | Opcode::FCMOVNBE | Opcode::FCMOVNU | Opcode::FUCOMI | Opcode::FCOMI | Opcode::FUCOMIP | Opcode::FCOMIP => { + out.write_fixed_size(" ")?; + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; + if instr.operands[1].is_memory() { // masm does not want to see the implicit st(0). - out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - visitor.f.write_mem_size_label(instr.mem_size)?; - visitor.f.write_fixed_size(" ptr")?; - visitor.f.write_char(' ')?; - if let Some(prefix) = instr.segment_override_for_op(1) { - let name = prefix.name(); - visitor.f.write_char(name[0] as char)?; - visitor.f.write_char(name[1] as char)?; - visitor.f.write_fixed_size(":")?; + if !rules.emit_operand(instr, 1, visitor.f)? { + visitor.f.write_mem_size_label(instr.mem_size)?; + visitor.f.write_fixed_size(" ptr")?; + visitor.f.write_char(' ')?; + if let Some(prefix) = instr.segment_override_for_op(1) { + let name = prefix.name(); + visitor.f.write_char(name[0] as char)?; + visitor.f.write_char(name[1] as char)?; + visitor.f.write_fixed_size(":")?; + } + instr.visit_operand(1, &mut visitor)?; } - instr.visit_operand(1, &mut visitor)?; - return Ok(()); } else { // dumpbin writes `st` instead of `st(0)` as the first operand in reg-reg ops, replicate this. masm doesn't care. - out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); - if instr.operands[0] == OperandSpec::RegRRR { - if instr.regs[0] == RegSpec::st0() { - visitor.f.write_fixed_size("st")?; - } else { - instr.visit_operand(0, &mut visitor)?; + if !rules.emit_operand(instr, 0, visitor.f)? { + if instr.regs[0] == RegSpec::st0() { + visitor.f.write_fixed_size("st")?; + } else { + instr.visit_operand(0, &mut visitor)?; + } } visitor.f.write_fixed_size(", ")?; - instr.visit_operand(1, &mut visitor)?; + if !rules.emit_operand(instr, 1, visitor.f)? { + instr.visit_operand(1, &mut visitor)?; + } } else { debug_assert!(instr.operands[1] == OperandSpec::RegRRR); - instr.visit_operand(0, &mut visitor)?; + if !rules.emit_operand(instr, 0, visitor.f)? { + instr.visit_operand(0, &mut visitor)?; + } visitor.f.write_fixed_size(", ")?; - if instr.regs[0] == RegSpec::st0() { - visitor.f.write_fixed_size("st")?; - } else { - instr.visit_operand(1, &mut visitor)?; + if !rules.emit_operand(instr, 1, visitor.f)? { + if instr.regs[0] == RegSpec::st0() { + visitor.f.write_fixed_size("st")?; + } else { + instr.visit_operand(1, &mut visitor)?; + } } }; - - return Ok(()); } + + return Ok(()); } Opcode::FBLD | Opcode::FLD | Opcode::FILD | Opcode::FXCH | Opcode::FUCOM | Opcode::FUCOMP => { // masm does not want to see the implicit st(0). out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; - if instr.operands[1].is_memory() { - if instr.mem_size == 10 { - visitor.f.write_fixed_size("tbyte")?; - } else { - visitor.f.write_mem_size_label(instr.mem_size)?; - } - visitor.f.write_fixed_size(" ptr")?; - visitor.f.write_char(' ')?; - if let Some(prefix) = instr.segment_override_for_op(1) { - let name = prefix.name(); - visitor.f.write_char(name[0] as char)?; - visitor.f.write_char(name[1] as char)?; - visitor.f.write_fixed_size(":")?; - } - instr.visit_operand(1, &mut visitor)?; - return Ok(()); - } else { - if instr.regs[1] == RegSpec::st0() { - visitor.f.write_fixed_size("st")?; - } else { + if !rules.emit_operand(instr, 1, visitor.f)? { + if instr.operands[1].is_memory() { + if instr.mem_size == 10 { + visitor.f.write_fixed_size("tbyte")?; + } else { + visitor.f.write_mem_size_label(instr.mem_size)?; + } + visitor.f.write_fixed_size(" ptr")?; + visitor.f.write_char(' ')?; + if let Some(prefix) = instr.segment_override_for_op(1) { + let name = prefix.name(); + visitor.f.write_char(name[0] as char)?; + visitor.f.write_char(name[1] as char)?; + visitor.f.write_fixed_size(":")?; + } instr.visit_operand(1, &mut visitor)?; + } else { + if instr.regs[1] == RegSpec::st0() { + visitor.f.write_fixed_size("st")?; + } else { + instr.visit_operand(1, &mut visitor)?; + } } - return Ok(()); } + return Ok(()); } Opcode::FBSTP | Opcode::FST | Opcode::FSTP | Opcode::FIST | Opcode::FISTP | Opcode::FISTTP => { // masm does not want to see the implicit st(0). out.write_fixed_size(" ")?; - let mut visitor = DisplayingOperandVisitor::new(out); + let mut visitor = DisplayingOperandVisitor { + f: out, + rules, + }; - if instr.operands[0].is_memory() { - if instr.mem_size == 10 { - visitor.f.write_fixed_size("tbyte")?; + if !rules.emit_operand(instr, 0, visitor.f)? { + if instr.operands[0].is_memory() { + if instr.mem_size == 10 { + visitor.f.write_fixed_size("tbyte")?; + } else { + visitor.f.write_mem_size_label(instr.mem_size)?; + } + visitor.f.write_fixed_size(" ptr")?; + visitor.f.write_char(' ')?; + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + visitor.f.write_char(name[0] as char)?; + visitor.f.write_char(name[1] as char)?; + visitor.f.write_fixed_size(":")?; + } + instr.visit_operand(0, &mut visitor)?; } else { - visitor.f.write_mem_size_label(instr.mem_size)?; - } - visitor.f.write_fixed_size(" ptr")?; - visitor.f.write_char(' ')?; - if let Some(prefix) = instr.segment_override_for_op(0) { - let name = prefix.name(); - visitor.f.write_char(name[0] as char)?; - visitor.f.write_char(name[1] as char)?; - visitor.f.write_fixed_size(":")?; + instr.visit_operand(0, &mut visitor)?; } - instr.visit_operand(0, &mut visitor)?; - return Ok(()); - } else { - instr.visit_operand(0, &mut visitor)?; - return Ok(()); } + return Ok(()); } _ => {} } @@ -1120,41 +1191,68 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> size_is_mmword = true; } - if instr.visit_operand(0, &mut RelativeBranchPrinter { - inst: instr, - out, - })? { - return Ok(()); - } - - if instr.operands[0 as usize].is_memory() { - // fxsave and friends get no "XXXword ptr" memory prefix, masm doesn't accept it - if instr.mem_size != 63 && instr.mem_size != 48 { // masm does not print "m384b" labels.. - if size_is_mmword && instr.mem_size == 8 { - out.write_fixed_size("mmword")?; - } else if instr.mem_size == 6 && (instr.opcode == Opcode::JMPF || instr.opcode == Opcode::CALLF) { - // "fword" in real mode instead of "far".. - out.write_fixed_size("fword")?; - } else { - out.write_mem_size_label(instr.mem_size)?; + if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { + if RELATIVE_BRANCHES.contains(&instr.opcode) { + // relative branch instructions have only one operand, so print this one and we're + // done. relative branch instructions *also* have a ... relative branch ... as + // their only operand, so don't `emit_operand()` which would confuse these for a + // "normal" immediate. + if rules.emit_branch_addr(instr.len(), instr.imm as i32, out)? { + return Ok(()); } - out.write_fixed_size(" ptr")?; - out.write_char(' ')?; } - if let Some(prefix) = instr.segment_override_for_op(0) { - let name = prefix.name(); - out.write_char(name[0] as char)?; - out.write_char(name[1] as char)?; - out.write_fixed_size(":")?; + + if instr.visit_operand(0, &mut RelativeBranchPrinter { + inst: instr, + rules, + out, + })? { + return Ok(()); } } + let mut show_sae = false; + let mut sae_mode = None; let mut displayer = DisplayingOperandVisitor { f: out, - show_sae: false, - sae_mode: None, + rules, }; - instr.visit_operand(0 as u8, &mut displayer)?; + + if instr.operands[0] == OperandSpec::RegRRR_maskmerge_sae || + instr.operands[0] == OperandSpec::RegRRR_maskmerge_sae_noround || + instr.operands[0] == OperandSpec::RegMMM_maskmerge_sae_noround { + show_sae = true; + if instr.operands[0] == OperandSpec::RegRRR_maskmerge_sae { + let instr_evex = instr.prefixes.evex_unchecked(); + sae_mode = Some(SaeMode::from(instr_evex.vex().l(), instr_evex.lp())); + } + } + + if !rules.emit_operand(instr, 0, displayer.f)? { + if instr.operands[0 as usize].is_memory() { + // fxsave and friends get no "XXXword ptr" memory prefix, masm doesn't accept it + if instr.mem_size != 63 && instr.mem_size != 48 { // masm does not print "m384b" labels.. + if size_is_mmword && instr.mem_size == 8 { + displayer.f.write_fixed_size("mmword")?; + } else if instr.mem_size == 6 && (instr.opcode == Opcode::JMPF || instr.opcode == Opcode::CALLF) { + // "fword" in real mode instead of "far".. + displayer.f.write_fixed_size("fword")?; + } else { + displayer.f.write_mem_size_label(instr.mem_size)?; + } + displayer.f.write_fixed_size(" ptr")?; + displayer.f.write_char(' ')?; + } + if let Some(prefix) = instr.segment_override_for_op(0) { + let name = prefix.name(); + displayer.f.write_char(name[0] as char)?; + displayer.f.write_char(name[1] as char)?; + displayer.f.write_fixed_size(":")?; + } + } + + instr.visit_operand(0 as u8, &mut displayer)?; + } for i in 1..instr.operand_count { // don't worry about checking for `instr.operands[i] != Nothing`, it would be a bug to @@ -1168,6 +1266,11 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> unsafe { unreachable_unchecked(); } } + if rules.emit_operand(instr, i, displayer.f)? { + // if the rule printed an operand out, continue on to the next one! + continue; + } + if instr.operands[i as usize].is_memory() { // fxsave and friends get no "XXXword ptr" memory prefix, masm doesn't accept it if instr.mem_size != 63 && instr.mem_size != 48 { // masm does not print "m384b" labels.. @@ -1251,9 +1354,9 @@ pub(crate) fn contextualize<T: DisplaySink>(instr: &Instruction, out: &mut T) -> } } - if displayer.show_sae { + if show_sae { displayer.f.write_char(' ')?; - if let Some(sae_mode) = displayer.sae_mode.as_ref() { + if let Some(sae_mode) = sae_mode.as_ref() { displayer.f.write_sae_mode(*sae_mode)?; } else { displayer.f.write_str("{sae}")?; diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs index 274d472..4049a97 100644 --- a/src/real_mode/mod.rs +++ b/src/real_mode/mod.rs @@ -10,7 +10,13 @@ pub use crate::MemoryAccessSize; use crate::{Address, Word}; #[cfg(feature = "fmt")] -pub use self::display::{DisplayStyle, InstructionDisplayer}; +pub use self::display::{ + DisplayStyle, + DisplayRules, DefaultRules, + InstructionDisplayer, InstructionRuleBundle +}; +#[cfg(feature = "fmt")] +pub use self::display::AbsoluteAddressFormatter; #[cfg(all(feature = "fmt", feature = "alloc"))] pub use self::display::InstructionTextBuffer; @@ -3326,6 +3332,18 @@ impl Instruction { } } + // TODO: more docs + #[cfg(feature = "fmt")] + pub fn display_rules<'a, 'rules, Rules>( + &'a self, + rules: &'rules Rules + ) -> display::InstructionRuleBundle<'a, 'rules, Rules> { + display::InstructionRuleBundle { + instr: self, + rules, + } + } + /// does this instruction include the `xacquire` hint for hardware lock elision? pub fn xacquire(&self) -> bool { if self.prefixes.repnz() { |
