aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2022-08-27 17:11:19 -0700
committeriximeow <me@iximeow.net>2023-01-02 08:50:23 -0800
commit789797accee0caa6580fbba650c719a952945ac6 (patch)
tree85fffbcc13a525bbea1df0464a85096744f0b3de
parentd2b4f3d1a454c7bbcc487ddfb2839b01dc1c9c9e (diff)
add a `generic` module for x86 disassembly
this module generally attempts to decode as 64-bit x86 instructions, on the assumption they are the most likely-desired instructions, falling back to 32-bit and then 16-bit decoding, in order. translation from a 64-bit `long_mode::Instruction` to `generic::Instruction` is close to free, where `protected_mode::Instruction` and `real_mode::Instruction` may be a little more costly in time but should still not be too bad. docs still need much touching up. most docs reference the `long_mode` structures and enums they're strongly inspired by.
-rw-r--r--src/generic/display.rs977
-rw-r--r--src/generic/mod.rs1878
2 files changed, 2855 insertions, 0 deletions
diff --git a/src/generic/display.rs b/src/generic/display.rs
new file mode 100644
index 0000000..61e58ac
--- /dev/null
+++ b/src/generic/display.rs
@@ -0,0 +1,977 @@
+use core::fmt;
+use crate::safer_unchecked::GetSaferUnchecked as _;
+
+use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors};
+use yaxpeax_arch::display::*;
+
+use crate::MEM_SIZE_STRINGS;
+use crate::generic::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec};
+
+impl fmt::Display for InstDecoder {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if self == &InstDecoder::default() {
+ return write!(f, "<all features>");
+ } else if self == &InstDecoder::minimal() {
+ return write!(f, "<no features>");
+ }
+ if self.sse3() { write!(f, "sse3 ")? }
+ if self.ssse3() { write!(f, "ssse3 ")? }
+ if self.monitor() { write!(f, "monitor ")? }
+ if self.vmx() { write!(f, "vmx ")? }
+ if self.fma3() { write!(f, "fma3 ")? }
+ if self.cmpxchg16b() { write!(f, "cmpxchg16b ")? }
+ if self.sse4_1() { write!(f, "sse4_1 ")? }
+ if self.sse4_2() { write!(f, "sse4_2 ")? }
+ if self.movbe() { write!(f, "movbe ")? }
+ if self.popcnt() { write!(f, "popcnt ")? }
+ if self.aesni() { write!(f, "aesni ")? }
+ if self.xsave() { write!(f, "xsave ")? }
+ if self.rdrand() { write!(f, "rdrand ")? }
+ if self.sgx() { write!(f, "sgx ")? }
+ if self.bmi1() { write!(f, "bmi1 ")? }
+ if self.avx2() { write!(f, "avx2 ")? }
+ if self.bmi2() { write!(f, "bmi2 ")? }
+ if self.invpcid() { write!(f, "invpcid ")? }
+ if self.mpx() { write!(f, "mpx ")? }
+ if self.avx512_f() { write!(f, "avx512_f ")? }
+ if self.avx512_dq() { write!(f, "avx512_dq ")? }
+ if self.rdseed() { write!(f, "rdseed ")? }
+ if self.adx() { write!(f, "adx ")? }
+ if self.avx512_fma() { write!(f, "avx512_fma ")? }
+ if self.pcommit() { write!(f, "pcommit ")? }
+ if self.clflushopt() { write!(f, "clflushopt ")? }
+ if self.clwb() { write!(f, "clwb ")? }
+ if self.avx512_pf() { write!(f, "avx512_pf ")? }
+ if self.avx512_er() { write!(f, "avx512_er ")? }
+ if self.avx512_cd() { write!(f, "avx512_cd ")? }
+ if self.sha() { write!(f, "sha ")? }
+ if self.avx512_bw() { write!(f, "avx512_bw ")? }
+ if self.avx512_vl() { write!(f, "avx512_vl ")? }
+ if self.prefetchwt1() { write!(f, "prefetchwt1 ")? }
+ if self.avx512_vbmi() { write!(f, "avx512_vbmi ")? }
+ if self.avx512_vbmi2() { write!(f, "avx512_vbmi2 ")? }
+ if self.gfni() { write!(f, "gfni ")? }
+ if self.vaes() { write!(f, "vaes ")? }
+ if self.pclmulqdq() { write!(f, "pclmulqdq ")? }
+ if self.avx_vnni() { write!(f, "avx_vnni ")? }
+ if self.avx512_bitalg() { write!(f, "avx512_bitalg ")? }
+ if self.avx512_vpopcntdq() { write!(f, "avx512_vpopcntdq ")? }
+ if self.avx512_4vnniw() { write!(f, "avx512_4vnniw ")? }
+ if self.avx512_4fmaps() { write!(f, "avx512_4fmaps ")? }
+ if self.cx8() { write!(f, "cx8 ")? }
+ if self.syscall() { write!(f, "syscall ")? }
+ if self.rdtscp() { write!(f, "rdtscp ")? }
+ if self.abm() { write!(f, "abm ")? }
+ if self.sse4a() { write!(f, "sse4a ")? }
+ if self._3dnowprefetch() { write!(f, "_3dnowprefetch ")? }
+ if self.xop() { write!(f, "xop ")? }
+ if self.skinit() { write!(f, "skinit ")? }
+ if self.tbm() { write!(f, "tbm ")? }
+ if self.intel_quirks() { write!(f, "intel_quirks ")? }
+ if self.amd_quirks() { write!(f, "amd_quirks ")? }
+ if self.avx() { write!(f, "avx ")? }
+ Ok(())
+ }
+}
+
+impl fmt::Display for PrefixVex {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if self.present() {
+ write!(f, "vex:{}{}{}{}",
+ if self.w() { "w" } else { "-" },
+ if self.r() { "r" } else { "-" },
+ if self.x() { "x" } else { "-" },
+ if self.b() { "b" } else { "-" },
+ )
+ } else {
+ write!(f, "vex:none")
+ }
+ }
+}
+
+impl fmt::Display for Segment {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Segment::CS => write!(f, "cs"),
+ Segment::DS => write!(f, "ds"),
+ Segment::ES => write!(f, "es"),
+ Segment::FS => write!(f, "fs"),
+ Segment::GS => write!(f, "gs"),
+ Segment::SS => write!(f, "ss"),
+ }
+ }
+}
+
+// register names are grouped by indices scaled by 16.
+// xmm, ymm, zmm all get two indices.
+const REG_NAMES: &[&'static str] = &[
+ "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
+ "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
+ "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",
+ "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7",
+ "es", "cs", "ss", "ds", "fs", "gs", "", "",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
+ "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31",
+ "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
+ "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31",
+ "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31",
+ "st(0)", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+ "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7",
+ "eip", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
+ "eflags", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
+];
+
+pub(crate) fn regspec_label(spec: &RegSpec) -> &'static str {
+ unsafe { REG_NAMES.get_kinda_unchecked((spec.num as u16 + ((spec.bank as u16) << 3)) as usize) }
+}
+
+impl fmt::Display for RegSpec {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str(regspec_label(self))
+ }
+}
+
+impl fmt::Display for Operand {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ self.colorize(&NoColors, fmt)
+ }
+}
+
+impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Operand {
+ fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result {
+ match self {
+ &Operand::ImmediateU8(imm) => {
+ write!(f, "{}", colors.number(u8_hex(imm)))
+ }
+ &Operand::ImmediateI8(imm) => {
+ write!(f, "{}",
+ colors.number(signed_i8_hex(imm)))
+ },
+ &Operand::ImmediateU16(imm) => {
+ write!(f, "{}", colors.number(u16_hex(imm)))
+ }
+ &Operand::ImmediateI16(imm) => {
+ write!(f, "{}",
+ colors.number(signed_i16_hex(imm)))
+ },
+ &Operand::ImmediateU32(imm) => {
+ write!(f, "{}", colors.number(u32_hex(imm)))
+ }
+ &Operand::ImmediateI32(imm) => {
+ write!(f, "{}",
+ colors.number(signed_i32_hex(imm)))
+ },
+ &Operand::AbsoluteFarAddress { segment, address } => {
+ write!(f, "{}:{}",
+ colors.number(u16_hex(segment as u16)),
+ colors.number(u32_hex(address as u32)),
+ )
+ },
+ &Operand::Register(ref spec) => {
+ f.write_str(regspec_label(spec))
+ }
+ &Operand::RegisterMaskMerge(ref spec, ref mask, merge_mode) => {
+ f.write_str(regspec_label(spec))?;
+ if mask.num != 0 {
+ f.write_str("{")?;
+ f.write_str(regspec_label(mask))?;
+ f.write_str("}")?;
+ }
+ if let MergeMode::Zero = merge_mode {
+ f.write_str("{z}")?;
+ }
+ Ok(())
+ }
+ &Operand::RegisterMaskMergeSae(ref spec, ref mask, merge_mode, sae_mode) => {
+ f.write_str(regspec_label(spec))?;
+ if mask.num != 0 {
+ f.write_str("{")?;
+ f.write_str(regspec_label(mask))?;
+ f.write_str("}")?;
+ }
+ if let MergeMode::Zero = merge_mode {
+ f.write_str("{z}")?;
+ }
+ f.write_str(sae_mode.label())?;
+ Ok(())
+ }
+ &Operand::RegisterMaskMergeSaeNoround(ref spec, ref mask, merge_mode) => {
+ f.write_str(regspec_label(spec))?;
+ if mask.num != 0 {
+ f.write_str("{")?;
+ f.write_str(regspec_label(mask))?;
+ f.write_str("}")?;
+ }
+ if let MergeMode::Zero = merge_mode {
+ f.write_str("{z}")?;
+ }
+ f.write_str("{sae}")?;
+ Ok(())
+ }
+ &Operand::DisplacementU16(imm) => {
+ write!(f, "[{}]", colors.address(u16_hex(imm)))
+ }
+ &Operand::DisplacementU32(imm) => {
+ write!(f, "[{}]", colors.address(u32_hex(imm)))
+ }
+ &Operand::RegDisp(ref spec, disp) => {
+ write!(f, "[{} ", regspec_label(spec))?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")
+ },
+ &Operand::RegDeref(ref spec) => {
+ f.write_str("[")?;
+ f.write_str(regspec_label(spec))?;
+ f.write_str("]")
+ },
+ &Operand::RegScale(ref spec, scale) => {
+ write!(f, "[{} * {}]",
+ regspec_label(spec),
+ colors.number(scale)
+ )
+ },
+ &Operand::RegScaleDisp(ref spec, scale, disp) => {
+ write!(f, "[{} * {} ",
+ regspec_label(spec),
+ colors.number(scale),
+ )?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")
+ },
+ &Operand::RegIndexBase(ref base, ref index) => {
+ f.write_str("[")?;
+ f.write_str(regspec_label(base))?;
+ f.write_str(" + ")?;
+ f.write_str(regspec_label(index))?;
+ f.write_str("]")
+ }
+ &Operand::RegIndexBaseDisp(ref base, ref index, disp) => {
+ write!(f, "[{} + {} ",
+ regspec_label(base),
+ regspec_label(index),
+ )?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")
+ },
+ &Operand::RegIndexBaseScale(ref base, ref index, scale) => {
+ write!(f, "[{} + {} * {}]",
+ regspec_label(base),
+ regspec_label(index),
+ colors.number(scale)
+ )
+ }
+ &Operand::RegIndexBaseScaleDisp(ref base, ref index, scale, disp) => {
+ write!(f, "[{} + {} * {} ",
+ regspec_label(base),
+ regspec_label(index),
+ colors.number(scale),
+ )?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")
+ },
+ &Operand::RegDispMasked(ref spec, disp, ref mask_reg) => {
+ write!(f, "[{} ", regspec_label(spec))?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ },
+ &Operand::RegDerefMasked(ref spec, ref mask_reg) => {
+ f.write_str("[")?;
+ f.write_str(regspec_label(spec))?;
+ f.write_str("]")?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ },
+ &Operand::RegScaleMasked(ref spec, scale, ref mask_reg) => {
+ write!(f, "[{} * {}]",
+ regspec_label(spec),
+ colors.number(scale)
+ )?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ },
+ &Operand::RegScaleDispMasked(ref spec, scale, disp, ref mask_reg) => {
+ write!(f, "[{} * {} ",
+ regspec_label(spec),
+ colors.number(scale),
+ )?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ },
+ &Operand::RegIndexBaseMasked(ref base, ref index, ref mask_reg) => {
+ f.write_str("[")?;
+ f.write_str(regspec_label(base))?;
+ f.write_str(" + ")?;
+ f.write_str(regspec_label(index))?;
+ f.write_str("]")?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ }
+ &Operand::RegIndexBaseDispMasked(ref base, ref index, disp, ref mask_reg) => {
+ write!(f, "[{} + {} ",
+ regspec_label(base),
+ regspec_label(index),
+ )?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ },
+ &Operand::RegIndexBaseScaleMasked(ref base, ref index, scale, ref mask_reg) => {
+ write!(f, "[{} + {} * {}]",
+ regspec_label(base),
+ regspec_label(index),
+ colors.number(scale)
+ )?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ }
+ &Operand::RegIndexBaseScaleDispMasked(ref base, ref index, scale, disp, ref mask_reg) => {
+ write!(f, "[{} + {} * {} ",
+ regspec_label(base),
+ regspec_label(index),
+ colors.number(scale),
+ )?;
+ format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?;
+ write!(f, "]")?;
+ write!(f, "{{{}}}", regspec_label(mask_reg))
+ },
+ &Operand::Nothing => { Ok(()) },
+ }
+ }
+}
+
+impl fmt::Display for Instruction {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt)
+ }
+}
+
+impl<'instr> fmt::Display for InstructionDisplayer<'instr> {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ self.colorize(&NoColors, fmt)
+ }
+}
+
+/// enum controlling how `Instruction::display_with` renders instructions. `Intel` is more or less
+/// intel syntax, though memory operand sizes are elided if they can be inferred from other
+/// operands.
+#[derive(Copy, Clone)]
+pub enum DisplayStyle {
+ /// intel-style syntax for instructions, like
+ /// `add eax, [edx + ecx * 2 + 0x1234]`
+ Intel,
+ /// C-style syntax for instructions, like
+ /// `eax += [edx + ecx * 2 + 0x1234]`
+ C,
+ // one might imagine an ATT style here, which is mostly interesting for reversing operand
+ // order.
+ // well.
+ // it also complicates memory operands in an offset-only operand, and is just kind of awful, so
+ // it's just not implemented yet.
+ // ATT,
+}
+
+/// implementation of [`Display`](fmt::Display) that renders instructions using a specified display
+/// style.
+pub struct InstructionDisplayer<'instr> {
+ pub(crate) instr: &'instr Instruction,
+ pub(crate) style: DisplayStyle,
+}
+
+/*
+ * Can't implement this as accepting a formatter because rust
+ * doesn't let me build one outside println! or write! or whatever.
+ *
+ * can't write this as an intermediate struct because i refuse to copy
+ * all data into the struct, and having a function producing a struct with
+ * some lifetimes gets really hairy if it's from a trait - same GAT kind
+ * of nonsense as i saw with ContextRead, because someone could hold onto
+ * the dang intermediate struct forever.
+ *
+ * so write to some Write thing i guess. bite me. i really just want to
+ * stop thinking about how to support printing instructions...
+ */
+impl <'instr, T: fmt::Write, Y: YaxColors> Colorize<T, Y> for InstructionDisplayer<'instr> {
+ fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result {
+ // TODO: I DONT LIKE THIS, there is no address i can give contextualize here,
+ // the address operand maybe should be optional..
+ self.contextualize(colors, 0, Some(&NoContext), out)
+ }
+}
+
+/// No per-operand context when contextualizing an instruction!
+struct NoContext;
+
+impl Instruction {
+ pub fn write_to<T: fmt::Write>(&self, out: &mut T) -> fmt::Result {
+ self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out)
+ }
+}
+
+fn contextualize_intel<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result {
+ if instr.xacquire() {
+ out.write_str("xacquire ")?;
+ }
+ if instr.xrelease() {
+ out.write_str("xrelease ")?;
+ }
+ if instr.prefixes.lock() {
+ out.write_str("lock ")?;
+ }
+
+ if instr.prefixes.rep_any() {
+ if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) {
+ if instr.prefixes.rep() {
+ write!(out, "rep ")?;
+ } else if instr.prefixes.repnz() {
+ write!(out, "repnz ")?;
+ }
+ }
+ }
+
+ out.write_str(instr.opcode.name())?;
+
+ if instr.opcode == Opcode::XBEGIN {
+ if (instr.imm as i32) >= 0 {
+ return write!(out, " $+{}", colors.number(signed_i32_hex(instr.imm as i32)));
+ } else {
+ return write!(out, " ${}", colors.number(signed_i32_hex(instr.imm as i32)));
+ }
+ }
+
+ if instr.operand_count > 0 {
+ out.write_str(" ")?;
+
+ let x = Operand::from_spec(instr, instr.operands[0]);
+
+ const RELATIVE_BRANCHES: [Opcode; 21] = [
+ Opcode::JMP, Opcode::JECXZ,
+ Opcode::LOOP, Opcode::LOOPZ, Opcode::LOOPNZ,
+ Opcode::JO, Opcode::JNO,
+ Opcode::JB, Opcode::JNB,
+ Opcode::JZ, Opcode::JNZ,
+ Opcode::JNA, Opcode::JA,
+ Opcode::JS, Opcode::JNS,
+ Opcode::JP, Opcode::JNP,
+ Opcode::JL, Opcode::JGE,
+ Opcode::JLE, Opcode::JG,
+ ];
+
+ if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 {
+ if RELATIVE_BRANCHES.contains(&instr.opcode) {
+ return match x {
+ Operand::ImmediateI8(rel) => {
+ if rel >= 0 {
+ write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32)))
+ } else {
+ write!(out, "${}", colors.number(signed_i32_hex(rel as i32)))
+ }
+ }
+ Operand::ImmediateI32(rel) => {
+ if rel >= 0 {
+ write!(out, "$+{}", colors.number(signed_i32_hex(rel)))
+ } else {
+ write!(out, "${}", colors.number(signed_i32_hex(rel)))
+ }
+ }
+ _ => { unreachable!() }
+ };
+ }
+ }
+
+ if x.is_memory() {
+ out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?;
+ out.write_str(" ")?;
+ }
+
+ if let Some(prefix) = instr.segment_override_for_op(0) {
+ write!(out, "{}:", prefix)?;
+ }
+ x.colorize(colors, out)?;
+
+ for i in 1..instr.operand_count {
+ match instr.opcode {
+ _ => {
+ match &instr.operands[i as usize] {
+ &OperandSpec::Nothing => {
+ return Ok(());
+ },
+ _ => {
+ out.write_str(", ")?;
+ let x = Operand::from_spec(instr, instr.operands[i as usize]);
+ if x.is_memory() {
+ out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?;
+ out.write_str(" ")?;
+ }
+ if let Some(prefix) = instr.segment_override_for_op(i) {
+ write!(out, "{}:", prefix)?;
+ }
+ x.colorize(colors, out)?;
+ if let Some(evex) = instr.prefixes.evex() {
+ if evex.broadcast() && x.is_memory() {
+ let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ {
+ if instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VCVTNEPS2BF16 {
+ if evex.vex().l() {
+ 8
+ } else if evex.lp() {
+ 16
+ } else {
+ 4
+ }
+ } else if instr.opcode == Opcode::VFPCLASSPD {
+ if evex.vex().l() {
+ 4
+ } else if evex.lp() {
+ 8
+ } else {
+ 2
+ }
+ } else {
+ // vcvtpd2ps is "cool": in broadcast mode, it can read a
+ // double-precision float (qword), resize to single-precision,
+ // then broadcast that to the whole destination register. this
+ // means we need to show `xmm, qword [addr]{1to4}` if vector
+ // size is 256. likewise, scale of 8 for the same truncation
+ // reason if vector size is 512.
+ // vcvtudq2pd is the same story.
+ // vfpclassp{s,d} is a mystery to me.
+ if evex.vex().l() {
+ 4
+ } else if evex.lp() {
+ 8
+ } else {
+ 2
+ }
+ }
+ } else {
+ // this should never be `None` - that would imply two
+ // memory operands for a broadcasted operation.
+ if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() {
+ width / instr.mem_size
+ } else {
+ 0
+ }
+ };
+ write!(out, "{{1to{}}}", scale)?;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ Ok(())
+}
+
+fn contextualize_c<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result {
+ let mut brace_count = 0;
+
+ let mut prefixed = false;
+
+ if instr.xacquire() {
+ out.write_str("xacquire ")?;
+ prefixed = true;
+ }
+ if instr.xrelease() {
+ out.write_str("xrelease ")?;
+ prefixed = true;
+ }
+ if instr.prefixes.lock() {
+ out.write_str("lock ")?;
+ prefixed = true;
+ }
+
+ if prefixed {
+ out.write_str("{ ")?;
+ brace_count += 1;
+ }
+
+ if instr.prefixes.rep_any() {
+ if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) {
+ let word_str = match instr.mem_size {
+ 1 => "byte",
+ 2 => "word",
+ 4 => "dword",
+ 8 => "qword",
+ _ => { unreachable!("invalid word size") }
+ };
+
+ // only a few of you actually use the prefix...
+ if instr.prefixes.rep() {
+ out.write_str("rep ")?;
+ } else if instr.prefixes.repnz() {
+ out.write_str("repnz ")?;
+ } // TODO: other rep kinds?
+
+ out.write_str(word_str)?;
+ out.write_str(" { ")?;
+ brace_count += 1;
+ }
+ }
+
+ fn write_jmp_operand<T: fmt::Write, Y: YaxColors>(op: Operand, colors: &Y, out: &mut T) -> fmt::Result {
+ match op {
+ Operand::ImmediateI8(rel) => {
+ if rel >= 0 {
+ write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32)))
+ } else {
+ write!(out, "${}", colors.number(signed_i32_hex(rel as i32)))
+ }
+ }
+ Operand::ImmediateI32(rel) => {
+ if rel >= 0 {
+ write!(out, "$+{}", colors.number(signed_i32_hex(rel)))
+ } else {
+ write!(out, "${}", colors.number(signed_i32_hex(rel)))
+ }
+ }
+ other => {
+ write!(out, "{}", other)
+ }
+ }
+ }
+
+ match instr.opcode {
+ Opcode::Invalid => { out.write_str("invalid")?; },
+ Opcode::MOVS => {
+ out.write_str("es:[edi++] = ds:[esi++]")?;
+ },
+ Opcode::CMPS => {
+ out.write_str("eflags = flags(ds:[esi++] - es:[edi++])")?;
+ },
+ Opcode::LODS => {
+ // TODO: size
+ out.write_str("rax = ds:[esi++]")?;
+ },
+ Opcode::STOS => {
+ // TODO: size
+ out.write_str("es:[edi++] = rax")?;
+ },
+ Opcode::INS => {
+ // TODO: size
+ out.write_str("es:[edi++] = port(dx)")?;
+ },
+ Opcode::OUTS => {
+ // TODO: size
+ out.write_str("port(dx) = ds:[esi++]")?;
+ }
+ Opcode::ADD => {
+ write!(out, "{} += {}", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::OR => {
+ write!(out, "{} |= {}", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::ADC => {
+ write!(out, "{} += {} + eflags.cf", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::ADCX => {
+ write!(out, "{} += {} + eflags.cf", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::ADOX => {
+ write!(out, "{} += {} + eflags.of", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SBB => {
+ write!(out, "{} -= {} + eflags.cf", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::AND => {
+ write!(out, "{} &= {}", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::XOR => {
+ write!(out, "{} ^= {}", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SUB => {
+ write!(out, "{} -= {}", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::CMP => {
+ write!(out, "eflags = flags({} - {})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::TEST => {
+ write!(out, "eflags = flags({} & {})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::XADD => {
+ write!(out, "({}, {}) = ({} + {}, {})", instr.operand(0), instr.operand(1), instr.operand(0), instr.operand(1), instr.operand(0))?;
+ }
+ Opcode::BT => {
+ write!(out, "bt")?;
+ }
+ Opcode::BTS => {
+ write!(out, "bts")?;
+ }
+ Opcode::BTC => {
+ write!(out, "btc")?;
+ }
+ Opcode::BSR => {
+ write!(out, "{} = msb({})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::BSF => {
+ write!(out, "{} = lsb({}) (x86 bsf)", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::TZCNT => {
+ write!(out, "{} = lsb({})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::MOV => {
+ write!(out, "{} = {}", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SAR => {
+ write!(out, "{} = {} >>> {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SAL => {
+ write!(out, "{} = {} <<< {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SHR => {
+ write!(out, "{} = {} >> {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SHRX => {
+ write!(out, "{} = {} >> {} (x86 shrx)", instr.operand(0), instr.operand(1), instr.operand(2))?;
+ }
+ Opcode::SHL => {
+ write!(out, "{} = {} << {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::SHLX => {
+ write!(out, "{} = {} << {} (x86 shlx)", instr.operand(0), instr.operand(1), instr.operand(2))?;
+ }
+ Opcode::ROR => {
+ write!(out, "{} = {} ror {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::RORX => {
+ write!(out, "{} = {} ror {} (x86 rorx)", instr.operand(0), instr.operand(1), instr.operand(2))?;
+ }
+ Opcode::ROL => {
+ write!(out, "{} = {} rol {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::RCR => {
+ write!(out, "{} = {} rcr {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::RCL => {
+ write!(out, "{} = {} rcl {}", instr.operand(0), instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::PUSH => {
+ write!(out, "push({})", instr.operand(0))?;
+ }
+ Opcode::POP => {
+ write!(out, "{} = pop()", instr.operand(0))?;
+ }
+ Opcode::MOVD => {
+ write!(out, "{} = movd({})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::MOVQ => {
+ write!(out, "{} = movq({})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::MOVNTQ => {
+ write!(out, "{} = movntq({})", instr.operand(0), instr.operand(1))?;
+ }
+ Opcode::INC => {
+ if instr.operand(0).is_memory() {
+ match instr.mem_size {
+ 1 => { write!(out, "byte {}++", instr.operand(0))?; },
+ 2 => { write!(out, "word {}++", instr.operand(0))?; },
+ 4 => { write!(out, "dword {}++", instr.operand(0))?; },
+ _ => { write!(out, "qword {}++", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8.
+ }
+ } else {
+ write!(out, "{}++", instr.operand(0))?;
+ }
+ }
+ Opcode::DEC => {
+ if instr.operand(0).is_memory() {
+ match instr.mem_size {
+ 1 => { write!(out, "byte {}--", instr.operand(0))?; },
+ 2 => { write!(out, "word {}--", instr.operand(0))?; },
+ 4 => { write!(out, "dword {}--", instr.operand(0))?; },
+ _ => { write!(out, "qword {}--", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8.
+ }
+ } else {
+ write!(out, "{}--", instr.operand(0))?;
+ }
+ }
+ Opcode::JMP => {
+ out.write_str("jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JECXZ => {
+ out.write_str("if ecx == 0 then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::LOOP => {
+ out.write_str("ecx--; if ecx != 0 then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::LOOPZ => {
+ out.write_str("ecx--; if ecx != 0 and zero(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::LOOPNZ => {
+ out.write_str("ecx--; if ecx != 0 and !zero(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JO => {
+ out.write_str("if _(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JNO => {
+ out.write_str("if _(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JB => {
+ out.write_str("if /* unsigned */ below(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JNB => {
+ out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JZ => {
+ out.write_str("if zero(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JNZ => {
+ out.write_str("if !zero(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JNA => {
+ out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JA => {
+ out.write_str("if /* unsigned */ above(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JS => {
+ out.write_str("if signed(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JNS => {
+ out.write_str("if !signed(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JP => {
+ out.write_str("if parity(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JNP => {
+ out.write_str("if !parity(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JL => {
+ out.write_str("if /* signed */ less(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JGE => {
+ out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JLE => {
+ out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::JG => {
+ out.write_str("if /* signed */ greater(rflags) then jmp ")?;
+ write_jmp_operand(instr.operand(0), colors, out)?;
+ },
+ Opcode::NOP => {
+ write!(out, "nop")?;
+ }
+ _ => {
+ if instr.operand_count() == 0 {
+ write!(out, "{}()", instr.opcode())?;
+ } else {
+ write!(out, "{} = {}({}", instr.operand(0), instr.opcode(), instr.operand(0))?;
+ let mut comma = true;
+ for i in 1..instr.operand_count() {
+ if comma {
+ write!(out, ", ")?;
+ }
+ write!(out, "{}", instr.operand(i))?;
+ comma = true;
+ }
+ write!(out, ")")?;
+ }
+ }
+ }
+
+ while brace_count > 0 {
+ out.write_str(" }")?;
+ brace_count -= 1;
+ }
+
+ Ok(())
+}
+
+impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual<u32, NoContext, T, Y> for InstructionDisplayer<'instr> {
+ fn contextualize(&self, colors: &Y, address: u32, context: Option<&NoContext>, out: &mut T) -> fmt::Result {
+ let InstructionDisplayer {
+ instr,
+ style,
+ } = self;
+
+ match style {
+ DisplayStyle::Intel => {
+ contextualize_intel(instr, colors, address, context, out)
+ }
+ DisplayStyle::C => {
+ contextualize_c(instr, colors, address, context, out)
+ }
+ }
+ }
+}
+
+#[cfg(feature="std")]
+impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::String>], T, Y> for Instruction {
+ fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option<alloc::string::String>]>, out: &mut T) -> fmt::Result {
+ if self.prefixes.lock() {
+ write!(out, "lock ")?;
+ }
+
+ if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&self.opcode) {
+ // only a few of you actually use the prefix...
+ if self.prefixes.rep() {
+ write!(out, "rep ")?;
+ } else if self.prefixes.repnz() {
+ write!(out, "repnz ")?;
+ }
+ }
+
+ self.opcode.colorize(colors, out)?;
+
+ match context.and_then(|xs| xs[0].as_ref()) {
+ Some(s) => { write!(out, " {}", s)?; },
+ None => {
+ match self.operands[0] {
+ OperandSpec::Nothing => {
+ return Ok(());
+ },
+ _ => {
+ write!(out, " ")?;
+ if let Some(prefix) = self.segment_override_for_op(0) {
+ write!(out, "{}:", prefix)?;
+ }
+ }
+ }
+ let x = Operand::from_spec(self, self.operands[0]);
+ x.colorize(colors, out)?;
+ }
+ };
+ for i in 1..self.operand_count {
+ let i = i as usize;
+ match context.and_then(|xs| xs[i].as_ref()) {
+ Some(s) => { write!(out, ", {}", s)? }
+ None => {
+ match &self.operands[i] {
+ &OperandSpec::Nothing => {
+ return Ok(());
+ },
+ _ => {
+ write!(out, ", ")?;
+ if let Some(prefix) = self.segment_override_for_op(1) {
+ write!(out, "{}:", prefix)?;
+ }
+ let x = Operand::from_spec(self, self.operands[i]);
+ x.colorize(colors, out)?
+ }
+ }
+ }
+ }
+ }
+ Ok(())
+ }
+}
diff --git a/src/generic/mod.rs b/src/generic/mod.rs
new file mode 100644
index 0000000..8ea1ebd
--- /dev/null
+++ b/src/generic/mod.rs
@@ -0,0 +1,1878 @@
+mod display;
+
+//#[cfg(feature = "fmt")]
+//mod display;
+
+use crate::MemoryAccessSize;
+
+use core::cmp::PartialEq;
+use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked;
+pub use crate::generated::opcode::Opcode as Opcode;
+
+use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction};
+use yaxpeax_arch::annotation::{AnnotatingDecoder, DescriptionSink, NullSink};
+use yaxpeax_arch::{DecodeError as ArchDecodeError};
+
+use core::fmt;
+impl fmt::Display for DecodeError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str(self.description())
+ }
+}
+
+use core::hash::Hash;
+use core::hash::Hasher;
+impl Hash for RegSpec {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ let code = ((self.bank as u16) << 8) | (self.num as u16);
+ code.hash(state);
+ }
+}
+
+/// the condition for a conditional instruction.
+///
+/// these are only obtained through [`Opcode::condition()`]:
+/// ```
+/// use yaxpeax_x86::long_mode::{Opcode, ConditionCode};
+///
+/// assert_eq!(Opcode::JB.condition(), Some(ConditionCode::B));
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum ConditionCode {
+ O,
+ NO,
+ B,
+ AE,
+ Z,
+ NZ,
+ A,
+ BE,
+ S,
+ NS,
+ P,
+ NP,
+ L,
+ GE,
+ G,
+ LE,
+}
+
+macro_rules! register {
+ ($bank:ident, $name:ident => $num:expr, $($tail:tt)+) => {
+ #[inline]
+ pub const fn $name() -> RegSpec {
+ RegSpec { bank: RegisterBank::$bank, num: $num }
+ }
+
+ register!($bank, $($tail)*);
+ };
+ ($bank:ident, $name:ident => $num:expr) => {
+ #[inline]
+ pub const fn $name() -> RegSpec {
+ RegSpec { bank: RegisterBank::$bank, num: $num }
+ }
+ };
+}
+
+#[allow(non_snake_case)]
+impl RegSpec {
+ /// the register `rip`. this register is in the class `rip`, which contains only it.
+ pub const RIP: RegSpec = RegSpec::rip();
+
+ /// the number of this register in its `RegisterClass`.
+ ///
+ /// for many registers this is a number in the name, but for registers harkening back to
+ /// `x86_32`, the first eight registers are `rax`, `rcx`, `rdx`, `rbx`, `rsp`, `rbp`, `rsi`,
+ /// and `rdi` (or `eXX` for the 32-bit forms, `XX` for 16-bit forms).
+ pub fn num(&self) -> u8 {
+ self.num
+ }
+
+ /// the class of register this register is in.
+ ///
+ /// this corresponds to the register's size, but is by the register's usage in the instruction
+ /// set; `rax` and `mm0` are the same size, but different classes (`Q`(word) and `MM` (mmx)
+ /// respectively).
+ pub fn class(&self) -> RegisterClass {
+ RegisterClass { kind: self.bank }
+ }
+
+ #[cfg(feature = "fmt")]
+ /// return a human-friendly name for this register. the returned name is the same as would be
+ /// used to render this register in an instruction.
+ pub fn name(&self) -> &'static str {
+ display::regspec_label(self)
+ }
+
+ /// construct a `RegSpec` for x87 register `st(num)`
+ #[inline]
+ pub fn st(num: u8) -> RegSpec {
+ if num >= 8 {
+ panic!("invalid x87 reg st({})", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::ST
+ }
+ }
+
+ /// construct a `RegSpec` for xmm reg `num`
+ #[inline]
+ pub fn xmm(num: u8) -> RegSpec {
+ if num >= 32 {
+ panic!("invalid x86 xmm reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::X
+ }
+ }
+
+ /// construct a `RegSpec` for ymm reg `num`
+ #[inline]
+ pub fn ymm(num: u8) -> RegSpec {
+ if num >= 32 {
+ panic!("invalid x86 ymm reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::Y
+ }
+ }
+
+ /// construct a `RegSpec` for zmm reg `num`
+ #[inline]
+ pub fn zmm(num: u8) -> RegSpec {
+ if num >= 32 {
+ panic!("invalid x86 zmm reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::Z
+ }
+ }
+
+ /// construct a `RegSpec` for qword reg `num`
+ #[inline]
+ pub fn q(num: u8) -> RegSpec {
+ if num >= 16 {
+ panic!("invalid x86 qword reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::Q
+ }
+ }
+
+ /// construct a `RegSpec` for mask reg `num`
+ #[inline]
+ pub fn mask(num: u8) -> RegSpec {
+ if num >= 8 {
+ panic!("invalid x86 mask reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::K
+ }
+ }
+
+ /// construct a `RegSpec` for dword reg `num`
+ #[inline]
+ pub fn d(num: u8) -> RegSpec {
+ if num >= 16 {
+ panic!("invalid x86 dword reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::D
+ }
+ }
+
+ /// construct a `RegSpec` for word reg `num`
+ #[inline]
+ pub fn w(num: u8) -> RegSpec {
+ if num >= 16 {
+ panic!("invalid x86 word reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::W
+ }
+ }
+
+ /// construct a `RegSpec` for non-rex byte reg `num`
+ #[inline]
+ pub fn rb(num: u8) -> RegSpec {
+ if num >= 16 {
+ panic!("invalid x86 rex-byte reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::rB
+ }
+ }
+
+ /// construct a `RegSpec` for non-rex byte reg `num`
+ #[inline]
+ pub fn b(num: u8) -> RegSpec {
+ if num >= 8 {
+ panic!("invalid x86 non-rex byte reg {}", num);
+ }
+
+ RegSpec {
+ num,
+ bank: RegisterBank::B
+ }
+ }
+
+ #[inline]
+ fn from_parts(num: u8, extended: bool, bank: RegisterBank) -> RegSpec {
+ RegSpec {
+ num: num + if extended { 0b1000 } else { 0 },
+ bank: bank
+ }
+ }
+
+ #[inline]
+ fn gp_from_parts(num: u8, extended: bool, width: u8, rex: bool) -> RegSpec {
+ RegSpec {
+ num: num + if extended { 0b1000 } else { 0 },
+ bank: width_to_gp_reg_bank(width, rex)
+ }
+ }
+
+ register!(RIP, rip => 0);
+ register!(EIP, eip => 0);
+
+ register!(RFlags, rflags => 0);
+ register!(EFlags, eflags => 0);
+
+ register!(S, es => 0, cs => 1, ss => 2, ds => 3, fs => 4, gs => 5);
+
+ register!(Q,
+ rax => 0, rcx => 1, rdx => 2, rbx => 3,
+ rsp => 4, rbp => 5, rsi => 6, rdi => 7,
+ r8 => 8, r9 => 9, r10 => 10, r11 => 11,
+ r12 => 8, r13 => 9, r14 => 14, r15 => 15
+ );
+
+ register!(D,
+ eax => 0, ecx => 1, edx => 2, ebx => 3,
+ esp => 4, ebp => 5, esi => 6, edi => 7,
+ r8d => 8, r9d => 9, r10d => 10, r11d => 11,
+ r12d => 8, r13d => 9, r14d => 14, r15d => 15
+ );
+
+ register!(W,
+ ax => 0, cx => 1, dx => 2, bx => 3,
+ sp => 4, bp => 5, si => 6, di => 7,
+ r8w => 8, r9w => 9, r10w => 10, r11w => 11,
+ r12w => 8, r13w => 9, r14w => 14, r15w => 15
+ );
+
+ register!(B,
+ al => 0, cl => 1, dl => 2, bl => 3,
+ ah => 4, ch => 5, dh => 6, bh => 7
+ );
+
+ register!(rB,
+ spl => 4, bpl => 5, sil => 6, dil => 7,
+ r8b => 8, r9b => 9, r10b => 10, r11b => 11,
+ r12b => 8, r13b => 9, r14b => 14, r15b => 15
+ );
+
+ #[inline]
+ pub const fn zmm0() -> RegSpec {
+ RegSpec { bank: RegisterBank::Z, num: 0 }
+ }
+
+ #[inline]
+ pub const fn ymm0() -> RegSpec {
+ RegSpec { bank: RegisterBank::Y, num: 0 }
+ }
+
+ #[inline]
+ pub const fn xmm0() -> RegSpec {
+ RegSpec { bank: RegisterBank::X, num: 0 }
+ }
+
+ #[inline]
+ pub const fn st0() -> RegSpec {
+ RegSpec { bank: RegisterBank::ST, num: 0 }
+ }
+
+ #[inline]
+ pub const fn mm0() -> RegSpec {
+ RegSpec { bank: RegisterBank::MM, num: 0 }
+ }
+
+ /// return the size of this register, in bytes.
+ #[inline]
+ pub fn width(&self) -> u8 {
+ self.class().width()
+ }
+}
+
+#[allow(non_camel_case_types)]
+#[allow(dead_code)]
+enum SizeCode {
+ b,
+ vd,
+ vq,
+ vqp
+}
+
+/// an operand for an `x86_64` instruction.
+///
+/// `Operand::Nothing` should be unreachable in practice; any such instructions should have an
+/// operand count of 0 (or at least one fewer than the `Nothing` operand's position).
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum Operand {
+ /// a sign-extended byte
+ ImmediateI8(i8),
+ /// a zero-extended byte
+ ImmediateU8(u8),
+ /// a sign-extended word
+ ImmediateI16(i16),
+ /// a zero-extended word
+ ImmediateU16(u16),
+ /// a sign-extended dword
+ ImmediateI32(i32),
+ /// a zero-extended dword
+ ImmediateU32(u32),
+ /// a sign-extended qword
+ ImmediateI64(i64),
+ /// a zero-extended qword
+ ImmediateU64(u64),
+ /// a bare register operand, such as `rcx`.
+ Register(RegSpec),
+ /// an `avx512` register operand with optional mask register and merge mode, such as
+ /// `zmm3{k4}{z}`.
+ ///
+ /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is
+ /// `MergeMode::Merge`.
+ RegisterMaskMerge(RegSpec, RegSpec, MergeMode),
+ /// an `avx512` register operand with optional mask register, merge mode, and suppressed
+ /// exceptions, such as `zmm3{k4}{z}{rd-sae}`.
+ ///
+ /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is
+ /// `MergeMode::Merge`.
+ RegisterMaskMergeSae(RegSpec, RegSpec, MergeMode, SaeMode),
+ /// an `avx512` register operand with optional mask register, merge mode, and suppressed
+ /// exceptions, with no overridden rounding mode, such as `zmm3{k4}{z}{sae}`.
+ ///
+ /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is
+ /// `MergeMode::Merge`.
+ RegisterMaskMergeSaeNoround(RegSpec, RegSpec, MergeMode),
+ /// a memory access to a literal word address. it's extremely rare that a well-formed x86
+ /// instruction uses this mode. as an example, `[0x1133]`
+ DisplacementU16(u16),
+ /// a memory access to a literal dword address. it's extremely rare that a well-formed x86
+ /// instruction uses this mode. as an example, `[0x11335577]`
+ DisplacementU32(u32),
+ /// a memory access to a literal qword address. it's relatively rare that a well-formed x86
+ /// instruction uses this mode, but plausibe. for example, `gs:[0x14]`. segment overrides,
+ /// however, are maintained on the instruction itself.
+ DisplacementU64(u64),
+ /// a simple dereference of the address held in some register. for example: `[rsi]`.
+ RegDeref(RegSpec),
+ /// a dereference of the address held in some register with offset. for example: `[rsi + 0x14]`.
+ RegDisp(RegSpec, i32),
+ /// a dereference of the address held in some register scaled by 1, 2, 4, or 8. this is almost always used with the `lea` instruction. for example: `[rdx * 4]`.
+ RegScale(RegSpec, u8),
+ /// a dereference of the address from summing two registers. for example: `[rbp + rax]`
+ RegIndexBase(RegSpec, RegSpec),
+ /// a dereference of the address from summing two registers with offset. for example: `[rdi + rcx + 0x40]`
+ RegIndexBaseDisp(RegSpec, RegSpec, i32),
+ /// a dereference of the address held in some register scaled by 1, 2, 4, or 8 with offset. this is almost always used with the `lea` instruction. for example: `[rax * 4 + 0x30]`.
+ RegScaleDisp(RegSpec, u8, i32),
+ /// a dereference of the address from summing a register and index register scaled by 1, 2, 4,
+ /// or 8. for
+ /// example: `[rsi + rcx * 4]`
+ RegIndexBaseScale(RegSpec, RegSpec, u8),
+ /// a dereference of the address from summing a register and index register scaled by 1, 2, 4,
+ /// or 8, with offset. for
+ /// example: `[rsi + rcx * 4 + 0x1234]`
+ RegIndexBaseScaleDisp(RegSpec, RegSpec, u8, i32),
+ /// an `avx512` dereference of register with optional masking. for example: `[rdx]{k3}`
+ RegDerefMasked(RegSpec, RegSpec),
+ /// an `avx512` dereference of register plus offset, with optional masking. for example: `[rsp + 0x40]{k3}`
+ RegDispMasked(RegSpec, i32, RegSpec),
+ /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8, with optional masking. this
+ /// seems extraordinarily unlikely to occur in practice. for example: `[rsi * 4]{k2}`
+ RegScaleMasked(RegSpec, u8, RegSpec),
+ /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional masking.
+ /// for example: `[rsi + rax * 4]{k6}`
+ RegIndexBaseMasked(RegSpec, RegSpec, RegSpec),
+ /// an `avx512` dereference of a register plus offset, with optional masking. for example:
+ /// `[rsi + rax + 0x1313]{k6}`
+ RegIndexBaseDispMasked(RegSpec, RegSpec, i32, RegSpec),
+ /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8 plus offset, with optional
+ /// masking. this seems extraordinarily unlikely to occur in practice. for example: `[rsi *
+ /// 4 + 0x1357]{k2}`
+ RegScaleDispMasked(RegSpec, u8, i32, RegSpec),
+ /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional
+ /// masking. for example: `[rsi + rax * 4]{k6}`
+ RegIndexBaseScaleMasked(RegSpec, RegSpec, u8, RegSpec),
+ /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8 and offset, with
+ /// optional masking. for example: `[rsi + rax * 4 + 0x1313]{k6}`
+ RegIndexBaseScaleDispMasked(RegSpec, RegSpec, u8, i32, RegSpec),
+ /// no operand. it is a bug for `yaxpeax-x86` to construct an `Operand` of this kind for public
+ /// use; the instruction's `operand_count` should be reduced so as to make this invisible to
+ /// library clients.
+ Nothing,
+}
+
+impl OperandSpec {
+ fn masked(self) -> Self {
+ match self {
+ OperandSpec::RegRRR => OperandSpec::RegRRR_maskmerge,
+ OperandSpec::RegMMM => OperandSpec::RegMMM_maskmerge,
+ OperandSpec::RegVex => OperandSpec::RegVex_maskmerge,
+ OperandSpec::Deref => OperandSpec::Deref_mask,
+ OperandSpec::RegDisp => OperandSpec::RegDisp_mask,
+ OperandSpec::RegScale => OperandSpec::RegScale_mask,
+ OperandSpec::RegScaleDisp => OperandSpec::RegScaleDisp_mask,
+ OperandSpec::RegIndexBaseScale => OperandSpec::RegIndexBaseScale_mask,
+ OperandSpec::RegIndexBaseScaleDisp => OperandSpec::RegIndexBaseScaleDisp_mask,
+ o => o,
+ }
+ }
+ fn is_memory(&self) -> bool {
+ match self {
+ OperandSpec::DispU32 |
+ OperandSpec::DispU64 |
+ OperandSpec::Deref |
+ OperandSpec::Deref_esi |
+ OperandSpec::Deref_edi |
+ OperandSpec::Deref_rsi |
+ OperandSpec::Deref_rdi |
+ OperandSpec::RegDisp |
+ OperandSpec::RegScale |
+ OperandSpec::RegScaleDisp |
+ OperandSpec::RegIndexBaseScale |
+ OperandSpec::RegIndexBaseScaleDisp |
+ OperandSpec::Deref_mask |
+ OperandSpec::RegDisp_mask |
+ OperandSpec::RegScale_mask |
+ OperandSpec::RegScaleDisp_mask |
+ OperandSpec::RegIndexBaseScale_mask |
+ OperandSpec::RegIndexBaseScaleDisp_mask => {
+ true
+ },
+ OperandSpec::ImmI8 |
+ OperandSpec::ImmI16 |
+ OperandSpec::ImmI32 |
+ OperandSpec::ImmI64 |
+ OperandSpec::ImmU8 |
+ OperandSpec::ImmU16 |
+ OperandSpec::RegRRR |
+ OperandSpec::RegRRR_maskmerge |
+ OperandSpec::RegRRR_maskmerge_sae |
+ OperandSpec::RegRRR_maskmerge_sae_noround |
+ OperandSpec::RegMMM |
+ OperandSpec::RegMMM_maskmerge |
+ OperandSpec::RegMMM_maskmerge_sae_noround |
+ OperandSpec::RegVex |
+ OperandSpec::RegVex_maskmerge |
+ OperandSpec::Reg4 |
+ OperandSpec::ImmInDispField |
+ OperandSpec::Nothing => {
+ false
+ }
+ }
+ }
+}
+
+/// an `avx512` merging mode.
+///
+/// the behavior for non-`avx512` instructions is equivalent to `merge`. `zero` is only useful in
+/// conjunction with a mask register, where bits specified in the mask register correspond to
+/// unmodified items in the instruction's desination.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum MergeMode {
+ Merge,
+ Zero,
+}
+impl From<bool> for MergeMode {
+ fn from(b: bool) -> Self {
+ if b {
+ MergeMode::Zero
+ } else {
+ MergeMode::Merge
+ }
+ }
+}
+/// an `avx512` custom rounding mode.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum SaeMode {
+ RoundNearest,
+ RoundDown,
+ RoundUp,
+ RoundZero,
+}
+const SAE_MODES: [SaeMode; 4] = [
+ SaeMode::RoundNearest,
+ SaeMode::RoundDown,
+ SaeMode::RoundUp,
+ SaeMode::RoundZero,
+];
+impl SaeMode {
+ /// a human-friendly label for this `SaeMode`:
+ ///
+ /// ```
+ /// use yaxpeax_x86::long_mode::SaeMode;
+ ///
+ /// assert_eq!(SaeMode::RoundNearest.label(), "{rne-sae}");
+ /// assert_eq!(SaeMode::RoundDown.label(), "{rd-sae}");
+ /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}");
+ /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}");
+ /// ```
+ pub fn label(&self) -> &'static str {
+ match self {
+ SaeMode::RoundNearest => "{rne-sae}",
+ SaeMode::RoundDown => "{rd-sae}",
+ SaeMode::RoundUp => "{ru-sae}",
+ SaeMode::RoundZero => "{rz-sae}",
+ }
+ }
+
+ fn from(l: bool, lp: bool) -> Self {
+ let mut idx = 0;
+ if l {
+ idx |= 1;
+ }
+ if lp {
+ idx |= 2;
+ }
+ SAE_MODES[idx]
+ }
+}
+impl Operand {
+ fn from_spec(inst: &Instruction, spec: OperandSpec) -> Operand {
+ match spec {
+ OperandSpec::Nothing => {
+ Operand::Nothing
+ }
+ // the register in modrm_rrr
+ OperandSpec::RegRRR => {
+ Operand::Register(inst.regs[0])
+ }
+ OperandSpec::RegRRR_maskmerge => {
+ Operand::RegisterMaskMerge(
+ inst.regs[0],
+ RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+ MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+ )
+ }
+ OperandSpec::RegRRR_maskmerge_sae => {
+ Operand::RegisterMaskMergeSae(
+ inst.regs[0],
+ RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+ MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+ SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()),
+ )
+ }
+ OperandSpec::RegRRR_maskmerge_sae_noround => {
+ Operand::RegisterMaskMergeSaeNoround(
+ inst.regs[0],
+ RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+ MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+ )
+ }
+ // the register in modrm_mmm (eg modrm mod bits were 11)
+ OperandSpec::RegMMM => {
+ Operand::Register(inst.regs[1])
+ }
+ OperandSpec::RegMMM_maskmerge => {
+ Operand::RegisterMaskMerge(
+ inst.regs[1],
+ RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+ MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+ )
+ }
+ OperandSpec::RegMMM_maskmerge_sae_noround => {
+ Operand::RegisterMaskMergeSaeNoround(
+ inst.regs[1],
+ RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+ MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+ )
+ }
+ OperandSpec::RegVex => {
+ Operand::Register(inst.regs[3])
+ }
+ OperandSpec::RegVex_maskmerge => {
+ Operand::RegisterMaskMerge(
+ inst.regs[3],
+ RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()),
+ MergeMode::from(inst.prefixes.evex_unchecked().merge()),
+ )
+ }
+ OperandSpec::Reg4 => {
+ Operand::Register(RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank })
+ }
+ OperandSpec::ImmI8 => Operand::ImmediateI8(inst.imm as i8),
+ OperandSpec::ImmU8 => Operand::ImmediateU8(inst.imm as u8),
+ OperandSpec::ImmI16 => Operand::ImmediateI16(inst.imm as i16),
+ OperandSpec::ImmU16 => Operand::ImmediateU16(inst.imm as u16),
+ OperandSpec::ImmI32 => Operand::ImmediateI32(inst.imm as i32),
+ OperandSpec::ImmI64 => Operand::ImmediateI64(inst.imm as i64),
+ OperandSpec::ImmInDispField => Operand::ImmediateU16(inst.disp as u16),
+ OperandSpec::DispU32 => Operand::DisplacementU32(inst.disp as u32),
+ OperandSpec::DispU64 => Operand::DisplacementU64(inst.disp as u64),
+ OperandSpec::Deref => {
+ Operand::RegDeref(inst.regs[1])
+ }
+ OperandSpec::Deref_esi => {
+ Operand::RegDeref(RegSpec::esi())
+ }
+ OperandSpec::Deref_edi => {
+ Operand::RegDeref(RegSpec::edi())
+ }
+ OperandSpec::Deref_rsi => {
+ Operand::RegDeref(RegSpec::rsi())
+ }
+ OperandSpec::Deref_rdi => {
+ Operand::RegDeref(RegSpec::rdi())
+ }
+ OperandSpec::RegDisp => {
+ Operand::RegDisp(inst.regs[1], inst.disp as i32)
+ }
+ OperandSpec::RegScale => {
+ Operand::RegScale(inst.regs[2], inst.scale)
+ }
+ OperandSpec::RegScaleDisp => {
+ Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32)
+ }
+ OperandSpec::RegIndexBaseScale => {
+ Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale)
+ }
+ OperandSpec::RegIndexBaseScaleDisp => {
+ Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32)
+ }
+ OperandSpec::Deref_mask => {
+ if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+ Operand::RegDerefMasked(inst.regs[1], RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+ } else {
+ Operand::RegDeref(inst.regs[1])
+ }
+ }
+ OperandSpec::RegDisp_mask => {
+ if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+ Operand::RegDispMasked(inst.regs[1], inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+ } else {
+ Operand::RegDisp(inst.regs[1], inst.disp as i32)
+ }
+ }
+ OperandSpec::RegScale_mask => {
+ if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+ Operand::RegScaleMasked(inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+ } else {
+ Operand::RegScale(inst.regs[2], inst.scale)
+ }
+ }
+ OperandSpec::RegScaleDisp_mask => {
+ if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+ Operand::RegScaleDispMasked(inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+ } else {
+ Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32)
+ }
+ }
+ OperandSpec::RegIndexBaseScale_mask => {
+ if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+ Operand::RegIndexBaseScaleMasked(inst.regs[1], inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+ } else {
+ Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale)
+ }
+ }
+ OperandSpec::RegIndexBaseScaleDisp_mask => {
+ if inst.prefixes.evex_unchecked().mask_reg() != 0 {
+ Operand::RegIndexBaseScaleDispMasked(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()))
+ } else {
+ Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32)
+ }
+ }
+ }
+ }
+
+ /// returns `true` if this operand implies a memory access, `false` otherwise.
+ ///
+ /// notably, the `lea` instruction uses a memory operand without actually ever accessing
+ /// memory.
+ pub fn is_memory(&self) -> bool {
+ match self {
+ Operand::DisplacementU32(_) |
+ Operand::DisplacementU64(_) |
+ Operand::RegDeref(_) |
+ Operand::RegDisp(_, _) |
+ Operand::RegScale(_, _) |
+ Operand::RegIndexBase(_, _) |
+ Operand::RegIndexBaseDisp(_, _, _) |
+ Operand::RegScaleDisp(_, _, _) |
+ Operand::RegIndexBaseScale(_, _, _) |
+ Operand::RegIndexBaseScaleDisp(_, _, _, _) |
+ Operand::RegDerefMasked(_, _) |
+ Operand::RegDispMasked(_, _, _) |
+ Operand::RegScaleMasked(_, _, _) |
+ Operand::RegIndexBaseMasked(_, _, _) |
+ Operand::RegIndexBaseDispMasked(_, _, _, _) |
+ Operand::RegScaleDispMasked(_, _, _, _) |
+ Operand::RegIndexBaseScaleMasked(_, _, _, _) |
+ Operand::RegIndexBaseScaleDispMasked(_, _, _, _, _) => {
+ true
+ },
+ Operand::ImmediateI8(_) |
+ Operand::ImmediateU8(_) |
+ Operand::ImmediateI16(_) |
+ Operand::ImmediateU16(_) |
+ Operand::ImmediateU32(_) |
+ Operand::ImmediateI32(_) |
+ Operand::ImmediateU64(_) |
+ Operand::ImmediateI64(_) |
+ Operand::Register(_) |
+ Operand::RegisterMaskMerge(_, _, _) |
+ Operand::RegisterMaskMergeSae(_, _, _, _) |
+ Operand::RegisterMaskMergeSaeNoround(_, _, _) |
+ Operand::Nothing => {
+ false
+ }
+ }
+ }
+
+ /// return the width of this operand, in bytes. register widths are determined by the
+ /// register's class. the widths of memory operands are recorded on the instruction this
+ /// `Operand` came from; `None` here means the authoritative width is `instr.mem_size()`.
+ pub fn width(&self) -> Option<u8> {
+ match self {
+ Operand::Register(reg) => {
+ Some(reg.width())
+ }
+ Operand::RegisterMaskMerge(reg, _, _) => {
+ Some(reg.width())
+ }
+ Operand::ImmediateI8(_) |
+ Operand::ImmediateU8(_) => {
+ Some(1)
+ }
+ Operand::ImmediateI16(_) |
+ Operand::ImmediateU16(_) => {
+ Some(2)
+ }
+ Operand::ImmediateI32(_) |
+ Operand::ImmediateU32(_) => {
+ Some(4)
+ }
+ Operand::ImmediateI64(_) |
+ Operand::ImmediateU64(_) => {
+ Some(8)
+ }
+ // memory operands or `Nothing`
+ _ => {
+ None
+ }
+ }
+ }
+}
+
+#[test]
+fn operand_size() {
+ assert_eq!(core::mem::size_of::<OperandSpec>(), 1);
+ assert_eq!(core::mem::size_of::<RegSpec>(), 2);
+ // assert_eq!(core::mem::size_of::<Prefixes>(), 4);
+ // assert_eq!(core::mem::size_of::<Instruction>(), 40);
+}
+
+/// a trivial struct for `yaxpeax_arch::Arch` to be implemented on. it's only interesting for the
+/// associated type parameters.
+#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
+#[derive(Hash, Eq, PartialEq, Debug, Copy, Clone)]
+#[allow(non_camel_case_types)]
+pub struct Arch;
+
+impl yaxpeax_arch::Arch for Arch {
+ type Address = u64;
+ type Word = u8;
+ type Instruction = Instruction;
+ type DecodeError = DecodeError;
+ type Decoder = InstDecoder;
+ type Operand = Operand;
+}
+
+#[derive(PartialEq, Copy, Clone, Eq, Hash, PartialOrd, Ord)]
+struct InstDecoder {
+ flags: u64,
+}
+
+impl InstDecoder {
+ pub fn minimal() -> Self {
+ InstDecoder {
+ flags: 0,
+ }
+ }
+
+ pub fn decode_slice(&self, data: &[u8]) -> Result<Instruction, DecodeError> {
+ let mut reader = yaxpeax_arch::U8Reader::new(data);
+ self.decode(&mut reader)
+ }
+
+ // TODO: map isa extensions over. maybe codegen the whole thing?
+ fn as_64b_best_effort(&self) -> crate::long_mode::InstDecoder {
+ crate::long_mode::InstDecoder::default()
+ }
+
+ // TODO: map isa extensions over. maybe codegen the whole thing?
+ fn as_32b_best_effort(&self) -> crate::protected_mode::InstDecoder {
+ crate::protected_mode::InstDecoder::default()
+ }
+
+ // TODO: map isa extensions over. maybe codegen the whole thing?
+ fn as_16b_best_effort(&self) -> crate::real_mode::InstDecoder {
+ crate::real_mode::InstDecoder::default()
+ }
+}
+
+// this is layout-compatible with 64-bit RegSpec (`RegisterBank` is the same), but not 32-bit or
+// 16-bit (fewer register banks). hopefully this remains generally true.
+/// an `x86` register, including its number and type. if `fmt` is enabled, name too.
+///
+/// ```
+/// use yaxpeax_x86::generic::{RegSpec, register_class};
+///
+/// assert_eq!(RegSpec::ecx().num(), 1);
+/// assert_eq!(RegSpec::ecx().class(), register_class::D);
+/// ```
+///
+/// some registers have classes of their own, and only one member: `rip`, `eip`, `rflags`, and
+/// `eflags`.
+#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
+#[derive(Copy, Clone, Debug, PartialOrd, Ord, Eq, PartialEq)]
+pub struct RegSpec {
+ num: u8,
+ bank: RegisterBank,
+}
+
+/// an `x86_64` register class - `qword`, `dword`, `xmmword`, `segment`, and so on.
+///
+/// this is mostly useful for comparing a `RegSpec`'s [`RegSpec::class()`] with a constant out of
+/// [`register_class`].
+#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
+#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct RegisterClass {
+ kind: RegisterBank,
+}
+
+const REGISTER_CLASS_NAMES: &[&'static str] = &[
+ "qword",
+ "BUG. PLEASE REPORT.",
+ "dword",
+ "BUG. PLEASE REPORT.",
+ "word",
+ "BUG. PLEASE REPORT.",
+ "byte",
+ "BUG. PLEASE REPORT.",
+ "rex-byte",
+ "BUG. PLEASE REPORT.",
+ "cr",
+ "BUG. PLEASE REPORT.",
+ "dr",
+ "BUG. PLEASE REPORT.",
+ "segment",
+ "xmm",
+ "BUG. PLEASE REPORT.",
+ "BUG. PLEASE REPORT.",
+ "BUG. PLEASE REPORT.",
+ "ymm",
+ "BUG. PLEASE REPORT.",
+ "BUG. PLEASE REPORT.",
+ "BUG. PLEASE REPORT.",
+ "zmm",
+ "BUG. PLEASE REPORT.",
+ "BUG. PLEASE REPORT.",
+ "BUG. PLEASE REPORT.",
+ "x87-stack",
+ "mmx",
+ "k",
+ "eip",
+ "rip",
+ "eflags",
+ "rflags",
+];
+
+/// high-level register classes in an x86 machine, such as "8-byte general purpose", "xmm", "x87",
+/// and so on. constants in this module are useful for inspecting the register class of a decoded
+/// instruction. as an example:
+/// ```
+/// use yaxpeax_x86::long_mode::{self as amd64};
+/// use yaxpeax_x86::long_mode::{Opcode, Operand, RegisterClass};
+/// use yaxpeax_arch::{Decoder, U8Reader};
+///
+/// let movsx_eax_cl = &[0x0f, 0xbe, 0xc1];
+/// let decoder = amd64::InstDecoder::default();
+/// let instruction = decoder
+/// .decode(&mut U8Reader::new(movsx_eax_cl))
+/// .expect("can decode");
+///
+/// assert_eq!(instruction.opcode(), Opcode::MOVSX);
+///
+/// fn show_register_class_info(regclass: RegisterClass) {
+/// match regclass {
+/// amd64::register_class::D => {
+/// println!(" and is a dword register");
+/// }
+/// amd64::register_class::B => {
+/// println!(" and is a byte register");
+/// }
+/// other => {
+/// panic!("unexpected and invalid register class {:?}", other);
+/// }
+/// }
+/// }
+///
+/// if let Operand::Register(regspec) = instruction.operand(0) {
+/// #[cfg(feature="fmt")]
+/// println!("first operand is {}", regspec);
+/// show_register_class_info(regspec.class());
+/// }
+///
+/// if let Operand::Register(regspec) = instruction.operand(1) {
+/// #[cfg(feature="fmt")]
+/// println!("first operand is {}", regspec);
+/// show_register_class_info(regspec.class());
+/// }
+/// ```
+///
+/// this is preferable to alternatives like checking register names against a known list: a
+/// register class is one byte and "is qword general-purpose" can then be a simple one-byte
+/// compare, instead of 16 string compares.
+///
+/// `yaxpeax-x86` does not attempt to further distinguish between, for example, register
+/// suitability as operands. as an example, `cl` is only a byte register, with no additional
+/// register class to describe its use as an implicit shift operand.
+pub mod register_class {
+ use super::{RegisterBank, RegisterClass};
+ /// quadword registers: rax through r15
+ pub const Q: RegisterClass = RegisterClass { kind: RegisterBank::Q };
+ /// doubleword registers: eax through r15d
+ pub const D: RegisterClass = RegisterClass { kind: RegisterBank::D };
+ /// word registers: ax through r15w
+ pub const W: RegisterClass = RegisterClass { kind: RegisterBank::W };
+ /// byte registers: al, cl, dl, bl, ah, ch, dh, bh. `B` registers do *not* have a rex prefix.
+ pub const B: RegisterClass = RegisterClass { kind: RegisterBank::B };
+ /// byte registers with rex prefix present: al through r15b. `RB` registers have a rex prefix.
+ pub const RB: RegisterClass = RegisterClass { kind: RegisterBank::rB };
+ /// control registers cr0 through cr15.
+ pub const CR: RegisterClass = RegisterClass { kind: RegisterBank::CR};
+ /// debug registers dr0 through dr15.
+ pub const DR: RegisterClass = RegisterClass { kind: RegisterBank::DR };
+ /// segment registers es, cs, ss, ds, fs, gs.
+ pub const S: RegisterClass = RegisterClass { kind: RegisterBank::S };
+ /// xmm registers xmm0 through xmm31.
+ pub const X: RegisterClass = RegisterClass { kind: RegisterBank::X };
+ /// ymm registers ymm0 through ymm31.
+ pub const Y: RegisterClass = RegisterClass { kind: RegisterBank::Y };
+ /// zmm registers zmm0 through zmm31.
+ pub const Z: RegisterClass = RegisterClass { kind: RegisterBank::Z };
+ /// x87 floating point stack entries st(0) through st(7).
+ pub const ST: RegisterClass = RegisterClass { kind: RegisterBank::ST };
+ /// mmx registers mm0 through mm7.
+ pub const MM: RegisterClass = RegisterClass { kind: RegisterBank::MM };
+ /// `avx512` mask registers k0 through k7.
+ pub const K: RegisterClass = RegisterClass { kind: RegisterBank::K };
+ /// the full instruction pointer register.
+ pub const RIP: RegisterClass = RegisterClass { kind: RegisterBank::RIP };
+ /// the low 32 bits of `rip`.
+ pub const EIP: RegisterClass = RegisterClass { kind: RegisterBank::EIP };
+ /// the full cpu flags register.
+ pub const RFLAGS: RegisterClass = RegisterClass { kind: RegisterBank::RFlags };
+ /// the low 32 bits of rflags.
+ pub const EFLAGS: RegisterClass = RegisterClass { kind: RegisterBank::EFlags };
+}
+
+impl RegisterClass {
+ /// return a human-friendly name for this register class
+ pub fn name(&self) -> &'static str {
+ REGISTER_CLASS_NAMES[self.kind as usize]
+ }
+
+ /// return the size of this register class, in bytes
+ pub fn width(&self) -> u8 {
+ match self.kind {
+ RegisterBank::Q => 8,
+ RegisterBank::D => 4,
+ RegisterBank::W => 2,
+ RegisterBank::B |
+ RegisterBank::rB => {
+ 1
+ },
+ RegisterBank::CR |
+ RegisterBank::DR => {
+ 8
+ },
+ RegisterBank::S => {
+ 2
+ },
+ RegisterBank::EIP => {
+ 4
+ }
+ RegisterBank::RIP => {
+ 8
+ }
+ RegisterBank::EFlags => {
+ 4
+ }
+ RegisterBank::RFlags => {
+ 8
+ }
+ RegisterBank::X => {
+ 16
+ }
+ RegisterBank::Y => {
+ 32
+ }
+ RegisterBank::Z => {
+ 64
+ }
+ RegisterBank::ST => {
+ 10
+ }
+ RegisterBank::MM => {
+ 8
+ }
+ RegisterBank::K => {
+ 8
+ }
+ }
+ }
+}
+
+#[allow(non_camel_case_types)]
+#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))]
+#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
+enum RegisterBank {
+ Q = 0, D = 2, W = 4, B = 6, rB = 8, // Quadword, Dword, Word, Byte
+ CR = 10, DR = 12, S = 14, EIP = 30, RIP = 31, EFlags = 32, RFlags = 33, // Control reg, Debug reg, Selector, ...
+ X = 15, Y = 19, Z = 23, // XMM, YMM, ZMM
+ ST = 27, MM = 28, // ST, MM regs (x87, mmx)
+ K = 29, // AVX512 mask registers
+}
+
+// this enum is identical across long-mode, protected-mode, and real-mode forms. translating
+// between these can be a "simple" transmute.
+/// the segment register used by the corresponding instruction.
+///
+/// typically this will be `ds` but can be overridden. some instructions have specific segment
+/// registers used regardless of segment prefixes, and in these cases `yaxpeax-x86` will report the
+/// actual segment register a physical processor would use.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+pub enum Segment {
+ DS = 0, CS, ES, FS, GS, SS
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct EvexData {
+ // data: present, z, b, Lp, Rp. aaa
+ bits: u8,
+}
+
+// `Prefixes` should be layout-compatbile with 64-bit prefixes, and convertible from 32-bit and
+// 16-bit forms. this is a distinct type from 64-bit prefixes in case they stop being
+// layout-compatible one day.
+/// the prefixes on an instruction.
+///
+/// `rep`, `repnz`, `lock`, and segment override prefixes are directly accessible here. `rex`,
+/// `vex`, and `evex` prefixes are available through their associated helpers.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+struct Prefixes {
+ bits: u8,
+ rex: PrefixRex,
+ segment: Segment,
+ evex_data: EvexData,
+}
+
+/// the `avx512`-related data from an [`evex`](https://en.wikipedia.org/wiki/EVEX_prefix) prefix.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct PrefixEvex {
+ vex: PrefixVex,
+ evex_data: EvexData,
+}
+
+impl PrefixEvex {
+ fn present(&self) -> bool {
+ self.evex_data.present()
+ }
+ /// the `evex` prefix's parts that overlap with `vex` definitions - `L`, `W`, `R`, `X`, and `B`
+ /// bits.
+ pub fn vex(&self) -> PrefixVex {
+ self.vex
+ }
+ /// the `avx512` mask register in use. `0` indicates "no mask register".
+ pub fn mask_reg(&self) -> u8 {
+ self.evex_data.aaa()
+ }
+ pub fn broadcast(&self) -> bool {
+ self.evex_data.b()
+ }
+ pub fn merge(&self) -> bool {
+ self.evex_data.z()
+ }
+ /// the `evex` `L'` bit.
+ pub fn lp(&self) -> bool {
+ self.evex_data.lp()
+ }
+ /// the `evex` `R'` bit.
+ pub fn rp(&self) -> bool {
+ self.evex_data.rp()
+ }
+}
+
+/// bits specified in an avx/avx2 [`vex`](https://en.wikipedia.org/wiki/VEX_prefix) prefix, `L`, `W`, `R`, `X`, and `B`.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct PrefixVex {
+ bits: u8,
+}
+
+#[allow(dead_code)]
+impl PrefixVex {
+ #[inline]
+ fn present(&self) -> bool { (self.bits & 0x80) == 0x80 }
+ #[inline]
+ pub fn b(&self) -> bool { (self.bits & 0x01) == 0x01 }
+ #[inline]
+ pub fn x(&self) -> bool { (self.bits & 0x02) == 0x02 }
+ #[inline]
+ pub fn r(&self) -> bool { (self.bits & 0x04) == 0x04 }
+ #[inline]
+ pub fn w(&self) -> bool { (self.bits & 0x08) == 0x08 }
+ #[inline]
+ pub fn l(&self) -> bool { (self.bits & 0x10) == 0x10 }
+ #[inline]
+ fn compressed_disp(&self) -> bool { (self.bits & 0x20) == 0x20 }
+}
+
+/// bits specified in an x86_64
+/// [`rex`](https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix) prefix.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct PrefixRex {
+ bits: u8
+}
+
+impl Prefixes {
+ fn new(bits: u8) -> Prefixes {
+ Prefixes {
+ bits: bits,
+ rex: PrefixRex { bits: 0 },
+ segment: Segment::DS,
+ evex_data: EvexData { bits: 0 },
+ }
+ }
+
+ #[inline]
+ pub fn rep(&self) -> bool { self.bits & 0x30 == 0x10 }
+ #[inline]
+ pub fn repnz(&self) -> bool { self.bits & 0x30 == 0x30 }
+ #[inline]
+ pub fn rep_any(&self) -> bool { self.bits & 0x30 != 0x00 }
+ #[inline]
+ fn operand_size(&self) -> bool { self.bits & 0x1 == 1 }
+ #[inline]
+ fn address_size(&self) -> bool { self.bits & 0x2 == 2 }
+ #[inline]
+ pub fn lock(&self) -> bool { self.bits & 0x4 == 4 }
+ #[inline]
+ pub fn cs(&mut self) { self.segment = Segment::CS }
+ #[inline]
+ pub fn ds(&self) -> bool { self.segment == Segment::DS }
+ #[inline]
+ pub fn es(&self) -> bool { self.segment == Segment::ES }
+ #[inline]
+ pub fn fs(&self) -> bool { self.segment == Segment::FS }
+ #[inline]
+ pub fn gs(&self) -> bool { self.segment == Segment::GS }
+ #[inline]
+ pub fn ss(&self) -> bool { self.segment == Segment::SS }
+ #[inline]
+ fn rex_unchecked(&self) -> PrefixRex { self.rex }
+ #[inline]
+ pub fn rex(&self) -> Option<PrefixRex> {
+ let rex = self.rex_unchecked();
+ if rex.present() {
+ Some(rex)
+ } else {
+ None
+ }
+ }
+ #[inline]
+ fn vex_unchecked(&self) -> PrefixVex { PrefixVex { bits: self.rex.bits } }
+ #[inline]
+ pub fn vex(&self) -> Option<PrefixVex> {
+ let vex = self.vex_unchecked();
+ if vex.present() {
+ Some(vex)
+ } else {
+ None
+ }
+ }
+ #[inline]
+ fn evex_unchecked(&self) -> PrefixEvex { PrefixEvex { vex: PrefixVex { bits: self.rex.bits }, evex_data: self.evex_data } }
+ #[inline]
+ pub fn evex(&self) -> Option<PrefixEvex> {
+ let evex = self.evex_unchecked();
+ if evex.present() {
+ Some(evex)
+ } else {
+ None
+ }
+ }
+}
+
+impl EvexData {
+ pub(crate) fn present(&self) -> bool {
+ self.bits & 0b1000_0000 != 0
+ }
+
+ pub(crate) fn aaa(&self) -> u8 {
+ self.bits & 0b111
+ }
+
+ pub(crate) fn b(&self) -> bool {
+ (self.bits & 0b0000_1000) != 0
+ }
+
+ pub(crate) fn z(&self) -> bool {
+ (self.bits & 0b0001_0000) != 0
+ }
+
+ pub(crate) fn lp(&self) -> bool {
+ (self.bits & 0b0010_0000) != 0
+ }
+
+ pub(crate) fn rp(&self) -> bool {
+ (self.bits & 0b0100_0000) != 0
+ }
+}
+
+impl PrefixRex {
+ #[inline]
+ fn present(&self) -> bool { (self.bits & 0xc0) == 0x40 }
+ #[inline]
+ pub fn b(&self) -> bool { (self.bits & 0x01) == 0x01 }
+ #[inline]
+ pub fn x(&self) -> bool { (self.bits & 0x02) == 0x02 }
+ #[inline]
+ pub fn r(&self) -> bool { (self.bits & 0x04) == 0x04 }
+ #[inline]
+ pub fn w(&self) -> bool { (self.bits & 0x08) == 0x08 }
+}
+
+#[cfg(feature = "std")]
+extern crate std;
+#[cfg(feature = "std")]
+impl std::error::Error for DecodeError {
+ fn description(&self) -> &str {
+ <Self as yaxpeax_arch::DecodeError>::description(self)
+ }
+}
+
+#[allow(non_camel_case_types)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+enum OperandSpec {
+ Nothing,
+ // the register in regs[0]
+ RegRRR,
+ // the register in regs[0] and is EVEX-encoded (may have a mask register, is merged or
+ // zeroed)
+ RegRRR_maskmerge,
+ // the register in regs[0] and is EVEX-encoded (may have a mask register, is merged or
+ // zeroed). additionally, this instruction has exceptions suppressed with a potentially
+ // custom rounding mode.
+ RegRRR_maskmerge_sae,
+ // the register in regs[0] and is EVEX-encoded (may have a mask register, is merged or
+ // zeroed). additionally, this instruction has exceptions suppressed.
+ RegRRR_maskmerge_sae_noround,
+ // the register in modrm_mmm (eg modrm mod bits were 11)
+ RegMMM,
+ // same as `RegRRR`: the register is modrm's `mmm` bits, and may be masekd.
+ RegMMM_maskmerge,
+ RegMMM_maskmerge_sae_noround,
+ // the register selected by vex-vvvv bits
+ RegVex,
+ RegVex_maskmerge,
+ // the register selected by a handful of avx2 vex-coded instructions,
+ // stuffed in imm4.
+ Reg4,
+ ImmI8,
+ ImmI16,
+ ImmI32,
+ ImmI64,
+ ImmU8,
+ ImmU16,
+ // ENTER is a two-immediate instruction, where the first immediate is stored in the disp field.
+ // for this case, a second immediate-style operand is needed.
+ // turns out `insertq` and `extrq` are also two-immediate instructions, so this is generalized
+ // to cover them too.
+ ImmInDispField,
+ DispU32,
+ DispU64,
+ Deref,
+ Deref_esi,
+ Deref_edi,
+ Deref_rsi,
+ Deref_rdi,
+ RegDisp,
+ RegScale,
+ RegScaleDisp,
+ RegIndexBaseScale,
+ RegIndexBaseScaleDisp,
+ Deref_mask,
+ RegDisp_mask,
+ RegScale_mask,
+ RegScaleDisp_mask,
+ RegIndexBaseScale_mask,
+ RegIndexBaseScaleDisp_mask,
+
+ // protected mode
+ DispU16,
+ Deref_si,
+ Deref_di,
+ RegIndexBase_mask,
+ RegIndexBaseDisp_mask,
+ // u16:u{16,32} immediate address for a far call
+ AbsoluteFarAddress,
+
+ // real mode
+ RegIndexBase,
+ RegIndexBaseDisp,
+}
+
+/// an `x86_64` instruction.
+///
+/// typically an opcode will be inspected by [`Instruction::opcode()`], and an instruction has
+/// [`Instruction::operand_count()`] many operands. operands are provided by
+/// [`Instruction::operand()`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Instruction {
+ pub prefixes: Prefixes,
+ /*
+ modrm_rrr: RegSpec,
+ modrm_mmm: RegSpec, // doubles as sib_base
+ sib_index: RegSpec,
+ vex_reg: RegSpec,
+ */
+ regs: [RegSpec; 4],
+ scale: u8,
+ length: u8,
+ operand_count: u8,
+ operands: [OperandSpec; 4],
+ imm: u64,
+ disp: u64,
+ pub(crate) opcode: Opcode,
+ mem_size: u8,
+}
+
+impl yaxpeax_arch::Instruction for Instruction {
+ fn well_defined(&self) -> bool {
+ // TODO: this is incorrect!
+ true
+ }
+}
+
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
+#[non_exhaustive]
+pub enum DecodeError {
+ ExhaustedInput,
+ InvalidOpcode,
+ InvalidOperand,
+ InvalidPrefixes,
+ TooLong,
+ IncompleteDecoder,
+}
+
+impl yaxpeax_arch::DecodeError for DecodeError {
+ fn data_exhausted(&self) -> bool { self == &DecodeError::ExhaustedInput }
+ fn bad_opcode(&self) -> bool { self == &DecodeError::InvalidOpcode }
+ fn bad_operand(&self) -> bool { self == &DecodeError::InvalidOperand }
+ fn description(&self) -> &'static str {
+ match self {
+ DecodeError::ExhaustedInput => { "exhausted input" },
+ DecodeError::InvalidOpcode => { "invalid opcode" },
+ DecodeError::InvalidOperand => { "invalid operand" },
+ DecodeError::InvalidPrefixes => { "invalid prefixes" },
+ DecodeError::TooLong => { "too long" },
+ DecodeError::IncompleteDecoder => { "the decoder is incomplete" },
+ }
+ }
+}
+
+impl LengthedInstruction for Instruction {
+ type Unit = AddressDiff<u64>;
+ #[inline]
+ fn len(&self) -> Self::Unit {
+ AddressDiff::from_const(self.length.into())
+ }
+ #[inline]
+ fn min_size() -> Self::Unit {
+ AddressDiff::from_const(1)
+ }
+}
+
+impl Default for InstDecoder {
+ /// Instantiates an x86 decoder that probably decodes what you want.
+ ///
+ /// Attempts to match real processors in interpretation of undefined sequences, and decodes any
+ /// instruction defined in any extension.
+ fn default() -> Self {
+ Self {
+ flags: 0xffffffff_ffffffff,
+ }
+ }
+}
+
+impl Decoder<Arch> for InstDecoder {
+ fn decode<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(&self, words: &mut T) -> Result<Instruction, <Arch as yaxpeax_arch::Arch>::DecodeError> {
+ let mut inst = crate::generic::Instruction::default();
+
+ self.decode_into(&mut inst, words)?;
+
+ Ok(inst)
+ }
+
+ fn decode_into<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(&self, instr: &mut Instruction, words: &mut T) -> Result<(), <Arch as yaxpeax_arch::Arch>::DecodeError> {
+ let mut inst = crate::generic::Instruction::default();
+ self.decode_with_annotation(instr, words, &mut NullSink)
+ }
+}
+
+impl AnnotatingDecoder<Arch> for InstDecoder {
+ type FieldDescription = FieldDescription;
+
+ fn decode_with_annotation<
+ T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>,
+ S: DescriptionSink<Self::FieldDescription>
+ >(&self, instr: &mut Instruction, words: &mut T, sink: &mut S) -> Result<(), <Arch as yaxpeax_arch::Arch>::DecodeError> {
+ // we only get one shot to read words from `&mut T`, so buffer up the max length of an
+ // x86 instruct's worth of bytes...
+ let mut bytes = [0u8; 15];
+ let mut available = 0;
+ for i in 0..bytes.len() {
+ if let Ok(word) = words.next() {
+ bytes[i] = word;
+ available += 1;
+ } else {
+ break;
+ }
+ }
+
+ // first try as a 64-bit instruction..
+ let decoder_64b = self.as_64b_best_effort();
+ let mut instr_64b = crate::long_mode::Instruction::default();
+ let mut sink_64b = NullSink;
+ let res = decoder_64b.decode_with_annotation(&mut instr_64b, &mut yaxpeax_arch::U8Reader::new(&bytes), &mut sink_64b);
+ let err_64b = match res {
+ Ok(()) => {
+ // TODO: flush sink_64b to sink
+ *instr = instr_64b.to_generic();
+ return Ok(());
+ }
+ Err(e) => {
+ e.to_generic()
+ }
+ };
+
+ // first try as a 32-bit instruction..
+ let decoder_32b = self.as_32b_best_effort();
+ let mut instr_32b = crate::protected_mode::Instruction::default();
+ let mut sink_32b = NullSink;
+ let res = decoder_32b.decode_with_annotation(&mut instr_32b, &mut yaxpeax_arch::U8Reader::new(&bytes), &mut sink_32b);
+ let err_32b = match res {
+ Ok(()) => {
+ // TODO: flush sink_32b to sink
+ *instr = instr_32b.to_generic();
+ return Ok(());
+ }
+ Err(e) => {
+ e.to_generic()
+ }
+ };
+
+ // lastly as a 16-bit instruction..
+ let decoder_16b = self.as_16b_best_effort();
+ let mut instr_16b = crate::real_mode::Instruction::default();
+ let mut sink_16b = NullSink;
+ let res = decoder_16b.decode_with_annotation(&mut instr_16b, &mut yaxpeax_arch::U8Reader::new(&bytes), &mut sink_16b);
+ let err_16b = match res {
+ Ok(()) => {
+ // TODO: flush sink_16b to sink
+ *instr = instr_16b.to_generic();
+ return Ok(());
+ }
+ Err(e) => {
+ e.to_generic()
+ }
+ };
+
+ // if all errors are the same, just return it. if the errors do not all agree, either:
+ // * they disagree by having different strings associated (64b/32b/16b-specific text)
+ // or
+ // * they disagree semantically due to differences in 64b/32b/16b decoding.
+ //
+ // for generic decoding, we try 64-bit decoding first, falling back to 32-bit and 16-bit as
+ // a last-ditch, but 16-bit decoding is *probably* not what a user of yaxpeax-x86 intends
+ // to decode. for many errors, 64b and 32b versions should compare the same and get the
+ // first arm here, but for some they might not quite be the same. since 32b is the
+ // likely-intended fallback mode and we've fallen back (and through it) in trying to
+ // decode, we'll return that error in the case of disagreements. there's no direct way to
+ // access a 16-bit decode error through the generic decode interface.
+ if err_64b == err_32b && err_32b == err_16b {
+ Err(err_64b)
+ } else {
+ Err(err_32b)
+ }
+ }
+}
+impl Default for Instruction {
+ fn default() -> Self {
+ Instruction::invalid()
+ }
+}
+
+impl Instruction {
+ /// get the `Opcode` of this instruction.
+ pub fn opcode(&self) -> Opcode {
+ self.opcode
+ }
+
+ /// get the `Operand` at the provided index.
+ ///
+ /// panics if the index is `>= 4`.
+ pub fn operand(&self, i: u8) -> Operand {
+ assert!(i < 4);
+ Operand::from_spec(self, self.operands[i as usize])
+ }
+
+ /// get the number of operands in this instruction. useful in iterating an instruction's
+ /// operands generically.
+ pub fn operand_count(&self) -> u8 {
+ self.operand_count
+ }
+
+ /// check if operand `i` is an actual operand or not. will be `false` for `i >=
+ /// inst.operand_count()`.
+ pub fn operand_present(&self, i: u8) -> bool {
+ assert!(i < 4);
+ if i >= self.operand_count {
+ return false;
+ }
+
+ if let OperandSpec::Nothing = self.operands[i as usize] {
+ false
+ } else {
+ true
+ }
+ }
+
+ /// get the memory access information for this instruction, if it accesses memory.
+ ///
+ /// the corresponding `MemoryAccessSize` may report that the size of accessed memory is
+ /// indeterminate; this is the case for `xsave/xrestor`-style instructions whose operation size
+ /// varies based on physical processor.
+ pub fn mem_size(&self) -> Option<MemoryAccessSize> {
+ if self.mem_size != 0 {
+ Some(MemoryAccessSize { size: self.mem_size })
+ } else {
+ None
+ }
+ }
+
+ /// build a new instruction representing nothing in particular. this is primarily useful as a
+ /// default to pass to `decode_into`.
+ pub fn invalid() -> Instruction {
+ Instruction {
+ prefixes: Prefixes::new(0),
+ opcode: Opcode::NOP,
+ mem_size: 0,
+ regs: [RegSpec::rax(); 4],
+ scale: 0,
+ length: 0,
+ disp: 0,
+ imm: 0,
+ operand_count: 0,
+ operands: [OperandSpec::Nothing; 4],
+ }
+ }
+
+ /// get the `Segment` that will *actually* be used for accessing the operand at index `i`.
+ ///
+ /// `stos`, `lods`, `movs`, and `cmps` specifically name some segments for use regardless of
+ /// prefixes.
+ pub fn segment_override_for_op(&self, op: u8) -> Option<Segment> {
+ match self.opcode {
+ Opcode::STOS |
+ Opcode::SCAS => {
+ if op == 0 {
+ Some(Segment::ES)
+ } else {
+ None
+ }
+ }
+ Opcode::LODS => {
+ if op == 1 {
+ Some(self.prefixes.segment)
+ } else {
+ None
+ }
+ }
+ Opcode::MOVS => {
+ if op == 0 {
+ Some(Segment::ES)
+ } else if op == 1 {
+ Some(self.prefixes.segment)
+ } else {
+ None
+ }
+ }
+ Opcode::CMPS => {
+ if op == 0 {
+ Some(self.prefixes.segment)
+ } else if op == 1 {
+ Some(Segment::ES)
+ } else {
+ None
+ }
+ },
+ _ => {
+ // most operands are pretty simple:
+ if self.operands[op as usize].is_memory() &&
+ self.prefixes.segment != Segment::DS {
+ Some(self.prefixes.segment)
+ } else {
+ None
+ }
+ }
+ }
+ }
+
+ #[cfg(feature = "fmt")]
+ /// wrap a reference to this instruction with a `DisplayStyle` to format the instruction with
+ /// later. see the documentation on [`display::DisplayStyle`] for more.
+ ///
+ /// ```
+ /// use yaxpeax_x86::long_mode::{InstDecoder, DisplayStyle};
+ ///
+ /// let decoder = InstDecoder::default();
+ /// let inst = decoder.decode_slice(&[0x33, 0xc1]).unwrap();
+ ///
+ /// assert_eq!("eax ^= ecx", inst.display_with(DisplayStyle::C).to_string());
+ /// assert_eq!("xor eax, ecx", inst.display_with(DisplayStyle::Intel).to_string());
+ /// ```
+ pub fn display_with<'a>(&'a self, style: display::DisplayStyle) -> display::InstructionDisplayer<'a> {
+ display::InstructionDisplayer {
+ style,
+ instr: self,
+ }
+ }
+
+ /// does this instruction include the `xacquire` hint for hardware lock elision?
+ pub fn xacquire(&self) -> bool {
+ if self.prefixes.repnz() {
+ // xacquire is permitted on typical `lock` instructions, OR `xchg` with memory operand,
+ // regardless of `lock` prefix.
+ if self.prefixes.lock() {
+ true
+ } else if self.opcode == Opcode::XCHG {
+ self.operands[0] != OperandSpec::RegMMM && self.operands[1] != OperandSpec::RegMMM
+ } else {
+ false
+ }
+ } else {
+ false
+ }
+ }
+
+ /// does this instruction include the `xrelease` hint for hardware lock elision?
+ pub fn xrelease(&self) -> bool {
+ if self.prefixes.rep() {
+ // xrelease is permitted on typical `lock` instructions, OR `xchg` with memory operand,
+ // regardless of `lock` prefix. additionally, xrelease is permitted on some forms of mov.
+ if self.prefixes.lock() {
+ true
+ } else if self.opcode == Opcode::XCHG {
+ self.operands[0] != OperandSpec::RegMMM && self.operands[1] != OperandSpec::RegMMM
+ } else if self.opcode == Opcode::MOV {
+ self.operands[0] != OperandSpec::RegMMM && (
+ self.operands[1] == OperandSpec::RegRRR ||
+ self.operands[1] == OperandSpec::ImmI8 ||
+ self.operands[1] == OperandSpec::ImmI16 ||
+ self.operands[1] == OperandSpec::ImmI32 ||
+ self.operands[1] == OperandSpec::ImmI64
+ )
+ } else {
+ false
+ }
+ } else {
+ false
+ }
+ }
+}
+
+#[inline]
+fn width_to_gp_reg_bank(width: u8, rex: bool) -> RegisterBank {
+ // transform (width, rex) into an index into an index into a LUT, instead of branching as
+ // `match` would.
+ let index = (width.trailing_zeros() << 1) | (rex as u32);
+
+ const BANK_LUT: [RegisterBank; 8] = [
+ RegisterBank::B, RegisterBank::rB,
+ RegisterBank::W, RegisterBank::W,
+ RegisterBank::D, RegisterBank::D,
+ RegisterBank::Q, RegisterBank::Q,
+ ];
+
+ *BANK_LUT.get(index as usize).unwrap_or_else(|| unsafe { unreachable_unchecked() })
+}
+
+/// a wrapper to hide internal library implementation details. this is only useful for the inner
+/// content's `Display` impl, which itself is unstable and suitable only for human consumption.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct OperandCodeWrapper { code: () }
+// TODO: something with OperandCodeWrapper...
+
+/// the actual description for a selection of bits involved in decoding a [`generic::Instruction`].
+///
+/// TODO: adjust wording w.r.t generic instructions, this can't be entirely precise..!
+/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full
+/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable
+/// and only useful for displaying for human consumption.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum InnerDescription {
+ /// the literal byte read for a `rex` prefix, `0x4_`.
+ RexPrefix(u8),
+ /// the segment selected by a segment override prefix. this is not necessarily the actual
+ /// segement used in the instruction's memory accesses, if any are made.
+ SegmentPrefix(Segment),
+ /// the opcode read for this instruction. this may be reported multiple times in an instruction
+ /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two
+ /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from
+ /// decoding the same instruction. this invariant is not well-tested, and may occur in
+ /// practice.
+ Opcode(Opcode),
+ /// the operand code indicating how to read operands for this instruction. this is an internal
+ /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the
+ /// decoding process. `OperandCode` names are unstable, and this variant is only useful for
+ /// displaying for human consumption.
+ OperandCode(OperandCodeWrapper),
+ /// a decoded register: a name for the bits used to decode it, the register number those bits
+ /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded.
+ RegisterNumber(&'static str, u8, RegSpec),
+ /// a miscellaneous string describing some bits of the instruction. this may describe a prefix,
+ /// internal details of a prefix, error or constraints on an opcode, operand encoding details,
+ /// or other items involved in an instruction.
+ Misc(&'static str),
+ /// a number involved in the instruction: typically either a disaplacement or immediate. the
+ /// string describes which. the `i64` member is typically a sign-extended value from the
+ /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign
+ /// extending to `-1`. bug reports are highly encouraged for unexpected values.
+ Number(&'static str, i64),
+ /// a boundary between two logically distinct sections of an instruction. these typically
+ /// separate the leading prefix string (if any), opcode, and operands (if any). the included
+ /// string describes which boundary this is. boundary names should not be considered stable,
+ /// and are useful at most for displaying for human consumption.
+ Boundary(&'static str),
+}
+
+impl InnerDescription {
+ fn with_id(self, id: u32) -> FieldDescription {
+ FieldDescription {
+ desc: self,
+ id,
+ }
+ }
+}
+
+cfg_if::cfg_if! {
+ if #[cfg(feature="fmt")] {
+ impl fmt::Display for InnerDescription {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ InnerDescription::RexPrefix(bits) => {
+ write!(f, "rex prefix: {}{}{}{}",
+ if bits & 0x8 != 0 { "w" } else { "-" },
+ if bits & 0x4 != 0 { "r" } else { "-" },
+ if bits & 0x2 != 0 { "x" } else { "-" },
+ if bits & 0x1 != 0 { "b" } else { "-" },
+ )
+ }
+ InnerDescription::SegmentPrefix(segment) => {
+ write!(f, "segment override: {}", segment)
+ }
+ InnerDescription::Misc(text) => {
+ f.write_str(text)
+ }
+ InnerDescription::Number(text, num) => {
+ write!(f, "{}: {:#x}", text, num)
+ }
+ InnerDescription::Opcode(opc) => {
+ write!(f, "opcode `{}`", opc)
+ }
+ InnerDescription::OperandCode(OperandCodeWrapper { code }) => {
+ write!(f, "operand code `{:?}`", code)
+ }
+ InnerDescription::RegisterNumber(name, num, reg) => {
+ write!(f, "`{}` (`{}` selects register number {})", reg, name, num)
+ }
+ InnerDescription::Boundary(desc) => {
+ write!(f, "{}", desc)
+ }
+ }
+ }
+ }
+ } else {
+ impl fmt::Display for InnerDescription {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("non-fmt build")
+ }
+ }
+ }
+}
+
+#[cfg_attr(feature="fmt", derive(Debug))]
+#[derive(Clone, PartialEq, Eq)]
+pub struct FieldDescription {
+ desc: InnerDescription,
+ id: u32,
+}
+
+impl FieldDescription {
+ /// the actual description associated with this bitfield.
+ pub fn desc(&self) -> &InnerDescription {
+ &self.desc
+ }
+}
+
+impl yaxpeax_arch::annotation::FieldDescription for FieldDescription {
+ fn id(&self) -> u32 {
+ self.id
+ }
+ fn is_separator(&self) -> bool {
+ if let InnerDescription::Boundary(_) = &self.desc {
+ true
+ } else {
+ false
+ }
+ }
+}
+
+impl fmt::Display for FieldDescription {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(&self.desc, f)
+ }
+}