diff options
| author | iximeow <me@iximeow.net> | 2022-08-27 17:11:19 -0700 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2023-01-02 08:50:23 -0800 | 
| commit | 789797accee0caa6580fbba650c719a952945ac6 (patch) | |
| tree | 85fffbcc13a525bbea1df0464a85096744f0b3de /src | |
| parent | d2b4f3d1a454c7bbcc487ddfb2839b01dc1c9c9e (diff) | |
add a `generic` module for x86 disassembly
this module generally attempts to decode as 64-bit x86 instructions, on
the assumption they are the most likely-desired instructions, falling
back to 32-bit and then 16-bit decoding, in order.
translation from a 64-bit `long_mode::Instruction` to
`generic::Instruction` is close to free, where
`protected_mode::Instruction` and `real_mode::Instruction` may be a
little more costly in time but should still not be too bad.
docs still need much touching up. most docs reference the `long_mode`
structures and enums they're strongly inspired by.
Diffstat (limited to 'src')
| -rw-r--r-- | src/generic/display.rs | 977 | ||||
| -rw-r--r-- | src/generic/mod.rs | 1878 | 
2 files changed, 2855 insertions, 0 deletions
| diff --git a/src/generic/display.rs b/src/generic/display.rs new file mode 100644 index 0000000..61e58ac --- /dev/null +++ b/src/generic/display.rs @@ -0,0 +1,977 @@ +use core::fmt; +use crate::safer_unchecked::GetSaferUnchecked as _; + +use yaxpeax_arch::{Colorize, ShowContextual, NoColors, YaxColors}; +use yaxpeax_arch::display::*; + +use crate::MEM_SIZE_STRINGS; +use crate::generic::{RegSpec, Opcode, Operand, MergeMode, InstDecoder, Instruction, Segment, PrefixVex, OperandSpec}; + +impl fmt::Display for InstDecoder { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        if self == &InstDecoder::default() { +            return write!(f, "<all features>"); +        } else if self == &InstDecoder::minimal() { +            return write!(f, "<no features>"); +        } +        if self.sse3() { write!(f, "sse3 ")? } +        if self.ssse3() { write!(f, "ssse3 ")? } +        if self.monitor() { write!(f, "monitor ")? } +        if self.vmx() { write!(f, "vmx ")? } +        if self.fma3() { write!(f, "fma3 ")? } +        if self.cmpxchg16b() { write!(f, "cmpxchg16b ")? } +        if self.sse4_1() { write!(f, "sse4_1 ")? } +        if self.sse4_2() { write!(f, "sse4_2 ")? } +        if self.movbe() { write!(f, "movbe ")? } +        if self.popcnt() { write!(f, "popcnt ")? } +        if self.aesni() { write!(f, "aesni ")? } +        if self.xsave() { write!(f, "xsave ")? } +        if self.rdrand() { write!(f, "rdrand ")? } +        if self.sgx() { write!(f, "sgx ")? } +        if self.bmi1() { write!(f, "bmi1 ")? } +        if self.avx2() { write!(f, "avx2 ")? } +        if self.bmi2() { write!(f, "bmi2 ")? } +        if self.invpcid() { write!(f, "invpcid ")? } +        if self.mpx() { write!(f, "mpx ")? } +        if self.avx512_f() { write!(f, "avx512_f ")? } +        if self.avx512_dq() { write!(f, "avx512_dq ")? } +        if self.rdseed() { write!(f, "rdseed ")? } +        if self.adx() { write!(f, "adx ")? } +        if self.avx512_fma() { write!(f, "avx512_fma ")? } +        if self.pcommit() { write!(f, "pcommit ")? } +        if self.clflushopt() { write!(f, "clflushopt ")? } +        if self.clwb() { write!(f, "clwb ")? } +        if self.avx512_pf() { write!(f, "avx512_pf ")? } +        if self.avx512_er() { write!(f, "avx512_er ")? } +        if self.avx512_cd() { write!(f, "avx512_cd ")? } +        if self.sha() { write!(f, "sha ")? } +        if self.avx512_bw() { write!(f, "avx512_bw ")? } +        if self.avx512_vl() { write!(f, "avx512_vl ")? } +        if self.prefetchwt1() { write!(f, "prefetchwt1 ")? } +        if self.avx512_vbmi() { write!(f, "avx512_vbmi ")? } +        if self.avx512_vbmi2() { write!(f, "avx512_vbmi2 ")? } +        if self.gfni() { write!(f, "gfni ")? } +        if self.vaes() { write!(f, "vaes ")? } +        if self.pclmulqdq() { write!(f, "pclmulqdq ")? } +        if self.avx_vnni() { write!(f, "avx_vnni ")? } +        if self.avx512_bitalg() { write!(f, "avx512_bitalg ")? } +        if self.avx512_vpopcntdq() { write!(f, "avx512_vpopcntdq ")? } +        if self.avx512_4vnniw() { write!(f, "avx512_4vnniw ")? } +        if self.avx512_4fmaps() { write!(f, "avx512_4fmaps ")? } +        if self.cx8() { write!(f, "cx8 ")? } +        if self.syscall() { write!(f, "syscall ")? } +        if self.rdtscp() { write!(f, "rdtscp ")? } +        if self.abm() { write!(f, "abm ")? } +        if self.sse4a() { write!(f, "sse4a ")? } +        if self._3dnowprefetch() { write!(f, "_3dnowprefetch ")? } +        if self.xop() { write!(f, "xop ")? } +        if self.skinit() { write!(f, "skinit ")? } +        if self.tbm() { write!(f, "tbm ")? } +        if self.intel_quirks() { write!(f, "intel_quirks ")? } +        if self.amd_quirks() { write!(f, "amd_quirks ")? } +        if self.avx() { write!(f, "avx ")? } +        Ok(()) +    } +} + +impl fmt::Display for PrefixVex { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        if self.present() { +            write!(f, "vex:{}{}{}{}", +                if self.w() { "w" } else { "-" }, +                if self.r() { "r" } else { "-" }, +                if self.x() { "x" } else { "-" }, +                if self.b() { "b" } else { "-" }, +            ) +        } else { +            write!(f, "vex:none") +        } +    } +} + +impl fmt::Display for Segment { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        match self { +            Segment::CS => write!(f, "cs"), +            Segment::DS => write!(f, "ds"), +            Segment::ES => write!(f, "es"), +            Segment::FS => write!(f, "fs"), +            Segment::GS => write!(f, "gs"), +            Segment::SS => write!(f, "ss"), +        } +    } +} + +// register names are grouped by indices scaled by 16. +// xmm, ymm, zmm all get two indices. +const REG_NAMES: &[&'static str] = &[ +    "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", +    "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", +    "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", +    "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", +    "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", +    "es", "cs", "ss", "ds", "fs", "gs", "", "", +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", +    "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", +    "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", +    "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", +    "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", +    "st(0)", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", +    "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", +    "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", +    "eip", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", +    "eflags", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", +]; + +pub(crate) fn regspec_label(spec: &RegSpec) -> &'static str { +    unsafe { REG_NAMES.get_kinda_unchecked((spec.num as u16 + ((spec.bank as u16) << 3)) as usize) } +} + +impl fmt::Display for RegSpec { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        f.write_str(regspec_label(self)) +    } +} + +impl fmt::Display for Operand { +    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { +        self.colorize(&NoColors, fmt) +    } +} + +impl <T: fmt::Write, Y: YaxColors> Colorize<T, Y> for Operand { +    fn colorize(&self, colors: &Y, f: &mut T) -> fmt::Result { +        match self { +            &Operand::ImmediateU8(imm) => { +                write!(f, "{}", colors.number(u8_hex(imm))) +            } +            &Operand::ImmediateI8(imm) => { +                write!(f, "{}", +                    colors.number(signed_i8_hex(imm))) +            }, +            &Operand::ImmediateU16(imm) => { +                write!(f, "{}", colors.number(u16_hex(imm))) +            } +            &Operand::ImmediateI16(imm) => { +                write!(f, "{}", +                    colors.number(signed_i16_hex(imm))) +            }, +            &Operand::ImmediateU32(imm) => { +                write!(f, "{}", colors.number(u32_hex(imm))) +            } +            &Operand::ImmediateI32(imm) => { +                write!(f, "{}", +                    colors.number(signed_i32_hex(imm))) +            }, +            &Operand::AbsoluteFarAddress { segment, address } => { +                write!(f, "{}:{}", +                    colors.number(u16_hex(segment as u16)), +                    colors.number(u32_hex(address as u32)), +                ) +            }, +            &Operand::Register(ref spec) => { +                f.write_str(regspec_label(spec)) +            } +            &Operand::RegisterMaskMerge(ref spec, ref mask, merge_mode) => { +                f.write_str(regspec_label(spec))?; +                if mask.num != 0 { +                    f.write_str("{")?; +                    f.write_str(regspec_label(mask))?; +                    f.write_str("}")?; +                } +                if let MergeMode::Zero = merge_mode { +                    f.write_str("{z}")?; +                } +                Ok(()) +            } +            &Operand::RegisterMaskMergeSae(ref spec, ref mask, merge_mode, sae_mode) => { +                f.write_str(regspec_label(spec))?; +                if mask.num != 0 { +                    f.write_str("{")?; +                    f.write_str(regspec_label(mask))?; +                    f.write_str("}")?; +                } +                if let MergeMode::Zero = merge_mode { +                    f.write_str("{z}")?; +                } +                f.write_str(sae_mode.label())?; +                Ok(()) +            } +            &Operand::RegisterMaskMergeSaeNoround(ref spec, ref mask, merge_mode) => { +                f.write_str(regspec_label(spec))?; +                if mask.num != 0 { +                    f.write_str("{")?; +                    f.write_str(regspec_label(mask))?; +                    f.write_str("}")?; +                } +                if let MergeMode::Zero = merge_mode { +                    f.write_str("{z}")?; +                } +                f.write_str("{sae}")?; +                Ok(()) +            } +            &Operand::DisplacementU16(imm) => { +                write!(f, "[{}]", colors.address(u16_hex(imm))) +            } +            &Operand::DisplacementU32(imm) => { +                write!(f, "[{}]", colors.address(u32_hex(imm))) +            } +            &Operand::RegDisp(ref spec, disp) => { +                write!(f, "[{} ", regspec_label(spec))?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]") +            }, +            &Operand::RegDeref(ref spec) => { +                f.write_str("[")?; +                f.write_str(regspec_label(spec))?; +                f.write_str("]") +            }, +            &Operand::RegScale(ref spec, scale) => { +                write!(f, "[{} * {}]", +                    regspec_label(spec), +                    colors.number(scale) +                ) +            }, +            &Operand::RegScaleDisp(ref spec, scale, disp) => { +                write!(f, "[{} * {} ", +                    regspec_label(spec), +                    colors.number(scale), +                )?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]") +            }, +            &Operand::RegIndexBase(ref base, ref index) => { +                f.write_str("[")?; +                f.write_str(regspec_label(base))?; +                f.write_str(" + ")?; +                f.write_str(regspec_label(index))?; +                f.write_str("]") +            } +            &Operand::RegIndexBaseDisp(ref base, ref index, disp) => { +                write!(f, "[{} + {} ", +                    regspec_label(base), +                    regspec_label(index), +                )?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]") +            }, +            &Operand::RegIndexBaseScale(ref base, ref index, scale) => { +                write!(f, "[{} + {} * {}]", +                    regspec_label(base), +                    regspec_label(index), +                    colors.number(scale) +                ) +            } +            &Operand::RegIndexBaseScaleDisp(ref base, ref index, scale, disp) => { +                write!(f, "[{} + {} * {} ", +                    regspec_label(base), +                    regspec_label(index), +                    colors.number(scale), +                )?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]") +            }, +            &Operand::RegDispMasked(ref spec, disp, ref mask_reg) => { +                write!(f, "[{} ", regspec_label(spec))?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]")?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            }, +            &Operand::RegDerefMasked(ref spec, ref mask_reg) => { +                f.write_str("[")?; +                f.write_str(regspec_label(spec))?; +                f.write_str("]")?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            }, +            &Operand::RegScaleMasked(ref spec, scale, ref mask_reg) => { +                write!(f, "[{} * {}]", +                    regspec_label(spec), +                    colors.number(scale) +                )?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            }, +            &Operand::RegScaleDispMasked(ref spec, scale, disp, ref mask_reg) => { +                write!(f, "[{} * {} ", +                    regspec_label(spec), +                    colors.number(scale), +                )?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]")?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            }, +            &Operand::RegIndexBaseMasked(ref base, ref index, ref mask_reg) => { +                f.write_str("[")?; +                f.write_str(regspec_label(base))?; +                f.write_str(" + ")?; +                f.write_str(regspec_label(index))?; +                f.write_str("]")?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            } +            &Operand::RegIndexBaseDispMasked(ref base, ref index, disp, ref mask_reg) => { +                write!(f, "[{} + {} ", +                    regspec_label(base), +                    regspec_label(index), +                )?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]")?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            }, +            &Operand::RegIndexBaseScaleMasked(ref base, ref index, scale, ref mask_reg) => { +                write!(f, "[{} + {} * {}]", +                    regspec_label(base), +                    regspec_label(index), +                    colors.number(scale) +                )?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            } +            &Operand::RegIndexBaseScaleDispMasked(ref base, ref index, scale, disp, ref mask_reg) => { +                write!(f, "[{} + {} * {} ", +                    regspec_label(base), +                    regspec_label(index), +                    colors.number(scale), +                )?; +                format_number_i32(colors, f, disp, NumberStyleHint::HexSignedWithSignSplit)?; +                write!(f, "]")?; +                write!(f, "{{{}}}", regspec_label(mask_reg)) +            }, +            &Operand::Nothing => { Ok(()) }, +        } +    } +} + +impl fmt::Display for Instruction { +    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { +        self.display_with(DisplayStyle::Intel).colorize(&NoColors, fmt) +    } +} + +impl<'instr> fmt::Display for InstructionDisplayer<'instr> { +    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { +        self.colorize(&NoColors, fmt) +    } +} + +/// enum controlling how `Instruction::display_with` renders instructions. `Intel` is more or less +/// intel syntax, though memory operand sizes are elided if they can be inferred from other +/// operands. +#[derive(Copy, Clone)] +pub enum DisplayStyle { +    /// intel-style syntax for instructions, like +    /// `add eax, [edx + ecx * 2 + 0x1234]` +    Intel, +    /// C-style syntax for instructions, like +    /// `eax += [edx + ecx * 2 + 0x1234]` +    C, +    // one might imagine an ATT style here, which is mostly interesting for reversing operand +    // order. +    // well. +    // it also complicates memory operands in an offset-only operand, and is just kind of awful, so +    // it's just not implemented yet. +    // ATT, +} + +/// implementation of [`Display`](fmt::Display) that renders instructions using a specified display +/// style. +pub struct InstructionDisplayer<'instr> { +    pub(crate) instr: &'instr Instruction, +    pub(crate) style: DisplayStyle, +} + +/* + * Can't implement this as accepting a formatter because rust + * doesn't let me build one outside println! or write! or whatever. + * + * can't write this as an intermediate struct because i refuse to copy + * all data into the struct, and having a function producing a struct with + * some lifetimes gets really hairy if it's from a trait - same GAT kind + * of nonsense as i saw with ContextRead, because someone could hold onto + * the dang intermediate struct forever. + * + * so write to some Write thing i guess. bite me. i really just want to + * stop thinking about how to support printing instructions... + */ +impl <'instr, T: fmt::Write, Y: YaxColors> Colorize<T, Y> for InstructionDisplayer<'instr> { +    fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result { +        // TODO: I DONT LIKE THIS, there is no address i can give contextualize here, +        // the address operand maybe should be optional.. +        self.contextualize(colors, 0, Some(&NoContext), out) +    } +} + +/// No per-operand context when contextualizing an instruction! +struct NoContext; + +impl Instruction { +    pub fn write_to<T: fmt::Write>(&self, out: &mut T) -> fmt::Result { +        self.display_with(DisplayStyle::Intel).contextualize(&NoColors, 0, Some(&NoContext), out) +    } +} + +fn contextualize_intel<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +    if instr.xacquire() { +        out.write_str("xacquire ")?; +    } +    if instr.xrelease() { +        out.write_str("xrelease ")?; +    } +    if instr.prefixes.lock() { +        out.write_str("lock ")?; +    } + +    if instr.prefixes.rep_any() { +        if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { +            if instr.prefixes.rep() { +                write!(out, "rep ")?; +            } else if instr.prefixes.repnz() { +                write!(out, "repnz ")?; +            } +        } +    } + +    out.write_str(instr.opcode.name())?; + +    if instr.opcode == Opcode::XBEGIN { +        if (instr.imm as i32) >= 0 { +            return write!(out, " $+{}", colors.number(signed_i32_hex(instr.imm as i32))); +        } else { +            return write!(out, " ${}", colors.number(signed_i32_hex(instr.imm as i32))); +        } +    } + +    if instr.operand_count > 0 { +        out.write_str(" ")?; + +        let x = Operand::from_spec(instr, instr.operands[0]); + +        const RELATIVE_BRANCHES: [Opcode; 21] = [ +            Opcode::JMP, Opcode::JECXZ, +            Opcode::LOOP, Opcode::LOOPZ, Opcode::LOOPNZ, +            Opcode::JO, Opcode::JNO, +            Opcode::JB, Opcode::JNB, +            Opcode::JZ, Opcode::JNZ, +            Opcode::JNA, Opcode::JA, +            Opcode::JS, Opcode::JNS, +            Opcode::JP, Opcode::JNP, +            Opcode::JL, Opcode::JGE, +            Opcode::JLE, Opcode::JG, +        ]; + +        if instr.operands[0] == OperandSpec::ImmI8 || instr.operands[0] == OperandSpec::ImmI32 { +            if RELATIVE_BRANCHES.contains(&instr.opcode) { +                return match x { +                    Operand::ImmediateI8(rel) => { +                        if rel >= 0 { +                            write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) +                        } else { +                            write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) +                        } +                    } +                    Operand::ImmediateI32(rel) => { +                        if rel >= 0 { +                            write!(out, "$+{}", colors.number(signed_i32_hex(rel))) +                        } else { +                            write!(out, "${}", colors.number(signed_i32_hex(rel))) +                        } +                    } +                    _ => { unreachable!() } +                }; +            } +        } + +        if x.is_memory() { +            out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; +            out.write_str(" ")?; +        } + +        if let Some(prefix) = instr.segment_override_for_op(0) { +            write!(out, "{}:", prefix)?; +        } +        x.colorize(colors, out)?; + +        for i in 1..instr.operand_count { +            match instr.opcode { +                _ => { +                    match &instr.operands[i as usize] { +                        &OperandSpec::Nothing => { +                            return Ok(()); +                        }, +                        _ => { +                            out.write_str(", ")?; +                            let x = Operand::from_spec(instr, instr.operands[i as usize]); +                            if x.is_memory() { +                                out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; +                                out.write_str(" ")?; +                            } +                            if let Some(prefix) = instr.segment_override_for_op(i) { +                                write!(out, "{}:", prefix)?; +                            } +                            x.colorize(colors, out)?; +                            if let Some(evex) = instr.prefixes.evex() { +                                if evex.broadcast() && x.is_memory() { +                                    let scale = if instr.opcode == Opcode::VCVTPD2PS || instr.opcode == Opcode::VCVTTPD2UDQ || instr.opcode == Opcode::VCVTPD2UDQ || instr.opcode == Opcode::VCVTUDQ2PD || instr.opcode == Opcode::VCVTPS2PD || instr.opcode == Opcode::VCVTQQ2PS || instr.opcode == Opcode::VCVTDQ2PD || instr.opcode == Opcode::VCVTTPD2DQ || instr.opcode == Opcode::VFPCLASSPS || instr.opcode == Opcode::VFPCLASSPD || instr.opcode == Opcode::VCVTNEPS2BF16 || instr.opcode == Opcode::VCVTUQQ2PS || instr.opcode == Opcode::VCVTPD2DQ || instr.opcode == Opcode::VCVTTPS2UQQ || instr.opcode == Opcode::VCVTPS2UQQ || instr.opcode == Opcode::VCVTTPS2QQ || instr.opcode == Opcode::VCVTPS2QQ { +                                        if instr.opcode == Opcode::VFPCLASSPS || instr.opcode ==  Opcode::VCVTNEPS2BF16 { +                                            if evex.vex().l() { +                                                8 +                                            } else if evex.lp() { +                                                16 +                                            } else { +                                                4 +                                            } +                                        } else if instr.opcode == Opcode::VFPCLASSPD { +                                            if evex.vex().l() { +                                                4 +                                            } else if evex.lp() { +                                                8 +                                            } else { +                                                2 +                                            } +                                        } else { +                                            // vcvtpd2ps is "cool": in broadcast mode, it can read a +                                            // double-precision float (qword), resize to single-precision, +                                            // then broadcast that to the whole destination register. this +                                            // means we need to show `xmm, qword [addr]{1to4}` if vector +                                            // size is 256. likewise, scale of 8 for the same truncation +                                            // reason if vector size is 512. +                                            // vcvtudq2pd is the same story. +                                            // vfpclassp{s,d} is a mystery to me. +                                            if evex.vex().l() { +                                                4 +                                            } else if evex.lp() { +                                                8 +                                            } else { +                                                2 +                                            } +                                        } +                                    } else { +                                        // this should never be `None` - that would imply two +                                        // memory operands for a broadcasted operation. +                                        if let Some(width) = Operand::from_spec(instr, instr.operands[i as usize - 1]).width() { +                                            width / instr.mem_size +                                        } else { +                                            0 +                                        } +                                    }; +                                    write!(out, "{{1to{}}}", scale)?; +                                } +                            } +                        } +                    } +                } +            } +        } +    } +    Ok(()) +} + +fn contextualize_c<T: fmt::Write, Y: YaxColors>(instr: &Instruction, colors: &Y, _address: u32, _context: Option<&NoContext>, out: &mut T) -> fmt::Result { +    let mut brace_count = 0; + +    let mut prefixed = false; + +    if instr.xacquire() { +        out.write_str("xacquire ")?; +        prefixed = true; +    } +    if instr.xrelease() { +        out.write_str("xrelease ")?; +        prefixed = true; +    } +    if instr.prefixes.lock() { +        out.write_str("lock ")?; +        prefixed = true; +    } + +    if prefixed { +        out.write_str("{ ")?; +        brace_count += 1; +    } + +    if instr.prefixes.rep_any() { +        if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&instr.opcode) { +            let word_str = match instr.mem_size { +                1 => "byte", +                2 => "word", +                4 => "dword", +                8 => "qword", +                _ => { unreachable!("invalid word size") } +            }; + +            // only a few of you actually use the prefix... +            if instr.prefixes.rep() { +                out.write_str("rep ")?; +            } else if instr.prefixes.repnz() { +                out.write_str("repnz ")?; +            } // TODO: other rep kinds? + +            out.write_str(word_str)?; +            out.write_str(" { ")?; +            brace_count += 1; +        } +    } + +    fn write_jmp_operand<T: fmt::Write, Y: YaxColors>(op: Operand, colors: &Y, out: &mut T) -> fmt::Result { +        match op { +            Operand::ImmediateI8(rel) => { +                if rel >= 0 { +                    write!(out, "$+{}", colors.number(signed_i32_hex(rel as i32))) +                } else { +                    write!(out, "${}", colors.number(signed_i32_hex(rel as i32))) +                } +            } +            Operand::ImmediateI32(rel) => { +                if rel >= 0 { +                    write!(out, "$+{}", colors.number(signed_i32_hex(rel))) +                } else { +                    write!(out, "${}", colors.number(signed_i32_hex(rel))) +                } +            } +            other => { +                write!(out, "{}", other) +            } +        } +    } + +    match instr.opcode { +        Opcode::Invalid => { out.write_str("invalid")?; }, +        Opcode::MOVS => { +            out.write_str("es:[edi++] = ds:[esi++]")?; +        }, +        Opcode::CMPS => { +            out.write_str("eflags = flags(ds:[esi++] - es:[edi++])")?; +        }, +        Opcode::LODS => { +            // TODO: size +            out.write_str("rax = ds:[esi++]")?; +        }, +        Opcode::STOS => { +            // TODO: size +            out.write_str("es:[edi++] = rax")?; +        }, +        Opcode::INS => { +            // TODO: size +            out.write_str("es:[edi++] = port(dx)")?; +        }, +        Opcode::OUTS => { +            // TODO: size +            out.write_str("port(dx) = ds:[esi++]")?; +        } +        Opcode::ADD => { +            write!(out, "{} += {}", instr.operand(0), instr.operand(1))?; +        } +        Opcode::OR => { +            write!(out, "{} |= {}", instr.operand(0), instr.operand(1))?; +        } +        Opcode::ADC => { +            write!(out, "{} += {} + eflags.cf", instr.operand(0), instr.operand(1))?; +        } +        Opcode::ADCX => { +            write!(out, "{} += {} + eflags.cf", instr.operand(0), instr.operand(1))?; +        } +        Opcode::ADOX => { +            write!(out, "{} += {} + eflags.of", instr.operand(0), instr.operand(1))?; +        } +        Opcode::SBB => { +            write!(out, "{} -= {} + eflags.cf", instr.operand(0), instr.operand(1))?; +        } +        Opcode::AND => { +            write!(out, "{} &= {}", instr.operand(0), instr.operand(1))?; +        } +        Opcode::XOR => { +            write!(out, "{} ^= {}", instr.operand(0), instr.operand(1))?; +        } +        Opcode::SUB => { +            write!(out, "{} -= {}", instr.operand(0), instr.operand(1))?; +        } +        Opcode::CMP => { +            write!(out, "eflags = flags({} - {})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::TEST => { +            write!(out, "eflags = flags({} & {})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::XADD => { +            write!(out, "({}, {}) = ({} + {}, {})", instr.operand(0), instr.operand(1), instr.operand(0), instr.operand(1), instr.operand(0))?; +        } +        Opcode::BT => { +            write!(out, "bt")?; +        } +        Opcode::BTS => { +            write!(out, "bts")?; +        } +        Opcode::BTC => { +            write!(out, "btc")?; +        } +        Opcode::BSR => { +            write!(out, "{} = msb({})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::BSF => { +            write!(out, "{} = lsb({}) (x86 bsf)", instr.operand(0), instr.operand(1))?; +        } +        Opcode::TZCNT => { +            write!(out, "{} = lsb({})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::MOV => { +            write!(out, "{} = {}", instr.operand(0), instr.operand(1))?; +        } +        Opcode::SAR => { +            write!(out, "{} = {} >>> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::SAL => { +            write!(out, "{} = {} <<< {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::SHR => { +            write!(out, "{} = {} >> {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::SHRX => { +            write!(out, "{} = {} >> {} (x86 shrx)", instr.operand(0), instr.operand(1), instr.operand(2))?; +        } +        Opcode::SHL => { +            write!(out, "{} = {} << {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::SHLX => { +            write!(out, "{} = {} << {} (x86 shlx)", instr.operand(0), instr.operand(1), instr.operand(2))?; +        } +        Opcode::ROR => { +            write!(out, "{} = {} ror {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::RORX => { +            write!(out, "{} = {} ror {} (x86 rorx)", instr.operand(0), instr.operand(1), instr.operand(2))?; +        } +        Opcode::ROL => { +            write!(out, "{} = {} rol {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::RCR => { +            write!(out, "{} = {} rcr {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::RCL => { +            write!(out, "{} = {} rcl {}", instr.operand(0), instr.operand(0), instr.operand(1))?; +        } +        Opcode::PUSH => { +            write!(out, "push({})", instr.operand(0))?; +        } +        Opcode::POP => { +            write!(out, "{} = pop()", instr.operand(0))?; +        } +        Opcode::MOVD => { +            write!(out, "{} = movd({})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::MOVQ => { +            write!(out, "{} = movq({})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::MOVNTQ => { +            write!(out, "{} = movntq({})", instr.operand(0), instr.operand(1))?; +        } +        Opcode::INC => { +            if instr.operand(0).is_memory() { +                match instr.mem_size { +                    1 => { write!(out, "byte {}++", instr.operand(0))?; }, +                    2 => { write!(out, "word {}++", instr.operand(0))?; }, +                    4 => { write!(out, "dword {}++", instr.operand(0))?; }, +                    _ => { write!(out, "qword {}++", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8. +                } +            } else { +                write!(out, "{}++", instr.operand(0))?; +            } +        } +        Opcode::DEC => { +            if instr.operand(0).is_memory() { +                match instr.mem_size { +                    1 => { write!(out, "byte {}--", instr.operand(0))?; }, +                    2 => { write!(out, "word {}--", instr.operand(0))?; }, +                    4 => { write!(out, "dword {}--", instr.operand(0))?; }, +                    _ => { write!(out, "qword {}--", instr.operand(0))?; }, // sizes that are not 1, 2, or 4, *better* be 8. +                } +            } else { +                write!(out, "{}--", instr.operand(0))?; +            } +        } +        Opcode::JMP => { +            out.write_str("jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JECXZ => { +            out.write_str("if ecx == 0 then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::LOOP => { +            out.write_str("ecx--; if ecx != 0 then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::LOOPZ => { +            out.write_str("ecx--; if ecx != 0 and zero(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::LOOPNZ => { +            out.write_str("ecx--; if ecx != 0 and !zero(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JO => { +            out.write_str("if _(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JNO => { +            out.write_str("if _(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JB => { +            out.write_str("if /* unsigned */ below(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JNB => { +            out.write_str("if /* unsigned */ above_or_equal(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JZ => { +            out.write_str("if zero(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JNZ => { +            out.write_str("if !zero(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JNA => { +            out.write_str("if /* unsigned */ below_or_equal(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JA => { +            out.write_str("if /* unsigned */ above(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JS => { +            out.write_str("if signed(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JNS => { +            out.write_str("if !signed(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JP => { +            out.write_str("if parity(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JNP => { +            out.write_str("if !parity(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JL => { +            out.write_str("if /* signed */ less(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JGE => { +            out.write_str("if /* signed */ greater_or_equal(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JLE => { +            out.write_str("if /* signed */ less_or_equal(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::JG => { +            out.write_str("if /* signed */ greater(rflags) then jmp ")?; +            write_jmp_operand(instr.operand(0), colors, out)?; +        }, +        Opcode::NOP => { +            write!(out, "nop")?; +        } +        _ => { +            if instr.operand_count() == 0 { +                write!(out, "{}()", instr.opcode())?; +            } else { +                write!(out, "{} = {}({}", instr.operand(0), instr.opcode(), instr.operand(0))?; +                let mut comma = true; +                for i in 1..instr.operand_count() { +                    if comma { +                        write!(out, ", ")?; +                    } +                    write!(out, "{}", instr.operand(i))?; +                    comma = true; +                } +                write!(out, ")")?; +            } +        } +    } + +    while brace_count > 0 { +        out.write_str(" }")?; +        brace_count -= 1; +    } + +    Ok(()) +} + +impl <'instr, T: fmt::Write, Y: YaxColors> ShowContextual<u32, NoContext, T, Y> for InstructionDisplayer<'instr> { +    fn contextualize(&self, colors: &Y, address: u32, context: Option<&NoContext>, out: &mut T) -> fmt::Result { +        let InstructionDisplayer { +            instr, +            style, +        } = self; + +        match style { +            DisplayStyle::Intel => { +                contextualize_intel(instr, colors, address, context, out) +            } +            DisplayStyle::C => { +                contextualize_c(instr, colors, address, context, out) +            } +        } +    } +} + +#[cfg(feature="std")] +impl <T: fmt::Write, Y: YaxColors> ShowContextual<u64, [Option<alloc::string::String>], T, Y> for Instruction { +    fn contextualize(&self, colors: &Y, _address: u64, context: Option<&[Option<alloc::string::String>]>, out: &mut T) -> fmt::Result { +        if self.prefixes.lock() { +            write!(out, "lock ")?; +        } + +        if [Opcode::MOVS, Opcode::CMPS, Opcode::LODS, Opcode::STOS, Opcode::INS, Opcode::OUTS].contains(&self.opcode) { +            // only a few of you actually use the prefix... +            if self.prefixes.rep() { +                write!(out, "rep ")?; +            } else if self.prefixes.repnz() { +                write!(out, "repnz ")?; +            } +        } + +        self.opcode.colorize(colors, out)?; + +        match context.and_then(|xs| xs[0].as_ref()) { +            Some(s) => { write!(out, " {}", s)?; }, +            None => { +                match self.operands[0] { +                    OperandSpec::Nothing => { +                        return Ok(()); +                    }, +                    _ => { +                        write!(out, " ")?; +                        if let Some(prefix) = self.segment_override_for_op(0) { +                            write!(out, "{}:", prefix)?; +                        } +                    } +                } +                let x = Operand::from_spec(self, self.operands[0]); +                x.colorize(colors, out)?; +            } +        }; +        for i in 1..self.operand_count { +            let i = i as usize; +            match context.and_then(|xs| xs[i].as_ref()) { +                Some(s) => { write!(out, ", {}", s)? } +                None => { +                    match &self.operands[i] { +                        &OperandSpec::Nothing => { +                            return Ok(()); +                        }, +                        _ => { +                            write!(out, ", ")?; +                            if let Some(prefix) = self.segment_override_for_op(1) { +                                write!(out, "{}:", prefix)?; +                            } +                            let x = Operand::from_spec(self, self.operands[i]); +                            x.colorize(colors, out)? +                        } +                    } +                } +            } +        } +        Ok(()) +    } +} diff --git a/src/generic/mod.rs b/src/generic/mod.rs new file mode 100644 index 0000000..8ea1ebd --- /dev/null +++ b/src/generic/mod.rs @@ -0,0 +1,1878 @@ +mod display; + +//#[cfg(feature = "fmt")] +//mod display; + +use crate::MemoryAccessSize; + +use core::cmp::PartialEq; +use crate::safer_unchecked::unreachable_kinda_unchecked as unreachable_unchecked; +pub use crate::generated::opcode::Opcode as Opcode; + +use yaxpeax_arch::{AddressDiff, Decoder, Reader, LengthedInstruction}; +use yaxpeax_arch::annotation::{AnnotatingDecoder, DescriptionSink, NullSink}; +use yaxpeax_arch::{DecodeError as ArchDecodeError}; + +use core::fmt; +impl fmt::Display for DecodeError { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        f.write_str(self.description()) +    } +} + +use core::hash::Hash; +use core::hash::Hasher; +impl Hash for RegSpec { +    fn hash<H: Hasher>(&self, state: &mut H) { +        let code = ((self.bank as u16) << 8) | (self.num as u16); +        code.hash(state); +    } +} + +/// the condition for a conditional instruction. +/// +/// these are only obtained through [`Opcode::condition()`]: +/// ``` +/// use yaxpeax_x86::long_mode::{Opcode, ConditionCode}; +/// +/// assert_eq!(Opcode::JB.condition(), Some(ConditionCode::B)); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ConditionCode { +    O, +    NO, +    B, +    AE, +    Z, +    NZ, +    A, +    BE, +    S, +    NS, +    P, +    NP, +    L, +    GE, +    G, +    LE, +} + +macro_rules! register { +    ($bank:ident, $name:ident => $num:expr, $($tail:tt)+) => { +        #[inline] +        pub const fn $name() -> RegSpec { +            RegSpec { bank: RegisterBank::$bank, num: $num } +        } + +        register!($bank, $($tail)*); +    }; +    ($bank:ident, $name:ident => $num:expr) => { +        #[inline] +        pub const fn $name() -> RegSpec { +            RegSpec { bank: RegisterBank::$bank, num: $num } +        } +    }; +} + +#[allow(non_snake_case)] +impl RegSpec { +    /// the register `rip`. this register is in the class `rip`, which contains only it. +    pub const RIP: RegSpec = RegSpec::rip(); + +    /// the number of this register in its `RegisterClass`. +    /// +    /// for many registers this is a number in the name, but for registers harkening back to +    /// `x86_32`, the first eight registers are `rax`, `rcx`, `rdx`, `rbx`, `rsp`, `rbp`, `rsi`, +    /// and `rdi` (or `eXX` for the 32-bit forms, `XX` for 16-bit forms). +    pub fn num(&self) -> u8 { +        self.num +    } + +    /// the class of register this register is in. +    /// +    /// this corresponds to the register's size, but is by the register's usage in the instruction +    /// set; `rax` and `mm0` are the same size, but different classes (`Q`(word) and `MM` (mmx) +    /// respectively). +    pub fn class(&self) -> RegisterClass { +        RegisterClass { kind: self.bank } +    } + +    #[cfg(feature = "fmt")] +    /// return a human-friendly name for this register. the returned name is the same as would be +    /// used to render this register in an instruction. +    pub fn name(&self) -> &'static str { +        display::regspec_label(self) +    } + +    /// construct a `RegSpec` for x87 register `st(num)` +    #[inline] +    pub fn st(num: u8) -> RegSpec { +        if num >= 8 { +            panic!("invalid x87 reg st({})", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::ST +        } +    } + +    /// construct a `RegSpec` for xmm reg `num` +    #[inline] +    pub fn xmm(num: u8) -> RegSpec { +        if num >= 32 { +            panic!("invalid x86 xmm reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::X +        } +    } + +    /// construct a `RegSpec` for ymm reg `num` +    #[inline] +    pub fn ymm(num: u8) -> RegSpec { +        if num >= 32 { +            panic!("invalid x86 ymm reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::Y +        } +    } + +    /// construct a `RegSpec` for zmm reg `num` +    #[inline] +    pub fn zmm(num: u8) -> RegSpec { +        if num >= 32 { +            panic!("invalid x86 zmm reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::Z +        } +    } + +    /// construct a `RegSpec` for qword reg `num` +    #[inline] +    pub fn q(num: u8) -> RegSpec { +        if num >= 16 { +            panic!("invalid x86 qword reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::Q +        } +    } + +    /// construct a `RegSpec` for mask reg `num` +    #[inline] +    pub fn mask(num: u8) -> RegSpec { +        if num >= 8 { +            panic!("invalid x86 mask reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::K +        } +    } + +    /// construct a `RegSpec` for dword reg `num` +    #[inline] +    pub fn d(num: u8) -> RegSpec { +        if num >= 16 { +            panic!("invalid x86 dword reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::D +        } +    } + +    /// construct a `RegSpec` for word reg `num` +    #[inline] +    pub fn w(num: u8) -> RegSpec { +        if num >= 16 { +            panic!("invalid x86 word reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::W +        } +    } + +    /// construct a `RegSpec` for non-rex byte reg `num` +    #[inline] +    pub fn rb(num: u8) -> RegSpec { +        if num >= 16 { +            panic!("invalid x86 rex-byte reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::rB +        } +    } + +    /// construct a `RegSpec` for non-rex byte reg `num` +    #[inline] +    pub fn b(num: u8) -> RegSpec { +        if num >= 8 { +            panic!("invalid x86 non-rex byte reg {}", num); +        } + +        RegSpec { +            num, +            bank: RegisterBank::B +        } +    } + +    #[inline] +    fn from_parts(num: u8, extended: bool, bank: RegisterBank) -> RegSpec { +        RegSpec { +            num: num + if extended { 0b1000 } else { 0 }, +            bank: bank +        } +    } + +    #[inline] +    fn gp_from_parts(num: u8, extended: bool, width: u8, rex: bool) -> RegSpec { +        RegSpec { +            num: num + if extended { 0b1000 } else { 0 }, +            bank: width_to_gp_reg_bank(width, rex) +        } +    } + +    register!(RIP, rip => 0); +    register!(EIP, eip => 0); + +    register!(RFlags, rflags => 0); +    register!(EFlags, eflags => 0); + +    register!(S, es => 0, cs => 1, ss => 2, ds => 3, fs => 4, gs => 5); + +    register!(Q, +        rax => 0, rcx => 1, rdx => 2, rbx => 3, +        rsp => 4, rbp => 5, rsi => 6, rdi => 7, +        r8 => 8, r9 => 9, r10 => 10, r11 => 11, +        r12 => 8, r13 => 9, r14 => 14, r15 => 15 +    ); + +    register!(D, +        eax => 0, ecx => 1, edx => 2, ebx => 3, +        esp => 4, ebp => 5, esi => 6, edi => 7, +        r8d => 8, r9d => 9, r10d => 10, r11d => 11, +        r12d => 8, r13d => 9, r14d => 14, r15d => 15 +    ); + +    register!(W, +        ax => 0, cx => 1, dx => 2, bx => 3, +        sp => 4, bp => 5, si => 6, di => 7, +        r8w => 8, r9w => 9, r10w => 10, r11w => 11, +        r12w => 8, r13w => 9, r14w => 14, r15w => 15 +    ); + +    register!(B, +        al => 0, cl => 1, dl => 2, bl => 3, +        ah => 4, ch => 5, dh => 6, bh => 7 +    ); + +    register!(rB, +        spl => 4, bpl => 5, sil => 6, dil => 7, +        r8b => 8, r9b => 9, r10b => 10, r11b => 11, +        r12b => 8, r13b => 9, r14b => 14, r15b => 15 +    ); + +    #[inline] +    pub const fn zmm0() -> RegSpec { +        RegSpec { bank: RegisterBank::Z, num: 0 } +    } + +    #[inline] +    pub const fn ymm0() -> RegSpec { +        RegSpec { bank: RegisterBank::Y, num: 0 } +    } + +    #[inline] +    pub const fn xmm0() -> RegSpec { +        RegSpec { bank: RegisterBank::X, num: 0 } +    } + +    #[inline] +    pub const fn st0() -> RegSpec { +        RegSpec { bank: RegisterBank::ST, num: 0 } +    } + +    #[inline] +    pub const fn mm0() -> RegSpec { +        RegSpec { bank: RegisterBank::MM, num: 0 } +    } + +    /// return the size of this register, in bytes. +    #[inline] +    pub fn width(&self) -> u8 { +        self.class().width() +    } +} + +#[allow(non_camel_case_types)] +#[allow(dead_code)] +enum SizeCode { +    b, +    vd, +    vq, +    vqp +} + +/// an operand for an `x86_64` instruction. +/// +/// `Operand::Nothing` should be unreachable in practice; any such instructions should have an +/// operand count of 0 (or at least one fewer than the `Nothing` operand's position). +#[derive(Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum Operand { +    /// a sign-extended byte +    ImmediateI8(i8), +    /// a zero-extended byte +    ImmediateU8(u8), +    /// a sign-extended word +    ImmediateI16(i16), +    /// a zero-extended word +    ImmediateU16(u16), +    /// a sign-extended dword +    ImmediateI32(i32), +    /// a zero-extended dword +    ImmediateU32(u32), +    /// a sign-extended qword +    ImmediateI64(i64), +    /// a zero-extended qword +    ImmediateU64(u64), +    /// a bare register operand, such as `rcx`. +    Register(RegSpec), +    /// an `avx512` register operand with optional mask register and merge mode, such as +    /// `zmm3{k4}{z}`. +    /// +    /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is +    /// `MergeMode::Merge`. +    RegisterMaskMerge(RegSpec, RegSpec, MergeMode), +    /// an `avx512` register operand with optional mask register, merge mode, and suppressed +    /// exceptions, such as `zmm3{k4}{z}{rd-sae}`. +    /// +    /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is +    /// `MergeMode::Merge`. +    RegisterMaskMergeSae(RegSpec, RegSpec, MergeMode, SaeMode), +    /// an `avx512` register operand with optional mask register, merge mode, and suppressed +    /// exceptions, with no overridden rounding mode, such as `zmm3{k4}{z}{sae}`. +    /// +    /// if the mask register is `k0`, there is no masking applied, and the default x86 operation is +    /// `MergeMode::Merge`. +    RegisterMaskMergeSaeNoround(RegSpec, RegSpec, MergeMode), +    /// a memory access to a literal word address. it's extremely rare that a well-formed x86 +    /// instruction uses this mode. as an example, `[0x1133]` +    DisplacementU16(u16), +    /// a memory access to a literal dword address. it's extremely rare that a well-formed x86 +    /// instruction uses this mode. as an example, `[0x11335577]` +    DisplacementU32(u32), +    /// a memory access to a literal qword address. it's relatively rare that a well-formed x86 +    /// instruction uses this mode, but plausibe. for example, `gs:[0x14]`. segment overrides, +    /// however, are maintained on the instruction itself. +    DisplacementU64(u64), +    /// a simple dereference of the address held in some register. for example: `[rsi]`. +    RegDeref(RegSpec), +    /// a dereference of the address held in some register with offset. for example: `[rsi + 0x14]`. +    RegDisp(RegSpec, i32), +    /// a dereference of the address held in some register scaled by 1, 2, 4, or 8. this is almost always used with the `lea` instruction. for example: `[rdx * 4]`. +    RegScale(RegSpec, u8), +    /// a dereference of the address from summing two registers. for example: `[rbp + rax]` +    RegIndexBase(RegSpec, RegSpec), +    /// a dereference of the address from summing two registers with offset. for example: `[rdi + rcx + 0x40]` +    RegIndexBaseDisp(RegSpec, RegSpec, i32), +    /// a dereference of the address held in some register scaled by 1, 2, 4, or 8 with offset. this is almost always used with the `lea` instruction. for example: `[rax * 4 + 0x30]`. +    RegScaleDisp(RegSpec, u8, i32), +    /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, +    /// or 8. for +    /// example: `[rsi + rcx * 4]` +    RegIndexBaseScale(RegSpec, RegSpec, u8), +    /// a dereference of the address from summing a register and index register scaled by 1, 2, 4, +    /// or 8, with offset. for +    /// example: `[rsi + rcx * 4 + 0x1234]` +    RegIndexBaseScaleDisp(RegSpec, RegSpec, u8, i32), +    /// an `avx512` dereference of register with optional masking. for example: `[rdx]{k3}` +    RegDerefMasked(RegSpec, RegSpec), +    /// an `avx512` dereference of register plus offset, with optional masking. for example: `[rsp + 0x40]{k3}` +    RegDispMasked(RegSpec, i32, RegSpec), +    /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8, with optional masking. this +    /// seems extraordinarily unlikely to occur in practice. for example: `[rsi * 4]{k2}` +    RegScaleMasked(RegSpec, u8, RegSpec), +    /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional masking. +    /// for example: `[rsi + rax * 4]{k6}` +    RegIndexBaseMasked(RegSpec, RegSpec, RegSpec), +    /// an `avx512` dereference of a register plus offset, with optional masking.  for example: +    /// `[rsi + rax + 0x1313]{k6}` +    RegIndexBaseDispMasked(RegSpec, RegSpec, i32, RegSpec), +    /// an `avx512` dereference of a register scaled by 1, 2, 4, or 8 plus offset, with optional +    /// masking. this seems extraordinarily unlikely to occur in practice. for example: `[rsi * +    /// 4 + 0x1357]{k2}` +    RegScaleDispMasked(RegSpec, u8, i32, RegSpec), +    /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8, with optional +    /// masking.  for example: `[rsi + rax * 4]{k6}` +    RegIndexBaseScaleMasked(RegSpec, RegSpec, u8, RegSpec), +    /// an `avx512` dereference of a register plus index scaled by 1, 2, 4, or 8 and offset, with +    /// optional masking.  for example: `[rsi + rax * 4 + 0x1313]{k6}` +    RegIndexBaseScaleDispMasked(RegSpec, RegSpec, u8, i32, RegSpec), +    /// no operand. it is a bug for `yaxpeax-x86` to construct an `Operand` of this kind for public +    /// use; the instruction's `operand_count` should be reduced so as to make this invisible to +    /// library clients. +    Nothing, +} + +impl OperandSpec { +    fn masked(self) -> Self { +        match self { +            OperandSpec::RegRRR => OperandSpec::RegRRR_maskmerge, +            OperandSpec::RegMMM => OperandSpec::RegMMM_maskmerge, +            OperandSpec::RegVex => OperandSpec::RegVex_maskmerge, +            OperandSpec::Deref => OperandSpec::Deref_mask, +            OperandSpec::RegDisp => OperandSpec::RegDisp_mask, +            OperandSpec::RegScale => OperandSpec::RegScale_mask, +            OperandSpec::RegScaleDisp => OperandSpec::RegScaleDisp_mask, +            OperandSpec::RegIndexBaseScale => OperandSpec::RegIndexBaseScale_mask, +            OperandSpec::RegIndexBaseScaleDisp => OperandSpec::RegIndexBaseScaleDisp_mask, +            o => o, +        } +    } +    fn is_memory(&self) -> bool { +        match self { +            OperandSpec::DispU32 | +            OperandSpec::DispU64 | +            OperandSpec::Deref | +            OperandSpec::Deref_esi | +            OperandSpec::Deref_edi | +            OperandSpec::Deref_rsi | +            OperandSpec::Deref_rdi | +            OperandSpec::RegDisp | +            OperandSpec::RegScale | +            OperandSpec::RegScaleDisp | +            OperandSpec::RegIndexBaseScale | +            OperandSpec::RegIndexBaseScaleDisp | +            OperandSpec::Deref_mask | +            OperandSpec::RegDisp_mask | +            OperandSpec::RegScale_mask | +            OperandSpec::RegScaleDisp_mask | +            OperandSpec::RegIndexBaseScale_mask | +            OperandSpec::RegIndexBaseScaleDisp_mask => { +                true +            }, +            OperandSpec::ImmI8 | +            OperandSpec::ImmI16 | +            OperandSpec::ImmI32 | +            OperandSpec::ImmI64 | +            OperandSpec::ImmU8 | +            OperandSpec::ImmU16 | +            OperandSpec::RegRRR | +            OperandSpec::RegRRR_maskmerge | +            OperandSpec::RegRRR_maskmerge_sae | +            OperandSpec::RegRRR_maskmerge_sae_noround | +            OperandSpec::RegMMM | +            OperandSpec::RegMMM_maskmerge | +            OperandSpec::RegMMM_maskmerge_sae_noround | +            OperandSpec::RegVex | +            OperandSpec::RegVex_maskmerge | +            OperandSpec::Reg4 | +            OperandSpec::ImmInDispField | +            OperandSpec::Nothing => { +                false +            } +        } +    } +} + +/// an `avx512` merging mode. +/// +/// the behavior for non-`avx512` instructions is equivalent to `merge`.  `zero` is only useful in +/// conjunction with a mask register, where bits specified in the mask register correspond to +/// unmodified items in the instruction's desination. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum MergeMode { +    Merge, +    Zero, +} +impl From<bool> for MergeMode { +    fn from(b: bool) -> Self { +        if b { +            MergeMode::Zero +        } else { +            MergeMode::Merge +        } +    } +} +/// an `avx512` custom rounding mode. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SaeMode { +    RoundNearest, +    RoundDown, +    RoundUp, +    RoundZero, +} +const SAE_MODES: [SaeMode; 4] = [ +    SaeMode::RoundNearest, +    SaeMode::RoundDown, +    SaeMode::RoundUp, +    SaeMode::RoundZero, +]; +impl SaeMode { +    /// a human-friendly label for this `SaeMode`: +    /// +    /// ``` +    /// use yaxpeax_x86::long_mode::SaeMode; +    /// +    /// assert_eq!(SaeMode::RoundNearest.label(), "{rne-sae}"); +    /// assert_eq!(SaeMode::RoundDown.label(), "{rd-sae}"); +    /// assert_eq!(SaeMode::RoundUp.label(), "{ru-sae}"); +    /// assert_eq!(SaeMode::RoundZero.label(), "{rz-sae}"); +    /// ``` +    pub fn label(&self) -> &'static str { +        match self { +            SaeMode::RoundNearest => "{rne-sae}", +            SaeMode::RoundDown => "{rd-sae}", +            SaeMode::RoundUp => "{ru-sae}", +            SaeMode::RoundZero => "{rz-sae}", +        } +    } + +    fn from(l: bool, lp: bool) -> Self { +        let mut idx = 0; +        if l { +            idx |= 1; +        } +        if lp { +            idx |= 2; +        } +        SAE_MODES[idx] +    } +} +impl Operand { +    fn from_spec(inst: &Instruction, spec: OperandSpec) -> Operand { +        match spec { +            OperandSpec::Nothing => { +                Operand::Nothing +            } +            // the register in modrm_rrr +            OperandSpec::RegRRR => { +                Operand::Register(inst.regs[0]) +            } +            OperandSpec::RegRRR_maskmerge => { +                Operand::RegisterMaskMerge( +                    inst.regs[0], +                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), +                    MergeMode::from(inst.prefixes.evex_unchecked().merge()), +                ) +            } +            OperandSpec::RegRRR_maskmerge_sae => { +                Operand::RegisterMaskMergeSae( +                    inst.regs[0], +                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), +                    MergeMode::from(inst.prefixes.evex_unchecked().merge()), +                    SaeMode::from(inst.prefixes.evex_unchecked().vex().l(), inst.prefixes.evex_unchecked().lp()), +                ) +            } +            OperandSpec::RegRRR_maskmerge_sae_noround => { +                Operand::RegisterMaskMergeSaeNoround( +                    inst.regs[0], +                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), +                    MergeMode::from(inst.prefixes.evex_unchecked().merge()), +                ) +            } +            // the register in modrm_mmm (eg modrm mod bits were 11) +            OperandSpec::RegMMM => { +                Operand::Register(inst.regs[1]) +            } +            OperandSpec::RegMMM_maskmerge => { +                Operand::RegisterMaskMerge( +                    inst.regs[1], +                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), +                    MergeMode::from(inst.prefixes.evex_unchecked().merge()), +                ) +            } +            OperandSpec::RegMMM_maskmerge_sae_noround => { +                Operand::RegisterMaskMergeSaeNoround( +                    inst.regs[1], +                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), +                    MergeMode::from(inst.prefixes.evex_unchecked().merge()), +                ) +            } +            OperandSpec::RegVex => { +                Operand::Register(inst.regs[3]) +            } +            OperandSpec::RegVex_maskmerge => { +                Operand::RegisterMaskMerge( +                    inst.regs[3], +                    RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg()), +                    MergeMode::from(inst.prefixes.evex_unchecked().merge()), +                ) +            } +            OperandSpec::Reg4 => { +                Operand::Register(RegSpec { num: inst.imm as u8, bank: inst.regs[3].bank }) +            } +            OperandSpec::ImmI8 => Operand::ImmediateI8(inst.imm as i8), +            OperandSpec::ImmU8 => Operand::ImmediateU8(inst.imm as u8), +            OperandSpec::ImmI16 => Operand::ImmediateI16(inst.imm as i16), +            OperandSpec::ImmU16 => Operand::ImmediateU16(inst.imm as u16), +            OperandSpec::ImmI32 => Operand::ImmediateI32(inst.imm as i32), +            OperandSpec::ImmI64 => Operand::ImmediateI64(inst.imm as i64), +            OperandSpec::ImmInDispField => Operand::ImmediateU16(inst.disp as u16), +            OperandSpec::DispU32 => Operand::DisplacementU32(inst.disp as u32), +            OperandSpec::DispU64 => Operand::DisplacementU64(inst.disp as u64), +            OperandSpec::Deref => { +                Operand::RegDeref(inst.regs[1]) +            } +            OperandSpec::Deref_esi => { +                Operand::RegDeref(RegSpec::esi()) +            } +            OperandSpec::Deref_edi => { +                Operand::RegDeref(RegSpec::edi()) +            } +            OperandSpec::Deref_rsi => { +                Operand::RegDeref(RegSpec::rsi()) +            } +            OperandSpec::Deref_rdi => { +                Operand::RegDeref(RegSpec::rdi()) +            } +            OperandSpec::RegDisp => { +                Operand::RegDisp(inst.regs[1], inst.disp as i32) +            } +            OperandSpec::RegScale => { +                Operand::RegScale(inst.regs[2], inst.scale) +            } +            OperandSpec::RegScaleDisp => { +                Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) +            } +            OperandSpec::RegIndexBaseScale => { +                Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) +            } +            OperandSpec::RegIndexBaseScaleDisp => { +                Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) +            } +            OperandSpec::Deref_mask => { +                if inst.prefixes.evex_unchecked().mask_reg() != 0 { +                    Operand::RegDerefMasked(inst.regs[1], RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) +                } else { +                    Operand::RegDeref(inst.regs[1]) +                } +            } +            OperandSpec::RegDisp_mask => { +                if inst.prefixes.evex_unchecked().mask_reg() != 0 { +                    Operand::RegDispMasked(inst.regs[1], inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) +                } else { +                    Operand::RegDisp(inst.regs[1], inst.disp as i32) +                } +            } +            OperandSpec::RegScale_mask => { +                if inst.prefixes.evex_unchecked().mask_reg() != 0 { +                    Operand::RegScaleMasked(inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) +                } else { +                    Operand::RegScale(inst.regs[2], inst.scale) +                } +            } +            OperandSpec::RegScaleDisp_mask => { +                if inst.prefixes.evex_unchecked().mask_reg() != 0 { +                    Operand::RegScaleDispMasked(inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) +                } else { +                    Operand::RegScaleDisp(inst.regs[2], inst.scale, inst.disp as i32) +                } +            } +            OperandSpec::RegIndexBaseScale_mask => { +                if inst.prefixes.evex_unchecked().mask_reg() != 0 { +                    Operand::RegIndexBaseScaleMasked(inst.regs[1], inst.regs[2], inst.scale, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) +                } else { +                    Operand::RegIndexBaseScale(inst.regs[1], inst.regs[2], inst.scale) +                } +            } +            OperandSpec::RegIndexBaseScaleDisp_mask => { +                if inst.prefixes.evex_unchecked().mask_reg() != 0 { +                    Operand::RegIndexBaseScaleDispMasked(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32, RegSpec::mask(inst.prefixes.evex_unchecked().mask_reg())) +                } else { +                    Operand::RegIndexBaseScaleDisp(inst.regs[1], inst.regs[2], inst.scale, inst.disp as i32) +                } +            } +        } +    } + +    /// returns `true` if this operand implies a memory access, `false` otherwise. +    /// +    /// notably, the `lea` instruction uses a memory operand without actually ever accessing +    /// memory. +    pub fn is_memory(&self) -> bool { +        match self { +            Operand::DisplacementU32(_) | +            Operand::DisplacementU64(_) | +            Operand::RegDeref(_) | +            Operand::RegDisp(_, _) | +            Operand::RegScale(_, _) | +            Operand::RegIndexBase(_, _) | +            Operand::RegIndexBaseDisp(_, _, _) | +            Operand::RegScaleDisp(_, _, _) | +            Operand::RegIndexBaseScale(_, _, _) | +            Operand::RegIndexBaseScaleDisp(_, _, _, _) | +            Operand::RegDerefMasked(_, _) | +            Operand::RegDispMasked(_, _, _) | +            Operand::RegScaleMasked(_, _, _) | +            Operand::RegIndexBaseMasked(_, _, _) | +            Operand::RegIndexBaseDispMasked(_, _, _, _) | +            Operand::RegScaleDispMasked(_, _, _, _) | +            Operand::RegIndexBaseScaleMasked(_, _, _, _) | +            Operand::RegIndexBaseScaleDispMasked(_, _, _, _, _) => { +                true +            }, +            Operand::ImmediateI8(_) | +            Operand::ImmediateU8(_) | +            Operand::ImmediateI16(_) | +            Operand::ImmediateU16(_) | +            Operand::ImmediateU32(_) | +            Operand::ImmediateI32(_) | +            Operand::ImmediateU64(_) | +            Operand::ImmediateI64(_) | +            Operand::Register(_) | +            Operand::RegisterMaskMerge(_, _, _) | +            Operand::RegisterMaskMergeSae(_, _, _, _) | +            Operand::RegisterMaskMergeSaeNoround(_, _, _) | +            Operand::Nothing => { +                false +            } +        } +    } + +    /// return the width of this operand, in bytes. register widths are determined by the +    /// register's class. the widths of memory operands are recorded on the instruction this +    /// `Operand` came from; `None` here means the authoritative width is `instr.mem_size()`. +    pub fn width(&self) -> Option<u8> { +        match self { +            Operand::Register(reg) => { +                Some(reg.width()) +            } +            Operand::RegisterMaskMerge(reg, _, _) => { +                Some(reg.width()) +            } +            Operand::ImmediateI8(_) | +            Operand::ImmediateU8(_) => { +                Some(1) +            } +            Operand::ImmediateI16(_) | +            Operand::ImmediateU16(_) => { +                Some(2) +            } +            Operand::ImmediateI32(_) | +            Operand::ImmediateU32(_) => { +                Some(4) +            } +            Operand::ImmediateI64(_) | +            Operand::ImmediateU64(_) => { +                Some(8) +            } +            // memory operands or `Nothing` +            _ => { +                None +            } +        } +    } +} + +#[test] +fn operand_size() { +    assert_eq!(core::mem::size_of::<OperandSpec>(), 1); +    assert_eq!(core::mem::size_of::<RegSpec>(), 2); +    // assert_eq!(core::mem::size_of::<Prefixes>(), 4); +    // assert_eq!(core::mem::size_of::<Instruction>(), 40); +} + +/// a trivial struct for `yaxpeax_arch::Arch` to be implemented on. it's only interesting for the +/// associated type parameters. +#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] +#[derive(Hash, Eq, PartialEq, Debug, Copy, Clone)] +#[allow(non_camel_case_types)] +pub struct Arch; + +impl yaxpeax_arch::Arch for Arch { +    type Address = u64; +    type Word = u8; +    type Instruction = Instruction; +    type DecodeError = DecodeError; +    type Decoder = InstDecoder; +    type Operand = Operand; +} + +#[derive(PartialEq, Copy, Clone, Eq, Hash, PartialOrd, Ord)] +struct InstDecoder { +    flags: u64, +} + +impl InstDecoder { +    pub fn minimal() -> Self { +        InstDecoder { +            flags: 0, +        } +    } + +    pub fn decode_slice(&self, data: &[u8]) -> Result<Instruction, DecodeError> { +        let mut reader = yaxpeax_arch::U8Reader::new(data); +        self.decode(&mut reader) +    } + +    // TODO: map isa extensions over. maybe codegen the whole thing? +    fn as_64b_best_effort(&self) -> crate::long_mode::InstDecoder { +        crate::long_mode::InstDecoder::default() +    } + +    // TODO: map isa extensions over. maybe codegen the whole thing? +    fn as_32b_best_effort(&self) -> crate::protected_mode::InstDecoder { +        crate::protected_mode::InstDecoder::default() +    } + +    // TODO: map isa extensions over. maybe codegen the whole thing? +    fn as_16b_best_effort(&self) -> crate::real_mode::InstDecoder { +        crate::real_mode::InstDecoder::default() +    } +} + +// this is layout-compatible with 64-bit RegSpec (`RegisterBank` is the same), but not 32-bit or +// 16-bit (fewer register banks). hopefully this remains generally true. +/// an `x86` register, including its number and type. if `fmt` is enabled, name too. +/// +/// ``` +/// use yaxpeax_x86::generic::{RegSpec, register_class}; +/// +/// assert_eq!(RegSpec::ecx().num(), 1); +/// assert_eq!(RegSpec::ecx().class(), register_class::D); +/// ``` +/// +/// some registers have classes of their own, and only one member: `rip`, `eip`, `rflags`, and +/// `eflags`. +#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] +#[derive(Copy, Clone, Debug, PartialOrd, Ord, Eq, PartialEq)] +pub struct RegSpec { +    num: u8, +    bank: RegisterBank, +} + +/// an `x86_64` register class - `qword`, `dword`, `xmmword`, `segment`, and so on. +/// +/// this is mostly useful for comparing a `RegSpec`'s [`RegSpec::class()`] with a constant out of +/// [`register_class`]. +#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct RegisterClass { +    kind: RegisterBank, +} + +const REGISTER_CLASS_NAMES: &[&'static str] = &[ +    "qword", +    "BUG. PLEASE REPORT.", +    "dword", +    "BUG. PLEASE REPORT.", +    "word", +    "BUG. PLEASE REPORT.", +    "byte", +    "BUG. PLEASE REPORT.", +    "rex-byte", +    "BUG. PLEASE REPORT.", +    "cr", +    "BUG. PLEASE REPORT.", +    "dr", +    "BUG. PLEASE REPORT.", +    "segment", +    "xmm", +    "BUG. PLEASE REPORT.", +    "BUG. PLEASE REPORT.", +    "BUG. PLEASE REPORT.", +    "ymm", +    "BUG. PLEASE REPORT.", +    "BUG. PLEASE REPORT.", +    "BUG. PLEASE REPORT.", +    "zmm", +    "BUG. PLEASE REPORT.", +    "BUG. PLEASE REPORT.", +    "BUG. PLEASE REPORT.", +    "x87-stack", +    "mmx", +    "k", +    "eip", +    "rip", +    "eflags", +    "rflags", +]; + +/// high-level register classes in an x86 machine, such as "8-byte general purpose", "xmm", "x87", +/// and so on. constants in this module are useful for inspecting the register class of a decoded +/// instruction. as an example: +/// ``` +/// use yaxpeax_x86::long_mode::{self as amd64}; +/// use yaxpeax_x86::long_mode::{Opcode, Operand, RegisterClass}; +/// use yaxpeax_arch::{Decoder, U8Reader}; +/// +/// let movsx_eax_cl = &[0x0f, 0xbe, 0xc1]; +/// let decoder = amd64::InstDecoder::default(); +/// let instruction = decoder +///     .decode(&mut U8Reader::new(movsx_eax_cl)) +///     .expect("can decode"); +/// +/// assert_eq!(instruction.opcode(), Opcode::MOVSX); +/// +/// fn show_register_class_info(regclass: RegisterClass) { +///     match regclass { +///         amd64::register_class::D => { +///             println!("  and is a dword register"); +///         } +///         amd64::register_class::B => { +///             println!("  and is a byte register"); +///         } +///         other => { +///             panic!("unexpected and invalid register class {:?}", other); +///         } +///     } +/// } +/// +/// if let Operand::Register(regspec) = instruction.operand(0) { +///     #[cfg(feature="fmt")] +///     println!("first operand is {}", regspec); +///     show_register_class_info(regspec.class()); +/// } +/// +/// if let Operand::Register(regspec) = instruction.operand(1) { +///     #[cfg(feature="fmt")] +///     println!("first operand is {}", regspec); +///     show_register_class_info(regspec.class()); +/// } +/// ``` +/// +/// this is preferable to alternatives like checking register names against a known list: a +/// register class is one byte and "is qword general-purpose" can then be a simple one-byte +/// compare, instead of 16 string compares. +/// +/// `yaxpeax-x86` does not attempt to further distinguish between, for example, register +/// suitability as operands. as an example, `cl` is only a byte register, with no additional +/// register class to describe its use as an implicit shift operand. +pub mod register_class { +    use super::{RegisterBank, RegisterClass}; +    /// quadword registers: rax through r15 +    pub const Q: RegisterClass = RegisterClass { kind: RegisterBank::Q }; +    /// doubleword registers: eax through r15d +    pub const D: RegisterClass = RegisterClass { kind: RegisterBank::D }; +    /// word registers: ax through r15w +    pub const W: RegisterClass = RegisterClass { kind: RegisterBank::W }; +    /// byte registers: al, cl, dl, bl, ah, ch, dh, bh. `B` registers do *not* have a rex prefix. +    pub const B: RegisterClass = RegisterClass { kind: RegisterBank::B }; +    /// byte registers with rex prefix present: al through r15b. `RB` registers have a rex prefix. +    pub const RB: RegisterClass = RegisterClass { kind: RegisterBank::rB }; +    /// control registers cr0 through cr15. +    pub const CR: RegisterClass = RegisterClass { kind: RegisterBank::CR}; +    /// debug registers dr0 through dr15. +    pub const DR: RegisterClass = RegisterClass { kind: RegisterBank::DR }; +    /// segment registers es, cs, ss, ds, fs, gs. +    pub const S: RegisterClass = RegisterClass { kind: RegisterBank::S }; +    /// xmm registers xmm0 through xmm31. +    pub const X: RegisterClass = RegisterClass { kind: RegisterBank::X }; +    /// ymm registers ymm0 through ymm31. +    pub const Y: RegisterClass = RegisterClass { kind: RegisterBank::Y }; +    /// zmm registers zmm0 through zmm31. +    pub const Z: RegisterClass = RegisterClass { kind: RegisterBank::Z }; +    /// x87 floating point stack entries st(0) through st(7). +    pub const ST: RegisterClass = RegisterClass { kind: RegisterBank::ST }; +    /// mmx registers mm0 through mm7. +    pub const MM: RegisterClass = RegisterClass { kind: RegisterBank::MM }; +    /// `avx512` mask registers k0 through k7. +    pub const K: RegisterClass = RegisterClass { kind: RegisterBank::K }; +    /// the full instruction pointer register. +    pub const RIP: RegisterClass = RegisterClass { kind: RegisterBank::RIP }; +    /// the low 32 bits of `rip`. +    pub const EIP: RegisterClass = RegisterClass { kind: RegisterBank::EIP }; +    /// the full cpu flags register. +    pub const RFLAGS: RegisterClass = RegisterClass { kind: RegisterBank::RFlags }; +    /// the low 32 bits of rflags. +    pub const EFLAGS: RegisterClass = RegisterClass { kind: RegisterBank::EFlags }; +} + +impl RegisterClass { +    /// return a human-friendly name for this register class +    pub fn name(&self) -> &'static str { +        REGISTER_CLASS_NAMES[self.kind as usize] +    } + +    /// return the size of this register class, in bytes +    pub fn width(&self) -> u8 { +        match self.kind { +            RegisterBank::Q => 8, +            RegisterBank::D => 4, +            RegisterBank::W => 2, +            RegisterBank::B | +            RegisterBank::rB => { +                1 +            }, +            RegisterBank::CR | +            RegisterBank::DR => { +                8 +            }, +            RegisterBank::S => { +                2 +            }, +            RegisterBank::EIP => { +                4 +            } +            RegisterBank::RIP => { +                8 +            } +            RegisterBank::EFlags => { +                4 +            } +            RegisterBank::RFlags => { +                8 +            } +            RegisterBank::X => { +                16 +            } +            RegisterBank::Y => { +                32 +            } +            RegisterBank::Z => { +                64 +            } +            RegisterBank::ST => { +                10 +            } +            RegisterBank::MM => { +                8 +            } +            RegisterBank::K => { +                8 +            } +        } +    } +} + +#[allow(non_camel_case_types)] +#[cfg_attr(feature="use-serde", derive(Serialize, Deserialize))] +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +enum RegisterBank { +    Q = 0, D = 2, W = 4, B = 6, rB = 8, // Quadword, Dword, Word, Byte +    CR = 10, DR = 12, S = 14, EIP = 30, RIP = 31, EFlags = 32, RFlags = 33,  // Control reg, Debug reg, Selector, ... +    X = 15, Y = 19, Z = 23,    // XMM, YMM, ZMM +    ST = 27, MM = 28,     // ST, MM regs (x87, mmx) +    K = 29, // AVX512 mask registers +} + +// this enum is identical across long-mode, protected-mode, and real-mode forms. translating +// between these can be a "simple" transmute. +/// the segment register used by the corresponding instruction. +/// +/// typically this will be `ds` but can be overridden. some instructions have specific segment +/// registers used regardless of segment prefixes, and in these cases `yaxpeax-x86` will report the +/// actual segment register a physical processor would use. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum Segment { +    DS = 0, CS, ES, FS, GS, SS +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +struct EvexData { +    // data: present, z, b, Lp, Rp. aaa +    bits: u8, +} + +// `Prefixes` should be layout-compatbile with 64-bit prefixes, and convertible from 32-bit and +// 16-bit forms. this is a distinct type from 64-bit prefixes in case they stop being +// layout-compatible one day. +/// the prefixes on an instruction. +/// +/// `rep`, `repnz`, `lock`, and segment override prefixes are directly accessible here. `rex`, +/// `vex`, and `evex` prefixes are available through their associated helpers. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +struct Prefixes { +    bits: u8, +    rex: PrefixRex, +    segment: Segment, +    evex_data: EvexData, +} + +/// the `avx512`-related data from an [`evex`](https://en.wikipedia.org/wiki/EVEX_prefix) prefix. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct PrefixEvex { +    vex: PrefixVex, +    evex_data: EvexData, +} + +impl PrefixEvex { +    fn present(&self) -> bool { +        self.evex_data.present() +    } +    /// the `evex` prefix's parts that overlap with `vex` definitions - `L`, `W`, `R`, `X`, and `B` +    /// bits. +    pub fn vex(&self) -> PrefixVex { +        self.vex +    } +    /// the `avx512` mask register in use. `0` indicates "no mask register". +    pub fn mask_reg(&self) -> u8 { +        self.evex_data.aaa() +    } +    pub fn broadcast(&self) -> bool { +        self.evex_data.b() +    } +    pub fn merge(&self) -> bool { +        self.evex_data.z() +    } +    /// the `evex` `L'` bit. +    pub fn lp(&self) -> bool { +        self.evex_data.lp() +    } +    /// the `evex` `R'` bit. +    pub fn rp(&self) -> bool { +        self.evex_data.rp() +    } +} + +/// bits specified in an avx/avx2 [`vex`](https://en.wikipedia.org/wiki/VEX_prefix) prefix, `L`, `W`, `R`, `X`, and `B`. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct PrefixVex { +    bits: u8, +} + +#[allow(dead_code)] +impl PrefixVex { +    #[inline] +    fn present(&self) -> bool { (self.bits & 0x80) == 0x80 } +    #[inline] +    pub fn b(&self) -> bool { (self.bits & 0x01) == 0x01 } +    #[inline] +    pub fn x(&self) -> bool { (self.bits & 0x02) == 0x02 } +    #[inline] +    pub fn r(&self) -> bool { (self.bits & 0x04) == 0x04 } +    #[inline] +    pub fn w(&self) -> bool { (self.bits & 0x08) == 0x08 } +    #[inline] +    pub fn l(&self) -> bool { (self.bits & 0x10) == 0x10 } +    #[inline] +    fn compressed_disp(&self) -> bool { (self.bits & 0x20) == 0x20 } +} + +/// bits specified in an x86_64 +/// [`rex`](https://wiki.osdev.org/X86-64_Instruction_Encoding#REX_prefix) prefix. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct PrefixRex { +    bits: u8 +} + +impl Prefixes { +    fn new(bits: u8) -> Prefixes { +        Prefixes { +            bits: bits, +            rex: PrefixRex { bits: 0 }, +            segment: Segment::DS, +            evex_data: EvexData { bits: 0 }, +        } +    } + +    #[inline] +    pub fn rep(&self) -> bool { self.bits & 0x30 == 0x10 } +    #[inline] +    pub fn repnz(&self) -> bool { self.bits & 0x30 == 0x30 } +    #[inline] +    pub fn rep_any(&self) -> bool { self.bits & 0x30 != 0x00 } +    #[inline] +    fn operand_size(&self) -> bool { self.bits & 0x1 == 1 } +    #[inline] +    fn address_size(&self) -> bool { self.bits & 0x2 == 2 } +    #[inline] +    pub fn lock(&self) -> bool { self.bits & 0x4 == 4 } +    #[inline] +    pub fn cs(&mut self) { self.segment = Segment::CS } +    #[inline] +    pub fn ds(&self) -> bool { self.segment == Segment::DS } +    #[inline] +    pub fn es(&self) -> bool { self.segment == Segment::ES } +    #[inline] +    pub fn fs(&self) -> bool { self.segment == Segment::FS } +    #[inline] +    pub fn gs(&self) -> bool { self.segment == Segment::GS } +    #[inline] +    pub fn ss(&self) -> bool { self.segment == Segment::SS } +    #[inline] +    fn rex_unchecked(&self) -> PrefixRex { self.rex } +    #[inline] +    pub fn rex(&self) -> Option<PrefixRex> { +        let rex = self.rex_unchecked(); +        if rex.present() { +            Some(rex) +        } else { +            None +        } +    } +    #[inline] +    fn vex_unchecked(&self) -> PrefixVex { PrefixVex { bits: self.rex.bits } } +    #[inline] +    pub fn vex(&self) -> Option<PrefixVex> { +        let vex = self.vex_unchecked(); +        if vex.present() { +            Some(vex) +        } else { +            None +        } +    } +    #[inline] +    fn evex_unchecked(&self) -> PrefixEvex { PrefixEvex { vex: PrefixVex { bits: self.rex.bits }, evex_data: self.evex_data } } +    #[inline] +    pub fn evex(&self) -> Option<PrefixEvex> { +        let evex = self.evex_unchecked(); +        if evex.present() { +            Some(evex) +        } else { +            None +        } +    } +} + +impl EvexData { +    pub(crate) fn present(&self) -> bool { +        self.bits & 0b1000_0000 != 0 +    } + +    pub(crate) fn aaa(&self) -> u8 { +        self.bits & 0b111 +    } + +    pub(crate) fn b(&self) -> bool { +        (self.bits & 0b0000_1000) != 0 +    } + +    pub(crate) fn z(&self) -> bool { +        (self.bits & 0b0001_0000) != 0 +    } + +    pub(crate) fn lp(&self) -> bool { +        (self.bits & 0b0010_0000) != 0 +    } + +    pub(crate) fn rp(&self) -> bool { +        (self.bits & 0b0100_0000) != 0 +    } +} + +impl PrefixRex { +    #[inline] +    fn present(&self) -> bool { (self.bits & 0xc0) == 0x40 } +    #[inline] +    pub fn b(&self) -> bool { (self.bits & 0x01) == 0x01 } +    #[inline] +    pub fn x(&self) -> bool { (self.bits & 0x02) == 0x02 } +    #[inline] +    pub fn r(&self) -> bool { (self.bits & 0x04) == 0x04 } +    #[inline] +    pub fn w(&self) -> bool { (self.bits & 0x08) == 0x08 } +} + +#[cfg(feature = "std")] +extern crate std; +#[cfg(feature = "std")] +impl std::error::Error for DecodeError { +    fn description(&self) -> &str { +        <Self as yaxpeax_arch::DecodeError>::description(self) +    } +} + +#[allow(non_camel_case_types)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum OperandSpec { +    Nothing, +    // the register in regs[0] +    RegRRR, +    // the register in regs[0] and is EVEX-encoded (may have a mask register, is merged or +    // zeroed) +    RegRRR_maskmerge, +    // the register in regs[0] and is EVEX-encoded (may have a mask register, is merged or +    // zeroed). additionally, this instruction has exceptions suppressed with a potentially +    // custom rounding mode. +    RegRRR_maskmerge_sae, +    // the register in regs[0] and is EVEX-encoded (may have a mask register, is merged or +    // zeroed). additionally, this instruction has exceptions suppressed. +    RegRRR_maskmerge_sae_noround, +    // the register in modrm_mmm (eg modrm mod bits were 11) +    RegMMM, +    // same as `RegRRR`: the register is modrm's `mmm` bits, and may be masekd. +    RegMMM_maskmerge, +    RegMMM_maskmerge_sae_noround, +    // the register selected by vex-vvvv bits +    RegVex, +    RegVex_maskmerge, +    // the register selected by a handful of avx2 vex-coded instructions, +    // stuffed in imm4. +    Reg4, +    ImmI8, +    ImmI16, +    ImmI32, +    ImmI64, +    ImmU8, +    ImmU16, +    // ENTER is a two-immediate instruction, where the first immediate is stored in the disp field. +    // for this case, a second immediate-style operand is needed. +    // turns out `insertq` and `extrq` are also two-immediate instructions, so this is generalized +    // to cover them too. +    ImmInDispField, +    DispU32, +    DispU64, +    Deref, +    Deref_esi, +    Deref_edi, +    Deref_rsi, +    Deref_rdi, +    RegDisp, +    RegScale, +    RegScaleDisp, +    RegIndexBaseScale, +    RegIndexBaseScaleDisp, +    Deref_mask, +    RegDisp_mask, +    RegScale_mask, +    RegScaleDisp_mask, +    RegIndexBaseScale_mask, +    RegIndexBaseScaleDisp_mask, + +    // protected mode +    DispU16, +    Deref_si, +    Deref_di, +    RegIndexBase_mask, +    RegIndexBaseDisp_mask, +    // u16:u{16,32} immediate address for a far call +    AbsoluteFarAddress, + +    // real mode +    RegIndexBase, +    RegIndexBaseDisp, +} + +/// an `x86_64` instruction. +/// +/// typically an opcode will be inspected by [`Instruction::opcode()`], and an instruction has +/// [`Instruction::operand_count()`] many operands. operands are provided by +/// [`Instruction::operand()`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Instruction { +    pub prefixes: Prefixes, +    /* +    modrm_rrr: RegSpec, +    modrm_mmm: RegSpec, // doubles as sib_base +    sib_index: RegSpec, +    vex_reg: RegSpec, +    */ +    regs: [RegSpec; 4], +    scale: u8, +    length: u8, +    operand_count: u8, +    operands: [OperandSpec; 4], +    imm: u64, +    disp: u64, +    pub(crate) opcode: Opcode, +    mem_size: u8, +} + +impl yaxpeax_arch::Instruction for Instruction { +    fn well_defined(&self) -> bool { +        // TODO: this is incorrect! +        true +    } +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +#[non_exhaustive] +pub enum DecodeError { +    ExhaustedInput, +    InvalidOpcode, +    InvalidOperand, +    InvalidPrefixes, +    TooLong, +    IncompleteDecoder, +} + +impl yaxpeax_arch::DecodeError for DecodeError { +    fn data_exhausted(&self) -> bool { self == &DecodeError::ExhaustedInput } +    fn bad_opcode(&self) -> bool { self == &DecodeError::InvalidOpcode } +    fn bad_operand(&self) -> bool { self == &DecodeError::InvalidOperand } +    fn description(&self) -> &'static str { +        match self { +            DecodeError::ExhaustedInput => { "exhausted input" }, +            DecodeError::InvalidOpcode => { "invalid opcode" }, +            DecodeError::InvalidOperand => { "invalid operand" }, +            DecodeError::InvalidPrefixes => { "invalid prefixes" }, +            DecodeError::TooLong => { "too long" }, +            DecodeError::IncompleteDecoder => { "the decoder is incomplete" }, +        } +    } +} + +impl LengthedInstruction for Instruction { +    type Unit = AddressDiff<u64>; +    #[inline] +    fn len(&self) -> Self::Unit { +        AddressDiff::from_const(self.length.into()) +    } +    #[inline] +    fn min_size() -> Self::Unit { +        AddressDiff::from_const(1) +    } +} + +impl Default for InstDecoder { +    /// Instantiates an x86 decoder that probably decodes what you want. +    /// +    /// Attempts to match real processors in interpretation of undefined sequences, and decodes any +    /// instruction defined in any extension. +    fn default() -> Self { +        Self { +            flags: 0xffffffff_ffffffff, +        } +    } +} + +impl Decoder<Arch> for InstDecoder { +    fn decode<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(&self, words: &mut T) -> Result<Instruction, <Arch as yaxpeax_arch::Arch>::DecodeError> { +        let mut inst = crate::generic::Instruction::default(); + +        self.decode_into(&mut inst, words)?; + +        Ok(inst) +    } + +    fn decode_into<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>>(&self, instr: &mut Instruction, words: &mut T) -> Result<(), <Arch as yaxpeax_arch::Arch>::DecodeError> { +        let mut inst = crate::generic::Instruction::default(); +        self.decode_with_annotation(instr, words, &mut NullSink) +    } +} + +impl AnnotatingDecoder<Arch> for InstDecoder { +    type FieldDescription = FieldDescription; + +    fn decode_with_annotation< +        T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpeax_arch::Arch>::Word>, +        S: DescriptionSink<Self::FieldDescription> +    >(&self, instr: &mut Instruction, words: &mut T, sink: &mut S) -> Result<(), <Arch as yaxpeax_arch::Arch>::DecodeError> { +        // we only get one shot to read words from `&mut T`, so buffer up the max length of an +        // x86 instruct's worth of bytes... +        let mut bytes = [0u8; 15]; +        let mut available = 0; +        for i in 0..bytes.len() { +            if let Ok(word) = words.next() { +                bytes[i] = word; +                available += 1; +            } else { +                break; +            } +        } + +        // first try as a 64-bit instruction.. +        let decoder_64b = self.as_64b_best_effort(); +        let mut instr_64b = crate::long_mode::Instruction::default(); +        let mut sink_64b = NullSink; +        let res = decoder_64b.decode_with_annotation(&mut instr_64b, &mut yaxpeax_arch::U8Reader::new(&bytes), &mut sink_64b); +        let err_64b = match res { +            Ok(()) => { +                // TODO: flush sink_64b to sink +                *instr = instr_64b.to_generic(); +                return Ok(()); +            } +            Err(e) => { +                e.to_generic() +            } +        }; + +        // first try as a 32-bit instruction.. +        let decoder_32b = self.as_32b_best_effort(); +        let mut instr_32b = crate::protected_mode::Instruction::default(); +        let mut sink_32b = NullSink; +        let res = decoder_32b.decode_with_annotation(&mut instr_32b, &mut yaxpeax_arch::U8Reader::new(&bytes), &mut sink_32b); +        let err_32b = match res { +            Ok(()) => { +                // TODO: flush sink_32b to sink +                *instr = instr_32b.to_generic(); +                return Ok(()); +            } +            Err(e) => { +                e.to_generic() +            } +        }; + +        // lastly as a 16-bit instruction.. +        let decoder_16b = self.as_16b_best_effort(); +        let mut instr_16b = crate::real_mode::Instruction::default(); +        let mut sink_16b = NullSink; +        let res = decoder_16b.decode_with_annotation(&mut instr_16b, &mut yaxpeax_arch::U8Reader::new(&bytes), &mut sink_16b); +        let err_16b = match res { +            Ok(()) => { +                // TODO: flush sink_16b to sink +                *instr = instr_16b.to_generic(); +                return Ok(()); +            } +            Err(e) => { +                e.to_generic() +            } +        }; + +        // if all errors are the same, just return it. if the errors do not all agree, either: +        // * they disagree by having different strings associated (64b/32b/16b-specific text) +        // or +        // * they disagree semantically due to differences in 64b/32b/16b decoding. +        // +        // for generic decoding, we try 64-bit decoding first, falling back to 32-bit and 16-bit as +        // a last-ditch, but 16-bit decoding is *probably* not what a user of yaxpeax-x86 intends +        // to decode. for many errors, 64b and 32b versions should compare the same and get the +        // first arm here, but for some they might not quite be the same. since 32b is the +        // likely-intended fallback mode and we've fallen back (and through it) in trying to +        // decode, we'll return that error in the case of disagreements. there's no direct way to +        // access a 16-bit decode error through the generic decode interface. +        if err_64b == err_32b && err_32b == err_16b { +            Err(err_64b) +        } else { +            Err(err_32b) +        } +    } +} +impl Default for Instruction { +    fn default() -> Self { +        Instruction::invalid() +    } +} + +impl Instruction { +    /// get the `Opcode` of this instruction. +    pub fn opcode(&self) -> Opcode { +        self.opcode +    } + +    /// get the `Operand` at the provided index. +    /// +    /// panics if the index is `>= 4`. +    pub fn operand(&self, i: u8) -> Operand { +        assert!(i < 4); +        Operand::from_spec(self, self.operands[i as usize]) +    } + +    /// get the number of operands in this instruction. useful in iterating an instruction's +    /// operands generically. +    pub fn operand_count(&self) -> u8 { +        self.operand_count +    } + +    /// check if operand `i` is an actual operand or not. will be `false` for `i >= +    /// inst.operand_count()`. +    pub fn operand_present(&self, i: u8) -> bool { +        assert!(i < 4); +        if i >= self.operand_count { +            return false; +        } + +        if let OperandSpec::Nothing = self.operands[i as usize] { +            false +        } else { +            true +        } +    } + +    /// get the memory access information for this instruction, if it accesses memory. +    /// +    /// the corresponding `MemoryAccessSize` may report that the size of accessed memory is +    /// indeterminate; this is the case for `xsave/xrestor`-style instructions whose operation size +    /// varies based on physical processor. +    pub fn mem_size(&self) -> Option<MemoryAccessSize> { +        if self.mem_size != 0 { +            Some(MemoryAccessSize { size: self.mem_size }) +        } else { +            None +        } +    } + +    /// build a new instruction representing nothing in particular. this is primarily useful as a +    /// default to pass to `decode_into`. +    pub fn invalid() -> Instruction { +        Instruction { +            prefixes: Prefixes::new(0), +            opcode: Opcode::NOP, +            mem_size: 0, +            regs: [RegSpec::rax(); 4], +            scale: 0, +            length: 0, +            disp: 0, +            imm: 0, +            operand_count: 0, +            operands: [OperandSpec::Nothing; 4], +        } +    } + +    /// get the `Segment` that will *actually* be used for accessing the operand at index `i`. +    /// +    /// `stos`, `lods`, `movs`, and `cmps` specifically name some segments for use regardless of +    /// prefixes. +    pub fn segment_override_for_op(&self, op: u8) -> Option<Segment> { +        match self.opcode { +            Opcode::STOS | +            Opcode::SCAS => { +                if op == 0 { +                    Some(Segment::ES) +                } else { +                    None +                } +            } +            Opcode::LODS => { +                if op == 1 { +                    Some(self.prefixes.segment) +                } else { +                    None +                } +            } +            Opcode::MOVS => { +                if op == 0 { +                    Some(Segment::ES) +                } else if op == 1 { +                    Some(self.prefixes.segment) +                } else { +                    None +                } +            } +            Opcode::CMPS => { +                if op == 0 { +                    Some(self.prefixes.segment) +                } else if op == 1 { +                    Some(Segment::ES) +                } else { +                    None +                } +            }, +            _ => { +                // most operands are pretty simple: +                if self.operands[op as usize].is_memory() && +                    self.prefixes.segment != Segment::DS { +                    Some(self.prefixes.segment) +                } else { +                    None +                } +            } +        } +    } + +    #[cfg(feature = "fmt")] +    /// wrap a reference to this instruction with a `DisplayStyle` to format the instruction with +    /// later. see the documentation on [`display::DisplayStyle`] for more. +    /// +    /// ``` +    /// use yaxpeax_x86::long_mode::{InstDecoder, DisplayStyle}; +    /// +    /// let decoder = InstDecoder::default(); +    /// let inst = decoder.decode_slice(&[0x33, 0xc1]).unwrap(); +    /// +    /// assert_eq!("eax ^= ecx", inst.display_with(DisplayStyle::C).to_string()); +    /// assert_eq!("xor eax, ecx", inst.display_with(DisplayStyle::Intel).to_string()); +    /// ``` +    pub fn display_with<'a>(&'a self, style: display::DisplayStyle) -> display::InstructionDisplayer<'a> { +        display::InstructionDisplayer { +            style, +            instr: self, +        } +    } + +    /// does this instruction include the `xacquire` hint for hardware lock elision? +    pub fn xacquire(&self) -> bool { +        if self.prefixes.repnz() { +            // xacquire is permitted on typical `lock` instructions, OR `xchg` with memory operand, +            // regardless of `lock` prefix. +            if self.prefixes.lock() { +                true +            } else if self.opcode == Opcode::XCHG { +                self.operands[0] != OperandSpec::RegMMM && self.operands[1] != OperandSpec::RegMMM +            } else { +                false +            } +        } else { +            false +        } +    } + +    /// does this instruction include the `xrelease` hint for hardware lock elision? +    pub fn xrelease(&self) -> bool { +        if self.prefixes.rep() { +            // xrelease is permitted on typical `lock` instructions, OR `xchg` with memory operand, +            // regardless of `lock` prefix. additionally, xrelease is permitted on some forms of mov. +            if self.prefixes.lock() { +                true +            } else if self.opcode == Opcode::XCHG { +                self.operands[0] != OperandSpec::RegMMM && self.operands[1] != OperandSpec::RegMMM +            } else if self.opcode == Opcode::MOV { +                self.operands[0] != OperandSpec::RegMMM && ( +                    self.operands[1] == OperandSpec::RegRRR || +                    self.operands[1] == OperandSpec::ImmI8 || +                    self.operands[1] == OperandSpec::ImmI16 || +                    self.operands[1] == OperandSpec::ImmI32 || +                    self.operands[1] == OperandSpec::ImmI64 +                ) +            } else { +                false +            } +        } else { +            false +        } +    } +} + +#[inline] +fn width_to_gp_reg_bank(width: u8, rex: bool) -> RegisterBank { +    // transform (width, rex) into an index into an index into a LUT, instead of branching as +    // `match` would. +    let index = (width.trailing_zeros() << 1) | (rex as u32); + +    const BANK_LUT: [RegisterBank; 8] = [ +        RegisterBank::B, RegisterBank::rB, +        RegisterBank::W, RegisterBank::W, +        RegisterBank::D, RegisterBank::D, +        RegisterBank::Q, RegisterBank::Q, +    ]; + +    *BANK_LUT.get(index as usize).unwrap_or_else(|| unsafe { unreachable_unchecked() }) +} + +/// a wrapper to hide internal library implementation details. this is only useful for the inner +/// content's `Display` impl, which itself is unstable and suitable only for human consumption. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct OperandCodeWrapper { code: () } +// TODO: something with OperandCodeWrapper... + +/// the actual description for a selection of bits involved in decoding a [`generic::Instruction`]. +/// +/// TODO: adjust wording w.r.t generic instructions, this can't be entirely precise..! +/// some prefixes are only identified as an `InnerDescription::Misc` string, while some are full +/// `InnerDescription::SegmentPrefix(Segment)`. generally, strings should be considered unstable +/// and only useful for displaying for human consumption. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum InnerDescription { +    /// the literal byte read for a `rex` prefix, `0x4_`. +    RexPrefix(u8), +    /// the segment selected by a segment override prefix. this is not necessarily the actual +    /// segement used in the instruction's memory accesses, if any are made. +    SegmentPrefix(Segment), +    /// the opcode read for this instruction. this may be reported multiple times in an instruction +    /// if multiple spans of bits are necessary to determine the opcode. it is a bug if two +    /// different `Opcode` are indicated by different `InnerDescription::Opcode` reported from +    /// decoding the same instruction. this invariant is not well-tested, and may occur in +    /// practice. +    Opcode(Opcode), +    /// the operand code indicating how to read operands for this instruction. this is an internal +    /// detail of `yaxpeax-x86` but is typically named in a manner that can aid understanding the +    /// decoding process. `OperandCode` names are unstable, and this variant is only useful for +    /// displaying for human consumption. +    OperandCode(OperandCodeWrapper), +    /// a decoded register: a name for the bits used to decode it, the register number those bits +    /// specify, and the fully-constructed [`long_mode::RegSpec`] that was decoded. +    RegisterNumber(&'static str, u8, RegSpec), +    /// a miscellaneous string describing some bits of the instruction. this may describe a prefix, +    /// internal details of a prefix, error or constraints on an opcode, operand encoding details, +    /// or other items involved in an instruction. +    Misc(&'static str), +    /// a number involved in the instruction: typically either a disaplacement or immediate. the +    /// string describes which. the `i64` member is typically a sign-extended value from the +    /// appropriate original size, meaning there may be incorrect cases of a `65535u16` sign +    /// extending to `-1`. bug reports are highly encouraged for unexpected values. +    Number(&'static str, i64), +    /// a boundary between two logically distinct sections of an instruction. these typically +    /// separate the leading prefix string (if any), opcode, and operands (if any). the included +    /// string describes which boundary this is. boundary names should not be considered stable, +    /// and are useful at most for displaying for human consumption. +    Boundary(&'static str), +} + +impl InnerDescription { +    fn with_id(self, id: u32) -> FieldDescription { +        FieldDescription { +            desc: self, +            id, +        } +    } +} + +cfg_if::cfg_if! { +    if #[cfg(feature="fmt")] { +        impl fmt::Display for InnerDescription { +            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +                match self { +                    InnerDescription::RexPrefix(bits) => { +                        write!(f, "rex prefix: {}{}{}{}", +                            if bits & 0x8 != 0 { "w" } else { "-" }, +                            if bits & 0x4 != 0 { "r" } else { "-" }, +                            if bits & 0x2 != 0 { "x" } else { "-" }, +                            if bits & 0x1 != 0 { "b" } else { "-" }, +                        ) +                    } +                    InnerDescription::SegmentPrefix(segment) => { +                        write!(f, "segment override: {}", segment) +                    } +                    InnerDescription::Misc(text) => { +                        f.write_str(text) +                    } +                    InnerDescription::Number(text, num) => { +                        write!(f, "{}: {:#x}", text, num) +                    } +                    InnerDescription::Opcode(opc) => { +                        write!(f, "opcode `{}`", opc) +                    } +                    InnerDescription::OperandCode(OperandCodeWrapper { code }) => { +                        write!(f, "operand code `{:?}`", code) +                    } +                    InnerDescription::RegisterNumber(name, num, reg) => { +                        write!(f, "`{}` (`{}` selects register number {})", reg, name, num) +                    } +                    InnerDescription::Boundary(desc) => { +                        write!(f, "{}", desc) +                    } +                } +            } +        } +    } else { +        impl fmt::Display for InnerDescription { +            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +                f.write_str("non-fmt build") +            } +        } +    } +} + +#[cfg_attr(feature="fmt", derive(Debug))] +#[derive(Clone, PartialEq, Eq)] +pub struct FieldDescription { +    desc: InnerDescription, +    id: u32, +} + +impl FieldDescription { +    /// the actual description associated with this bitfield. +    pub fn desc(&self) -> &InnerDescription { +        &self.desc +    } +} + +impl yaxpeax_arch::annotation::FieldDescription for FieldDescription { +    fn id(&self) -> u32 { +        self.id +    } +    fn is_separator(&self) -> bool { +        if let InnerDescription::Boundary(_) = &self.desc { +            true +        } else { +            false +        } +    } +} + +impl fmt::Display for FieldDescription { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        fmt::Display::fmt(&self.desc, f) +    } +} | 
