/// Manual references in this crate, both figure and page numbers, are with respect to the document /// `DDI0406C_d_armv7ar_arm.pdf` /// `sha256: 294668ae6480133b32d85e9567cc77c5eb0e1232decdf42cac7ab480e884f6e0` //#[cfg(feature="use-serde")] //use serde::{Serialize, Deserialize}; use core::fmt::{self, Display, Formatter}; use yaxpeax_arch::{Arch, AddressDiff, Decoder, LengthedInstruction, Reader, ReadError}; #[allow(deprecated)] use yaxpeax_arch::{NoColors, ShowContextual}; mod thumb; // #[cfg(feature = "fmt")] mod display; #[cfg(all(feature="alloc", feature="fmt"))] pub use display::InstructionTextBuffer; // opcode, s, w, cond /// a struct for the combined display of an opcode and possible suffixes. /// /// this includes the opcode, its optional `.s` suffix, optional `.w` suffix, and condition code, /// if any. pub struct ConditionedOpcode(pub Opcode, pub bool, pub bool, pub ConditionCode); impl Display for ConditionedOpcode { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { write!(f, "{}{}{}{}", self.0, if self.1 { "s" } else { "" }, if self.2 { ".w" } else { "" }, self.3) } } /// a context impl to display `arm` instructions with no additional context (no symbol name /// information, offset names, etc). this impl results in `some_instruction.contextualize(...)` /// displaying an instruction the same way its `Display` impl would. pub struct NoContext; #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[allow(non_camel_case_types)] #[allow(missing_docs)] pub enum Opcode { Invalid, /* * These two don't really have direct encodings, but are for the specific instances * where the semantics of the original instruction are the same as push (specifically * ldm/stm/mov that write to the stack and increment/decrement appropriately */ POP, PUSH, B, BL, BLX, BX, BXJ, AND, EOR, SUB, RSB, ADD, ADC, SBC, RSC, TST, TEQ, CMP, CMN, ORR, MOV, BIC, MVN, LSL, LSR, ASR, RRX, ROR, ADR, MSR, MRS, CLZ, LDREXH, STREXH, LDREXB, STREXB, LDREXD, STREXD, LDREX, STREX, LDM(bool, bool, bool, bool), STM(bool, bool, bool, bool), LDR, STR, LDRH, STRH, LDRB, STRB, LDRSH, LDRSHT, LDRSB, LDRSBT, STRD, LDRD, LDC(u8), LDCL(u8), LDC2(u8), LDC2L(u8), STC(u8), STCL(u8), STC2(u8), STC2L(u8), MCRR2(u8, u8), MCR2(u8, u8, u8), MRRC2(u8, u8), MCRR(u8, u8), MRRC(u8, u8), MRC2(u8, u8, u8), CDP2(u8, u8, u8), SRS(bool, bool), RFE(bool, bool), LDRT, STRT, LDRHT, STRHT, LDRBT, STRBT, SWP, SWPB, MUL, MLA, UMAAL, MLS, UMULL, UMLAL, SMULL, SMUL(bool, bool), SMLA(bool, bool), SMLAL, SMLAL_halfword(bool, bool), SMAL(bool, bool), SMLAW(bool), ERET, BKPT, HVC, SMC, MOVT, QDSUB, QDADD, QSUB, QADD, TBB, TBH, UDF, SVC, WFE, WFI, SEV, CSDB, YIELD, HINT, NOP, LEAVEX, ENTERX, CLREX, DSB, DMB, ISB, SXTH, UXTH, SXTB16, UXTB16, SXTB, UXTB, SXTAH, UXTAH, SXTAB16, UXTAB16, SXTAB, UXTAB, CBZ, CBNZ, SETEND, CPS(bool), CPS_modeonly, REV, REV16, REVSH, IT, PKHTB, PKHBT, ORN, SSAT, SSAT16, SBFX, USAT, USAT16, UBFX, BFI, BFC, DBG, PLD, PLI, RBIT, SEL, SADD16, QADD16, SHADD16, SASX, QASX, SHASX, SSAX, QSAX, SHSAX, SSUB16, QSUB16, SHSUB16, SADD8, QADD8, SHADD8, SSUB8, QSUB8, SHSUB8, UADD16, UQADD16, UHADD16, UASX, UQASX, UHASX, USAX, UQSAX, UHSAX, USUB16, UQSUB16, UHSUB16, UADD8, UQADD8, UHADD8, USUB8, UQSUB8, UHSUB8, SMLSD, SMMLA, SMMLS, USADA8, USAD8, SMLAD, SMUSD, SMMUL, SMULW(bool), SMUAD, SDIV, UDIV, SMLALD(bool), SMLSLD(bool), } static DATA_PROCESSING_OPCODES: [Opcode; 16] = [ Opcode::AND, Opcode::EOR, Opcode::SUB, Opcode::RSB, Opcode::ADD, Opcode::ADC, Opcode::SBC, Opcode::RSC, Opcode::TST, Opcode::TEQ, Opcode::CMP, Opcode::CMN, Opcode::ORR, Opcode::MOV, Opcode::BIC, Opcode::MVN ]; /// a struct describiing a shifted register operand. this is primarily interesting in that it can /// be translated to a `RegShiftStyle` for further interpretation. #[derive(Debug, PartialEq, Eq, Copy, Clone)] #[repr(transparent)] pub struct RegShift { data: u16 } impl RegShift { /// convert an instruction's `RegShift` operand into something more appropriate for /// programmatic use. pub fn into_shift(&self) -> RegShiftStyle { if self.data & 0b10000 == 0 { RegShiftStyle::RegImm(RegImmShift { data: self.data }) } else { RegShiftStyle::RegReg(RegRegShift { data: self.data }) } } /// don't use this. it's for armv7 testing only. #[doc(hidden)] pub fn from_raw(data: u16) -> Self { RegShift { data } } } /// an enum describing one of two ways a shifted register operand may be shifted. pub enum RegShiftStyle { /// a register shifted by an immediate. RegImm(RegImmShift), /// a register shifted by a register. RegReg(RegRegShift), } /// a register shifted by a register. #[repr(transparent)] pub struct RegRegShift { data: u16 } /// the way a shift operation is carried out. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum ShiftStyle { /// left-shift the value, filling in zeroes. LSL = 0, /// right-shift the value, filling in zeroes. LSR = 1, /// arithmetic shift right, filling with the top bit of the value (sign-extending). ASR = 2, /// rotate-right, filling with bits shifted out of the value. ROR = 3, } impl Display for ShiftStyle { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use core::fmt::Write; let name = self.name(); f.write_char(name[0] as char)?; f.write_char(name[1] as char)?; f.write_char(name[2] as char)?; Ok(()) } } impl ShiftStyle { fn from(bits: u8) -> ShiftStyle { match bits { 0b00 => ShiftStyle::LSL, 0b01 => ShiftStyle::LSR, 0b10 => ShiftStyle::ASR, 0b11 => ShiftStyle::ROR, _ => unreachable!("bad ShiftStyle index") } } fn name(&self) -> &'static [u8; 3] { match self { ShiftStyle::LSL => &[b'l', b's', b'l'], ShiftStyle::LSR => &[b'l', b's', b'r'], ShiftStyle::ASR => &[b'a', b's', b'r'], ShiftStyle::ROR => &[b'r', b'o', b'r'], } } } impl RegRegShift { /// the general-purpose register, an amount to shift the shiftee. pub fn shifter(&self) -> Reg { Reg::from_u8((self.data >> 8) as u8 & 0b1111) } /// the way in which this register is shifted. pub fn stype(&self) -> ShiftStyle { ShiftStyle::from((self.data >> 5) as u8 & 0b11) } /// the general-purpose register to be shifted. pub fn shiftee(&self) -> Reg { Reg::from_u8(self.data as u8 & 0b1111) } } /// a register shifted by an immediate. #[repr(transparent)] pub struct RegImmShift { data: u16 } impl RegImmShift { /// the immediate this register is shifted by. pub fn imm(&self) -> u8 { (self.data >> 7) as u8 & 0b11111 } /// the way in which this register is shifted. pub fn stype(&self) -> ShiftStyle { ShiftStyle::from((self.data >> 5) as u8 & 0b11) } /// the general-purpose register to be shifted. pub fn shiftee(&self) -> Reg { Reg::from_u8(self.data as u8 & 0b1111) } } /// a struct describing an `arm` register. #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[repr(transparent)] pub struct Reg { bits: u8 } impl Reg { #[allow(non_snake_case)] fn from_sysm(R: bool, M: u8) -> Option { /* * Is one of: * • _, encoded with R==0. * • ELR_hyp, encoded with R==0. * • SPSR_, encoded with R==1. * For a full description of the encoding of this field, see Encoding and use of Banked register * transfer * instructions on page B9-1959. * */ if R == false { [ Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(8))), Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(9))), Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(10))), Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(11))), Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(12))), Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Usr, Reg::from_u8(14))), None, Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(8))), Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(9))), Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(10))), Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(11))), Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(12))), Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Fiq, Reg::from_u8(14))), None, Some(Operand::BankedReg(Bank::Irq, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Irq, Reg::from_u8(14))), Some(Operand::BankedReg(Bank::Svc, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Svc, Reg::from_u8(14))), Some(Operand::BankedReg(Bank::Abt, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Abt, Reg::from_u8(14))), Some(Operand::BankedReg(Bank::Und, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Und, Reg::from_u8(14))), None, None, None, None, Some(Operand::BankedReg(Bank::Mon, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Mon, Reg::from_u8(14))), Some(Operand::BankedReg(Bank::Hyp, Reg::from_u8(13))), Some(Operand::BankedReg(Bank::Hyp, Reg::from_u8(14))), ][M as usize] } else { if M == 0b01110 { Some(Operand::BankedSPSR(Bank::Fiq)) } else if M == 0b10000 { Some(Operand::BankedSPSR(Bank::Irq)) } else if M == 0b10010 { Some(Operand::BankedSPSR(Bank::Svc)) } else if M == 0b10100 { Some(Operand::BankedSPSR(Bank::Abt)) } else if M == 0b10110 { Some(Operand::BankedSPSR(Bank::Und)) } else if M == 0b11100 { Some(Operand::BankedSPSR(Bank::Mon)) } else if M == 0b11110 { Some(Operand::BankedSPSR(Bank::Hyp)) } else { None } } } /// create a new `Reg` with the specified number. /// /// panics if `bits` is out of range (16 or above). #[inline] pub fn from_u8(bits: u8) -> Reg { if bits > 0b1111 { panic!("register number out of range"); } Reg { bits } } /// get the number of this register. the returned value will be between 0 and 15. pub fn number(&self) -> u8 { self.bits } } /// a control register. #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[repr(transparent)] pub struct CReg { bits: u8 } impl Display for CReg { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "c{}", self.bits) } } impl CReg { /// create a new `CReg` with the specified number. /// /// panics if `bits` is out of range (16 or above). #[inline] pub fn from_u8(bits: u8) -> CReg { if bits > 0b1111 { panic!("register number out of range"); } CReg { bits } } /// get the number of this register. the returned value will be between 0 and 15. pub fn number(&self) -> u8 { self.bits } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[allow(non_camel_case_types)] #[allow(missing_docs)] pub enum StatusRegMask { // Note 0b0000 is unused (as is 0b10000) CPSR_C = 0b0001, CPSR_X = 0b0010, CPSR_XC = 0b0011, APSR_G = 0b0100, CPSR_SC = 0b0101, CPSR_SX = 0b0110, CPSR_SXC = 0b0111, APSR_NZCVQ = 0b1000, CPSR_FC = 0b1001, CPSR_FX = 0b1010, CPSR_FXC = 0b1011, APSR_NZCVQG = 0b1100, CPSR_FSC = 0b1101, CPSR_FSX = 0b1110, CPSR_FSXC = 0b1111, SPSR = 0b10000, SPSR_C = 0b10001, SPSR_X = 0b10010, SPSR_XC = 0b10011, SPSR_S = 0b10100, SPSR_SC = 0b10101, SPSR_SX = 0b10110, SPSR_SXC = 0b10111, SPSR_F = 0b11000, SPSR_FC = 0b11001, SPSR_FX = 0b11010, SPSR_FXC = 0b11011, SPSR_FS = 0b11100, SPSR_FSC = 0b11101, SPSR_FSX = 0b11110, SPSR_FSXC = 0b11111, } impl StatusRegMask { fn from_raw(raw: u8) -> Result { if raw == 0 { // invalid status reg mask value return Err(DecodeError::InvalidOperand); } Ok([ StatusRegMask::CPSR_C, // actually unreachable StatusRegMask::CPSR_C, StatusRegMask::CPSR_X, StatusRegMask::CPSR_XC, StatusRegMask::APSR_G, StatusRegMask::CPSR_SC, StatusRegMask::CPSR_SX, StatusRegMask::CPSR_SXC, StatusRegMask::APSR_NZCVQ, StatusRegMask::CPSR_FC, StatusRegMask::CPSR_FX, StatusRegMask::CPSR_FXC, StatusRegMask::APSR_NZCVQG, StatusRegMask::CPSR_FSC, StatusRegMask::CPSR_FSX, StatusRegMask::CPSR_FSXC, StatusRegMask::SPSR, StatusRegMask::SPSR_C, StatusRegMask::SPSR_X, StatusRegMask::SPSR_XC, StatusRegMask::SPSR_S, StatusRegMask::SPSR_SC, StatusRegMask::SPSR_SX, StatusRegMask::SPSR_SXC, StatusRegMask::SPSR_F, StatusRegMask::SPSR_FC, StatusRegMask::SPSR_FX, StatusRegMask::SPSR_FXC, StatusRegMask::SPSR_FS, StatusRegMask::SPSR_FSC, StatusRegMask::SPSR_FSX, StatusRegMask::SPSR_FSXC, ][raw as usize]) } } /// an operand in an `arm` instruction. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Operand { /// a general-purpose register. Reg(Reg), /// a general-purpose register, with writeback. these generally imply an increment by width of /// a memort operand, depending on the instruction. RegWBack(Reg, bool), /// a list of registers specified as a bitmask from bits 0 to 15. RegList(u16), /// a memory access, dereferencing a general-purpose register. RegDeref(Reg), /// a memory access, dereferencing a shifted general-purpose register with register or /// immediate offset. RegShift(RegShift), /// a memory access of a register, post-indexed a register shifted by register or immediate. /// the first bool indicates if the shifted-register is added or subtracted ot the base /// register, while the second bool indicates if the resulting address is written back to the /// base register. RegDerefPostindexRegShift(Reg, RegShift, bool, bool), // add/sub, wback /// a memory access of a register, pre-indexed with a register shifted by register or /// immediate. the first bool indicates if the shifted-register is added or subtracted ot the /// base register, while the second bool indicates if the resulting address is written back to /// the base register. RegDerefPreindexRegShift(Reg, RegShift, bool, bool), // add/sub, wback /// a memory access of a register, post-indexed with an immediate. the first bool indicates if /// the shifted-register is added or subtracted ot the base register, while the second bool /// indicates if the resulting address is written back to the base register. RegDerefPostindexOffset(Reg, u16, bool, bool), // add/sub, wback /// a memory access of a register, pre-indexed with an immediate. the first bool indicates if /// the shifted-register is added or subtracted ot the base register, while the second bool /// indicates if the resulting address is written back to the base register. RegDerefPreindexOffset(Reg, u16, bool, bool), // add/sub, wback /// a memory access of a register, post-indexed with a register. the first bool indicates if the /// shifted-register is added or subtracted ot the base register, while the second bool /// indicates if the resulting address is written back to the base register. RegDerefPostindexReg(Reg, Reg, bool, bool), // add/sub, wback /// a memory access of a register, pre-indexed with a register. the first bool indicates if the /// shifted-register is added or subtracted ot the base register, while the second bool /// indicates if the resulting address is written back to the base register. RegDerefPreindexReg(Reg, Reg, bool, bool), // add/sub, wback /// a 12-bit immediate, stored in a `u16`. Imm12(u16), /// a 32-bit immediate, stored in a `u32`. Imm32(u32), /// a pc-relative branch, with 32-bit signed offset, left-shifted by 2. BranchOffset(i32), /// a pc-relative branch, with 32-bit signed offset, left-shifted by 1. BranchThumbOffset(i32), /// a coprocessor index. #[deprecated( since = "0.3.2", note = "`Coprocessor` was prematurely added: `CoprocOption` the operand used to indicate \ coprocessor selection and has always been the variant used as such" )] Coprocessor(u8), /// a coprocessor option number. CoprocOption(u8), /// an `arm` control register. CReg(CReg), /// an `arm` banked register, either `usr` (general-purpose) bank or one of the alternate sets /// of `arm` registers. BankedReg(Bank, Reg), /// `spsr` in some `arm` register bank. BankedSPSR(Bank), /// a mask of bits for the `spsr` register. StatusRegMask(StatusRegMask), /// the `apsr` register. APSR, /// the `spsr` register. SPSR, /// the `cpsr` register. CPSR, /// "no operand". since an instruction's `operands` array is always four entries, this is used /// to fill space, if any, after recording an instruction's extant operands. Nothing, } /// a trait describing functions in support of processing operands /// /// this is interesting more for future implementation of `yaxpeax-arm`: operands currently are /// backed by an `Operand` enum, which means that operating on an operand involves potentially more /// copies and data management than strictly necessary. in the future, operands may be described /// more granularly, where `OperandVisitor` is the stable interface to the pre-enum constituent /// parts of operands. /// /// see `yaxpeax-x86`'s equivalent trait for examples of that direction. pub trait OperandVisitor { /// the result for successful processing of an operand. for formatting, as an example, this is /// likely `()`. type Ok; /// the result for an error in processing an operand. type Error; /// process an operand that is a simple general purpose register. fn visit_reg(&mut self, reg: Reg) -> Result; /// process an operand that is a simple general purpose register with writeback. fn visit_reg_wback(&mut self, reg: Reg, wback: bool) -> Result; /// process an operand that is a list of registers. fn visit_reglist(&mut self, list: u16) -> Result; /// process an operand that is a memory access through a general purpose register. fn visit_reg_deref(&mut self, reg: Reg) -> Result; /// process an operand that is a shifted general pupose register. fn visit_reg_shift(&mut self, reg_shift: RegShift) -> Result; /// process an operand that is the dereference of a register, afterward incremented by index. fn visit_reg_deref_postindex_reg_shift(&mut self, base: Reg, index: RegShift, add: bool, wback: bool) -> Result; /// process an operand that is the dereference of a register incremented by index. fn visit_reg_deref_preindex_reg_shift(&mut self, base: Reg, index: RegShift, add: bool, wback: bool) -> Result; /// process an operand that is the dereference of a register, afterward incremented by offset. fn visit_reg_deref_postindex_offset(&mut self, base: Reg, offset: u16, add: bool, wback: bool) -> Result; /// process an operand that is the dereference of a register incremented by offset. fn visit_reg_deref_preindex_offset(&mut self, base: Reg, offset: u16, add: bool, wback: bool) -> Result; /// process an operand that is the dereference of a register, afterward incremented by offset. fn visit_reg_deref_postindex_reg(&mut self, base: Reg, offset: Reg, add: bool, wback: bool) -> Result; /// process an operand that is the dereference of a register incremented by offset. fn visit_reg_deref_preindex_reg(&mut self, base: Reg, offset: Reg, add: bool, wback: bool) -> Result; /// process an operand that is a 12-bit immediate. fn visit_imm12(&mut self, imm: u16) -> Result; /// process an operand that is a 32-bit immediate. fn visit_imm32(&mut self, imm: u32) -> Result; /// process an operand that is a branch with i32 offset. fn visit_branch_offset(&mut self, offset: i32) -> Result; /// process an operand that is a branch with i32 offset that also exchanges instruction sets. /// /// this is typically rendered in the same way as `visit_branch_offset` but is a distinct /// helper due to support uses emphasizing the ISA-changing behavior. fn visit_blx_offset(&mut self, offset: i32) -> Result; /// process an operand that is a coprocessor option access. not exactly clear what this means, /// this may need to get foled into an opcode or redefinition of the operand. fn visit_coprocessor_option(&mut self, nr: u8) -> Result; /// process an operand that is a control register. fn visit_creg(&mut self, creg: CReg) -> Result; /// process an operand that is a banked register. fn visit_banked_reg(&mut self, bank: Bank, reg: u16) -> Result; /// process an operand that is a banked version of `SPSR`. fn visit_banked_spsr(&mut self, bank: Bank) -> Result; /// process an operand that is some set of bits out of a status register. fn visit_status_reg_mask(&mut self, mask: StatusRegMask) -> Result; /// process an operand that is `APSR`. fn visit_apsr(&mut self) -> Result; /// process an operand that is `SPSR`. fn visit_spsr(&mut self) -> Result; /// process an operand that is `CPSR`. fn visit_cpsr(&mut self) -> Result; /// process an operand that is not defined above. there are no parameters as for an unknown /// operand kind there is no appropriate tuple of values to provide. fn visit_other(&mut self) -> Result; } /// a register bank for a register in `armv7` or below. #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[allow(missing_docs)] pub enum Bank { Usr, Fiq, Irq, Svc, Abt, Und, Mon, Hyp, } /// a `armv7` or below instruction. #[derive(Debug, PartialEq, Eq)] pub struct Instruction { /// the condition code for this instruction, defaults to `AL` if the instruction is /// unconditional. pub condition: ConditionCode, /// the opcode of this instruction. pub opcode: Opcode, /// operands for the decoded instruction. operands are populated from index 0, to 1, 2, and 3. /// operands from the instruction are non-`Operand::Nothing`. pub operands: [Operand; 4], /// does this instruction update flags, while variants that do not update flags exist? pub s: bool, /// is this a 32-bit thumb instruction? pub wide: bool, /// and if it is a 32-bit thumb instruction, should the .w suffix be shown? pub thumb_w: bool, /// and generally speaking, was this just a thumb-encoded instruction? pub thumb: bool, } /// the kinds of errors possibly encountered in trying to decode an `armv7` or below instruction. #[derive(Debug, PartialEq, Copy, Clone)] pub enum DecodeError { /// the input was insufficient to decode a full instruction. for non-thumb instructions, this means /// the input was not at least four bytes long. for thumb instructions, the input was either /// not two bytes, or not four bytes, depending on how much the instruction would need. ExhaustedInput, /// the instruction encodes an opcode that is not valid. InvalidOpcode, /// the instruction encodes an operand that is not valid. InvalidOperand, /// `yaxpeax-arm` doesn't know how to decode this, but it may be a valid instruction. the /// instruction decoder is not complete, sorry. :( /// /// in practice this typically indicates some kinds of coprocessor instruction, or `ARMv7` SIMD /// instruction. Incomplete, /// the instruction includes reserved bits that were not set as required. Nonconforming, /// the input encodes an instruction that is explicitly undefined. Undefined, /// the input encodes an instruction with unpredictable behavior. Unpredictable, } impl fmt::Display for DecodeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use yaxpeax_arch::DecodeError; f.write_str(self.description()) } } #[cfg(feature = "std")] extern crate std; #[cfg(feature = "std")] impl std::error::Error for DecodeError { fn description(&self) -> &str { ::description(self) } } impl From for DecodeError { fn from(_e: ReadError) -> DecodeError { DecodeError::ExhaustedInput } } impl yaxpeax_arch::DecodeError for DecodeError { fn data_exhausted(&self) -> bool { self == &DecodeError::ExhaustedInput } fn bad_opcode(&self) -> bool { self == &DecodeError::InvalidOpcode } fn bad_operand(&self) -> bool { self == &DecodeError::InvalidOperand || self == &DecodeError::Unpredictable } fn description(&self) -> &'static str { match self { DecodeError::ExhaustedInput => "exhausted input", DecodeError::InvalidOpcode => "invalid opcode", DecodeError::InvalidOperand => "invalid operand", DecodeError::Incomplete => "incomplete decoder", DecodeError::Nonconforming => "invalid reserved bits", DecodeError::Undefined => "undefined encoding", DecodeError::Unpredictable => "unpredictable instruction", } } } impl yaxpeax_arch::Instruction for Instruction { // TODO: this is wrong!! fn well_defined(&self) -> bool { true } } impl Default for Instruction { fn default() -> Self { Instruction { condition: ConditionCode::AL, opcode: Opcode::Invalid, operands: [Operand::Nothing, Operand::Nothing, Operand::Nothing, Operand::Nothing], s: false, thumb_w: false, wide: false, thumb: false, } } } impl Instruction { fn set_s(&mut self, value: bool) { self.s = value; } /// does this instruction set status flags? pub fn s(&self) -> bool { self.s } pub(crate) fn set_w(&mut self, value: bool) { self.thumb_w = value; } /// was this instruction encoded in `thumb` mode and still 4 bytes, *and* requires a `.w` /// suffix on the opcode? pub fn w(&self) -> bool { self.thumb_w } pub(crate) fn set_wide(&mut self, value: bool) { self.wide = value; } /// was this instruction encoded in `thumb` mode and still 4 bytes? pub fn wide(&self) -> bool { self.wide } pub(crate) fn set_thumb(&mut self, value: bool) { self.thumb = value; } /// was this instruction encoded in `thumb` mode? pub fn thumb(&self) -> bool { self.thumb } } impl Display for Instruction { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { #[allow(deprecated)] self.contextualize(&NoColors, 0, Some(&NoContext), f) } } impl LengthedInstruction for Instruction { type Unit = AddressDiff<::Address>; fn min_size() -> Self::Unit { // TODO: this is contingent on the decoder mode... AddressDiff::from_const(4) } fn len(&self) -> Self::Unit { if self.thumb && !self.wide { AddressDiff::from_const(2) } else { AddressDiff::from_const(4) } } } /// a condition code for am `armv7` or below instruction. #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[allow(missing_docs)] pub enum ConditionCode { EQ, NE, HS, LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL } impl ConditionCode { fn name(&self) -> &'static [u8; 2] { match self { ConditionCode::EQ => &[b'e', b'q'], ConditionCode::NE => &[b'n', b'e'], ConditionCode::HS => &[b'h', b's'], ConditionCode::LO => &[b'l', b'o'], ConditionCode::MI => &[b'm', b'i'], ConditionCode::PL => &[b'p', b'l'], ConditionCode::VS => &[b'v', b's'], ConditionCode::VC => &[b'v', b'c'], ConditionCode::HI => &[b'h', b'i'], ConditionCode::LS => &[b'l', b's'], ConditionCode::GE => &[b'g', b'e'], ConditionCode::LT => &[b'l', b't'], ConditionCode::GT => &[b'g', b't'], ConditionCode::LE => &[b'l', b'e'], ConditionCode::AL => &[b'a', b'l'], } } } impl Display for ConditionCode { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { if *self != ConditionCode::AL { use core::fmt::Write; f.write_char(self.name()[0] as char)?; f.write_char(self.name()[1] as char)?; } Ok(()) } } impl ConditionCode { #[inline] fn build(value: u8) -> ConditionCode { match value { 0b0000 => ConditionCode::EQ, 0b0001 => ConditionCode::NE, 0b0010 => ConditionCode::HS, 0b0011 => ConditionCode::LO, 0b0100 => ConditionCode::MI, 0b0101 => ConditionCode::PL, 0b0110 => ConditionCode::VS, 0b0111 => ConditionCode::VC, 0b1000 => ConditionCode::HI, 0b1001 => ConditionCode::LS, 0b1010 => ConditionCode::GE, 0b1011 => ConditionCode::LT, 0b1100 => ConditionCode::GT, 0b1101 => ConditionCode::LE, 0b1110 => ConditionCode::AL, _ => { // this means the argument `value` must never be outside [0,15] // which itself means this function shouldn't be public unreachable!(); } } } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[allow(dead_code)] enum DecodeMode { User, FIQ, IRQ, Supervisor, Monitor, Abort, Hyp, Undefined, System, /// Catch-all mode to try decoding all ARM instructions. Some instructions are `UNDEFINED` or /// `UNPREDICTABLE` in some modes, but `Any` will attempt to decode all. Any, } impl Default for DecodeMode { fn default() -> Self { DecodeMode::Any } } impl DecodeMode { fn is_user(&self) -> bool { match self { DecodeMode::Any | DecodeMode::User => true, _ => false } } #[allow(dead_code)] fn is_supervisor(&self) -> bool { match self { DecodeMode::Any | DecodeMode::Supervisor => true, _ => false } } fn is_hyp(&self) -> bool { match self { DecodeMode::Any | DecodeMode::Hyp => true, _ => false } } fn is_system(&self) -> bool { match self { DecodeMode::Any | DecodeMode::System => true, _ => false } } fn is_any(&self) -> bool { match self { DecodeMode::Any => true, _ => false, } } } #[derive(Debug, PartialEq, Eq)] #[allow(non_camel_case_types)] enum ARMVersion { v4, v5, v6, v6t2, v7, v7ve, v7vese, Any, } impl Default for ARMVersion { fn default() -> Self { ARMVersion::Any } } // nothing checks/rejects by arm version yet, but.. soon.... /// a struct with decode configuration for `ARMv7` and below. the same decoder is used for `thumb` /// and non-`thumb` modes, and the same instruction struct is used for decoded instructions in /// either mode. /// /// NOTE: helper functions here create `InstDecoder` for specific revisions, extensions, or lack /// thereof, in the supported instruction set. `yaxpeax-arm` does not actually honor these settings /// yet. this means any `InstDecoder` will decode all known instructions through the latest `ARMv7` /// extensions. #[allow(unused)] #[derive(Debug)] pub struct InstDecoder { mode: DecodeMode, version: ARMVersion, should_is_must: bool, thumb: bool, } impl Default for InstDecoder { fn default() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::Any, should_is_must: true, thumb: false, } } } impl InstDecoder { /// set the decoder to decoding in thumb mode as the specified bool provides; `true` means /// "yes, decode in `thumb` mode", where `false` means to decode as a normal `arm` instruction. pub fn set_thumb_mode(&mut self, thumb: bool) { self.thumb = thumb; } /// set the decoder to decoding in thumb mode as the specified bool provides; `true` means /// "yes, decode in `thumb` mode", where `false` means to decode as a normal `arm` instruction. /// /// (this consumes and returns the `InstDecoder` to support use in chained calls.)` pub fn with_thumb_mode(mut self, thumb: bool) -> Self { self.set_thumb_mode(thumb); self } /// initialize a new `arm` `InstDecoder` with default ("everything") support, but in `thumb` /// mode. pub fn default_thumb() -> Self { Self::default().with_thumb_mode(true) } /// create an `InstDecoder` that supports only instructions through to `ARMv4`. pub fn armv4() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v4, should_is_must: true, thumb: false, } } /// create an `InstDecoder` that supports only instructions through to `ARMv5`. pub fn armv5() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v5, should_is_must: true, thumb: false, } } /// create an `InstDecoder` that supports only instructions through to `ARMv6`. pub fn armv6() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v6, should_is_must: true, thumb: false, } } /// create an `InstDecoder` that supports only instructions through to `ARMv6t2`. pub fn armv6t2() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v6t2, should_is_must: true, thumb: false, } } /// create an `InstDecoder` that supports only instructions through to `ARMv6t2` in thumb mode. pub fn armv6t2_thumb() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v6t2, should_is_must: true, thumb: true, } } /// create an `InstDecoder` that supports only instructions through to `ARMv7`. pub fn armv7() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v7, should_is_must: true, thumb: false, } } /// create an `InstDecoder` that supports only instructions through to `ARMv7` in thumb mode. pub fn armv7_thumb() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v7, should_is_must: true, thumb: true, } } /// create an `InstDecoder` that supports only instructions through to `ARMv7ve`. pub fn armv7ve() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v7ve, should_is_must: true, thumb: false, } } /// create an `InstDecoder` that supports only instructions through to `ARMv7ve` in thumb mode. pub fn armv7ve_thumb() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v7ve, should_is_must: true, thumb: true, } } /// create an `InstDecoder` that supports only instructions through to `ARMv7vese`. pub fn armv7vese() -> Self { Self { mode: DecodeMode::Any, version: ARMVersion::v7vese, should_is_must: true, thumb: false, } } fn unpredictable(&self) -> Result<(), DecodeError> { if self.mode != DecodeMode::Any { Err(DecodeError::Unpredictable) } else { Ok(()) } } } #[allow(non_snake_case)] impl Decoder for InstDecoder { #[inline] fn decode_into::Address, ::Word>>(&self, inst: &mut Instruction, words: &mut T) -> Result<(), ::DecodeError> { inst.set_w(false); inst.set_wide(false); if self.thumb { return thumb::decode_into(&self, inst, words); } else { inst.set_thumb(false); } let mut word_bytes = [0u8; 4]; words.next_n(&mut word_bytes)?; let word = u32::from_le_bytes(word_bytes); let (cond, opc_upper) = { let top_byte = word >> 24; ( ((top_byte >> 4) & 0xf) as u8, ((top_byte >> 1) & 0x7) as u8 ) }; if cond == 0b1111 { // unconditional instructions, section A5.7/page A5-214 inst.condition = ConditionCode::AL; let op1 = (word >> 20) as u8; if op1 >= 0b1000_0000 { match (op1 >> 5) & 0b11 { 0b00 => { match op1 & 0b101 { 0b000 | 0b101 => { return Err(DecodeError::InvalidOpcode); } 0b100 => { // SRS (see table A5.7, op1 = 0b100xx1x0, page A5-214) if !self.mode.is_any() && self.mode.is_hyp() { return Err(DecodeError::Undefined); } if self.should_is_must { if word & 0x000fffe0 != 0x000d0500 { return Err(DecodeError::Nonconforming); } } let puxw = (word >> 21) & 0b1111; let P = puxw & 0b1000 != 0; let U = puxw & 0b0100 != 0; let W = puxw & 0b0001 != 0; inst.opcode = Opcode::SRS(P, U); inst.operands = [ Operand::RegWBack(Reg::from_u8(13), W), Operand::Imm32(word & 0b1111), Operand::Nothing, Operand::Nothing, ]; }, 0b001 => { // RFE (see table A5.7, op1 = 0b100xx0x1, page A5-214) if !self.mode.is_any() && self.mode.is_hyp() { return Err(DecodeError::Undefined); } if self.should_is_must { if word & 0xffff != 0x0a00 { return Err(DecodeError::Nonconforming); } } let puxw = (word >> 21) & 0b1111; let P = puxw & 0b1000 != 0; let U = puxw & 0b0100 != 0; let W = puxw & 0b0001 != 0; inst.opcode = Opcode::RFE(P, U); inst.operands = [ Operand::RegWBack(Reg::from_u8((word >> 16) as u8 & 0b1111), W), Operand::Nothing, Operand::Nothing, Operand::Nothing, ]; } _ => { unreachable!("op1 mask is 0b101 but somehow we got an invalid pattern"); } } } 0b01 => { inst.opcode = Opcode::BLX; let operand = ((word & 0xffffff) as i32) << 8 >> 7; inst.operands = [ Operand::BranchThumbOffset( operand | ( ((word >> 24) & 0b1) as i32 ) ), Operand::Nothing, Operand::Nothing, Operand::Nothing, ]; } 0b10 => { // op1=0b110xxxxx, see table A5-23 if (word >> 20) & 0b11010 == 0b00000 { // the `not 11000x0{0,1}` cases in table A5-23, MCRR or MRRC // but first check that bit 2 of op1 is in fact 1: if (word >> 20) & 0b00100 != 0 { // actually MCRR or MRRC let CRm = word as u8 & 0b1111; let opc1 = (word >> 4) as u8 & 0b1111; let coproc = (word >> 8) as u8 & 0b1111; if coproc & 0b1110 == 0b1010 { // TODO: `UNDEFINED` return Err(DecodeError::InvalidOpcode); } let Rt = (word >> 12) as u8 & 0b1111; let Rt2 = (word >> 16) as u8 & 0b1111; if Rt == 15 || Rt2 == 15 || Rt == Rt2 { // TODO: actually `UNPREDICTABLE` return Err(DecodeError::InvalidOperand); } if (word >> 20) & 0b00001 != 0 { inst.opcode = Opcode::MRRC2(coproc, opc1); } else { inst.opcode = Opcode::MCRR2(coproc, opc1); } inst.operands = [ Operand::Reg(Reg::from_u8(Rt)), Operand::Reg(Reg::from_u8(Rt2)), Operand::CReg(CReg::from_u8(CRm)), Operand::Nothing, ]; } else { return Err(DecodeError::InvalidOpcode); } } else { // STC or LDC let pudw = (word >> 21) as u8 & 0b1111; let Rn = (word >> 16) as u8 & 0b1111; let CRd = (word >> 12) as u8 & 0b1111; let coproc = (word >> 8) as u8 & 0b1111; let imm8 = word & 0b11111111; if coproc & 0b1110 == 0b1010 { return Err(DecodeError::InvalidOpcode); } if (word >> 20) & 0b00001 == 0 { // op=110xxxx0, STC // page A8-663 if pudw & 0b0010 != 0 { inst.opcode = Opcode::STC2L(coproc); } else { inst.opcode = Opcode::STC2(coproc); } } else { // op=110xxxx1, LDC // page A8-393 if pudw & 0b0010 != 0 { inst.opcode = Opcode::LDC2L(coproc); } else { inst.opcode = Opcode::LDC2(coproc); } } let P = pudw & 0b1000 != 0; let U = pudw & 0b0100 != 0; let W = pudw & 0b0001 != 0; inst.operands = [ Operand::CReg(CReg::from_u8(CRd)), if P { Operand::RegDerefPreindexOffset(Reg::from_u8(Rn), (imm8 << 2) as u16, U, W) } else { if W { // preindex has no wback Operand::RegDerefPostindexOffset(Reg::from_u8(Rn), (imm8 << 2) as u16, U, false) } else { Operand::RegDeref(Reg::from_u8(Rn)) } }, if !P && !W { // TODO: not sure what ldc2{l}'s