diff options
author | iximeow <me@iximeow.net> | 2022-01-03 14:06:57 -0800 |
---|---|---|
committer | iximeow <me@iximeow.net> | 2022-01-03 14:06:57 -0800 |
commit | 0759de116479c8a6319450e1e116af39e8c844c5 (patch) | |
tree | 38b806c8c3035faf99c88ada34bb7eab4309c4e3 /src | |
parent | c9a266cd62713f2ff7f5cf637adafd685ee17f16 (diff) | |
parent | fe9c41db5e4f2916439dd268a1b5e65447396ce3 (diff) |
architecture-generic bit layouts of instructions
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 396 |
1 files changed, 384 insertions, 12 deletions
diff --git a/src/main.rs b/src/main.rs index d834785..19660b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,18 +85,18 @@ fn main() { match arch_str { "x86_64" | - "x86:64" => arch_02::decode_input::<yaxpeax_x86::long_mode::Arch>(&buf, &printer), + "x86:64" => arch_02::decode_input_and_annotate::<yaxpeax_x86::long_mode::Arch>(&buf, &printer), "x86_32" | - "x86:32" => arch_02::decode_input::<yaxpeax_x86::protected_mode::Arch>(&buf, &printer), + "x86:32" => arch_02::decode_input_and_annotate::<yaxpeax_x86::protected_mode::Arch>(&buf, &printer), "x86_16" | - "x86:16" => arch_02::decode_input::<yaxpeax_x86::real_mode::Arch>(&buf, &printer), + "x86:16" => arch_02::decode_input_and_annotate::<yaxpeax_x86::real_mode::Arch>(&buf, &printer), "ia64" => arch_02::decode_input::<yaxpeax_ia64::IA64>(&buf, &printer), "avr" => arch_02::decode_input::<yaxpeax_avr::AVR>(&buf, &printer), "armv7" => arch_02::decode_input::<yaxpeax_arm::armv7::ARMv7>(&buf, &printer), "armv7-t" => arch_02::decode_armv7_thumb(&buf, &printer), "armv8" => arch_02::decode_input::<yaxpeax_arm::armv8::a64::ARMv8>(&buf, &printer), "mips" => arch_02::decode_input::<yaxpeax_mips::MIPS>(&buf, &printer), - "msp430" => arch_02::decode_input::<yaxpeax_msp430::MSP430>(&buf, &printer), + "msp430" => arch_02::decode_input_and_annotate::<yaxpeax_msp430::MSP430>(&buf, &printer), "pic17" => arch_02::decode_input::<yaxpeax_pic17::PIC17>(&buf, &printer), "pic18" => arch_02::decode_input::<yaxpeax_pic18::PIC18>(&buf, &printer), "m16c" => arch_02::decode_input::<yaxpeax_m16c::M16C>(&buf, &printer), @@ -161,12 +161,6 @@ struct Printer { verbose: bool, } -struct InstDetails<I: fmt::Display + fmt::Debug> { - pub inst_len: usize, - pub well_defined: bool, - pub inst: I, -} - impl Printer { // shared generic function to keep display logic consistent regardless of yaxpeax-arch version fn print_instr<I, E>(&self, rest: &[u8], addr: usize, inst_res: Result<InstDetails<I>, E>) @@ -178,14 +172,26 @@ impl Printer { let mut stdout = self.stdout.lock(); write!(stdout, "{:#010x}: ", addr).unwrap(); match inst_res { - Ok(InstDetails { inst_len, well_defined, inst }) => { + Ok(InstDetails { inst_len, well_defined, inst, field_descriptions }) => { writeln!(stdout, "{:14}: {}", hex::encode(&rest[..inst_len]), inst) .unwrap(); if self.verbose { - writeln!(stdout, " {:?}", inst).unwrap(); if !well_defined { writeln!(stdout, " not well-defined").unwrap(); } + + // if we can show detailed information about the instruction's interpretation, + // do that. otherwise, debug impl of the instruction and hope for the best. + if let Some((mapper, fields)) = field_descriptions { + let bits_layout = fmt_field_descriptions( + &mapper, + &fields, + &rest[..inst_len] + ); + write!(stdout, "{}", bits_layout).unwrap(); + } else { + writeln!(stdout, " {:?}", inst).unwrap(); + } } } Err(e) => { @@ -195,6 +201,13 @@ impl Printer { } } +struct InstDetails<I: fmt::Debug + fmt::Display> { + inst_len: usize, + well_defined: bool, + inst: I, + field_descriptions: Option<(BitPosition, Vec<FieldRecord>)>, +} + // yaxpeax-arch, implemented by all decoders here, may be required at incompatible versions by // different decoders if/when a new version releases. implement the actual decode-and-print // behavior independent of yaxpeax-arch so decoders using different version can exist in parallel. @@ -205,6 +218,10 @@ mod arch_02 { use yaxpeax_arch_02::{ AddressBase, Arch, Decoder, Instruction, LengthedInstruction, Reader, U8Reader, }; + use yaxpeax_arch_02::annotation::{AnnotatingDecoder, FieldDescription, VecSink}; + + use crate::{FieldRecord, ItemDescription}; + pub(crate) fn decode_input<A: Arch>(buf: &[u8], printer: &Printer) where @@ -214,6 +231,15 @@ mod arch_02 { decode_input_with_decoder::<A>(A::Decoder::default(), buf, printer); } + pub(crate) fn decode_input_and_annotate<A: Arch + crate::ArchBitMapper>(buf: &[u8], printer: &Printer) + where + A::Instruction: fmt::Display, + A::Decoder: AnnotatingDecoder<A>, + for<'data> U8Reader<'data>: Reader<A::Address, A::Word>, + { + decode_input_with_annotation::<A>(A::Decoder::default(), buf, printer); + } + pub(crate) fn decode_armv7_thumb(buf: &[u8], printer: &Printer) { let decoder = yaxpeax_arm::armv7::InstDecoder::default_thumb(); decode_input_with_decoder::<yaxpeax_arm::armv7::ARMv7>(decoder, buf, printer); @@ -240,10 +266,356 @@ mod arch_02 { inst_len: A::Address::zero().wrapping_offset(inst.len()).to_linear(), well_defined: inst.well_defined(), inst, + field_descriptions: None, } }); printer.print_instr(rest, addr.to_linear(), generic_res); addr += advance_addr; } } + + fn field_descs_to_record<A: Arch + crate::ArchBitMapper>(sink: VecSink<<A::Decoder as AnnotatingDecoder<A>>::FieldDescription>) -> Vec<FieldRecord> where A::Decoder: AnnotatingDecoder<A> { + let mut fields: Vec<FieldRecord> = Vec::new(); + let bit_mapper = A::mapper(); + + use itertools::Itertools; + let mut vs = sink.records; + vs.sort_by_key(|rec| rec.2.id()); + for (id, group) in &vs.iter().group_by(|x| x.2.id()) { + let mut field = FieldRecord { + elements: Vec::new(), + id: id, + }; + + for (desc, spans) in &group.group_by(|x| x.2.to_owned()) { + let mut item = ItemDescription { + ranges: Vec::new(), + description: desc.to_string(), + separator: desc.is_separator(), + }; + + for span in spans { + item.ranges.push(crate::BitRange::across(bit_mapper, span.0, span.1)); + } + field.elements.push(item); + } + fields.push(field); + } + + fields + } + + pub(crate) fn decode_input_with_annotation<A: Arch + crate::ArchBitMapper>( + decoder: A::Decoder, + buf: &[u8], + printer: &Printer, + ) where + A::Instruction: fmt::Display, + A::Decoder: AnnotatingDecoder<A>, + for<'data> U8Reader<'data>: Reader<A::Address, A::Word>, + { + let mut addr = A::Address::zero(); + while let Some(rest) = buf.get(addr.to_linear()..).filter(|v| !v.is_empty()) { + let mut sink: VecSink<<A::Decoder as AnnotatingDecoder<A>>::FieldDescription> = VecSink::new(); + let mut reader = U8Reader::new(rest); + let mut inst = A::Instruction::default(); + let res = decoder.decode_with_annotation(&mut inst, &mut reader, &mut sink); + let advance_addr = match &res { + Ok(_) => inst.len(), + Err(_) => A::Instruction::min_size(), + }; + let generic_res = res.map(|_| { + let records = field_descs_to_record::<A>(sink); + crate::InstDetails { + inst_len: A::Address::zero().wrapping_offset(inst.len()).to_linear(), + well_defined: inst.well_defined(), + inst, + field_descriptions: Some((A::mapper(), records)), + } + }); + printer.print_instr(rest, addr.to_linear(), generic_res); + addr += advance_addr; + } + } +} + +/// any architecture with an `AnnotatingDecoder` implementation will have annotations reported at +/// positions of bits in the instruction. `yaxpeax-dis` requires some description of how to convert +/// between a column and a bit for a given architecture. +#[derive(Copy, Clone, Debug)] +struct BitPosition { + word_size: usize, +} + +impl BitPosition { + fn col2bit(&self, col: usize) -> usize { + let word = col / self.word_size; + let bit = (self.word_size - 1) - (col % self.word_size); + let bit = word * self.word_size + bit; + bit + } + + fn bit2col(&self, bit: usize) -> usize { + let word = bit / self.word_size; + let col = (self.word_size - 1) - (bit % self.word_size); + let col = word * self.word_size + col; + col + } +} + +const IA64_POSITIONS: BitPosition = BitPosition { + word_size: 128 +}; + +const WORD_POSITIONS: BitPosition = BitPosition { + word_size: 16 +}; + +const BYTE_POSITIONS: BitPosition = BitPosition { + word_size: 8 +}; + +trait ArchBitMapper { + fn mapper() -> BitPosition; +} + +impl ArchBitMapper for yaxpeax_x86::real_mode::Arch { + fn mapper() -> BitPosition { + BYTE_POSITIONS + } +} + +impl ArchBitMapper for yaxpeax_x86::protected_mode::Arch { + fn mapper() -> BitPosition { + BYTE_POSITIONS + } +} + +impl ArchBitMapper for yaxpeax_x86::long_mode::Arch { + fn mapper() -> BitPosition { + BYTE_POSITIONS + } +} + +impl ArchBitMapper for yaxpeax_msp430::MSP430 { + fn mapper() -> BitPosition { + WORD_POSITIONS + } +} + +impl ArchBitMapper for yaxpeax_ia64::IA64 { + fn mapper() -> BitPosition { + IA64_POSITIONS + } +} + +#[derive(Debug)] +struct BitRange { + start: u32, + end: u32, + lhs: u32, + rhs: u32, +} + +impl BitRange { + fn across(bit_mapper: BitPosition, start: u32, end: u32) -> BitRange { + let mut lhs = bit_mapper.bit2col(start as usize) as u32; + let mut rhs = bit_mapper.bit2col(start as usize) as u32; + for bit in start..=end { + lhs = std::cmp::min(lhs, bit_mapper.bit2col(bit as usize) as u32); + rhs = std::cmp::max(rhs, bit_mapper.bit2col(bit as usize) as u32); + } + BitRange { start, end, lhs, rhs } + } +} + +/// a representation of a decoder's `Annotation` type that does not actually reference +/// `yaxpeax_arch`. this is important so we can have a whole shared display routine reused across +/// `yaxpeax_arch` versions - there may be more than one in use at a time in `yaxpeax-dis`. +struct ItemDescription { + ranges: Vec<BitRange>, + description: String, + separator: bool, +} + +impl fmt::Debug for ItemDescription { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{ ranges: {:?}, description: {}, separator: {} }}", &self.ranges, &self.description, self.separator) + } +} + +// spans grouped together in some decoder-specified logical structure by +// `id`. `id` is a hint that data should be considered related for display +// purposes. +struct FieldRecord { + // spans grouped together by `FieldDescription` - one field may be + // described by multiple distinct spans, so those spans are recorded + // here. elements are ordered by the lowest bit of spans describing an + // element. + elements: Vec<ItemDescription>, + id: u32, +} + +impl fmt::Debug for FieldRecord { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{ elements: {:?}, id: {} }}", &self.elements, &self.id) + } +} + +fn fmt_field_descriptions(bit_mapper: &BitPosition, fields: &[FieldRecord], data: &[u8]) -> String { + let mut boundaries = [false; 256]; + let mut separators = [false; 256]; + let mut bits = [false; 256]; + let mut rhs = [false; 256]; + let mut lhs = [false; 256]; + let mut field_order: Vec<(usize, usize)> = Vec::new(); + let mut boundary_order: Vec<(usize, usize)> = Vec::new(); + + for (fi, field) in fields.iter().enumerate() { + for (ei, element) in field.elements.iter().enumerate() { + if element.separator { + for (_ri, range) in element.ranges.iter().enumerate() { + boundaries[range.start as usize + 1] = true; + boundary_order.push((fi, range.start as usize + 1)); + } + continue; + } + field_order.push((fi, ei)); + for (_ri, range) in element.ranges.iter().enumerate() { + for i in range.start..=range.end { + bits[i as usize] = true; + } + separators[range.start as usize] = true; + lhs[range.lhs as usize] = true; + rhs[range.rhs as usize] = true; + } + } + } + boundary_order.sort_by(|l, r| r.1.cmp(&l.1)); + + // regardless of sections, the left-hand side of the terminal is a free boundary + lhs[0] = false; + + let mut res = String::new(); + res.push_str(" \n"); + + let mut fudge_bits = [false; 160]; + + for i in 0..160 { + if (i >> 3) >= data.len() { + continue; + } + + let mut fudge = false; + + if lhs[i] { + fudge = true; + } + + if i > 0 && rhs[i - 1] { + fudge = true; + } + + if fudge { + fudge_bits[i] = true; + } + } + + let mut fudge = 0; + let mut col = [b' '; 160]; + + for i in 0..160 { + if (i >> 3) >= data.len() { + continue; + } + + let bit = bit_mapper.col2bit(i); + + if fudge_bits[i] { + fudge += 1; + } + + if data[(bit >> 3) as usize] & (1 << (bit as u8 & 7)) != 0 { + col[i + fudge] = b'1'; + } else { + col[i + fudge] = b'0'; + } + } + res.push_str(unsafe { std::str::from_utf8_unchecked(&col) }); + res.push_str("\n"); + + for (fi, ei) in field_order.iter() { + let mut col = [b' '; 160]; + + for range in &fields[*fi as usize].elements[*ei as usize].ranges { + let mut fudge = 0; + + for c in 0..128 { + let bit = bit_mapper.col2bit(c as usize); + + if boundaries[c] { + col[c + fudge] = b'|'; + } + if fudge_bits[c as usize] { + fudge += 1; + } + + if bit >= range.start as usize && bit <= range.end as usize { + let data_bit = data[(bit >> 3) as usize] & (1 << (bit as u8 & 7)) != 0; + col[c as usize + fudge] = if data_bit { b'1' } else { b'0' }; + } + } + } + + res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..(data.len() * 8 + 30)]) }); + res.push_str(" "); + res.push_str(&fields[*fi as usize].elements[*ei as usize].description); + res.push_str("\n"); + } + + let mut fudge = 0; + let mut col = [b' '; 160]; + + let mut line_end = 0; + for i in 0..160 { + if (i >> 3) > data.len() { + continue; + } + + if boundaries[i] { + col[i + fudge] = b'|'; + line_end = i + fudge + 1; + } + if fudge_bits[i] { + fudge += 1; + } + } + res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..line_end]) }); + res.push_str("\n"); + + for (field_index, bit) in boundary_order { + let mut fudge = 0; + let mut col = [b' '; 160]; + + for i in 0..160 { + if (i >> 3) > data.len() { + continue; + } + + if i == bit { + res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..i + fudge]) }); + break; + } + + if boundaries[i] { + col[i + fudge] = b'|'; + } + if fudge_bits[i] { + fudge += 1; + } + } + res.push_str("\n"); + } + + res } |