diff options
| author | iximeow <me@iximeow.net> | 2022-01-03 14:06:57 -0800 | 
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2022-01-03 14:06:57 -0800 | 
| commit | 0759de116479c8a6319450e1e116af39e8c844c5 (patch) | |
| tree | 38b806c8c3035faf99c88ada34bb7eab4309c4e3 /src | |
| parent | c9a266cd62713f2ff7f5cf637adafd685ee17f16 (diff) | |
| parent | fe9c41db5e4f2916439dd268a1b5e65447396ce3 (diff) | |
architecture-generic bit layouts of instructions
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 396 | 
1 files changed, 384 insertions, 12 deletions
| diff --git a/src/main.rs b/src/main.rs index d834785..19660b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,18 +85,18 @@ fn main() {      match arch_str {          "x86_64" | -        "x86:64" => arch_02::decode_input::<yaxpeax_x86::long_mode::Arch>(&buf, &printer), +        "x86:64" => arch_02::decode_input_and_annotate::<yaxpeax_x86::long_mode::Arch>(&buf, &printer),          "x86_32" | -        "x86:32" => arch_02::decode_input::<yaxpeax_x86::protected_mode::Arch>(&buf, &printer), +        "x86:32" => arch_02::decode_input_and_annotate::<yaxpeax_x86::protected_mode::Arch>(&buf, &printer),          "x86_16" | -        "x86:16" => arch_02::decode_input::<yaxpeax_x86::real_mode::Arch>(&buf, &printer), +        "x86:16" => arch_02::decode_input_and_annotate::<yaxpeax_x86::real_mode::Arch>(&buf, &printer),          "ia64" => arch_02::decode_input::<yaxpeax_ia64::IA64>(&buf, &printer),          "avr" => arch_02::decode_input::<yaxpeax_avr::AVR>(&buf, &printer),          "armv7" => arch_02::decode_input::<yaxpeax_arm::armv7::ARMv7>(&buf, &printer),          "armv7-t" => arch_02::decode_armv7_thumb(&buf, &printer),          "armv8" => arch_02::decode_input::<yaxpeax_arm::armv8::a64::ARMv8>(&buf, &printer),          "mips" => arch_02::decode_input::<yaxpeax_mips::MIPS>(&buf, &printer), -        "msp430" => arch_02::decode_input::<yaxpeax_msp430::MSP430>(&buf, &printer), +        "msp430" => arch_02::decode_input_and_annotate::<yaxpeax_msp430::MSP430>(&buf, &printer),          "pic17" => arch_02::decode_input::<yaxpeax_pic17::PIC17>(&buf, &printer),          "pic18" => arch_02::decode_input::<yaxpeax_pic18::PIC18>(&buf, &printer),          "m16c" => arch_02::decode_input::<yaxpeax_m16c::M16C>(&buf, &printer), @@ -161,12 +161,6 @@ struct Printer {      verbose: bool,  } -struct InstDetails<I: fmt::Display + fmt::Debug> { -    pub inst_len: usize, -    pub well_defined: bool, -    pub inst: I, -} -  impl Printer {      // shared generic function to keep display logic consistent regardless of yaxpeax-arch version      fn print_instr<I, E>(&self, rest: &[u8], addr: usize, inst_res: Result<InstDetails<I>, E>) @@ -178,14 +172,26 @@ impl Printer {          let mut stdout = self.stdout.lock();          write!(stdout, "{:#010x}: ", addr).unwrap();          match inst_res { -            Ok(InstDetails { inst_len, well_defined, inst }) => { +            Ok(InstDetails { inst_len, well_defined, inst, field_descriptions }) => {                  writeln!(stdout, "{:14}: {}", hex::encode(&rest[..inst_len]), inst)                      .unwrap();                  if self.verbose { -                    writeln!(stdout, "  {:?}", inst).unwrap();                      if !well_defined {                          writeln!(stdout, "  not well-defined").unwrap();                      } + +                    // if we can show detailed information about the instruction's interpretation, +                    // do that. otherwise, debug impl of the instruction and hope for the best. +                    if let Some((mapper, fields)) = field_descriptions { +                        let bits_layout = fmt_field_descriptions( +                            &mapper, +                            &fields, +                            &rest[..inst_len] +                        ); +                        write!(stdout, "{}", bits_layout).unwrap(); +                    } else { +                        writeln!(stdout, "  {:?}", inst).unwrap(); +                    }                  }              }              Err(e) => { @@ -195,6 +201,13 @@ impl Printer {      }  } +struct InstDetails<I: fmt::Debug + fmt::Display> { +    inst_len: usize, +    well_defined: bool, +    inst: I, +    field_descriptions: Option<(BitPosition, Vec<FieldRecord>)>, +} +  // yaxpeax-arch, implemented by all decoders here, may be required at incompatible versions by  // different decoders if/when a new version releases. implement the actual decode-and-print  // behavior independent of yaxpeax-arch so decoders using different version can exist in parallel. @@ -205,6 +218,10 @@ mod arch_02 {      use yaxpeax_arch_02::{          AddressBase, Arch, Decoder, Instruction, LengthedInstruction, Reader, U8Reader,      }; +    use yaxpeax_arch_02::annotation::{AnnotatingDecoder, FieldDescription, VecSink}; + +    use crate::{FieldRecord, ItemDescription}; +      pub(crate) fn decode_input<A: Arch>(buf: &[u8], printer: &Printer)      where @@ -214,6 +231,15 @@ mod arch_02 {          decode_input_with_decoder::<A>(A::Decoder::default(), buf, printer);      } +    pub(crate) fn decode_input_and_annotate<A: Arch + crate::ArchBitMapper>(buf: &[u8], printer: &Printer) +    where +        A::Instruction: fmt::Display, +        A::Decoder: AnnotatingDecoder<A>, +        for<'data> U8Reader<'data>: Reader<A::Address, A::Word>, +    { +        decode_input_with_annotation::<A>(A::Decoder::default(), buf, printer); +    } +      pub(crate) fn decode_armv7_thumb(buf: &[u8], printer: &Printer) {          let decoder = yaxpeax_arm::armv7::InstDecoder::default_thumb();          decode_input_with_decoder::<yaxpeax_arm::armv7::ARMv7>(decoder, buf, printer); @@ -240,10 +266,356 @@ mod arch_02 {                      inst_len: A::Address::zero().wrapping_offset(inst.len()).to_linear(),                      well_defined: inst.well_defined(),                      inst, +                    field_descriptions: None,                  }              });              printer.print_instr(rest, addr.to_linear(), generic_res);              addr += advance_addr;          }      } + +    fn field_descs_to_record<A: Arch + crate::ArchBitMapper>(sink: VecSink<<A::Decoder as AnnotatingDecoder<A>>::FieldDescription>) -> Vec<FieldRecord> where A::Decoder: AnnotatingDecoder<A> { +        let mut fields: Vec<FieldRecord> = Vec::new(); +        let bit_mapper = A::mapper(); + +        use itertools::Itertools; +        let mut vs = sink.records; +        vs.sort_by_key(|rec| rec.2.id()); +        for (id, group) in &vs.iter().group_by(|x| x.2.id()) { +            let mut field = FieldRecord { +                elements: Vec::new(), +                id: id, +            }; + +            for (desc, spans) in &group.group_by(|x| x.2.to_owned()) { +                let mut item = ItemDescription { +                    ranges: Vec::new(), +                    description: desc.to_string(), +                    separator: desc.is_separator(), +                }; + +                for span in spans { +                    item.ranges.push(crate::BitRange::across(bit_mapper, span.0, span.1)); +                } +                field.elements.push(item); +            } +            fields.push(field); +        } + +        fields +    } + +    pub(crate) fn decode_input_with_annotation<A: Arch + crate::ArchBitMapper>( +        decoder: A::Decoder, +        buf: &[u8], +        printer: &Printer, +    ) where +        A::Instruction: fmt::Display, +        A::Decoder: AnnotatingDecoder<A>, +        for<'data> U8Reader<'data>: Reader<A::Address, A::Word>, +    { +        let mut addr = A::Address::zero(); +        while let Some(rest) = buf.get(addr.to_linear()..).filter(|v| !v.is_empty()) { +            let mut sink: VecSink<<A::Decoder as AnnotatingDecoder<A>>::FieldDescription> = VecSink::new(); +            let mut reader = U8Reader::new(rest); +            let mut inst = A::Instruction::default(); +            let res = decoder.decode_with_annotation(&mut inst, &mut reader, &mut sink); +            let advance_addr = match &res { +                Ok(_) => inst.len(), +                Err(_) => A::Instruction::min_size(), +            }; +            let generic_res = res.map(|_| { +                let records = field_descs_to_record::<A>(sink); +                crate::InstDetails { +                    inst_len: A::Address::zero().wrapping_offset(inst.len()).to_linear(), +                    well_defined: inst.well_defined(), +                    inst, +                    field_descriptions: Some((A::mapper(), records)), +                } +            }); +            printer.print_instr(rest, addr.to_linear(), generic_res); +            addr += advance_addr; +        } +    } +} + +/// any architecture with an `AnnotatingDecoder` implementation will have annotations reported at +/// positions of bits in the instruction. `yaxpeax-dis` requires some description of how to convert +/// between a column and a bit for a given architecture. +#[derive(Copy, Clone, Debug)] +struct BitPosition { +    word_size: usize, +} + +impl BitPosition { +    fn col2bit(&self, col: usize) -> usize { +        let word = col / self.word_size; +        let bit = (self.word_size - 1) - (col % self.word_size); +        let bit = word * self.word_size + bit; +        bit +    } + +    fn bit2col(&self, bit: usize) -> usize { +        let word = bit / self.word_size; +        let col = (self.word_size - 1) - (bit % self.word_size); +        let col = word * self.word_size + col; +        col +    } +} + +const IA64_POSITIONS: BitPosition = BitPosition { +    word_size: 128 +}; + +const WORD_POSITIONS: BitPosition = BitPosition { +    word_size: 16 +}; + +const BYTE_POSITIONS: BitPosition = BitPosition { +    word_size: 8 +}; + +trait ArchBitMapper { +    fn mapper() -> BitPosition; +} + +impl ArchBitMapper for yaxpeax_x86::real_mode::Arch { +    fn mapper() -> BitPosition { +        BYTE_POSITIONS +    } +} + +impl ArchBitMapper for yaxpeax_x86::protected_mode::Arch { +    fn mapper() -> BitPosition { +        BYTE_POSITIONS +    } +} + +impl ArchBitMapper for yaxpeax_x86::long_mode::Arch { +    fn mapper() -> BitPosition { +        BYTE_POSITIONS +    } +} + +impl ArchBitMapper for yaxpeax_msp430::MSP430 { +    fn mapper() -> BitPosition { +        WORD_POSITIONS +    } +} + +impl ArchBitMapper for yaxpeax_ia64::IA64 { +    fn mapper() -> BitPosition { +        IA64_POSITIONS +    } +} + +#[derive(Debug)] +struct BitRange { +    start: u32, +    end: u32, +    lhs: u32, +    rhs: u32, +} + +impl BitRange { +    fn across(bit_mapper: BitPosition, start: u32, end: u32) -> BitRange { +        let mut lhs = bit_mapper.bit2col(start as usize) as u32; +        let mut rhs = bit_mapper.bit2col(start as usize) as u32; +        for bit in start..=end { +            lhs = std::cmp::min(lhs, bit_mapper.bit2col(bit as usize) as u32); +            rhs = std::cmp::max(rhs, bit_mapper.bit2col(bit as usize) as u32); +        } +        BitRange { start, end, lhs, rhs } +    } +} + +/// a representation of a decoder's `Annotation` type that does not actually reference +/// `yaxpeax_arch`. this is important so we can have a whole shared display routine reused across +/// `yaxpeax_arch` versions - there may be more than one in use at a time in `yaxpeax-dis`. +struct ItemDescription { +    ranges: Vec<BitRange>, +    description: String, +    separator: bool, +} + +impl fmt::Debug for ItemDescription { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        write!(f, "{{ ranges: {:?}, description: {}, separator: {} }}", &self.ranges, &self.description, self.separator) +    } +} + +// spans grouped together in some decoder-specified logical structure by +// `id`. `id` is a hint that data should be considered related for display +// purposes. +struct FieldRecord { +    // spans grouped together by `FieldDescription` - one field may be +    // described by multiple distinct spans, so those spans are recorded +    // here. elements are ordered by the lowest bit of spans describing an +    // element. +    elements: Vec<ItemDescription>, +    id: u32, +} + +impl fmt::Debug for FieldRecord { +    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +        write!(f, "{{ elements: {:?}, id: {} }}", &self.elements, &self.id) +    } +} + +fn fmt_field_descriptions(bit_mapper: &BitPosition, fields: &[FieldRecord], data: &[u8]) -> String { +    let mut boundaries = [false; 256]; +    let mut separators = [false; 256]; +    let mut bits = [false; 256]; +    let mut rhs = [false; 256]; +    let mut lhs = [false; 256]; +    let mut field_order: Vec<(usize, usize)> = Vec::new(); +    let mut boundary_order: Vec<(usize, usize)> = Vec::new(); + +    for (fi, field) in fields.iter().enumerate() { +        for (ei, element) in field.elements.iter().enumerate() { +            if element.separator { +                for (_ri, range) in element.ranges.iter().enumerate() { +                    boundaries[range.start as usize + 1] = true; +                    boundary_order.push((fi, range.start as usize + 1)); +                } +                continue; +            } +            field_order.push((fi, ei)); +            for (_ri, range) in element.ranges.iter().enumerate() { +                for i in range.start..=range.end { +                    bits[i as usize] = true; +                } +                separators[range.start as usize] = true; +                lhs[range.lhs as usize] = true; +                rhs[range.rhs as usize] = true; +            } +        } +    } +    boundary_order.sort_by(|l, r| r.1.cmp(&l.1)); + +    // regardless of sections, the left-hand side of the terminal is a free boundary +    lhs[0] = false; + +    let mut res = String::new(); +    res.push_str("                                \n"); + +    let mut fudge_bits = [false; 160]; + +    for i in 0..160 { +        if (i >> 3) >= data.len() { +            continue; +        } + +        let mut fudge = false; + +        if lhs[i] { +            fudge = true; +        } + +        if i > 0 && rhs[i - 1] { +            fudge = true; +        } + +        if fudge { +            fudge_bits[i] = true; +        } +    } + +    let mut fudge = 0; +    let mut col = [b' '; 160]; + +    for i in 0..160 { +        if (i >> 3) >= data.len() { +            continue; +        } + +        let bit = bit_mapper.col2bit(i); + +        if fudge_bits[i] { +            fudge += 1; +        } + +        if data[(bit >> 3) as usize] & (1 << (bit as u8 & 7)) != 0 { +            col[i + fudge] = b'1'; +        } else { +            col[i + fudge] = b'0'; +        } +    } +    res.push_str(unsafe { std::str::from_utf8_unchecked(&col) }); +    res.push_str("\n"); + +    for (fi, ei) in field_order.iter() { +        let mut col = [b' '; 160]; + +        for range in &fields[*fi as usize].elements[*ei as usize].ranges { +            let mut fudge = 0; + +            for c in 0..128 { +                let bit = bit_mapper.col2bit(c as usize); + +                if boundaries[c] { +                    col[c + fudge] = b'|'; +                } +                if fudge_bits[c as usize] { +                    fudge += 1; +                } + +                if bit >= range.start as usize && bit <= range.end as usize { +                    let data_bit = data[(bit >> 3) as usize] & (1 << (bit as u8 & 7)) != 0; +                    col[c as usize + fudge] = if data_bit { b'1' } else { b'0' }; +                } +            } +        } + +        res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..(data.len() * 8 + 30)]) }); +        res.push_str(" "); +        res.push_str(&fields[*fi as usize].elements[*ei as usize].description); +        res.push_str("\n"); +    } + +    let mut fudge = 0; +    let mut col = [b' '; 160]; + +    let mut line_end = 0; +    for i in 0..160 { +        if (i >> 3) > data.len() { +            continue; +        } + +        if boundaries[i] { +            col[i + fudge] = b'|'; +            line_end = i + fudge + 1; +        } +        if fudge_bits[i] { +            fudge += 1; +        } +    } +    res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..line_end]) }); +    res.push_str("\n"); + +    for (field_index, bit) in boundary_order { +        let mut fudge = 0; +        let mut col = [b' '; 160]; + +        for i in 0..160 { +            if (i >> 3) > data.len() { +                continue; +            } + +            if i == bit { +                res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..i + fudge]) }); +                break; +            } + +            if boundaries[i] { +                col[i + fudge] = b'|'; +            } +            if fudge_bits[i] { +                fudge += 1; +            } +        } +        res.push_str("\n"); +    } + +    res  } | 
