From d2ec91d2fa0b20fa7cb935ea279a90367f6dcc1e Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 22 Aug 2021 15:47:21 -0700 Subject: commit to x86-supporting field annotation --- src/main.rs | 313 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 310 insertions(+), 3 deletions(-) (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs index 0c6a5fc..9184601 100644 --- a/src/main.rs +++ b/src/main.rs @@ -83,16 +83,18 @@ fn main() { match arch_str { "x86_64" | - "x86:64" => crate::current_arch::decode_input::(&buf, verbose), + "x86:64" => crate::current_arch::decode_input_with_annotation::(&buf, verbose), "x86_32" | - "x86:32" => crate::current_arch::decode_input::(&buf, verbose), + "x86:32" => crate::current_arch::decode_input_with_annotation::(&buf, verbose), "x86_16" | - "x86:16" => crate::current_arch::decode_input::(&buf, verbose), + "x86:16" => crate::current_arch::decode_input_with_annotation::(&buf, verbose), + // "ia64" => crate::current_arch::decode_input_with_annotation::(&buf, verbose), "ia64" => crate::current_arch::decode_input::(&buf, verbose), "avr" => crate::current_arch::decode_input::(&buf, verbose), "armv7" => crate::current_arch::decode_input::(&buf, verbose), "armv8" => crate::current_arch::decode_input::(&buf, verbose), "mips" => crate::current_arch::decode_input::(&buf, verbose), + // "msp430" => crate::current_arch::decode_input_with_annotation::(&buf, verbose), "msp430" => crate::current_arch::decode_input::(&buf, verbose), "pic17" => crate::current_arch::decode_input::(&buf, verbose), "pic18" => crate::current_arch::decode_input::(&buf, verbose), @@ -158,9 +160,240 @@ fn with_parsed_superh( // yaxpeax-arch while older decoders are still being updated. mod current_arch { use yaxpeax_arch_02::{AddressBase, Arch, Decoder, Instruction, LengthedInstruction, Reader, U8Reader}; + use yaxpeax_arch_02::{AnnotatingDecoder, FieldDescription, VecSink}; use std::fmt; use num_traits::identities::Zero; + fn col2bit(col: usize) -> usize { + // ia64 + // 127 - col + // msp430 + /* + let word = col >> 4; + let bit = 15 - (col & 0xf); + + (word << 4) | bit + */ + // x86 + let byte = col / 8; + let bit = (7 - (col % 8)); + let bit = byte * 8 + bit; + bit + } + fn bit2col(bit: usize) -> usize { + let byte = bit / 8; + let bit = (7 - (bit % 8)); + let bit = byte * 8 + bit; + bit + } + + #[derive(Debug)] + struct BitRange { + start: u32, + end: u32, + lhs: u32, + rhs: u32, + } + + impl BitRange { + fn across(start: u32, end: u32) -> BitRange { + let mut lhs = bit2col(start as usize) as u32; + let mut rhs = bit2col(start as usize) as u32; + for bit in start..=end { + lhs = std::cmp::min(lhs, bit2col(bit as usize) as u32); + rhs = std::cmp::max(rhs, bit2col(bit as usize) as u32); + } + BitRange { start, end, lhs, rhs } + } + } + + struct ItemDescription where A::Decoder: AnnotatingDecoder { + ranges: Vec, + description: <::Decoder as AnnotatingDecoder>::FieldDescription, + } + + impl fmt::Debug for ItemDescription where A::Decoder: AnnotatingDecoder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{ ranges: {:?}, description: {} }}", &self.ranges, &self.description) + } + } + + // spans grouped together in some decoder-specified logical structure by + // `id`. `id` is a hint that data should be considered related for display + // purposes. + struct FieldRecord where A::Decoder: AnnotatingDecoder { + // spans grouped together by `FieldDescription` - one field may be + // described by multiple distinct spans, so those spans are recorded + // here. elements are ordered by the lowest bit of spans describing an + // element. + elements: Vec>, + id: u32, + } + + impl fmt::Debug for FieldRecord where A::Decoder: AnnotatingDecoder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{ elements: {:?}, id: {} }}", &self.elements, &self.id) + } + } + + fn show_field_descriptions(fields: &[FieldRecord], data: &[u8]) where A::Decoder: AnnotatingDecoder { + let mut boundaries = [false; 256]; + let mut separators = [false; 256]; + let mut bits = [false; 256]; + let mut rhs = [false; 256]; + let mut lhs = [false; 256]; + let mut field_order: Vec<(usize, usize)> = Vec::new(); + let mut boundary_order: Vec<(usize, usize)> = Vec::new(); + + for (fi, field) in fields.iter().enumerate() { + for (ei, element) in field.elements.iter().enumerate() { + if element.description.is_separator() { + for (_ri, range) in element.ranges.iter().enumerate() { + boundaries[range.start as usize + 1] = true; + boundary_order.push((fi, range.start as usize + 1)); + } + continue; + } + field_order.push((fi, ei)); + for (_ri, range) in element.ranges.iter().enumerate() { + for i in range.start..=range.end { + bits[i as usize] = true; + } + separators[range.start as usize] = true; + lhs[range.lhs as usize] = true; + rhs[range.rhs as usize] = true; + } + } + } + boundary_order.sort_by(|l, r| r.1.cmp(&l.1)); + + // regardless of sections, the left-hand side of the terminal is a free boundary + lhs[0] = false; + + let mut res = String::new(); + res.push_str(" \n"); + + let mut fudge_bits = [false; 160]; + + for i in 0..160 { + if (i >> 3) >= data.len() { + continue; + } + + let mut fudge = false; + + if lhs[i] { + fudge = true; + } + + if i > 0 && rhs[i - 1] { + fudge = true; + } + + if fudge { + fudge_bits[i] = true; + } + } + + let mut fudge = 0; + let mut col = [b' '; 160]; + + for i in 0..160 { + if (i >> 3) >= data.len() { + continue; + } + + let bit = col2bit(i); + + if fudge_bits[i] { + fudge += 1; + } + + if data[(bit >> 3) as usize] & (1 << (bit as u8 & 7)) != 0 { + col[i + fudge] = b'1'; + } else { + col[i + fudge] = b'0'; + } + } + res.push_str(unsafe { std::str::from_utf8_unchecked(&col) }); + res.push_str("\n"); + + for (fi, ei) in field_order.iter() { + let mut col = [b' '; 160]; + + for range in &fields[*fi as usize].elements[*ei as usize].ranges { + let mut fudge = 0; + + for c in 0..128 { + let bit = col2bit(c as usize); + + if boundaries[c] { + col[c + fudge] = b'|'; + } + if fudge_bits[c as usize] { + fudge += 1; + } + + if bit >= range.start as usize && bit <= range.end as usize { + let data_bit = data[(bit >> 3) as usize] & (1 << (bit as u8 & 7)) != 0; + col[c as usize + fudge] = if data_bit { b'1' } else { b'0' }; + } + } + } + + res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..(data.len() * 8 + 30)]) }); + res.push_str(" "); + res.push_str(&fields[*fi as usize].elements[*ei as usize].description.to_string()); + res.push_str("\n"); + } + + let mut fudge = 0; + let mut col = [b' '; 160]; + + for i in 0..160 { + if (i >> 3) >= data.len() { + continue; + } + + if boundaries[i] { + col[i + fudge] = b'|'; + } + if fudge_bits[i] { + fudge += 1; + } + } + res.push_str(unsafe { std::str::from_utf8_unchecked(&col) }); + res.push_str("\n"); + + for (field_index, bit) in boundary_order { + let mut fudge = 0; + let mut col = [b' '; 160]; + + for i in 0..160 { + if (i >> 3) >= data.len() { + continue; + } + + if i == bit { + res.push_str(unsafe { std::str::from_utf8_unchecked(&col[..i + fudge]) }); + break; + } + + if boundaries[i] { + col[i + fudge] = b'|'; + } + if fudge_bits[i] { + fudge += 1; + } + } + use std::fmt::Write; + let _ = write!(res, "{}", fields[field_index].elements[0].description); + res.push_str("\n"); + } + + println!("{}", res); + } + pub(crate) fn decode_input(buf: &[u8], verbose: bool) where A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader, @@ -168,6 +401,14 @@ mod current_arch { decode_input_with_decoder::(A::Decoder::default(), buf, verbose); } + pub(crate) fn decode_input_with_annotation(buf: &[u8], verbose: bool) + where + A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader, + A::Decoder: AnnotatingDecoder, + { + decode_input_with_decoder_and_annotation::(A::Decoder::default(), buf, verbose); + } + pub(crate) fn decode_input_with_decoder(decoder: A::Decoder, buf: &[u8], verbose: bool) where A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader, @@ -205,6 +446,72 @@ mod current_arch { } } } + + pub(crate) fn decode_input_with_decoder_and_annotation(decoder: A::Decoder, buf: &[u8], verbose: bool) + where + A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader, + A::Decoder: AnnotatingDecoder, + { + let start = A::Address::zero(); + let mut addr = start; + loop { + let mut sink: VecSink<>::FieldDescription> = VecSink::new(); + let mut reader = U8Reader::new(&buf[addr.to_linear()..]); + let mut inst = A::Instruction::default(); + match decoder.decode_with_annotation(&mut inst, &mut reader, &mut sink) { + Ok(()) => { + println!( + "{:#010x}: {:14}: {}", + addr.to_linear(), + hex::encode( + &buf[addr.to_linear()..] + [..A::Address::zero().wrapping_offset(inst.len()).to_linear()] + ), + inst + ); + if verbose { + let mut fields: Vec> = Vec::new(); + + use itertools::Itertools; + let mut vs = sink.records; + vs.sort_by_key(|rec| rec.2.id()); + for (id, group) in &vs.iter().group_by(|x| x.2.id()) { + let mut field = FieldRecord { + elements: Vec::new(), + id: id, + }; + + for (desc, spans) in &group.group_by(|x| x.2.to_owned()) { + let mut item = ItemDescription { + ranges: Vec::new(), + description: desc, + }; + + for span in spans { + item.ranges.push(BitRange::across(span.0, span.1)); + } + field.elements.push(item); + } + fields.push(field); + } + show_field_descriptions( + &fields, + &buf[addr.to_linear()..] + [..A::Address::zero().wrapping_offset(inst.len()).to_linear()] + ); + } + addr += inst.len(); + } + Err(e) => { + println!("{:#010x}: {}", addr.to_linear(), e); + addr += A::Instruction::min_size(); + } + } + if addr.to_linear() >= buf.len() { + break; + } + } + } } mod legacy_arch { -- cgit v1.1