From ca5e4d5d2bdecaebc7da7657cbda2f70a57fdad0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 6 Jul 2021 11:56:17 -0700 Subject: add ReaderBuilder to generically construct arch-required Readers also revise an `unsafe` that might be unsafe un extremely unlikely circumstances - no one should be passing yaxpeax a `&[u8]` larger than `isize::MAX`, but on 32-bit architectures we can't necessarily guarantee that it won't happen --- CHANGELOG | 8 ++++++++ Cargo.toml | 2 +- src/lib.rs | 2 +- src/reader.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index e12c93e..acbe679 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +## 0.2.2 + +added `ReaderBuilder` trait and impls for `U8Reader` on various address and word types. + +added documentation for `Reader`, `U8Reader`, and `ReaderBuilder`. + +avoid an unlikely violation of `core::ptr::offset` safety rules on 32-bit architectures. + ## 0.2.1 updated architecture matrix diff --git a/Cargo.toml b/Cargo.toml index ae4dbe7..1fc3aff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["disassembly", "disassembler"] license = "0BSD" name = "yaxpeax-arch" repository = "https://git.iximeow.net/yaxpeax-arch/" -version = "0.2.1" +version = "0.2.2" [dependencies] "num-traits" = { version = "0.2", default-features = false } diff --git a/src/lib.rs b/src/lib.rs index 455b5e2..edc0742 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ pub use color::ColorSettings; pub mod display; mod reader; -pub use reader::{Reader, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be}; +pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be}; /// the minimum set of errors a `yaxpeax-arch` disassembler may produce. /// diff --git a/src/reader.rs b/src/reader.rs index b263f22..afadea0 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -11,14 +11,42 @@ pub enum ReadError { IOError(&'static str), } +/// a trait defining how `Item`-sized words are read at `Address`-positioned offsets into some +/// stream of data. for *most* uses, [`yaxpeax_arch::U8Reader`] probably is sufficient. when +/// reading from data sources that aren't `&[u8]`, `Address` isn't a multiple of `u8`, or `Item` +/// isn't a multiple of 8 bits, `U8Reader` won't be sufficient. pub trait Reader { fn next(&mut self) -> Result; + /// read `buf`-many items from this reader in bulk. if `Reader` cannot read `buf`-many items, + /// return `ReadError::ExhaustedInput`. fn next_n(&mut self, buf: &mut [Item]) -> Result<(), ReadError>; + /// mark the current position as where to measure `offset` against. fn mark(&mut self); + /// the difference, in `Address`, between the current `Reader` position and its last `mark`. + /// when created, a `Reader`'s initial position is `mark`ed, so creating a `Reader` and + /// immediately calling `offset()` must return `Address::zero()`. fn offset(&mut self) -> Address; + /// the difference, in `Address`, between the current `Reader` position and the initial offset + /// when constructed. fn total_offset(&mut self) -> Address; } +/// a trait defining how to build a `Reader` from some data source (`Self`). +/// definitions of `ReaderBuilder` are provided for `U8Reader` on `Address` and `Word` types that +/// `yaxpeax_arch` provides - external decoder implementations should also provide `ReaderBuilder` +/// impls if they use custom `Reader` types. +pub trait ReaderBuilder where Self: Sized { + type Result: Reader; + + /// construct a reader from `data` beginning at `addr` from its beginning. + fn read_at(data: Self, addr: Address) -> Self::Result; + /// construct a reader from `data` beginning at the start of `data`. + fn read_from(data: Self) -> Self::Result { + Self::read_at(data, Address::zero()) + } +} + +/// a struct for `Reader` impls that can operate on units of `u8`. pub struct U8Reader<'a> { start: *const u8, data: *const u8, @@ -29,10 +57,22 @@ pub struct U8Reader<'a> { impl<'a> U8Reader<'a> { pub fn new(data: &'a [u8]) -> U8Reader<'a> { + + // WHY: either on <64b systems we panic on `data.len() > isize::MAX`, or we compute end + // without `offset` (which would be UB for such huge slices) + #[cfg(not(target_pointer_width = "64"))] + let end = data.as_ptr().wrapping_add(data.len()); + + // SAFETY: the slice was valid, so data + data.len() does not overflow. at the moment, + // there aren't 64-bit systems with 63 bits of virtual address space, so it's not possible + // to have a slice length larger than 64-bit isize::MAX. + #[cfg(target_pointer_width = "64")] + let end = unsafe { data.as_ptr().offset(data.len() as isize) }; + U8Reader { start: data.as_ptr(), data: data.as_ptr(), - end: unsafe { data.as_ptr().offset(data.len() as isize) }, + end, mark: data.as_ptr(), _lifetime: core::marker::PhantomData, } @@ -135,6 +175,13 @@ macro_rules! u8reader_reader_impl { } } + impl<'data> ReaderBuilder<$addr_size, $word> for &'data [u8] { + type Result = U8Reader<'data>; + + fn read_at(data: Self, addr: $addr_size) -> Self::Result { + U8Reader::new(&data[(addr as usize)..]) + } + } } } -- cgit v1.1