From b49987bdbd2b5a08163a45ef3dc1868754d84165 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 3 Jul 2021 21:10:37 -0700 Subject: document yaxpeax_arch traits and add an AddressDiff::to_const --- src/address/mod.rs | 3 +++ src/lib.rs | 69 ++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 62 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/address/mod.rs b/src/address/mod.rs index e44ca13..31f2079 100644 --- a/src/address/mod.rs +++ b/src/address/mod.rs @@ -68,6 +68,9 @@ impl AddressDiff { pub fn from_const(amount: T::Diff) -> Self { AddressDiff { amount } } + pub fn to_const(&self) -> T::Diff { + self.amount + } } impl fmt::Debug for AddressDiff where T::Diff: fmt::Debug { diff --git a/src/lib.rs b/src/lib.rs index 1862993..121cd1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,19 +1,10 @@ #![no_std] -#[cfg(feature = "std")] -#[macro_use] -extern crate std; - use core::fmt::{self, Debug, Display}; use core::hash::Hash; -extern crate num_traits; -#[cfg(feature="use-serde")] -extern crate serde; #[cfg(feature="use-serde")] #[macro_use] extern crate serde_derive; -#[cfg(feature="colors")] -extern crate crossterm; #[cfg(feature="use-serde")] use serde::{Serialize, Deserialize}; @@ -32,14 +23,33 @@ pub use color::ColorSettings; pub mod display; mod reader; -pub use reader::{Reader, ReadError, U8Reader}; //, U16le, U16be, U32le, U32be, U64le, U64be}; +pub use reader::{Reader, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be}; +/// the minimum set of errors a `yaxpeax-arch` disassembler may produce. +/// +/// it is permissible for an implementor of `DecodeError` to have items that return `false` for +/// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know +/// about. pub trait DecodeError { + /// did the decoder fail because it reached the end of input? fn data_exhausted(&self) -> bool; + /// did the decoder error because the instruction's opcode is invalid? + /// + /// this may not be a sensical question for some instruction sets - `bad_opcode` should + /// generally indicate an issue with the instruction itself. this is in contrast to one + /// specific operand being invalid for the instruction, or some other issue to do with decoding + /// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy + /// and left as best-effort for decoder implementors. fn bad_opcode(&self) -> bool; + /// did the decoder error because an operand of the instruction to decode is invalid? + /// + /// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on + /// the part of implementors. fn bad_operand(&self) -> bool; } +/// a minimal enum implementing `DecodeError`. this is intended to be enough for a low effort, +/// low-fidelity error taxonomy, without boilerplate of a `DecodeError` implementation. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum StandardDecodeError { ExhaustedInput, @@ -63,15 +73,43 @@ impl DecodeError for StandardDecodeError { fn bad_operand(&self) -> bool { *self == StandardDecodeError::InvalidOperand } } +/// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are +/// the architecture-defined [`DecodeError`] implemention. pub trait Decoder { + /// decode one instruction for this architecture from the [`yaxpeax_arch::Reader`] of this + /// architecture's `Word`. fn decode>(&self, words: &mut T) -> Result { let mut inst = A::Instruction::default(); self.decode_into(&mut inst, words).map(|_: ()| inst) } + /// decode one instruction for this architecture from the [`yaxpeax_arch::Reader`] of this + /// architecture's `Word`, writing into the provided `inst`. + /// + /// SAFETY: + /// + /// while `inst` MUST be left in a state that does not violate Rust's safety guarantees, + /// implementors are NOT obligated to leave `inst` in a semantically meaningful state if + /// decoding fails. if `decode_into` returns an error, callers may find contradictory and + /// useless information in `inst`, as well as *stale data* from whatever was passed in. fn decode_into>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>; } +/// a collection of associated type parameters that constitute the definitions for an instruction +/// set. `Arch` provides an `Instruction` and its associated `Operand`s, which is guaranteed to be +/// decodable by this `Arch::Decoder`. `Arch::Decoder` can always be constructed with a `Default` +/// implementation, and decodes from a `Reader`. +/// +/// `Arch` is suitable as the foundational trait to implement more complex logic on top of; for +/// example, it would be entirely expected to have a +/// ```text +/// pub fn emulate>( +/// reader: &mut Reader, +/// emu: &mut E +/// ) -> Result; +/// ``` +/// +/// in some library built on top of `yaxpeax-arch`. #[cfg(feature="use-serde")] pub trait Arch { type Word: Debug + Display + PartialEq + Eq; @@ -92,9 +130,20 @@ pub trait Arch { type Operand; } +/// instructions have lengths, and minimum possible sizes for advancing a decoder on error. +/// +/// unfortunately, this means calling `x.len()` for some `Arch::Instruction` requires importing +/// this trait. sorry. pub trait LengthedInstruction { type Unit; + /// the length, in terms of `Unit`, of this instruction. because `Unit` will be a diff of an + /// architecture's `Address` type, this almost always is a number of bytes. implementations + /// should indicate if this is ever not the case. fn len(&self) -> Self::Unit; + /// the length, in terms of `Unit`, of the shortest possible instruction in a given + /// architecture.. because `Unit` will be a diff of an architecture's `Address` type, this + /// almost always is a number of bytes. implementations should indicate if this is ever not the + /// case. fn min_size() -> Self::Unit; } -- cgit v1.1