aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
blob: 9825fd2efe4eebb1a48c7ae357dab94aae48db36 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
//! # `yaxpeax-x86`, a decoder for x86-family instruction sets
//!
//! `yaxpeax-x86` provides x86 decoders, for 64-, 32-, and 16-bit modes. `yaxpeax-x86` also
//! implements traits defined by `yaxpeax_arch`, making it suitable for interchangeable use with
//! other `yaxpeax`-family instruction decoders.
//!
//! ## usage
//!
//! the fastest way to decode an x86 instruction is through [`amd64::InstDecoder::decode_slice()`]:
//! ```
//! let decoder = yaxpeax_x86::amd64::InstDecoder::default();
//!
//! let inst = decoder.decode_slice(&[0x33, 0xc0]).unwrap();
//!
//! #[cfg(features="fmt")]
//! assert_eq!("xor eax, eax", inst.to_string());
//! ```
//!
//! instructions, operands, registers, and generally all decoding structures, are in their mode's
//! repsective submodule:
//! * `x86_64`/`amd64` decoding is under [`long_mode`]
//! * `x86_32`/`x86` decoding is under [`protected_mode`]
//! * `x86_16`/`8086` decoding is under [`real_mode`]
//!
//! all modes have equivalent data available in a decoded instruction. for example, all modes have
//! library-friendly `Operand` and `RegSpec` types:
//!
//! ```
//! use yaxpeax_x86::amd64::{InstDecoder, Operand, RegSpec};
//!
//! let decoder = yaxpeax_x86::amd64::InstDecoder::default();
//!
//! let inst = decoder.decode_slice(&[0x33, 0x01]).unwrap();
//!
//! #[cfg(features="fmt")]
//! assert_eq!("xor eax, dword [rcx]", inst.to_string());
//!
//! assert_eq!(Operand::Register(RegSpec::eax()), inst.operand(0));
//! #[cfg(features="fmt")]
//! assert_eq!("eax", inst.operand(0).to_string());
//! assert_eq!(Operand::RegDeref(RegSpec::rcx()), inst.operand(1));
//!
//! // an operand in isolation does not know the size of memory it references, if any
//! #[cfg(features="fmt")]
//! assert_eq!("[rcx]", inst.operand(1).to_string());
//!
//! // and for memory operands, the size must be read from the instruction itself:
//! let mem_size: yaxpeax_x86::amd64::MemoryAccessSize = inst.mem_size().unwrap();
//! assert_eq!("dword", mem_size.size_name());
//!
//! // `MemoryAccessSize::size_name()` is how its `Display` impl works, as well:
//! #[cfg(features="fmt")]
//! assert_eq!("dword", mem_size.to_string());
//! ```
//!
//! `yaxpeax-x86` can also be used to decode instructions generically through the `yaxpeax-arch`
//! traits:
//! ```
//! mod decoder {
//!     use yaxpeax_arch::{Arch, AddressDisplay, Decoder, Reader, ReaderBuilder};
//!
//!     // have to play some games so this example works right even without `fmt` enabled!
//!     #[cfg(feature="fmt")]
//!     trait InstBound: std::fmt::Display {}
//!     #[cfg(not(feature="fmt"))]
//!     trait InstBound {}
//!
//!     #[cfg(feature="fmt")]
//!     impl <T: std::fmt::Display> InstBound for T {}
//!     #[cfg(not(feature="fmt"))]
//!     impl <T> InstBound for T {}
//!
//!     pub fn decode_stream<
//!         'data,
//!         A: yaxpeax_arch::Arch,
//!         U: ReaderBuilder<A::Address, A::Word>,
//!     >(data: U) where
//!         A::Instruction: InstBound,
//!     {
//!         let mut reader = ReaderBuilder::read_from(data);
//!         let mut address: A::Address = reader.total_offset();
//!
//!         let decoder = A::Decoder::default();
//!         let mut decode_res = decoder.decode(&mut reader);
//!         loop {
//!             match decode_res {
//!                 Ok(ref inst) => {
//!                     #[cfg(feature="fmt")]
//!                     println!("{}: {}", address.show(), inst);
//!                     decode_res = decoder.decode(&mut reader);
//!                     address = reader.total_offset();
//!                 }
//!                 Err(e) => {
//!                     println!("{}: decode error: {}", address.show(), e);
//!                     break;
//!                 }
//!             }
//!         }
//!     }
//! }
//!
//! use yaxpeax_x86::amd64::{Arch as x86_64};
//! use yaxpeax_arch::{ReaderBuilder, U8Reader};
//! let data: &[u8] = &[0x55, 0x33, 0xc0, 0x48, 0x8b, 0x02, 0x5d, 0xc3];
//! decoder::decode_stream::<x86_64, _>(data);
//! ```
//!
//! ## `#![no_std]`
//!
//! `yaxpeax-x86` supports `no_std` usage. to be built `no_std`, `yaxpeax-x86` only needs
//! `default-features = false` in the corresponding `Cargo.toml` dependency. if formatting is
//! needed with `std` disabled, it can be re-enabled by explicitly requesting the `fmt` features
//! like:
//! ```text
//! yaxpeax-x86 = { version = "*", default-features = false, features = ["fmt"] }
//! ```
//!
//! this is how the `.so` and `.a` packaging in
//! [`ffi/`](https://github.com/iximeow/yaxpeax-x86/tree/no-gods-no-/ffi) is performed.

#![no_std]

#[cfg(feature="use-serde")]
#[macro_use] extern crate serde_derive;
#[cfg(feature="use-serde")]
extern crate serde;

#[cfg(feature="std")]
extern crate alloc;

pub mod long_mode;
pub use long_mode as amd64;
pub use long_mode::Arch as x86_64;

pub mod protected_mode;
pub use protected_mode::Arch as x86_32;

pub mod real_mode;
pub use real_mode::Arch as x86_16;

mod safer_unchecked;

const MEM_SIZE_STRINGS: [&'static str; 64] = [
    "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword",
    "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ptr", "zmmword",
];

pub struct MemoryAccessSize {
    size: u8,
}
impl MemoryAccessSize {
    /// return the number of bytes referenced by this memory access.
    ///
    /// if the number of bytes cannot be confidently known by the instruction in isolation (as is
    /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this
    /// function will return `None`.
    pub fn bytes_size(&self) -> Option<u8> {
        if self.size == 63 {
            None
        } else {
            Some(self.size)
        }
    }

    /// a human-friendly label for the number of bytes this memory access references.
    ///
    /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86`
    /// prefers to use consistent names for a width even if the way those bytes are used varies.
    ///
    /// the sizes `yaxpeax-x86` knows are as follows:
    /// | size (bytes) | name       |
    /// |--------------|------------|
    /// | 1            | `byte`     |
    /// | 2            | `word`     |
    /// | 4            | `dword`    |
    /// | 6            | `far`      |
    /// | 8            | `qword`    |
    /// | 10           | `mword`    |
    /// | 16           | `xmmword`  |
    /// | 32           | `ymmword`  |
    /// | 64           | `zmmword`  |
    /// | variable     | `ptr`      |
    ///
    /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for
    /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment
    /// selector and eight bytes of address.
    ///
    /// "variable" accesses access a number of bytes dependent on the physical processor and its
    /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions.
    pub fn size_name(&self) -> &'static str {
        MEM_SIZE_STRINGS[self.size as usize - 1]
    }
}

#[cfg(feature = "fmt")]
impl core::fmt::Display for MemoryAccessSize {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        f.write_str(self.size_name())
    }
}

#[cfg(feature = "fmt")]
impl core::fmt::Debug for MemoryAccessSize {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        core::fmt::Display::fmt(self, f)
    }
}