aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
blob: 93274f91e2acfb8eabbca04b963418c0a6e54fb8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
//! # `yaxpeax-x86`, a decoder for x86-family instruction sets
//!
//! `yaxpeax-x86` provides x86 decoders, for 64-, 32-, and 16-bit modes. `yaxpeax-x86` also
//! implements traits defined by `yaxpeax_arch`, making it suitable for interchangeable use with
//! other `yaxpeax`-family instruction decoders.
//!
//! ## usage
//!
//! the fastest way to decode an x86 instruction is through [`amd64::InstDecoder::decode_slice()`]:
//! ```
//! let decoder = yaxpeax_x86::amd64::InstDecoder::default();
//!
//! let inst = decoder.decode_slice(&[0x33, 0xc0]).unwrap();
//!
//! #[cfg(features="fmt")]
//! assert_eq!("xor eax, eax", inst.to_string());
//! ```
//!
//! instructions, operands, registers, and generally all decoding structures, are in their mode's
//! respective submodule:
//! * `x86_64`/`amd64` decoding is under [`long_mode`]
//! * `x86_32`/`x86` decoding is under [`protected_mode`]
//! * `x86_16`/`8086` decoding is under [`real_mode`]
//!
//! all modes have equivalent data available in a decoded instruction. for example, all modes have
//! library-friendly `Operand` and `RegSpec` types:
//!
//! ```
//! use yaxpeax_x86::amd64::{InstDecoder, Operand, RegSpec};
//!
//! let decoder = yaxpeax_x86::amd64::InstDecoder::default();
//!
//! let inst = decoder.decode_slice(&[0x33, 0x01]).unwrap();
//!
//! #[cfg(features="fmt")]
//! assert_eq!("xor eax, dword [rcx]", inst.to_string());
//!
//! assert_eq!(Operand::Register { reg: RegSpec::eax() }, inst.operand(0));
//! #[cfg(features="fmt")]
//! assert_eq!("eax", inst.operand(0).to_string());
//! assert_eq!(Operand::MemDeref { base: RegSpec::rcx() }, inst.operand(1));
//!
//! // an operand in isolation does not know the size of memory it references, if any
//! #[cfg(features="fmt")]
//! assert_eq!("[rcx]", inst.operand(1).to_string());
//!
//! // and for memory operands, the size must be read from the instruction itself:
//! let mem_size: yaxpeax_x86::amd64::MemoryAccessSize = inst.mem_size().unwrap();
//! assert_eq!("dword", mem_size.size_name());
//!
//! // `MemoryAccessSize::size_name()` is how its `Display` impl works, as well:
//! #[cfg(features="fmt")]
//! assert_eq!("dword", mem_size.to_string());
//! ```
//!
//! `yaxpeax-x86` can also be used to decode instructions generically through the `yaxpeax-arch`
//! traits:
//! ```
//! mod decoder {
//!     use yaxpeax_arch::{Arch, AddressDisplay, Decoder, Reader, ReaderBuilder};
//!
//!     // have to play some games so this example works right even without `fmt` enabled!
//!     #[cfg(feature="fmt")]
//!     trait InstBound: std::fmt::Display {}
//!     #[cfg(not(feature="fmt"))]
//!     trait InstBound {}
//!
//!     #[cfg(feature="fmt")]
//!     impl <T: std::fmt::Display> InstBound for T {}
//!     #[cfg(not(feature="fmt"))]
//!     impl <T> InstBound for T {}
//!
//!     pub fn decode_stream<
//!         'data,
//!         A: yaxpeax_arch::Arch,
//!         U: ReaderBuilder<A::Address, A::Word>,
//!     >(data: U) where
//!         A::Instruction: InstBound,
//!     {
//!         let mut reader = ReaderBuilder::read_from(data);
//!         let mut address: A::Address = reader.total_offset();
//!
//!         let decoder = A::Decoder::default();
//!         let mut decode_res = decoder.decode(&mut reader);
//!         loop {
//!             match decode_res {
//!                 Ok(ref inst) => {
//!                     #[cfg(feature="fmt")]
//!                     println!("{}: {}", address.show(), inst);
//!                     decode_res = decoder.decode(&mut reader);
//!                     address = reader.total_offset();
//!                 }
//!                 Err(e) => {
//!                     println!("{}: decode error: {}", address.show(), e);
//!                     break;
//!                 }
//!             }
//!         }
//!     }
//! }
//!
//! use yaxpeax_x86::amd64::{Arch as x86_64};
//! use yaxpeax_arch::{ReaderBuilder, U8Reader};
//! let data: &[u8] = &[0x55, 0x33, 0xc0, 0x48, 0x8b, 0x02, 0x5d, 0xc3];
//! decoder::decode_stream::<x86_64, _>(data);
//! ```
//!
//! ## `#![no_std]`
//!
//! `yaxpeax-x86` supports `no_std` usage. to be built `no_std`, `yaxpeax-x86` only needs
//! `default-features = false` in the corresponding `Cargo.toml` dependency. if formatting is
//! needed with `std` disabled, it can be re-enabled by explicitly requesting the `fmt` features
//! like:
//! ```text
//! yaxpeax-x86 = { version = "*", default-features = false, features = ["fmt"] }
//! ```
//!
//! this is how the `.so` and `.a` packaging in
//! [`ffi/`](https://github.com/iximeow/yaxpeax-x86/tree/no-gods-no-/ffi) is performed.

#![no_std]

#[cfg(feature="use-serde")]
#[macro_use] extern crate serde_derive;
#[cfg(feature="use-serde")]
extern crate serde;

#[cfg(feature="std")]
extern crate alloc;

pub mod long_mode;
pub use long_mode as amd64;
pub use long_mode::Arch as x86_64;

pub mod protected_mode;
pub use protected_mode::Arch as x86_32;

pub mod real_mode;
pub use real_mode::Arch as x86_16;

// this exists to size `InstructionTextBuffer`'s buffer. it ideally would come from an `Arch`
// impl, or something related to `Arch`, but i'm not yet sure how to wire that up into
// yaxpeax-arch. so instead calculate an appropriate max size for all of 16-bit/32-bit/64-bit
// instruction printing that `InstructionTextBuffer` can be used for.
//
// `InstructionTextBuffer` prints an `InstructionDisplayer`, which means either intel syntax or
// pseudo-C. in the future, at&t probably, as well.
//
// the pseudo-C syntax's max length would be something like:
// ```
// xacquire xrelease lock { repnz qword if /* signed */ greater_or_equal(rflags) then jmp gs:[xmm31 +
// xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae} }
// ```
// (which is nonsensical) or for an unknown opcode,
// ```
// xacquire xrelease lock { op0 = op(op0, op1, op2, op3) }
// ```
// where `opN` is an operand. the longest operand, same as above, would be something like
// ```
// gs:[xmm31 + xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae}
// ```
// for a length like 262 bytes of operand, 55 bytes of prefixes and syntax, and another up-to-20
// bytes of opcode.
//
// the longest contextualize_c might write is around 337 bytes. round up to 512 because it's.. not
// much extra.
//
// the same reasoning for intel syntax yields a smaller instruction:
// ```
// xacquire xrelease lock op op1, op2, op3, op4
// ```
// where the longest operands are the same as above. this comes out to closer to 307 bytes. 512
// bytes is still the longest of the two options.
#[allow(dead_code)] // can be an unused constant in some library configurations
const MAX_INSTRUCTION_LEN: usize = 512;

const MEM_SIZE_STRINGS: [&'static str; 65] = [
    "BUG",
    "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword",
    "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ymmword",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "m384b",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG",
    "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "ptr", "zmmword",
];

pub struct MemoryAccessSize {
    size: u8,
}
impl MemoryAccessSize {
    /// return the number of bytes referenced by this memory access.
    ///
    /// if the number of bytes cannot be confidently known by the instruction in isolation (as is
    /// the case for `xsave`/`xrstor`-style "operate on all processor state" instructions), this
    /// function will return `None`.
    pub fn bytes_size(&self) -> Option<u8> {
        if self.size == 63 {
            None
        } else {
            Some(self.size)
        }
    }

    /// a human-friendly label for the number of bytes this memory access references.
    ///
    /// there are some differences from size names that may be expected elsewhere; `yaxpeax-x86`
    /// prefers to use consistent names for a width even if the way those bytes are used varies.
    ///
    /// the sizes `yaxpeax-x86` knows are as follows:
    /// | size (bytes) | name       |
    /// |--------------|------------|
    /// | 1            | `byte`     |
    /// | 2            | `word`     |
    /// | 4            | `dword`    |
    /// | 6            | `far`      |
    /// | 8            | `qword`    |
    /// | 10           | `mword`    |
    /// | 16           | `xmmword`  |
    /// | 32           | `ymmword`  |
    /// | 64           | `zmmword`  |
    /// | variable     | `ptr`      |
    ///
    /// "mword" refers to an mmx-sized access - 80 bits, or 10 bytes. `mword` is also used for
    /// 64-bit far calls, because they reference a contiguous ten bytes; two bytes of segment
    /// selector and eight bytes of address.
    ///
    /// "variable" accesses access a number of bytes dependent on the physical processor and its
    /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions.
    pub fn size_name(&self) -> &'static str {
        MEM_SIZE_STRINGS[self.size as usize]
    }
}

#[cfg(feature = "fmt")]
impl core::fmt::Display for MemoryAccessSize {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        f.write_str(self.size_name())
    }
}

#[cfg(feature = "fmt")]
impl core::fmt::Debug for MemoryAccessSize {
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
        core::fmt::Display::fmt(self, f)
    }
}