1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
|
//! common types used for instruction behavior analysis across `yaxpeax-x86`
//!
//! these types are, in particular, used in [`yaxpeax_x86::long_mode::behavior`],
//! [`yaxpeax_x86::protected_mode::behavior`], and [`yaxpeax_x86::real_mode::behavior`]. specifics
//! like `Operand` are still mode-dependent, in part because `RegSpec` is different across modes.
//! likewise, there is no generic method to go from an `Instruction` to these kinds of accessors
//! yet. that said, `Instruction::behavior()` returns an `InstBehavior` that works effectively the
//! same way in all modes.
// a lot of people have told me that we don't need to read code anymore, just like we "never look at
// machine code anymore". sit with me and have a sad laugh for a moment. if we weren't here, reading
// and thinking and talking about how to model the computer, where would the training data come
// from? "you're not being left behind, it's a personal choice!", i've heard. a year later this has
// evolved into "it is an abdication of your responsibility as an engineer to not pay Anthropic".
// it is our ~moral responsibility~ to build the highest quality software in service of furthering
// the rot? it is an abdication of ethics to claim all works are good.
/// a collection of possible exceptions an instruction can raise. this covers the handful of
/// well-defined exception vectors with bits matching to the exception vectors listed in SDM
/// chapter 6.5.1 "Call and Return Operation for Interrupt or Exception Handling Procedures"
/// specifically "Table 6-1. Exceptions and Interrupts".
pub struct ExceptionInfo {
possible_vectors: u32,
}
/// an individual exception vector. these are just a tiny wrapper around `u8` to have some
/// associated constant definitions.
///
/// the associated constants on this type are named according to the Intel SDM chapter 7.3 "SOURCES
/// OF INTERRUPTS" table 7-1 "Protected-Mode Exceptions and Interrupts". similar descriptions can
/// be found in the AMD APM.
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct Exception {
vector: u8,
}
impl Exception {
/// Divide Error
pub const DE: Exception = Exception::vector(0);
/// Debug
pub const DB: Exception = Exception::vector(1);
/// Non-Maskable Interrupt
pub const NMI: Exception = Exception::vector(2);
/// Breakpoint
pub const BP: Exception = Exception::vector(3);
/// Overflow
pub const OF: Exception = Exception::vector(4);
/// BOUND Range Exceeded
pub const BR: Exception = Exception::vector(5);
/// Invalid Opcode (Undefined Opcode)
pub const UD: Exception = Exception::vector(6);
/// Device Not Available (No Math Coprocessor)
pub const NM: Exception = Exception::vector(7);
/// Double Fault
pub const DF: Exception = Exception::vector(8);
// CoProcessor Segment Overrun (reserved)
// from the SDM:
// > IA-32 processors after the Intel386 processor do not generate this exception.
//
// and as the mnemonic has since been reused for exception vector 16,
// `Floating-Point Error (Math Fault)`, we won't bother giving vector 9 a nice symbolic name.
// const MF: Exception = Exception::vector(9);
/// Invalid TSS
pub const TS: Exception = Exception::vector(10);
/// Segment Not Present
pub const NP: Exception = Exception::vector(11);
/// Stack Segment Fault
pub const SS: Exception = Exception::vector(12);
/// General Protection
pub const GP: Exception = Exception::vector(13);
/// Page Fault
pub const PF: Exception = Exception::vector(14);
// vector 15 is reserved
/// Floating-Point Error (Math Fault)
pub const MF: Exception = Exception::vector(16);
/// Alignment Check
pub const AC: Exception = Exception::vector(17);
/// Machine Check
pub const MC: Exception = Exception::vector(18);
/// SIMD Floating-Point Exception
pub const XM: Exception = Exception::vector(19);
/// Virtualization Exception
pub const VE: Exception = Exception::vector(20);
/// Control Protection Exception
pub const CP: Exception = Exception::vector(21);
/// construct an `Exception` for the provided exception vector number.
///
/// this is provided for convenience when converting (for example) the number in an x86
/// exception handler to the kinds of `Exception` in this library.
pub const fn vector(vector: u8) -> Self {
Self { vector }
}
/// convert this `Exception` to an index into an x86 IDT.
pub const fn to_u8(&self) -> u8 {
self.vector
}
#[cfg(any(doc, feature = "fmt"))]
/// get the typical mnemonic for this `Exception`, if one is documented.
///
/// the names returned by helper do not include a leading `#`. they come from the Intel SDM
/// chapter 7.3 "SOURCES OF INTERRUPTS" table 7-1 "Protected-Mode Exceptions and Interrupts".
/// similar descriptions can be found in the AMD APM.
pub fn name(&self) -> Option<&'static str> {
static NAMES: [Option<&'static str>; 22] = [
Some("DE"), Some("DB"), Some("NMI"), Some("BP"),
Some("OF"), Some("BR"), Some("UD"), Some("NM"),
Some("DF"), None, Some("TS"), Some("NP"),
Some("SS"), Some("GP"), Some("PF"), None,
Some("MF"), Some("AC"), Some("MC"), Some("XM"),
Some("VE"), Some("CP")
];
if let Some(maybe_name) = NAMES.get(self.vector as usize) {
*maybe_name
} else {
None
}
}
}
#[cfg(feature = "fmt")]
use core::fmt;
#[cfg(feature = "fmt")]
impl fmt::Debug for Exception {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(name) = self.name() {
write!(f, "#{}", name)
} else {
write!(f, "#Int{}", self.to_u8())
}
}
}
impl ExceptionInfo {
/// construct an empty set of possible exception vectors.
pub fn empty() -> Self {
Self {
possible_vectors: 0,
}
}
/// test if this `ExceptionInfo` has any possible vector set.
pub fn any(&self) -> bool {
self.possible_vectors != 0
}
/// test if this `ExceptionInfo` has no vector set.
pub fn none(&self) -> bool {
!self.any()
}
/// test if this `ExceptionInfo` indicates that exception `e` may be raised.
pub fn may(&self, e: Exception) -> bool {
(self.possible_vectors & (1 << e.vector)) != 0
}
/// record that exception `e` is or is not (`b`) possible in this `ExceptionInfo` record.
pub const fn set(&mut self, e: Exception, b: bool) {
let offset = e.vector;
assert!(offset < 32);
let mask = !(1 << offset);
let bit = (b as u32) << offset;
self.possible_vectors &= mask;
self.possible_vectors |= bit;
}
/// record that exception `e` is or is not (`b`) possible in this `ExceptionInfo` record, but
/// in a more chaining-friendly way.
pub const fn with(mut self, e: Exception, b: bool) -> Self {
self.set(e, b);
self
}
}
#[test]
fn test_exception_info() {
let mut info = ExceptionInfo::empty();
info.set(Exception::MF, true);
assert_eq!(info.possible_vectors, 0x10000);
info.set(Exception::MF, true);
assert_eq!(info.possible_vectors, 0x10000);
info.set(Exception::MF, false);
assert_eq!(info.possible_vectors, 0x00000);
info.set(Exception::GP, false);
assert_eq!(info.possible_vectors, 0x00000);
info.set(Exception::GP, true);
assert_eq!(info.possible_vectors, 0x02000);
info.set(Exception::MF, true);
assert_eq!(info.possible_vectors, 0x12000);
}
/// a description of the privilege level (that is, value of `CPL` in the current code selector)
/// that allows executing the corresponding instruction.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrivilegeLevel {
/// the corresponding instruction can run at any privilege level.
Any = 0b00,
/// the corresponding instruction can only run when `CPL=0` (aka "in ring 0").
PL0 = 0b01,
/// the corresponding instruction has more complex rule for when it is allowed.
///
/// this may mean the instruction is either "Any" or "PL0" depending on other processor state
/// (such as `rdtsc`), or it may mean the instruction simply does not relate directly to
/// `CPL=3`/`CPL=0` (such as for `iret`).
Special = 0b10,
}
/// a description of how an operand is used.
///
/// `Access::ReadWrite` can be processed in the same manner as that operand listed as
/// `Access::Read` followed by that same operand listed as `Access::Write`.
///
/// **important**: the meaning of `Access` is different for `flags`/`eflags`/`rflags` than other
/// operands! these differences are documented on enum variants below.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum Access {
/// the corresponding operand is read.
///
/// for memory operands, this describes the referenced memory; implicitly the registers used in
/// the operand's address calculation are also read.
Read = 0b01,
/// the corresponding operand is written.
///
/// for memory operands, this describes the referenced memory; implicitly the registers used in
/// the operand's address calculation are also read.
///
/// for flags/eflags/rflags, "write" refers to some subset of flag bits as appropriate for the
/// instruction, and implies that the instruction does not depend on the initial state of those
/// bits. this is in contrast to `Write` for other operands, where it implies a full write of
/// the corresponding operand. as a concrete example, `add` reports the flags register as a
/// `Write` since the resulting flag bits are purely a function of the `add` register/memory
/// operands.
Write = 0b10,
/// the corresponding operand is read and written.
///
/// in some cases `Access::ReadWrite` is chosen in particular to represent a parital-write;
/// this is especially true with SIMD instructions as `yaxpeax-x86` does not currently have the
/// ability to express individual SIMD lane read/write operations. the `vmov{h,l}{ps,pd}`
/// instructions are more common examples of this access form. this kind of partial-write
/// access is reported as `Access::Write` for flags/eflags/rflags.
///
/// for flags/eflags/rflags, "read-write" refers to some subset of flag bits as appropriate for the
/// instruction, and implies that the instruction does depends on the initial state of those
/// bits as well as modifying some (possibly different) bits in flags as a result.
/// as a concrete example, `adc` reports the flags register as a `ReadWrite` because the
/// initial state of `cf` is an input to the addition, and the normal arithmetic flags are
/// written based on the result.
///
/// for memory operands, this describes the referenced memory; implicitly the registers used in
/// the operand's address calculation are also read.
ReadWrite = 0b11,
/// the corresponding operand is not actually accessed for reading or writing.
///
/// this is only used to describe the operand of `nop` or `ud1` instructions.
None = 0b00,
}
impl Access {
// translate two bits to an `Access`. panics if the bit pattern has anything other than the low
// two bits set. don't do that.
pub(crate) fn from_bits(bits: u8) -> Option<Access> {
const LUT: [Option<Access>; 4] = [
Some(Access::None), Some(Access::Read),
Some(Access::Write), Some(Access::ReadWrite),
];
assert!(bits <= 0b11);
LUT[bits as usize]
}
/// is this access a read?
///
/// if it is `ReadWrite`, this will be `true` as will `is_write`.
pub fn is_read(&self) -> bool {
*self as u8 & 0b01 != 0
}
/// is this access a write?
///
/// if it is `ReadWrite`, this will be `true` as will `is_read`.
pub fn is_write(&self) -> bool {
*self as u8 & 0b10 != 0
}
}
|