aboutsummaryrefslogtreecommitdiff
path: root/src/long_mode/behavior.rs
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2026-05-17 17:44:31 +0000
committeriximeow <me@iximeow.net>2026-05-17 17:45:59 +0000
commit50120b69498333f11d57cddd5a4f91192ac69dc2 (patch)
treeb53cc7a9dfe28a76742e21aa99c336dfb0c34a44 /src/long_mode/behavior.rs
parentbeb67cce2eab45e29617814ee68833d7c0f20d36 (diff)
docs, cleanupinst-behavior
Diffstat (limited to 'src/long_mode/behavior.rs')
-rw-r--r--src/long_mode/behavior.rs193
1 files changed, 172 insertions, 21 deletions
diff --git a/src/long_mode/behavior.rs b/src/long_mode/behavior.rs
index dd9ec3e..97ae3fd 100644
--- a/src/long_mode/behavior.rs
+++ b/src/long_mode/behavior.rs
@@ -1,9 +1,41 @@
+//! behavior information for x86-64 instructions.
+//!
+//! this module allows users of yaxpeax-x86 to collect operand read/write information about
+//! instructions ([`InstBehavior::get_operand()`]), as well as allowed execution level
+//! ([`InstBehavior::privilege_level()`]), potential exceptions ([`InstBehavior::exceptions()`]),
+//! and iterating all explicit and implicit operands ([`InstBehavior::all_operands()`]).
+//!
+//! additionally, [`ComplexOp`] enumerates instructions that may be considered "complex" - either
+//! involving architectural state not expressed in yaxpeax-x86' API or otherwise affecting machine
+//! state in a way that simply considering the operands as-presented would be inaccurate. where
+//! possible, `ComplexOp` tries to guide users towards how to handle such instructions.
+//!
+//! some behavior information in this module is "unstable", meaning it must be opted into with
+//! `feature = ["unstable"]` on yaxpeax-x86; information from "unstable" interfaces may be
+//! less-tested and change across semver-compatible releases! if you want to use unstable
+//! interfaces here, first: thank you!! please report any issues, and second: consider pinning to a
+//! specific minor version while setting `feature = ["unstable"]` if instruction behavior becoming
+//! more correct might present an issue in your application.
+
+#![deny(missing_docs)]
+
+// a lot of people have told me that we don't need to read code anymore, just like we "never look at
+// machine code anymore". sit with me and have a sad laugh for a moment. if we weren't here, reading
+// and thinking and talking about how to model the computer, where would the training data come
+// from? "you're not being left behind, it's a personal choice!", i've heard. a year later this has
+// evolved into "it is an abdication of your responsibility as an engineer to not pay Anthropic".
+// it is our moral responsibility to build the highest quality software in service of furthering
+// the rot? it is an abdication of ethics to claim all works are good.
+
use super::{Instruction, Opcode, Operand, OperandSpec};
use super::RegSpec;
/// an accessor for run-time characteristics of instructions.
///
-/// ... TODO words ...
+/// generally, behavior accessors across architectures are expected to have a `behavior()`
+/// entrypoint on a decoded instruction. it is not clear which properties of `behavior()`
+/// generalize across architectures (yet!) but presumably something like `all_operands()` and
+/// `Access` do.
///
/// additionally, of note for x86:
///
@@ -15,11 +47,10 @@ use super::RegSpec;
/// operation (consider `call qword [rcx]`; `qword [rcx]` is one memory access, but the implied
/// push of a return address is a second memory operation).
/// * `{,e,r}flags` is often written and sometimes read, but almost never as an explicit source or
-/// destination operand. this can be queried with `flags_access()`, in addition to its inclusion
-/// as an implicit operand.
+/// destination operand. this can be queried with [`flags_access()`].
///
/// it's also useful to know if implicit and explicit operands are reads, writes, or both, such as
-/// when diagnosing a run-time fault. to iterate over this information, `operands().accesses()`. or
+/// when diagnosing a run-time fault. to iterate over this information, `all_operands().iter()`. or
/// `visit_accesses(&mut ..)` to collect all operand/access information for this instruction.
#[derive(Copy, Clone)]
pub struct InstBehavior<'inst> {
@@ -28,6 +59,14 @@ pub struct InstBehavior<'inst> {
}
impl Instruction {
+ /// get a struct to query behaviors of an instruction.
+ ///
+ /// "behaviors" is broad! as of writing, "behavior" covers "implicit and explicit operand
+ /// reads/writes", "possible exceptions", "allowed privilege levels", and "instruction has
+ /// additional semantics not easily expressed by this library".
+ ///
+ /// see the documentation for [`InstBehavior`] as well as the
+ /// [`behavior`][yaxpeax_x86::long_mode::behavior] module for more information.
pub fn behavior<'inst>(&'inst self) -> InstBehavior<'inst> {
let mut behavior = opcode2behavior(&self.opcode);
@@ -252,6 +291,10 @@ impl Exception {
/// Control Protection Exception
pub const CP: Exception = Exception::vector(21);
+ /// construct an `Exception` for the provided exception vector number.
+ ///
+ /// this is provided for convenience when converting (for example) the number in an x86
+ /// exception handler to the kinds of `Exception` in this library.
pub const fn vector(vector: u8) -> Self {
Self { vector }
}
@@ -262,6 +305,11 @@ impl Exception {
}
#[cfg(feature = "fmt")]
+ /// get the typical mnemonic for this `Exception`, if one is documented.
+ ///
+ /// the names returned by helper do not include a leading `#`. they come from the Intel SDM
+ /// chapter 7.3 "SOURCES OF INTERRUPTS" table 7-1 "Protected-Mode Exceptions and Interrupts".
+ /// similar descriptions can be found in the AMD APM.
pub fn name(&self) -> Option<&'static str> {
static NAMES: [Option<&'static str>; 22] = [
Some("DE"), Some("DB"), Some("NMI"), Some("BP"),
@@ -294,20 +342,24 @@ impl fmt::Debug for Exception {
}
impl ExceptionInfo {
+ /// construct an empty set of possible exception vectors.
pub fn empty() -> Self {
Self {
possible_vectors: 0,
}
}
+ /// test if this `ExceptionInfo` has any possible vector set.
pub fn any(&self) -> bool {
self.possible_vectors != 0
}
+ /// test if this `ExceptionInfo` has no vector set.
pub fn none(&self) -> bool {
!self.any()
}
+ /// test if this `ExceptionInfo` indicates that exception `e` may be raised.
pub fn may(&self, e: Exception) -> bool {
(self.possible_vectors & (1 << e.vector)) != 0
}
@@ -323,6 +375,8 @@ impl ExceptionInfo {
self.possible_vectors |= bit;
}
+ /// record that exception `e` is or is not (`b`) possible in this `ExceptionInfo` record, but
+ /// in a more chaining-friendly way.
pub const fn with(mut self, e: Exception, b: bool) -> Self {
self.set(e, b);
self
@@ -351,13 +405,25 @@ fn test_exception_info() {
assert_eq!(info.possible_vectors, 0x12000);
}
+/// a description of the privilege level (that is, value of `CPL` in the current code selector)
+/// that allows executing the corresponding instruction.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum PrivilegeLevel {
+ /// the corresponding instruction can run at any privilege level.
Any = 0b00,
+ /// the corresponding instruction can only run when `CPL=0` (aka "in ring 0").
PL0 = 0b01,
+ /// the corresponding instruction has more complex rule for when it is allowed.
+ ///
+ /// this may mean the instruction is either "Any" or "PL0" depending on other processor state
+ /// (such as `rdtsc`), or it may mean the instruction simply does not relate directly to
+ /// `CPL=3`/`CPL=0` (such as for `iret`).
Special = 0b10,
}
+/// a handle for an instruction, its behavior, and any related implicit operands.
+///
+/// this is only useful for [`InstOperands::iter()`].
#[derive(Copy, Clone)]
pub struct InstOperands<'inst> {
inst: InstBehavior<'inst>,
@@ -365,11 +431,17 @@ pub struct InstOperands<'inst> {
}
impl<'inst> InstOperands<'inst> {
+ /// establish an iterator over the operands described in this `InstOperands`.
pub fn iter(self) -> AccessIter<'inst> {
AccessIter::new(self)
}
}
+/// this struct implements [`Iterator`] to allow library users to walk all explicit and implicit
+/// operands for the corresponding instruction, along with if they are used for reading or for
+/// writing.
+///
+/// implicit operands are always walked first, explicit operands are walked last.
pub struct AccessIter<'inst> {
operands: InstOperands<'inst>,
explicit: bool,
@@ -385,6 +457,8 @@ impl<'inst> AccessIter<'inst> {
}
}
+ /// weaken this iterator to only returning the operands corresponding to this instruction,
+ /// without specific access information.
pub fn operands(self) -> OperandIter<'inst> {
OperandIter { inner: self }
}
@@ -437,7 +511,8 @@ impl<'inst> Iterator for AccessIter<'inst> {
if self.next < self.operands.inst.inst.operand_count() {
let op = self.operands.inst.inst.operand(self.next);
- let access = self.operands.inst.operand_access(self.next).expect("defined operand has defined access");
+ let access = self.operands.inst.operand_access(self.next)
+ .expect("defined operand has defined access");
debug_assert!(
access != Access::None || (
self.operands.inst.inst.opcode == Opcode::NOP ||
@@ -454,6 +529,11 @@ impl<'inst> Iterator for AccessIter<'inst> {
}
}
+/// a reduced-strength iterator of an instruction's implicit and explicit operands.
+///
+/// unlike `AccessIter`, this iterator does not provide read/write information, simply that
+/// operands are or are not present. this is more likely useful for some kinds of instruction
+/// printing than automated instruction analysis.
pub struct OperandIter<'inst> {
inner: AccessIter<'inst>,
}
@@ -512,6 +592,11 @@ impl<'inst> Iterator for OperandIter<'inst> {
}
impl<'inst> InstBehavior<'inst> {
+ #[cfg(feature = "unstable")]
+ /// get the [`PrivilegeLevel`] for this instruction.
+ ///
+ /// returns `None` if no privilege level information is recorded for the instruction. such
+ /// cases are a bug, please report if you see them.
pub fn privilege_level(&self) -> Option<PrivilegeLevel> {
let pl_bits = self.behavior.behavior & 0b11;
const LUT: [Option<PrivilegeLevel>; 4] = [
@@ -522,8 +607,11 @@ impl<'inst> InstBehavior<'inst> {
LUT[pl_bits as usize]
}
- ///
-// #[cfg(feature = "unstable")]
+ #[cfg(feature = "unstable")]
+ /// get the [`ExceptionInfo`] for this instruction.
+ ///
+ /// this is very much best-effort and poorly tested. it is behind the `unstable` feature for a
+ /// reason!
pub fn exceptions(&self) -> ExceptionInfo {
let mut exceptions = ExceptionInfo::empty();
if self.privilege_level() != Some(PrivilegeLevel::Any) {
@@ -545,15 +633,21 @@ impl<'inst> InstBehavior<'inst> {
exceptions
}
- fn as_complex_op(&self) -> Option<ComplexOp> {
- // if the behavior is not complex, it is *definitely* not complex. if the behavior is
+ /// transform this instruction's [`Opcode`] into a [`ComplexOp`], if the instruction is
+ /// "complex".
+ ///
+ /// documentation on [`ComplexOp`] covers what instructions are considered "complex" by
+ /// yaxpeax-x86 and why in more detail. correct analysis of a function (or program!) in the
+ /// presence of complex instructions may require consulting the Intel Software Developer's
+ /// Manual or AMD Architecture Programmer's Manual.
+ pub fn as_complex_op(&self) -> Option<ComplexOp> {
+ // if the behavior is not complex, it is *definitely* not a complex op. if the behavior is
// complex, it's really a "depending on the specific instruction and operands it might
// be"...
if !self.behavior.is_complex() {
return None;
}
- // TODO: all of these should be a `set_complex` bit.
if self.inst.opcode == Opcode::BT {
if self.inst.operands[0] != OperandSpec::RegMMM {
Some(ComplexOp::BT)
@@ -614,12 +708,17 @@ impl<'inst> InstBehavior<'inst> {
})
}
+ /// get the `Access` behavior this instruction has for `rflags`.
+ ///
+ /// note that as the documentation for [`Access`] describes, "read" and "write" have slightly
+ /// different meanings for the flags register than other locations.
+ // this implies that `rflags` must never appear in an implicit operand list.
pub fn flags_access(&self) -> Option<Access> {
let flag_acc = (self.behavior.behavior >> 2) & 0b11;
Access::from_bits(flag_acc)
}
- pub fn implicit_oplist(&self) -> Option<&'static [ImplicitOperand]> {
+ fn implicit_oplist(&self) -> Option<&'static [ImplicitOperand]> {
let ops_idx = self.behavior.extra;
if ops_idx == 0 {
return None;
@@ -629,8 +728,13 @@ impl<'inst> InstBehavior<'inst> {
Some(&IMPLICIT_OPS_LIST[ops_idx as usize])
}
+ /// get the `Access` behavor for an explicit operand of this instruction.
+ ///
+ /// `None` means that there is no operand at the given index, while `Some(Access::None)` means
+ /// there is an operand, and the instruction does not actually access it (as for `nop`, `ud0`,
+ /// and `ud1`)
pub fn operand_access(&self, idx: u8) -> Option<Access> {
- if idx >= 4 {
+ if idx >= self.inst.operand_count {
return None;
}
@@ -638,13 +742,24 @@ impl<'inst> InstBehavior<'inst> {
Access::from_bits(op_acc)
}
+ /// iterate all operands in the instruction and report them to the provided `AccessVisitor`.
+ ///
+ /// this is a more informative, but somewhat more specialized, interface than simply iterating
+ /// [`InstBehavior::all_operands()`]. for memory operands, address calculations are reported to
+ /// the access visitor as reads of the relevant registers. if all dependent values are
+ /// available, the resulting effective address is computed and reported as part of the memory
+ /// access.
+ ///
+ /// `visit_accesses()` is slightly more efficient in this than iterating `all_operands()` as
+ /// well, as it uses unstable internal representations directly, rather than converting to API
+ /// types and back for every operand.
pub fn visit_accesses<T: AccessVisitor>(&self, v: &mut T) -> Result<(), ComplexOp> {
if let Some(op) = self.as_complex_op() {
return Err(op);
}
fn compute_addr<T: AccessVisitor>(v: &mut T, inst: &Instruction, op_spec: OperandSpec) -> Option<u64> {
- // TODO: test assertions feature?
+ #[cfg(feature = "_debug_internal_asserts")]
if !op_spec.is_memory() {
panic!("expected memory operand but got {:?}", op_spec);
}
@@ -753,6 +868,8 @@ impl<'inst> InstBehavior<'inst> {
Some(inst.disp as u32 as u64)
}
other => {
+ // this could be `_debug_internal_assertions`-gated, but i'm not quite that
+ // confident yet..
panic!("not-yet-handled memory operand: {:?}", other);
}
}
@@ -776,7 +893,11 @@ impl<'inst> InstBehavior<'inst> {
OperandSpec::Deref_edi => RegSpec::edi(),
OperandSpec::Deref_esi => RegSpec::esi(),
OperandSpec::Deref => self.inst.regs[1],
- other => { panic!("TODO: unreachable {:?}", other); }
+ other => {
+ // this could be `_debug_internal_assertions`-gated, but i'm not quite
+ // that confident yet..
+ panic!("TODO: unreachable {:?}", other);
+ }
};
if op.write {
v.register_write(reg);
@@ -796,7 +917,9 @@ impl<'inst> InstBehavior<'inst> {
}
}
OperandSpec::MemIndexScale => {
- // HACK HACK HACK
+ // HACK HACK HACK this is just how i've decided to interpret
+ // `MemIndexScale` as an operand spec; it's only for xlat. adding
+ // another field to implicit operands just for this is a little silly..
let base = v.get_register(op.reg);
let index = v.get_register(RegSpec::al());
if let (Some(base), Some(index)) = (base, index) {
@@ -806,6 +929,8 @@ impl<'inst> InstBehavior<'inst> {
}
}
other => {
+ // this could be `_debug_internal_assertions`-gated, but i'm not quite
+ // that confident yet..
panic!("impossible operand spec {:?}", other);
}
};
@@ -969,7 +1094,7 @@ impl<'inst> InstBehavior<'inst> {
if other.is_masked() && self.inst.prefixes.evex_unchecked().mask_reg() != 0 {
v.register_read(RegSpec::mask(self.inst.prefixes.evex_unchecked().mask_reg()));
}
- // no lea check necessary: the memory access is coded as a read and no
+ // no lea check necessary: its memory access is coded as a read and no
// instruction has a similar "fake" memory write.
v.memory_write(addr, size as u32);
}
@@ -1045,17 +1170,23 @@ impl Access {
LUT[bits as usize]
}
+ /// is this access a read?
+ ///
+ /// if it is `ReadWrite`, this will be `true` as will `is_write`.
pub fn is_read(&self) -> bool {
*self as u8 & 0b01 != 0
}
+ /// is this access a write?
+ ///
+ /// if it is `ReadWrite`, this will be `true` as will `is_read`.
pub fn is_write(&self) -> bool {
*self as u8 & 0b10 != 0
}
}
#[derive(Copy, Clone, PartialEq, Debug)]
-pub struct BehaviorDigest {
+struct BehaviorDigest {
// laid out like:
//
// |7 6|5 4|3 2|1 0|
@@ -1081,7 +1212,6 @@ pub struct BehaviorDigest {
extra: u16,
}
-// TODO: the various `set_pl*()` are not actually used yet..
#[allow(dead_code)]
impl BehaviorDigest {
const fn empty() -> BehaviorDigest {
@@ -1457,9 +1587,13 @@ impl BehaviorDigest {
///
/// TDX-related instructions are considered complex because they are not more precisely tested and
/// are assumed as-complex-as-VMX in the first place.
+// TODO: this could be declared through a macro that does something like:
+// "declare_opcode_subset! { }" which gets a list of identifiers and generates the
+// `Opcode::<ident> as u32` rhs. but a vim macro will do for now.
#[non_exhaustive]
#[repr(u32)] // same repr as `Opcode`
#[derive(Copy, Clone, Debug)]
+#[allow(missing_docs)]
pub enum ComplexOp {
/// rdmsr/wrmsr are considered "complex" for reasons in the enum doc comment.
RDMSR = (Opcode::RDMSR as u32),
@@ -1765,13 +1899,30 @@ pub trait AccessVisitor {
/// if any `get_register()` returns `None` in an address calculation, the subsequent
/// `memory_read()` or `memory_write()` for that operand will be given an `address` of `None`.
///
- /// if `get_register` is implemented withhout calling `register_read`, the
- /// if `get_register()` is given a custom implementation, be sure to either call `
+ /// `get_register()` may be implemented withhout calling `register_read()`, in which case when
+ /// used with `visit_accesses` the register/memory read/writes will all correspond directly to
+ /// implicit and explicit operands.
fn get_register(&mut self, reg: RegSpec) -> Option<u64> {
self.register_read(reg);
None
}
+ /// record that the instruction reads a memory location.
+ ///
+ /// when used with `visit_accesses`, an address is only provided when yaxpeax-x86 can calculate
+ /// an effective address (i.e. `get_register()` calls for all dependent registers return a
+ /// value). all non-`ComplexOp` instructions have a known memory access size, so this is always
+ /// reported regardless of if *where* is not known.
+ ///
+ /// some instructions can both read and write memory (consider `call [addr]`).
fn memory_read(&mut self, address: Option<u64>, size: u32);
+ /// record that the instruction writes a memory location.
+ ///
+ /// when used with `visit_accesses`, an address is only provided when yaxpeax-x86 can calculate
+ /// an effective address (i.e. `get_register()` calls for all dependent registers return a
+ /// value). all non-`ComplexOp` instructions have a known memory access size, so this is always
+ /// reported regardless of if *where* is not known.
+ ///
+ /// some instructions can both read and write memory (consider `call [addr]`).
fn memory_write(&mut self, address: Option<u64>, size: u32);
}
@@ -4038,7 +4189,7 @@ static TABLE: [BehaviorDigest; 1413] = [
.set_operand(0, Access::Read)
.set_complex(true),
/* VERR => */ GENERAL_R_FLAGWRITE,
- /* VERW => */ GENERAL_R_FLAGWRITE,
+ /* VERW => */ GENERAL_R_FLAGWRITE,
/* CMC => */ GENERAL_FLAGRW,
/* CLC => */ GENERAL_FLAGRW,
/* STC => */ GENERAL_FLAGRW,