aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2024-06-24 14:06:22 -0700
committeriximeow <me@iximeow.net>2024-06-24 14:27:25 -0700
commitdd8bd5ce0772b08c271205508e48e98ef1c58ea8 (patch)
tree946630c89a554843dd33a9988a36bb43db48d539 /src/lib.rs
parentddde47c4c8c2058379b448894bebb3e099ea0585 (diff)
justify the current max instruction length
this is also checked by a new fuzz target
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs36
1 files changed, 36 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 7ab6cb8..93274f9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -138,6 +138,42 @@ pub use protected_mode::Arch as x86_32;
pub mod real_mode;
pub use real_mode::Arch as x86_16;
+// this exists to size `InstructionTextBuffer`'s buffer. it ideally would come from an `Arch`
+// impl, or something related to `Arch`, but i'm not yet sure how to wire that up into
+// yaxpeax-arch. so instead calculate an appropriate max size for all of 16-bit/32-bit/64-bit
+// instruction printing that `InstructionTextBuffer` can be used for.
+//
+// `InstructionTextBuffer` prints an `InstructionDisplayer`, which means either intel syntax or
+// pseudo-C. in the future, at&t probably, as well.
+//
+// the pseudo-C syntax's max length would be something like:
+// ```
+// xacquire xrelease lock { repnz qword if /* signed */ greater_or_equal(rflags) then jmp gs:[xmm31 +
+// xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae} }
+// ```
+// (which is nonsensical) or for an unknown opcode,
+// ```
+// xacquire xrelease lock { op0 = op(op0, op1, op2, op3) }
+// ```
+// where `opN` is an operand. the longest operand, same as above, would be something like
+// ```
+// gs:[xmm31 + xmm31 * 8 + 0x12345678]{k7}{z}{rne-sae}
+// ```
+// for a length like 262 bytes of operand, 55 bytes of prefixes and syntax, and another up-to-20
+// bytes of opcode.
+//
+// the longest contextualize_c might write is around 337 bytes. round up to 512 because it's.. not
+// much extra.
+//
+// the same reasoning for intel syntax yields a smaller instruction:
+// ```
+// xacquire xrelease lock op op1, op2, op3, op4
+// ```
+// where the longest operands are the same as above. this comes out to closer to 307 bytes. 512
+// bytes is still the longest of the two options.
+#[allow(dead_code)] // can be an unused constant in some library configurations
+const MAX_INSTRUCTION_LEN: usize = 512;
+
const MEM_SIZE_STRINGS: [&'static str; 65] = [
"BUG",
"byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword",