From bbdf78c061b6e685d1992dcdeac692fc2f8f0d34 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 1 Apr 2024 23:38:57 -0700 Subject: display opt: mem size labels and minor segment reporting changes for mem size labels: add one new "BUG" entry at the start of the array so `mem_size` does not need to be adjusted before being used to look up a string from the `MEM_SIZE_STRINGS` array. it's hard to measure the direct benefit of this, but it shrinks codegen size by a bit and simplfies a bit of assembly.... for segment reporting changes: stos/scas/lods do not actually need special segment override logic. instead, set their use of `es` when decoded, if appropriate. this is potentially ambiguous; in non-64bit modes the sequence `26aa` would decode as `stos` with explicit `es` prefix. this is now identical to simply decoding `aa`, which now also reports that there is an explicit `es` prefix even though there is no prefix on tne instruction. on the other hand, the prefix-reported segment now more accurately describes the memory selector through which memory accesses will happen. seems ok? --- src/lib.rs | 5 +++-- src/long_mode/display.rs | 8 ++++++-- src/long_mode/mod.rs | 17 ++--------------- src/protected_mode/display.rs | 4 ++-- src/real_mode/display.rs | 4 ++-- test/long_mode/mod.rs | 2 +- 6 files changed, 16 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 188a37a..a7b8531 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,7 +140,8 @@ pub use real_mode::Arch as x86_16; mod safer_unchecked; -const MEM_SIZE_STRINGS: [&'static str; 64] = [ +const MEM_SIZE_STRINGS: [&'static str; 65] = [ + "BUG", "byte", "word", "BUG", "dword", "ptr", "far", "BUG", "qword", "BUG", "mword", "BUG", "BUG", "BUG", "BUG", "BUG", "xmmword", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", "BUG", @@ -194,7 +195,7 @@ impl MemoryAccessSize { /// "variable" accesses access a number of bytes dependent on the physical processor and its /// operating mode. this is particularly relevant for `xsave`/`xrstor`-style instructions. pub fn size_name(&self) -> &'static str { - MEM_SIZE_STRINGS[self.size as usize - 1] + MEM_SIZE_STRINGS[self.size as usize] } } diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index 9c6795e..c1c6c65 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -132,6 +132,10 @@ pub(crate) fn regspec_label(spec: &RegSpec) -> &'static str { unsafe { REG_NAMES.get_kinda_unchecked((spec.num as u16 + ((spec.bank as u16) << 3)) as usize) } } +pub(crate) fn mem_size_label(size: u8) -> &'static str { + unsafe { MEM_SIZE_STRINGS.get_kinda_unchecked(size as usize) } +} + impl fmt::Display for RegSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(regspec_label(self)) @@ -3415,7 +3419,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } @@ -3435,7 +3439,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(", ")?; let x = Operand::from_spec(instr, instr.operands[i as usize]); if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } if let Some(prefix) = instr.segment_override_for_op(i) { diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs index 9bc9f0b..f58976a 100644 --- a/src/long_mode/mod.rs +++ b/src/long_mode/mod.rs @@ -4380,21 +4380,6 @@ impl Instruction { /// prefixes. pub fn segment_override_for_op(&self, op: u8) -> Option { match self.opcode { - Opcode::STOS | - Opcode::SCAS => { - if op == 0 { - Some(Segment::ES) - } else { - None - } - } - Opcode::LODS => { - if op == 1 { - Some(self.prefixes.segment) - } else { - None - } - } Opcode::MOVS => { if op == 0 { Some(Segment::ES) @@ -9002,6 +8987,7 @@ fn read_operands< } OperandCase::Yb_AL => { instruction.regs[0] = RegSpec::al(); + instruction.prefixes.segment = Segment::ES; if instruction.prefixes.address_size() { instruction.regs[1] = RegSpec::edi(); } else { @@ -9028,6 +9014,7 @@ fn read_operands< let bank = bank_from_prefixes_64(SizeCode::vqp, instruction.prefixes); instruction.regs[0].num = 0; instruction.regs[0].bank = bank; + instruction.prefixes.segment = Segment::ES; if instruction.prefixes.address_size() { instruction.regs[1] = RegSpec::edi(); } else { diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 55cab9e..89b7565 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -3436,7 +3436,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } @@ -3456,7 +3456,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(", ")?; let x = Operand::from_spec(instr, instr.operands[i as usize]); if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } if let Some(prefix) = instr.segment_override_for_op(i) { diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index 9450a39..6472c6c 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -3436,7 +3436,7 @@ fn contextualize_intel(instr: &Instruction, colors: } if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } @@ -3456,7 +3456,7 @@ fn contextualize_intel(instr: &Instruction, colors: out.write_str(", ")?; let x = Operand::from_spec(instr, instr.operands[i as usize]); if x.is_memory() { - out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize - 1])?; + out.write_str(MEM_SIZE_STRINGS[instr.mem_size as usize])?; out.write_str(" ")?; } if let Some(prefix) = instr.segment_override_for_op(i) { diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 8cb032c..b725287 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -2751,7 +2751,7 @@ fn only_64bit() { test_display(&[0xae], "scas byte es:[rdi], al"); test_display(&[0xaf], "scas dword es:[rdi], eax"); test_display(&[0x67, 0xaf], "scas dword es:[edi], eax"); - test_display(&[0x67, 0xac], "lods al, byte ds:[esi]"); + test_display(&[0x67, 0xac], "lods al, byte [esi]"); test_display(&[0x67, 0xaa], "stos byte es:[edi], al"); // note that rax.b does *not* change the register test_display(&[0x4f, 0xa0, 0x12, 0x34, 0x12, 0x34, 0x12, 0x34, 0x12, 0x34], "mov al, byte [0x3412341234123412]"); -- cgit v1.1