From cef4feeaf9c64e03a6728f267750ac2fb32eb9ff Mon Sep 17 00:00:00 2001
From: iximeow <me@iximeow.net>
Date: Sat, 21 Aug 2021 12:13:01 -0700
Subject: report memory sizes for push, pop, call, ret

these instructions had memory sizes reported for the operand, if it was
a memory operand, but for versions with non-memory operands the decoded
`Instruction` would imply that non memory access would happen at all.
now, decoded instructions in these cases will report a more useful
memory size.
---
 src/lib.rs                |  7 +++++++
 src/long_mode/mod.rs      | 32 +++++++++++++++++++++++++++++++-
 src/protected_mode/mod.rs | 28 +++++++++++++++++++++++++++-
 src/real_mode/mod.rs      | 29 +++++++++++++++++++++++++++--
 4 files changed, 92 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/lib.rs b/src/lib.rs
index 84353ba..46bebdb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -185,3 +185,10 @@ impl core::fmt::Display for MemoryAccessSize {
         f.write_str(self.size_name())
     }
 }
+
+#[cfg(feature = "fmt")]
+impl core::fmt::Debug for MemoryAccessSize {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        core::fmt::Display::fmt(self, f)
+    }
+}
diff --git a/src/long_mode/mod.rs b/src/long_mode/mod.rs
index 039d550..a01e854 100644
--- a/src/long_mode/mod.rs
+++ b/src/long_mode/mod.rs
@@ -7396,6 +7396,9 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if immsz == 0 {
                 instruction.operands[0] = OperandSpec::ImmI8;
             } else {
+                if instruction.opcode == Opcode::CALL {
+                    instruction.mem_size = 8;
+                }
                 instruction.operands[0] = OperandSpec::ImmI32;
             }
             instruction.operand_count = 1;
@@ -7420,6 +7423,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
                         };
                         instruction.regs[0] =
                             RegSpec::from_parts(reg, instruction.prefixes.rex_unchecked().b(), bank);
+                        instruction.mem_size = 8;
                         instruction.operand_count = 1;
                     }
                     1 => {
@@ -7615,12 +7619,27 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if instruction.operands[0] == OperandSpec::RegMMM {
                 if opcode == Opcode::CALL || opcode == Opcode::JMP {
                     instruction.regs[1].bank = RegisterBank::Q;
+                    if opcode == Opcode::CALL {
+                        instruction.mem_size = 8;
+                    }
+                } else if opcode == Opcode::PUSH || opcode == Opcode::POP {
+                    if instruction.prefixes.operand_size() {
+                        instruction.mem_size = 2;
+                    } else {
+                        instruction.mem_size = 8;
+                    }
                 } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF {
                     return Err(DecodeError::InvalidOperand);
                 }
             } else {
-                if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP {
+                if opcode == Opcode::CALL || opcode == Opcode::JMP {
                     instruction.mem_size = 8;
+                } else if opcode == Opcode::PUSH || opcode == Opcode::POP {
+                    if instruction.prefixes.operand_size() {
+                        instruction.mem_size = 2;
+                    } else {
+                        instruction.mem_size = 8;
+                    }
                 } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF {
                     instruction.mem_size = 10;
                 }
@@ -7749,6 +7768,12 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if instruction.opcode == Opcode::Invalid {
                 return Err(DecodeError::InvalidOpcode);
             }
+            if instruction.opcode == Opcode::RETURN {
+                instruction.mem_size = 8;
+            } else if instruction.opcode == Opcode::RETF {
+                instruction.mem_size = 10;
+            }
+            // TODO: leave?
             instruction.operands[0] = OperandSpec::Nothing;
             instruction.operand_count = 0;
             return Ok(());
@@ -9273,6 +9298,11 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y
             instruction.imm =
                 read_imm_unsigned(words, 2)?;
             instruction.operands[0] = OperandSpec::ImmU16;
+            if instruction.opcode == Opcode::RETURN {
+                instruction.mem_size = 8;
+            } else {
+                instruction.mem_size = 10;
+            }
             instruction.operand_count = 1;
         }
         OperandCode::ModRM_0x0f00 => {
diff --git a/src/protected_mode/mod.rs b/src/protected_mode/mod.rs
index a06af4c..8381d68 100644
--- a/src/protected_mode/mod.rs
+++ b/src/protected_mode/mod.rs
@@ -7235,6 +7235,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
                         };
                         instruction.regs[0] =
                             RegSpec::from_parts(reg, bank);
+                        instruction.mem_size = 4;
                         instruction.operand_count = 1;
                     }
                     1 => {
@@ -7328,6 +7329,9 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if immsz == 0 {
                 instruction.operands[0] = OperandSpec::ImmI8;
             } else {
+                if instruction.opcode == Opcode::CALL {
+                    instruction.mem_size = 4;
+                }
                 instruction.operands[0] = OperandSpec::ImmI32;
             }
             instruction.operand_count = 1;
@@ -7505,12 +7509,21 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if instruction.operands[0] == OperandSpec::RegMMM {
                 if opcode == Opcode::CALL || opcode == Opcode::JMP {
                     instruction.regs[1].bank = RegisterBank::D;
+                    if opcode == Opcode::CALL {
+                        instruction.mem_size = 4;
+                    }
                 } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF {
                     return Err(DecodeError::InvalidOperand);
                 }
             } else {
-                if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP {
+                if opcode == Opcode::CALL || opcode == Opcode::JMP {
                     instruction.mem_size = 4;
+                } else if opcode == Opcode::PUSH || opcode == Opcode::POP {
+                    if instruction.prefixes.operand_size() {
+                        instruction.mem_size = 2;
+                    } else {
+                        instruction.mem_size = 4;
+                    }
                 } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF {
                     instruction.mem_size = 6;
                 }
@@ -7653,6 +7666,14 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             instruction.operand_count = 1;
         }
         28 => {
+            if instruction.opcode == Opcode::Invalid {
+                return Err(DecodeError::InvalidOpcode);
+            }
+            if instruction.opcode == Opcode::RETURN {
+                instruction.mem_size = 4;
+            } else {
+                instruction.mem_size = 6;
+            }
             instruction.operands[0] = OperandSpec::Nothing;
             instruction.operand_count = 0;
             return Ok(());
@@ -9132,6 +9153,11 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y
             instruction.imm =
                 read_imm_unsigned(words, 2)?;
             instruction.operands[0] = OperandSpec::ImmU16;
+            if instruction.opcode == Opcode::RETURN {
+                instruction.mem_size = 4;
+            } else {
+                instruction.mem_size = 6;
+            }
             instruction.operand_count = 1;
         }
         OperandCode::ModRM_0x0f00 => {
diff --git a/src/real_mode/mod.rs b/src/real_mode/mod.rs
index fbfc687..548c42e 100644
--- a/src/real_mode/mod.rs
+++ b/src/real_mode/mod.rs
@@ -7236,6 +7236,7 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
                         };
                         instruction.regs[0] =
                             RegSpec::from_parts(reg, bank);
+                        instruction.mem_size = 2;
                         instruction.operand_count = 1;
                     }
                     1 => {
@@ -7329,6 +7330,9 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if immsz == 0 {
                 instruction.operands[0] = OperandSpec::ImmI8;
             } else {
+                if instruction.opcode == Opcode::CALL {
+                    instruction.mem_size = 2;
+                }
                 instruction.operands[0] = OperandSpec::ImmI32;
             }
             instruction.operand_count = 1;
@@ -7506,11 +7510,19 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             if instruction.operands[0] == OperandSpec::RegMMM {
                 // in real mode, `xed` reports that operand-size does in fact override from word to
                 // dword. unlikely larger modes, operand-size can't shrink the call operand down.
-                if opcode == Opcode::CALLF || opcode == Opcode::JMPF {
+                if opcode == Opcode::CALL {
+                    instruction.mem_size = 2;
+                } else if opcode == Opcode::CALLF || opcode == Opcode::JMPF {
                     return Err(DecodeError::InvalidOperand);
                 }
             } else {
-                if opcode == Opcode::CALL || opcode == Opcode::JMP || opcode == Opcode::PUSH || opcode == Opcode::POP {
+                if opcode == Opcode::CALL || opcode == Opcode::JMP {
+                    if instruction.prefixes.operand_size() {
+                        instruction.mem_size = 4;
+                    } else {
+                        instruction.mem_size = 2;
+                    }
+                } else if opcode == Opcode::PUSH || opcode == Opcode::POP {
                     if instruction.prefixes.operand_size() {
                         instruction.mem_size = 4;
                     } else {
@@ -7658,6 +7670,14 @@ fn read_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as yaxpe
             instruction.operand_count = 1;
         }
         28 => {
+            if instruction.opcode == Opcode::Invalid {
+                return Err(DecodeError::InvalidOpcode);
+            }
+            if instruction.opcode == Opcode::RETURN {
+                instruction.mem_size = 2;
+            } else {
+                instruction.mem_size = 4;
+            }
             instruction.operands[0] = OperandSpec::Nothing;
             instruction.operand_count = 0;
             return Ok(());
@@ -9140,6 +9160,11 @@ fn unlikely_operands<T: Reader<<Arch as yaxpeax_arch::Arch>::Address, <Arch as y
             instruction.imm =
                 read_imm_unsigned(words, 2)?;
             instruction.operands[0] = OperandSpec::ImmU16;
+            if instruction.opcode == Opcode::RETURN {
+                instruction.mem_size = 2;
+            } else {
+                instruction.mem_size = 4;
+            }
             instruction.operand_count = 1;
         }
         OperandCode::ModRM_0x0f00 => {
-- 
cgit v1.1