From f2a2a09688421f2c532ab6f02527bf68f095407a Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 25 May 2026 17:32:57 +0000 Subject: j*cxz/pusha/popa alternate size forms these all existed since forever but the library did not distinguish them and did not provide prefix information for users to tell which had been decoded. --- CHANGELOG | 7 +++++++ src/long_mode/behavior.rs | 12 ++++++++---- src/long_mode/display.rs | 12 ++++++++++-- src/long_mode/mod.rs | 22 +++++++++++++++++++++- src/protected_mode/behavior.rs | 38 ++++++++++++++++---------------------- src/protected_mode/display.rs | 13 +++++++++++-- src/protected_mode/mod.rs | 40 +++++++++++++++++++++++++++++++++++++--- src/real_mode/behavior.rs | 38 ++++++++++++++++---------------------- src/real_mode/display.rs | 13 +++++++++++-- src/real_mode/mod.rs | 40 +++++++++++++++++++++++++++++++++++++--- test/long_mode/mod.rs | 2 ++ test/protected_mode/mod.rs | 8 ++++++-- test/real_mode/mod.rs | 4 ++++ 13 files changed, 186 insertions(+), 63 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 732b850..1059267 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -45,6 +45,13 @@ * some instructions (such as invept, invvpid) were accepted by uarch-specific deocders when they should not have been. * disallow 66-prefixed `sha1rnds4`. +* jrcxz/jecxz/jcxz are overridden based on address-size prefix to select their + mode-appropriate alternate forms: jecxz/jcxz/jecxz. + - thank you Stephen for the report! +* 32-bit: pusha/popa default to pushad/popad to reflect the default operand + size. an operand-size prefix now correctly overrides to pusha/popa. +* 16-bit: an operand-size prefix now correctly overrides to pushad/popad. + - likewise, thank you Stephen for spotting these issues! ## 2.0.0 diff --git a/src/long_mode/behavior.rs b/src/long_mode/behavior.rs index 3e1b4a7..478c07a 100644 --- a/src/long_mode/behavior.rs +++ b/src/long_mode/behavior.rs @@ -229,8 +229,7 @@ impl Instruction { } } else if self.opcode() == Opcode::LOOPNZ || self.opcode() == Opcode::LOOPZ - || self.opcode() == Opcode::LOOP - || self.opcode() == Opcode::JRCXZ { + || self.opcode() == Opcode::LOOP { if self.prefixes.rex_unchecked().w() { behavior = behavior .set_implicit_ops(RW_RCX_IDX); @@ -3559,7 +3558,7 @@ fn behavior_table_size_is_right() { } /// this table MUST line up with Opcode declaration order in `mod.rs`. -static TABLE: [BehaviorDigest; 1413] = [ +static TABLE: [BehaviorDigest; 1414] = [ /* ADD => */ GENERAL_RW_R_FLAGWRITE, /* OR => */ GENERAL_RW_R_FLAGWRITE, /* ADC => */ GENERAL_RW_R_FLAGRW, @@ -5167,7 +5166,7 @@ static TABLE: [BehaviorDigest; 1413] = [ /* JRCXZ => */ BehaviorDigest::empty() .set_pl_any() .set_operand(0, Access::Read) - .set_nontrivial(true), + .set_implicit_ops(RW_RCX_IDX), // started shipping in Tremont, 2020 sept 23 // while this instruction is marked "write, read", the written first operand is a register @@ -5871,4 +5870,9 @@ static TABLE: [BehaviorDigest; 1413] = [ .set_pl0() .set_flags_access(Access::Write) .set_complex(true), + + /* JECXZ => */ BehaviorDigest::empty() + .set_pl_any() + .set_operand(0, Access::Read) + .set_implicit_ops(RW_ECX_IDX), ]; diff --git a/src/long_mode/display.rs b/src/long_mode/display.rs index f215d07..a00dd22 100644 --- a/src/long_mode/display.rs +++ b/src/long_mode/display.rs @@ -2100,6 +2100,8 @@ const MNEMONICS: &[&'static str] = &[ "pvalidate", "rmpadjust", "rmpupdate", + + "jecxz", ]; impl Opcode { @@ -2699,6 +2701,7 @@ impl Colorize for Opcode { Opcode::LOOPZ | Opcode::LOOP | Opcode::JRCXZ | + Opcode::JECXZ | Opcode::CALL | Opcode::CALLF | Opcode::JMP | @@ -4018,6 +4021,10 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) out.write_str("if rcx == 0 then jmp ")?; write_jmp_operand(instr.operand(0), out)?; }, + Opcode::JECXZ => { + out.write_str("if ecx == 0 then jmp ")?; + write_jmp_operand(instr.operand(0), out)?; + }, Opcode::LOOP => { out.write_str("rcx--; if rcx != 0 then jmp ")?; write_jmp_operand(instr.operand(0), out)?; @@ -4216,8 +4223,9 @@ impl ShowContextual Opcode { @@ -4684,7 +4689,7 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOPNZ), OperandCode::Ibs), OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOPZ), OperandCode::Ibs), OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOP), OperandCode::Ibs), - OpcodeRecord::new(Interpretation::Instruction(Opcode::JRCXZ), OperandCode::Ibs), + OpcodeRecord::new(Interpretation::Instruction(Opcode::JRCXZ), OperandCode::CXZ), OpcodeRecord::new(Interpretation::Instruction(Opcode::IN), OperandCode::AL_Ib), OpcodeRecord::new(Interpretation::Instruction(Opcode::IN), OperandCode::AX_Ib), OpcodeRecord::new(Interpretation::Instruction(Opcode::OUT), OperandCode::Ib_AL), @@ -9117,6 +9122,21 @@ fn read_operands< instruction.regs[0].bank = RegisterBank::Q; }; } + OperandCase::CXZ => { + if instruction.prefixes.address_size() { + // address-size overridden from 64-bit to 32-bit + instruction.opcode = Opcode::JECXZ; + } + instruction.imm = + read_imm_signed(words, 1)? as u64; + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("1-byte immediate", instruction.imm as i64) + .with_id(words.offset() as u32 * 8), + ); + instruction.operands[0] = OperandSpec::ImmI8; + }, }; Ok(()) } diff --git a/src/protected_mode/behavior.rs b/src/protected_mode/behavior.rs index 43bace0..22d1f9e 100644 --- a/src/protected_mode/behavior.rs +++ b/src/protected_mode/behavior.rs @@ -129,22 +129,6 @@ impl Instruction { .set_operand(1, Access::Read) .set_operand(2, Access::Read); } - } else if self.opcode == Opcode::PUSHA { - if !self.prefixes.operand_size() { - behavior = behavior - .set_implicit_ops(PUSHAD_IDX); - } else { - behavior = behavior - .set_implicit_ops(PUSHA_IDX); - } - } else if self.opcode == Opcode::POPA { - if !self.prefixes.operand_size() { - behavior = behavior - .set_implicit_ops(POPAD_IDX); - } else { - behavior = behavior - .set_implicit_ops(POPA_IDX); - } } else if self.opcode == Opcode::DIV || self.opcode == Opcode::IDIV { let op_width = if self.operands[0] == OperandSpec::RegMMM { self.regs[1].width() @@ -247,8 +231,7 @@ impl Instruction { } } else if self.opcode() == Opcode::LOOPNZ || self.opcode() == Opcode::LOOPZ - || self.opcode() == Opcode::LOOP - || self.opcode() == Opcode::JECXZ { + || self.opcode() == Opcode::LOOP { if self.prefixes.operand_size() { behavior = behavior .set_implicit_ops(RW_CX_IDX); @@ -3840,7 +3823,7 @@ fn behavior_table_size_is_right() { } /// this table MUST line up with Opcode declaration order in `mod.rs`. -static TABLE: [BehaviorDigest; 1425] = [ +static TABLE: [BehaviorDigest; 1428] = [ /* ADD => */ GENERAL_RW_R_FLAGWRITE, /* OR => */ GENERAL_RW_R_FLAGWRITE, /* ADC => */ GENERAL_RW_R_FLAGRW, @@ -5456,14 +5439,14 @@ static TABLE: [BehaviorDigest; 1425] = [ /* JECXZ => */ BehaviorDigest::empty() .set_pl_any() .set_operand(0, Access::Read) - .set_nontrivial(true), + .set_implicit_ops(RW_ECX_IDX), /* PUSHA => */ BehaviorDigest::empty() .set_pl_any() - .set_nontrivial(true), // 66 prefix adjusts size of pushed registers + .set_implicit_ops(PUSHA_IDX), /* POPA => */ BehaviorDigest::empty() .set_pl_any() - .set_nontrivial(true), // 66 prefix adjusts size of popped registers + .set_implicit_ops(POPA_IDX), /* BOUND => */ BehaviorDigest::empty() .set_pl_any() .set_operand(0, Access::Read) @@ -6202,4 +6185,15 @@ static TABLE: [BehaviorDigest; 1425] = [ .set_pl0() .set_flags_access(Access::Write) .set_complex(true), + + /* PUSHAD => */ BehaviorDigest::empty() + .set_pl_any() + .set_implicit_ops(PUSHAD_IDX), + /* POPAD => */ BehaviorDigest::empty() + .set_pl_any() + .set_implicit_ops(POPAD_IDX), + /* JCXZ => */ BehaviorDigest::empty() + .set_pl_any() + .set_operand(0, Access::Read) + .set_implicit_ops(RW_CX_IDX), ]; diff --git a/src/protected_mode/display.rs b/src/protected_mode/display.rs index 46449d1..bbb43ab 100644 --- a/src/protected_mode/display.rs +++ b/src/protected_mode/display.rs @@ -2092,6 +2092,10 @@ const MNEMONICS: &[&'static str] = &[ "pvalidate", "rmpadjust", "rmpupdate", + + "pushad", + "popad", + "jcxz", ]; impl Opcode { @@ -2574,6 +2578,10 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) out.write_str("if ecx == 0 then jmp ")?; write_jmp_operand(instr.operand(0), out)?; }, + Opcode::JCXZ => { + out.write_str("if cx == 0 then jmp ")?; + write_jmp_operand(instr.operand(0), out)?; + }, Opcode::LOOP => { out.write_str("ecx--; if ecx != 0 then jmp ")?; write_jmp_operand(instr.operand(0), out)?; @@ -2764,8 +2772,9 @@ impl ShowContextual Opcode { @@ -4595,7 +4604,7 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOPNZ), OperandCode::Ibs), OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOPZ), OperandCode::Ibs), OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOP), OperandCode::Ibs), - OpcodeRecord::new(Interpretation::Instruction(Opcode::JECXZ), OperandCode::Ibs), + OpcodeRecord::new(Interpretation::Instruction(Opcode::JECXZ), OperandCode::CXZ), OpcodeRecord::new(Interpretation::Instruction(Opcode::IN), OperandCode::AL_Ib), OpcodeRecord::new(Interpretation::Instruction(Opcode::IN), OperandCode::AX_Ib), OpcodeRecord::new(Interpretation::Instruction(Opcode::OUT), OperandCode::Ib_AL), @@ -6476,9 +6485,19 @@ fn read_operands< } else if instruction.opcode == Opcode::XLAT { instruction.mem_size = 1; } else if instruction.opcode == Opcode::PUSHA { - instruction.mem_size = 4 * 8; + if instruction.prefixes.operand_size() { + instruction.mem_size = 2 * 8; + } else { + instruction.opcode = Opcode::PUSHAD; + instruction.mem_size = 4 * 8; + } } else if instruction.opcode == Opcode::POPA { - instruction.mem_size = 4 * 8; + if instruction.prefixes.operand_size() { + instruction.mem_size = 2 * 8; + } else { + instruction.opcode = Opcode::POPAD; + instruction.mem_size = 4 * 8; + } } instruction.operands[0] = OperandSpec::Nothing; instruction.operand_count = 0; @@ -9011,6 +9030,21 @@ fn read_operands< instruction.mem_size = 2; instruction.operand_count = 2; }, + OperandCase::CXZ => { + if instruction.prefixes.address_size() { + // address-size overridden from 32-bit to 16-bit + instruction.opcode = Opcode::JCXZ; + } + instruction.imm = + read_imm_signed(words, 1)? as u32; + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("1-byte immediate", instruction.imm as i64) + .with_id(words.offset() as u32 * 8), + ); + instruction.operands[0] = OperandSpec::ImmI8; + }, }; Ok(()) } diff --git a/src/real_mode/behavior.rs b/src/real_mode/behavior.rs index cc7f705..8941ee5 100644 --- a/src/real_mode/behavior.rs +++ b/src/real_mode/behavior.rs @@ -140,22 +140,6 @@ impl Instruction { .set_operand(1, Access::Read) .set_operand(2, Access::Read); } - } else if self.opcode == Opcode::PUSHA { - if self.prefixes.operand_size() { - behavior = behavior - .set_implicit_ops(PUSHAD_IDX); - } else { - behavior = behavior - .set_implicit_ops(PUSHA_IDX); - } - } else if self.opcode == Opcode::POPA { - if self.prefixes.operand_size() { - behavior = behavior - .set_implicit_ops(POPAD_IDX); - } else { - behavior = behavior - .set_implicit_ops(POPA_IDX); - } } else if self.opcode == Opcode::DIV || self.opcode == Opcode::IDIV { let op_width = if self.operands[0] == OperandSpec::RegMMM { self.regs[1].width() @@ -257,8 +241,7 @@ impl Instruction { } } else if self.opcode() == Opcode::LOOPNZ || self.opcode() == Opcode::LOOPZ - || self.opcode() == Opcode::LOOP - || self.opcode() == Opcode::JCXZ { + || self.opcode() == Opcode::LOOP { if !self.prefixes.operand_size() { behavior = behavior .set_implicit_ops(RW_CX_IDX); @@ -3808,7 +3791,7 @@ fn behavior_table_size_is_right() { } /// this table MUST line up with Opcode declaration order in `mod.rs`. -static TABLE: [BehaviorDigest; 1425] = [ +static TABLE: [BehaviorDigest; 1428] = [ /* ADD => */ GENERAL_RW_R_FLAGWRITE, /* OR => */ GENERAL_RW_R_FLAGWRITE, /* ADC => */ GENERAL_RW_R_FLAGRW, @@ -5424,14 +5407,14 @@ static TABLE: [BehaviorDigest; 1425] = [ /* JCXZ => */ BehaviorDigest::empty() .set_pl_any() .set_operand(0, Access::Read) - .set_nontrivial(true), + .set_implicit_ops(RW_CX_IDX), /* PUSHA => */ BehaviorDigest::empty() .set_pl_any() - .set_nontrivial(true), + .set_implicit_ops(PUSHA_IDX), /* POPA => */ BehaviorDigest::empty() .set_pl_any() - .set_nontrivial(true), + .set_implicit_ops(POPA_IDX), /* BOUND => */ BehaviorDigest::empty() .set_pl_any() .set_operand(0, Access::Read) @@ -6170,4 +6153,15 @@ static TABLE: [BehaviorDigest; 1425] = [ .set_pl0() .set_flags_access(Access::Write) .set_complex(true), + + /* PUSHAD => */ BehaviorDigest::empty() + .set_pl_any() + .set_implicit_ops(PUSHAD_IDX), + /* POPAD => */ BehaviorDigest::empty() + .set_pl_any() + .set_implicit_ops(POPAD_IDX), + /* JECXZ => */ BehaviorDigest::empty() + .set_pl_any() + .set_operand(0, Access::Read) + .set_implicit_ops(RW_ECX_IDX), ]; diff --git a/src/real_mode/display.rs b/src/real_mode/display.rs index e686b0a..888b0b1 100644 --- a/src/real_mode/display.rs +++ b/src/real_mode/display.rs @@ -2094,6 +2094,10 @@ const MNEMONICS: &[&'static str] = &[ "pvalidate", "rmpadjust", "rmpupdate", + + "pushad", + "popad", + "jecxz", ]; impl Opcode { @@ -2572,6 +2576,10 @@ pub(crate) fn contextualize_c(instr: &Instruction, out: &mut T) out.write_str("jmp ")?; write_jmp_operand(instr.operand(0), out)?; }, + Opcode::JECXZ => { + out.write_str("if ecx == 0 then jmp ")?; + write_jmp_operand(instr.operand(0), out)?; + }, Opcode::JCXZ => { out.write_str("if cx == 0 then jmp ")?; write_jmp_operand(instr.operand(0), out)?; @@ -2766,8 +2774,9 @@ impl ShowContextual Opcode { @@ -4622,7 +4631,7 @@ const OPCODES: [OpcodeRecord; 256] = [ OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOPNZ), OperandCode::Ibs), OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOPZ), OperandCode::Ibs), OpcodeRecord::new(Interpretation::Instruction(Opcode::LOOP), OperandCode::Ibs), - OpcodeRecord::new(Interpretation::Instruction(Opcode::JCXZ), OperandCode::Ibs), + OpcodeRecord::new(Interpretation::Instruction(Opcode::JCXZ), OperandCode::CXZ), OpcodeRecord::new(Interpretation::Instruction(Opcode::IN), OperandCode::AL_Ib), OpcodeRecord::new(Interpretation::Instruction(Opcode::IN), OperandCode::AX_Ib), OpcodeRecord::new(Interpretation::Instruction(Opcode::OUT), OperandCode::Ib_AL), @@ -6510,9 +6519,19 @@ fn read_operands< } else if instruction.opcode == Opcode::XLAT { instruction.mem_size = 1; } else if instruction.opcode == Opcode::PUSHA { - instruction.mem_size = 2 * 8; + if !instruction.prefixes.operand_size() { + instruction.mem_size = 2 * 8; + } else { + instruction.opcode = Opcode::PUSHAD; + instruction.mem_size = 4 * 8; + } } else if instruction.opcode == Opcode::POPA { - instruction.mem_size = 2 * 8; + if !instruction.prefixes.operand_size() { + instruction.mem_size = 2 * 8; + } else { + instruction.opcode = Opcode::POPAD; + instruction.mem_size = 4 * 8; + } } instruction.operands[0] = OperandSpec::Nothing; instruction.operand_count = 0; @@ -9050,6 +9069,21 @@ fn read_operands< instruction.mem_size = 2; instruction.operand_count = 2; }, + OperandCase::CXZ => { + if instruction.prefixes.address_size() { + // address-size overridden from 16-bit to 32-bit + instruction.opcode = Opcode::JECXZ; + } + instruction.imm = + read_imm_signed(words, 1)? as u32; + sink.record( + words.offset() as u32 * 8 - 8, + words.offset() as u32 * 8 - 1, + InnerDescription::Number("1-byte immediate", instruction.imm as i64) + .with_id(words.offset() as u32 * 8), + ); + instruction.operands[0] = OperandSpec::ImmI8; + }, }; Ok(()) } diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index eaca39d..c7fb2be 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -1341,6 +1341,8 @@ fn test_control_flow() { test_display(&[0xe2, 0x12], "loop $+0x12"); test_display(&[0xe3, 0x12], "jrcxz $+0x12"); test_display(&[0xe3, 0xf0], "jrcxz $-0x10"); + test_display(&[0x67, 0xe3, 0x12], "jecxz $+0x12"); + test_display(&[0x67, 0xe3, 0xf0], "jecxz $-0x10"); test_display(&[0xc3], "ret"); } diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs index 50b4887..f6e69cf 100644 --- a/test/protected_mode/mod.rs +++ b/test/protected_mode/mod.rs @@ -1160,6 +1160,8 @@ fn test_control_flow() { test_display(&[0xe2, 0x12], "loop $+0x12"); test_display(&[0xe3, 0x12], "jecxz $+0x12"); test_display(&[0xe3, 0xf0], "jecxz $-0x10"); + test_display(&[0x67, 0xe3, 0x12], "jcxz $+0x12"); + test_display(&[0x67, 0xe3, 0xf0], "jcxz $-0x10"); test_display(&[0xc3], "ret"); } @@ -2528,8 +2530,10 @@ fn only_32bit() { test_display(&[0x67, 0xa1, 0xc0, 0xb0], "mov eax, dword [0xb0c0]"); test_display(&[0x66, 0x67, 0xa1, 0xc0, 0xb0], "mov ax, word [0xb0c0]"); - test_display(&[0x60], "pusha"); - test_display(&[0x61], "popa"); + test_display(&[0x60], "pushad"); + test_display(&[0x61], "popad"); + test_display(&[0x66, 0x60], "pusha"); + test_display(&[0x66, 0x61], "popa"); test_display(&[0xce], "into"); test_display(&[0x06], "push es"); test_display(&[0x07], "pop es"); diff --git a/test/real_mode/mod.rs b/test/real_mode/mod.rs index 67798eb..bce199b 100644 --- a/test/real_mode/mod.rs +++ b/test/real_mode/mod.rs @@ -440,6 +440,8 @@ fn test_real_mode() { test_display(&[0x5e], "pop si"); test_display(&[0x60], "pusha"); test_display(&[0x61], "popa"); + test_display(&[0x66, 0x60], "pushad"); + test_display(&[0x66, 0x61], "popad"); test_display(&[0x62, 0xf1, 0x7c, 0x08, 0x10, 0x0a], "vmovups xmm1, xmmword [bp + si * 1]"); test_display(&[0x62, 0xf1, 0x7c, 0x08, 0x10, 0x4a, 0x01], "vmovups xmm1, xmmword [bp + si * 1 + 0x10]"); test_display(&[0x62, 0xf1, 0x7c, 0x08, 0x10, 0xca], "vmovups xmm1, xmm2"); @@ -17974,6 +17976,8 @@ fn test_real_mode() { test_display(&[0xe2, 0x12], "loop $+0x12"); test_display(&[0xe3, 0x12], "jcxz $+0x12"); test_display(&[0xe3, 0xf0], "jcxz $-0x10"); + test_display(&[0x67, 0xe3, 0x12], "jecxz $+0x12"); + test_display(&[0x67, 0xe3, 0xf0], "jecxz $-0x10"); test_display(&[0xe4, 0x99], "in al, 0x99"); test_display(&[0xe5, 0x99], "in ax, 0x99"); test_display(&[0xe6, 0x99], "out 0x99, al"); -- cgit v1.1