diff options
| author | iximeow <me@iximeow.net> | 2026-06-08 07:20:41 +0000 |
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2026-07-05 00:09:22 +0000 |
| commit | 15b8817b2eda4c2f58fc098591677f77c34c454d (patch) | |
| tree | a80541deccd81c4791a45a20bd62dedcac612a21 | |
| parent | 9b24ada2c3a7afa42448fff7ee441ad983530d88 (diff) | |
fix vpbroadcast* memory size and source register bank
| -rw-r--r-- | CHANGELOG | 6 | ||||
| -rw-r--r-- | src/long_mode/vex.rs | 38 | ||||
| -rw-r--r-- | src/protected_mode/vex.rs | 38 | ||||
| -rw-r--r-- | src/real_mode/vex.rs | 38 | ||||
| -rw-r--r-- | test/long_mode/mod.rs | 8 | ||||
| -rw-r--r-- | test/protected_mode/mod.rs | 8 | ||||
| -rw-r--r-- | test/real_mode/mod.rs | 8 |
7 files changed, 120 insertions, 24 deletions
@@ -11,6 +11,12 @@ nop dates back to initial versions of the library attempting to never return instructions that did not reflect a decoded x86 instruction. it has long passed its time. thank you for the patch, @Grond66! +* fix vpbroadcast* with a SIMD register source being able to claim ymm as a source. the source + register according to manuals and every assembler is xmm-size, if a register. semantically this + has little effect: the broadcasted value is the low lane of the source register in these cases. +* fix vpbroadcast* with a memory source reporting incorrect memory sizes. the memory address being + broadcast indicates the size, which is one byte/word/dword/qword. it is unrelated to the + broadcasted-to vector length. testing instruction round-tripping through `masm` found a few bugs, which are also fixed in this release: diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index 6ae12ad..b787598 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -76,6 +76,7 @@ enum VEXOperandCode { G_E_xyLmm, E_G_xyLmm, G_E_xyLmm_imm8, + G_Lmm_E_xmm, G_V_E_xyLmm_imm8, G_V_E_xmm, G_V_E_xmm_imm8, @@ -1200,6 +1201,35 @@ fn read_vex_operands< instruction.operand_count = 2; Ok(()) } + VEXOperandCode::G_Lmm_E_xmm => { + if instruction.regs[3].num != 0 { + return Err(DecodeError::InvalidOperand); + } + // the name of this bit is `L` in the documentation, so use the same name here. + #[allow(non_snake_case)] + let L = instruction.prefixes.vex_unchecked().l(); + let bank = if L { RegisterBank::Y } else { RegisterBank::X }; + + let modrm = read_modrm(words)?; + instruction.regs[0] = + RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex_unchecked().r(), bank); + let mem_oper = read_E(words, instruction, modrm, RegisterBank::X, sink)?; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = mem_oper; + if mem_oper != OperandSpec::RegMMM { + if instruction.opcode == Opcode::VPBROADCASTB { + instruction.mem_size = 1; + } else if instruction.opcode == Opcode::VPBROADCASTW { + instruction.mem_size = 2; + } else if instruction.opcode == Opcode::VPBROADCASTD { + instruction.mem_size = 4; + } else if instruction.opcode == Opcode::VPBROADCASTQ { + instruction.mem_size = 8; + }; + } + instruction.operand_count = 2; + Ok(()) + } VEXOperandCode::G_V_E_xyLmm => { let modrm = read_modrm(words)?; // the name of this bit is `L` in the documentation, so use the same name here. @@ -2301,12 +2331,12 @@ fn read_vex_instruction< 0x58 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTD, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTD, VEXOperandCode::G_Lmm_E_xmm) }, 0x59 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTQ, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTQ, VEXOperandCode::G_Lmm_E_xmm) }, 0x5A => (Opcode::VBROADCASTI128, if L { if instruction.prefixes.vex_unchecked().w() { @@ -2319,12 +2349,12 @@ fn read_vex_instruction< 0x78 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTB, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTB, VEXOperandCode::G_Lmm_E_xmm) }, 0x79 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTW, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTW, VEXOperandCode::G_Lmm_E_xmm) }, 0x8C => { if instruction.prefixes.vex_unchecked().w() { diff --git a/src/protected_mode/vex.rs b/src/protected_mode/vex.rs index 875f010..429ad23 100644 --- a/src/protected_mode/vex.rs +++ b/src/protected_mode/vex.rs @@ -74,6 +74,7 @@ enum VEXOperandCode { G_V_E_xyLmm, G_E_xyLmm, E_G_xyLmm, + G_Lmm_E_xmm, G_E_xyLmm_imm8, G_V_E_xyLmm_imm8, G_V_E_xmm, @@ -1127,6 +1128,35 @@ fn read_vex_operands< instruction.operand_count = 2; Ok(()) } + VEXOperandCode::G_Lmm_E_xmm => { + if instruction.regs[3].num != 0 { + return Err(DecodeError::InvalidOperand); + } + // the name of this bit is `L` in the documentation, so use the same name here. + #[allow(non_snake_case)] + let L = instruction.prefixes.vex_unchecked().l(); + let bank = if L { RegisterBank::Y } else { RegisterBank::X }; + + let modrm = read_modrm(words)?; + instruction.regs[0] = + RegSpec::from_parts((modrm >> 3) & 7, bank); + let mem_oper = read_E(words, instruction, modrm, RegisterBank::X, sink)?; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = mem_oper; + if mem_oper != OperandSpec::RegMMM { + if instruction.opcode == Opcode::VPBROADCASTB { + instruction.mem_size = 1; + } else if instruction.opcode == Opcode::VPBROADCASTW { + instruction.mem_size = 2; + } else if instruction.opcode == Opcode::VPBROADCASTD { + instruction.mem_size = 4; + } else if instruction.opcode == Opcode::VPBROADCASTQ { + instruction.mem_size = 8; + }; + } + instruction.operand_count = 2; + Ok(()) + } VEXOperandCode::G_V_E_xyLmm => { let modrm = read_modrm(words)?; // the name of this bit is `L` in the documentation, so use the same name here. @@ -2172,12 +2202,12 @@ fn read_vex_instruction< 0x58 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTD, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTD, VEXOperandCode::G_Lmm_E_xmm) }, 0x59 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTQ, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTQ, VEXOperandCode::G_Lmm_E_xmm) }, 0x5A => (Opcode::VBROADCASTI128, if L { if instruction.prefixes.vex_unchecked().w() { @@ -2190,12 +2220,12 @@ fn read_vex_instruction< 0x78 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTB, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTB, VEXOperandCode::G_Lmm_E_xmm) }, 0x79 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTW, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTW, VEXOperandCode::G_Lmm_E_xmm) }, 0x8C => { if instruction.prefixes.vex_unchecked().w() { diff --git a/src/real_mode/vex.rs b/src/real_mode/vex.rs index 1c1470a..3a7fbe3 100644 --- a/src/real_mode/vex.rs +++ b/src/real_mode/vex.rs @@ -74,6 +74,7 @@ enum VEXOperandCode { G_V_E_xyLmm, G_E_xyLmm, E_G_xyLmm, + G_Lmm_E_xmm, G_E_xyLmm_imm8, G_V_E_xyLmm_imm8, G_V_E_xmm, @@ -1127,6 +1128,35 @@ fn read_vex_operands< instruction.operand_count = 2; Ok(()) } + VEXOperandCode::G_Lmm_E_xmm => { + if instruction.regs[3].num != 0 { + return Err(DecodeError::InvalidOperand); + } + // the name of this bit is `L` in the documentation, so use the same name here. + #[allow(non_snake_case)] + let L = instruction.prefixes.vex_unchecked().l(); + let bank = if L { RegisterBank::Y } else { RegisterBank::X }; + + let modrm = read_modrm(words)?; + instruction.regs[0] = + RegSpec::from_parts((modrm >> 3) & 7, bank); + let mem_oper = read_E(words, instruction, modrm, RegisterBank::X, sink)?; + instruction.operands[0] = OperandSpec::RegRRR; + instruction.operands[1] = mem_oper; + if mem_oper != OperandSpec::RegMMM { + if instruction.opcode == Opcode::VPBROADCASTB { + instruction.mem_size = 1; + } else if instruction.opcode == Opcode::VPBROADCASTW { + instruction.mem_size = 2; + } else if instruction.opcode == Opcode::VPBROADCASTD { + instruction.mem_size = 4; + } else if instruction.opcode == Opcode::VPBROADCASTQ { + instruction.mem_size = 8; + }; + } + instruction.operand_count = 2; + Ok(()) + } VEXOperandCode::G_V_E_xyLmm => { let modrm = read_modrm(words)?; // the name of this bit is `L` in the documentation, so use the same name here. @@ -2172,12 +2202,12 @@ fn read_vex_instruction< 0x58 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTD, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTD, VEXOperandCode::G_Lmm_E_xmm) }, 0x59 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTQ, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTQ, VEXOperandCode::G_Lmm_E_xmm) }, 0x5A => (Opcode::VBROADCASTI128, if L { if instruction.prefixes.vex_unchecked().w() { @@ -2190,12 +2220,12 @@ fn read_vex_instruction< 0x78 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTB, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTB, VEXOperandCode::G_Lmm_E_xmm) }, 0x79 => if instruction.prefixes.vex_unchecked().w() { return Err(DecodeError::InvalidOpcode); } else { - (Opcode::VPBROADCASTW, VEXOperandCode::G_E_xyLmm) + (Opcode::VPBROADCASTW, VEXOperandCode::G_Lmm_E_xmm) }, 0x8C => { if instruction.prefixes.vex_unchecked().w() { diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 216463e..478a1fc 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -2884,17 +2884,17 @@ mod vex { testcase!(features { AVX2: true } &[0xc4, 0b000_00010, 0b1_1111_101, 0x47, 0b11_001_010], "vpsllvq ymm9, ymm0, ymm10"), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x58, 0b11_000_001], "vpbroadcastd xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x58, 0b11_000_001], "vpbroadcastd ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x58, 0b11_000_001], "vpbroadcastd ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x58, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x59, 0b11_000_001], "vpbroadcastq xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x59, 0b11_000_001], "vpbroadcastq ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x59, 0b11_000_001], "vpbroadcastq ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x59, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x78, 0b11_000_001], "vpbroadcastb xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x78, 0b11_000_001], "vpbroadcastb ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x78, 0b11_000_001], "vpbroadcastb ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x78, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x79, 0b11_000_001], "vpbroadcastw xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x79, 0b11_000_001], "vpbroadcastw ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x79, 0b11_000_001], "vpbroadcastw ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x79, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b000_00010, 0b0_1111_001, 0x8c, 0b00_001_010], "vpmaskmovd xmm9, xmm0, xmmword [r10]"), diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs index d19c432..b0e6772 100644 --- a/test/protected_mode/mod.rs +++ b/test/protected_mode/mod.rs @@ -2766,17 +2766,17 @@ mod vex { testcase!(features { AVX2: true } &[0xc4, 0b110_00010, 0b1_1111_101, 0x47, 0b11_001_010], "vpsllvq ymm1, ymm0, ymm2"), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x58, 0b11_000_001], "vpbroadcastd xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x58, 0b11_000_001], "vpbroadcastd ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x58, 0b11_000_001], "vpbroadcastd ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x58, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x59, 0b11_000_001], "vpbroadcastq xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x59, 0b11_000_001], "vpbroadcastq ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x59, 0b11_000_001], "vpbroadcastq ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x59, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x78, 0b11_000_001], "vpbroadcastb xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x78, 0b11_000_001], "vpbroadcastb ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x78, 0b11_000_001], "vpbroadcastb ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x78, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_001, 0x79, 0b11_000_001], "vpbroadcastw xmm0, xmm1"), - testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x79, 0b11_000_001], "vpbroadcastw ymm0, ymm1"), + testcase!(features { AVX2: true } &[0xc4, 0b111_00010, 0b0_1111_101, 0x79, 0b11_000_001], "vpbroadcastw ymm0, xmm1"), testcase!(invalid: &[0xc4, 0b111_00010, 0b1_1111_001, 0x79, 0b11_000_001]), testcase!(features { AVX2: true } &[0xc4, 0b110_00010, 0b0_1111_001, 0x8c, 0b00_001_010], "vpmaskmovd xmm1, xmm0, xmmword [edx]"), diff --git a/test/real_mode/mod.rs b/test/real_mode/mod.rs index 119cff3..e7be755 100644 --- a/test/real_mode/mod.rs +++ b/test/real_mode/mod.rs @@ -18145,10 +18145,10 @@ mod real_mode { testcase!(&[0xc4, 0xe2, 0x79, 0x59, 0xc1], "vpbroadcastq xmm0, xmm1"), testcase!(&[0xc4, 0xe2, 0x79, 0x78, 0xc1], "vpbroadcastb xmm0, xmm1"), testcase!(&[0xc4, 0xe2, 0x79, 0x79, 0xc1], "vpbroadcastw xmm0, xmm1"), - testcase!(&[0xc4, 0xe2, 0x7d, 0x58, 0xc1], "vpbroadcastd ymm0, ymm1"), - testcase!(&[0xc4, 0xe2, 0x7d, 0x59, 0xc1], "vpbroadcastq ymm0, ymm1"), - testcase!(&[0xc4, 0xe2, 0x7d, 0x78, 0xc1], "vpbroadcastb ymm0, ymm1"), - testcase!(&[0xc4, 0xe2, 0x7d, 0x79, 0xc1], "vpbroadcastw ymm0, ymm1"), + testcase!(&[0xc4, 0xe2, 0x7d, 0x58, 0xc1], "vpbroadcastd ymm0, xmm1"), + testcase!(&[0xc4, 0xe2, 0x7d, 0x59, 0xc1], "vpbroadcastq ymm0, xmm1"), + testcase!(&[0xc4, 0xe2, 0x7d, 0x78, 0xc1], "vpbroadcastb ymm0, xmm1"), + testcase!(&[0xc4, 0xe2, 0x7d, 0x79, 0xc1], "vpbroadcastw ymm0, xmm1"), testcase!(&[0xc4, 0xe2, 0xe0, 0xf2, 0x01], "andn eax, ebx, dword [bx + di * 1]"), testcase!(&[0xc4, 0xe2, 0xe0, 0xf5, 0x07], "bzhi eax, dword [bx], ebx"), testcase!(&[0xc4, 0xe2, 0xe0, 0xf7, 0x01], "bextr eax, dword [bx + di * 1], ebx"), |
