diff options
| -rw-r--r-- | CHANGELOG | 8 | ||||
| -rw-r--r-- | src/long_mode/vex.rs | 59 | ||||
| -rw-r--r-- | test/long_mode/mod.rs | 16 | 
3 files changed, 62 insertions, 21 deletions
| @@ -10,6 +10,14 @@      become `jbe` and `jae` in 2.x.  * fix incorrect decode of a0/a1/a2/a3 mov register when rex.b is set    (rex.b would select register 8, but the register is unconditionally A) +* fix incorrect handling of some variants of gather instructions, +  vpgatherdq, vpgatherqq, vgatherdps, vgatherdpd, vgatherqps, vgatherqpd. +  errors were any of: +  * reporting qword loads when loads are dword-wide +  * reporting dword loads when loads are qword-wide +  * reporting ymm register sizes when sizes are actually xmm +  * reporting xmm register sizes when sizes are actually ymm +  * reporting index register as ymm when it is actually xmm  ## 1.1.5  * fix several typos across crate docs - thank you Bruce! (aka github user waywardmonkeys) diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index aaf6402..e83c735 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -676,7 +676,11 @@ fn read_vex_operands<                  if instruction.opcode == Opcode::VMOVLPD || instruction.opcode == Opcode::VMOVHPD || instruction.opcode == Opcode::VMOVHPS {                      instruction.mem_size = 8;                  } else { -                    instruction.mem_size = 16; +                    if L { +                        instruction.mem_size = 32; +                    } else { +                        instruction.mem_size = 16; +                    }                  }              }              instruction.operands[0] = mem_oper; @@ -1076,7 +1080,11 @@ fn read_vex_operands<                  if instruction.opcode == Opcode::VMOVLPD || instruction.opcode == Opcode::VMOVHPD || instruction.opcode == Opcode::VMOVHPS {                      instruction.mem_size = 8;                  } else { -                    instruction.mem_size = 16; +                    if L { +                        instruction.mem_size = 32; +                    } else { +                        instruction.mem_size = 16; +                    }                  }              }              instruction.operands[0] = mem_oper; @@ -1102,7 +1110,11 @@ fn read_vex_operands<              instruction.imm = read_imm_unsigned(words, 1)?;              instruction.operands[3] = OperandSpec::ImmU8;              if mem_oper != OperandSpec::RegMMM { -                instruction.mem_size = 16; +                if L { +                    instruction.mem_size = 32; +                } else { +                    instruction.mem_size = 16; +                }              }              instruction.operand_count = 4;              Ok(()) @@ -1123,12 +1135,18 @@ fn read_vex_operands<              instruction.operands[0] = OperandSpec::RegRRR;              instruction.operands[1] = mem_oper;              if mem_oper != OperandSpec::RegMMM { -                if [Opcode::VBROADCASTSS, Opcode::VUCOMISS, Opcode::VCOMISS].contains(&instruction.opcode)  { -                    instruction.mem_size = 4; -                } else if [Opcode::VMOVDDUP, Opcode::VUCOMISD, Opcode::VCOMISD, Opcode::VCVTPS2PD, Opcode::VMOVQ].contains(&instruction.opcode)  { -                    instruction.mem_size = 8; +                if L { +                    instruction.mem_size = 32;                  } else {                      instruction.mem_size = 16; +                } + +                if instruction.opcode == Opcode::VMOVDDUP && !L { +                    instruction.mem_size = 8; +                } else if [Opcode::VBROADCASTSS, Opcode::VUCOMISS, Opcode::VCOMISS].contains(&instruction.opcode)  { +                    instruction.mem_size = 4; +                } else if [Opcode::VUCOMISD, Opcode::VCOMISD, Opcode::VCVTPS2PD, Opcode::VMOVQ].contains(&instruction.opcode)  { +                    instruction.mem_size = 8;                  };              }              instruction.operand_count = 2; @@ -1154,7 +1172,11 @@ fn read_vex_operands<                  } else if [Opcode::VSQRTSD, Opcode::VADDSD, Opcode::VMULSD, Opcode::VSUBSD, Opcode::VMINSD, Opcode::VDIVSD, Opcode::VMAXSD].contains(&instruction.opcode) {                      instruction.mem_size = 8;                  } else { -                    instruction.mem_size = 16; +                    if L { +                        instruction.mem_size = 32; +                    } else { +                        instruction.mem_size = 16; +                    }                  }              }              instruction.operand_count = 3; @@ -1264,7 +1286,13 @@ fn read_vex_operands<              #[allow(non_snake_case)]              let L = instruction.prefixes.vex_unchecked().l(); -            let bank = if L { +            let bank = if L && instruction.opcode != Opcode::VGATHERQPS && instruction.opcode != Opcode::VPGATHERQD { +                RegisterBank::Y +            } else { +                RegisterBank::X +            }; + +            let index_bank = if L {                  RegisterBank::Y              } else {                  RegisterBank::X @@ -1274,12 +1302,17 @@ fn read_vex_operands<              instruction.regs[0] =                  RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex_unchecked().r(), bank);              let mem_oper = read_E(words, instruction, modrm, bank, sink)?; -            instruction.regs[2].bank = bank; +            if instruction.opcode == Opcode::VPGATHERDQ { +                instruction.regs[2].bank = RegisterBank::X; +            } else { +                instruction.regs[2].bank = index_bank; +            } +            instruction.regs[3].bank = bank;              instruction.operands[0] = OperandSpec::RegRRR;              instruction.operands[1] = mem_oper;              instruction.operands[2] = OperandSpec::RegVex;              if mem_oper != OperandSpec::RegMMM { -                if instruction.opcode == Opcode::VPGATHERDD { +                if instruction.opcode == Opcode::VPGATHERDD || instruction.opcode == Opcode::VPGATHERQD || instruction.opcode == Opcode::VGATHERDPS || instruction.opcode == Opcode::VGATHERQPS {                      instruction.mem_size = 4;                  } else {                      instruction.mem_size = 8; @@ -2529,8 +2562,8 @@ fn read_vex_instruction<                      }),                      0x08 => (Opcode::VROUNDPS, VEXOperandCode::G_E_xyLmm_imm8),                      0x09 => (Opcode::VROUNDPD, VEXOperandCode::G_E_xyLmm_imm8), -                    0x0A => (Opcode::VROUNDSS, VEXOperandCode::G_V_E_xyLmm_imm8), -                    0x0B => (Opcode::VROUNDSD, VEXOperandCode::G_V_E_xyLmm_imm8), +                    0x0A => (Opcode::VROUNDSS, VEXOperandCode::G_V_E_xmm_imm8), +                    0x0B => (Opcode::VROUNDSD, VEXOperandCode::G_V_E_xmm_imm8),                      0x0C => (Opcode::VBLENDPS, VEXOperandCode::G_V_E_xyLmm_imm8),                      0x0D => (Opcode::VBLENDPD, VEXOperandCode::G_V_E_xyLmm_imm8),                      0x0E => (Opcode::VPBLENDW, VEXOperandCode::G_V_E_xyLmm_imm8), diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index dd3d3d3..3c53fce 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -1836,22 +1836,22 @@ fn test_vex() {      test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdd xmm8, dword [r9 + xmm12 * 4], xmm0");      test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdd ymm8, dword [r9 + ymm12 * 4], ymm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdq xmm8, dword [r9 + xmm12 * 4], xmm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdq ymm8, qword [r9 + ymm12 * 4], ymm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdq xmm8, qword [r9 + xmm12 * 4], xmm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdq ymm8, qword [r9 + xmm12 * 4], ymm0");      test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqd xmm8, dword [r9 + xmm12 * 4], xmm0");      test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x91, 0b00_000_100, 0xa1], "vpgatherqd xmm8, dword [r9 + ymm12 * 4], xmm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqq xmm8, dword [r9 + xmm12 * 4], xmm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqq xmm8, qword [r9 + xmm12 * 4], xmm0");      test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x91, 0b00_000_100, 0xa1], "vpgatherqq ymm8, qword [r9 + ymm12 * 4], ymm0");      test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdps xmm8, dword [r9 + xmm12 * 4], xmm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdps ymm8, qword [r9 + ymm12 * 4], ymm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdpd xmm8, dword [r9 + xmm12 * 4], xmm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdps ymm8, dword [r9 + ymm12 * 4], ymm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdpd xmm8, qword [r9 + xmm12 * 4], xmm0");      test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdpd ymm8, qword [r9 + ymm12 * 4], ymm0");      test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqps xmm8, dword [r9 + xmm12 * 4], xmm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqps ymm8, qword [r9 + ymm12 * 4], ymm0"); -    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqpd xmm8, dword [r9 + xmm12 * 4], xmm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqps xmm8, dword [r9 + ymm12 * 4], xmm0"); +    test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqpd xmm8, qword [r9 + xmm12 * 4], xmm0");      test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqpd ymm8, qword [r9 + ymm12 * 4], ymm0");      test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b0_1111_001, 0xdb, 0b11_001_010], "vaesimc xmm9, xmm10"); @@ -2492,7 +2492,7 @@ fn test_vex() {      test_instr(&[0xc4, 0x02, 0x79, 0x0f, 0xcd], "vtestpd xmm9, xmm13");      test_instr(&[0xc4, 0x02, 0x7d, 0x0f, 0xcd], "vtestpd ymm9, ymm13");      test_instr(&[0xc4, 0xe2, 0x65, 0x90, 0x04, 0x51], "vpgatherdd ymm0, dword [rcx + ymm2 * 2], ymm3"); -    test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, qword [rcx + ymm2 * 2], ymm3"); +    test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, qword [rcx + xmm2 * 2], ymm3");      test_instr(&[0xc4, 0xe2, 0x65, 0x91, 0x04, 0x51], "vpgatherqd xmm0, dword [rcx + ymm2 * 2], xmm3");      test_instr(&[0xc4, 0xe2, 0xe5, 0x91, 0x04, 0x51], "vpgatherqq ymm0, qword [rcx + ymm2 * 2], ymm3");      test_instr(&[0xc4, 0x02, 0x09, 0x9d, 0xcd], "vfnmadd132ss xmm9, xmm14, xmm13"); | 
