From b8649428e2b176d283800da5f1fcd3613e9e4abc Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 8 Jul 2023 11:04:55 -0700 Subject: fix v(p)gather situations, get vex tests passing again --- CHANGELOG | 8 +++++++ src/long_mode/vex.rs | 59 +++++++++++++++++++++++++++++++++++++++------------ test/long_mode/mod.rs | 16 +++++++------- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 6651281..52f761f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,14 @@ become `jbe` and `jae` in 2.x. * fix incorrect decode of a0/a1/a2/a3 mov register when rex.b is set (rex.b would select register 8, but the register is unconditionally A) +* fix incorrect handling of some variants of gather instructions, + vpgatherdq, vpgatherqq, vgatherdps, vgatherdpd, vgatherqps, vgatherqpd. + errors were any of: + * reporting qword loads when loads are dword-wide + * reporting dword loads when loads are qword-wide + * reporting ymm register sizes when sizes are actually xmm + * reporting xmm register sizes when sizes are actually ymm + * reporting index register as ymm when it is actually xmm ## 1.1.5 * fix several typos across crate docs - thank you Bruce! (aka github user waywardmonkeys) diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index aaf6402..e83c735 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -676,7 +676,11 @@ fn read_vex_operands< if instruction.opcode == Opcode::VMOVLPD || instruction.opcode == Opcode::VMOVHPD || instruction.opcode == Opcode::VMOVHPS { instruction.mem_size = 8; } else { - instruction.mem_size = 16; + if L { + instruction.mem_size = 32; + } else { + instruction.mem_size = 16; + } } } instruction.operands[0] = mem_oper; @@ -1076,7 +1080,11 @@ fn read_vex_operands< if instruction.opcode == Opcode::VMOVLPD || instruction.opcode == Opcode::VMOVHPD || instruction.opcode == Opcode::VMOVHPS { instruction.mem_size = 8; } else { - instruction.mem_size = 16; + if L { + instruction.mem_size = 32; + } else { + instruction.mem_size = 16; + } } } instruction.operands[0] = mem_oper; @@ -1102,7 +1110,11 @@ fn read_vex_operands< instruction.imm = read_imm_unsigned(words, 1)?; instruction.operands[3] = OperandSpec::ImmU8; if mem_oper != OperandSpec::RegMMM { - instruction.mem_size = 16; + if L { + instruction.mem_size = 32; + } else { + instruction.mem_size = 16; + } } instruction.operand_count = 4; Ok(()) @@ -1123,12 +1135,18 @@ fn read_vex_operands< instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = mem_oper; if mem_oper != OperandSpec::RegMMM { - if [Opcode::VBROADCASTSS, Opcode::VUCOMISS, Opcode::VCOMISS].contains(&instruction.opcode) { - instruction.mem_size = 4; - } else if [Opcode::VMOVDDUP, Opcode::VUCOMISD, Opcode::VCOMISD, Opcode::VCVTPS2PD, Opcode::VMOVQ].contains(&instruction.opcode) { - instruction.mem_size = 8; + if L { + instruction.mem_size = 32; } else { instruction.mem_size = 16; + } + + if instruction.opcode == Opcode::VMOVDDUP && !L { + instruction.mem_size = 8; + } else if [Opcode::VBROADCASTSS, Opcode::VUCOMISS, Opcode::VCOMISS].contains(&instruction.opcode) { + instruction.mem_size = 4; + } else if [Opcode::VUCOMISD, Opcode::VCOMISD, Opcode::VCVTPS2PD, Opcode::VMOVQ].contains(&instruction.opcode) { + instruction.mem_size = 8; }; } instruction.operand_count = 2; @@ -1154,7 +1172,11 @@ fn read_vex_operands< } else if [Opcode::VSQRTSD, Opcode::VADDSD, Opcode::VMULSD, Opcode::VSUBSD, Opcode::VMINSD, Opcode::VDIVSD, Opcode::VMAXSD].contains(&instruction.opcode) { instruction.mem_size = 8; } else { - instruction.mem_size = 16; + if L { + instruction.mem_size = 32; + } else { + instruction.mem_size = 16; + } } } instruction.operand_count = 3; @@ -1264,7 +1286,13 @@ fn read_vex_operands< #[allow(non_snake_case)] let L = instruction.prefixes.vex_unchecked().l(); - let bank = if L { + let bank = if L && instruction.opcode != Opcode::VGATHERQPS && instruction.opcode != Opcode::VPGATHERQD { + RegisterBank::Y + } else { + RegisterBank::X + }; + + let index_bank = if L { RegisterBank::Y } else { RegisterBank::X @@ -1274,12 +1302,17 @@ fn read_vex_operands< instruction.regs[0] = RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex_unchecked().r(), bank); let mem_oper = read_E(words, instruction, modrm, bank, sink)?; - instruction.regs[2].bank = bank; + if instruction.opcode == Opcode::VPGATHERDQ { + instruction.regs[2].bank = RegisterBank::X; + } else { + instruction.regs[2].bank = index_bank; + } + instruction.regs[3].bank = bank; instruction.operands[0] = OperandSpec::RegRRR; instruction.operands[1] = mem_oper; instruction.operands[2] = OperandSpec::RegVex; if mem_oper != OperandSpec::RegMMM { - if instruction.opcode == Opcode::VPGATHERDD { + if instruction.opcode == Opcode::VPGATHERDD || instruction.opcode == Opcode::VPGATHERQD || instruction.opcode == Opcode::VGATHERDPS || instruction.opcode == Opcode::VGATHERQPS { instruction.mem_size = 4; } else { instruction.mem_size = 8; @@ -2529,8 +2562,8 @@ fn read_vex_instruction< }), 0x08 => (Opcode::VROUNDPS, VEXOperandCode::G_E_xyLmm_imm8), 0x09 => (Opcode::VROUNDPD, VEXOperandCode::G_E_xyLmm_imm8), - 0x0A => (Opcode::VROUNDSS, VEXOperandCode::G_V_E_xyLmm_imm8), - 0x0B => (Opcode::VROUNDSD, VEXOperandCode::G_V_E_xyLmm_imm8), + 0x0A => (Opcode::VROUNDSS, VEXOperandCode::G_V_E_xmm_imm8), + 0x0B => (Opcode::VROUNDSD, VEXOperandCode::G_V_E_xmm_imm8), 0x0C => (Opcode::VBLENDPS, VEXOperandCode::G_V_E_xyLmm_imm8), 0x0D => (Opcode::VBLENDPD, VEXOperandCode::G_V_E_xyLmm_imm8), 0x0E => (Opcode::VPBLENDW, VEXOperandCode::G_V_E_xyLmm_imm8), diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index dd3d3d3..3c53fce 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -1836,22 +1836,22 @@ fn test_vex() { test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdd xmm8, dword [r9 + xmm12 * 4], xmm0"); test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdd ymm8, dword [r9 + ymm12 * 4], ymm0"); - test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdq xmm8, dword [r9 + xmm12 * 4], xmm0"); - test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdq ymm8, qword [r9 + ymm12 * 4], ymm0"); + test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdq xmm8, qword [r9 + xmm12 * 4], xmm0"); + test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdq ymm8, qword [r9 + xmm12 * 4], ymm0"); test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqd xmm8, dword [r9 + xmm12 * 4], xmm0"); test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x91, 0b00_000_100, 0xa1], "vpgatherqd xmm8, dword [r9 + ymm12 * 4], xmm0"); - test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqq xmm8, dword [r9 + xmm12 * 4], xmm0"); + test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqq xmm8, qword [r9 + xmm12 * 4], xmm0"); test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x91, 0b00_000_100, 0xa1], "vpgatherqq ymm8, qword [r9 + ymm12 * 4], ymm0"); test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdps xmm8, dword [r9 + xmm12 * 4], xmm0"); - test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdps ymm8, qword [r9 + ymm12 * 4], ymm0"); - test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdpd xmm8, dword [r9 + xmm12 * 4], xmm0"); + test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdps ymm8, dword [r9 + ymm12 * 4], ymm0"); + test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdpd xmm8, qword [r9 + xmm12 * 4], xmm0"); test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdpd ymm8, qword [r9 + ymm12 * 4], ymm0"); test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqps xmm8, dword [r9 + xmm12 * 4], xmm0"); - test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqps ymm8, qword [r9 + ymm12 * 4], ymm0"); - test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqpd xmm8, dword [r9 + xmm12 * 4], xmm0"); + test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqps xmm8, dword [r9 + ymm12 * 4], xmm0"); + test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqpd xmm8, qword [r9 + xmm12 * 4], xmm0"); test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqpd ymm8, qword [r9 + ymm12 * 4], ymm0"); test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b0_1111_001, 0xdb, 0b11_001_010], "vaesimc xmm9, xmm10"); @@ -2492,7 +2492,7 @@ fn test_vex() { test_instr(&[0xc4, 0x02, 0x79, 0x0f, 0xcd], "vtestpd xmm9, xmm13"); test_instr(&[0xc4, 0x02, 0x7d, 0x0f, 0xcd], "vtestpd ymm9, ymm13"); test_instr(&[0xc4, 0xe2, 0x65, 0x90, 0x04, 0x51], "vpgatherdd ymm0, dword [rcx + ymm2 * 2], ymm3"); - test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, qword [rcx + ymm2 * 2], ymm3"); + test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, qword [rcx + xmm2 * 2], ymm3"); test_instr(&[0xc4, 0xe2, 0x65, 0x91, 0x04, 0x51], "vpgatherqd xmm0, dword [rcx + ymm2 * 2], xmm3"); test_instr(&[0xc4, 0xe2, 0xe5, 0x91, 0x04, 0x51], "vpgatherqq ymm0, qword [rcx + ymm2 * 2], ymm3"); test_instr(&[0xc4, 0x02, 0x09, 0x9d, 0xcd], "vfnmadd132ss xmm9, xmm14, xmm13"); -- cgit v1.1