aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2023-07-08 11:04:55 -0700
committeriximeow <me@iximeow.net>2023-07-08 11:04:55 -0700
commitb8649428e2b176d283800da5f1fcd3613e9e4abc (patch)
treea44447d8e081f3a2894805af6f7468a940768be5
parentf7cd4b8279e1f38237cc43c383a19cc4a99aed6f (diff)
fix v(p)gather situations, get vex tests passing again
-rw-r--r--CHANGELOG8
-rw-r--r--src/long_mode/vex.rs59
-rw-r--r--test/long_mode/mod.rs16
3 files changed, 62 insertions, 21 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 6651281..52f761f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,14 @@
become `jbe` and `jae` in 2.x.
* fix incorrect decode of a0/a1/a2/a3 mov register when rex.b is set
(rex.b would select register 8, but the register is unconditionally A)
+* fix incorrect handling of some variants of gather instructions,
+ vpgatherdq, vpgatherqq, vgatherdps, vgatherdpd, vgatherqps, vgatherqpd.
+ errors were any of:
+ * reporting qword loads when loads are dword-wide
+ * reporting dword loads when loads are qword-wide
+ * reporting ymm register sizes when sizes are actually xmm
+ * reporting xmm register sizes when sizes are actually ymm
+ * reporting index register as ymm when it is actually xmm
## 1.1.5
* fix several typos across crate docs - thank you Bruce! (aka github user waywardmonkeys)
diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs
index aaf6402..e83c735 100644
--- a/src/long_mode/vex.rs
+++ b/src/long_mode/vex.rs
@@ -676,7 +676,11 @@ fn read_vex_operands<
if instruction.opcode == Opcode::VMOVLPD || instruction.opcode == Opcode::VMOVHPD || instruction.opcode == Opcode::VMOVHPS {
instruction.mem_size = 8;
} else {
- instruction.mem_size = 16;
+ if L {
+ instruction.mem_size = 32;
+ } else {
+ instruction.mem_size = 16;
+ }
}
}
instruction.operands[0] = mem_oper;
@@ -1076,7 +1080,11 @@ fn read_vex_operands<
if instruction.opcode == Opcode::VMOVLPD || instruction.opcode == Opcode::VMOVHPD || instruction.opcode == Opcode::VMOVHPS {
instruction.mem_size = 8;
} else {
- instruction.mem_size = 16;
+ if L {
+ instruction.mem_size = 32;
+ } else {
+ instruction.mem_size = 16;
+ }
}
}
instruction.operands[0] = mem_oper;
@@ -1102,7 +1110,11 @@ fn read_vex_operands<
instruction.imm = read_imm_unsigned(words, 1)?;
instruction.operands[3] = OperandSpec::ImmU8;
if mem_oper != OperandSpec::RegMMM {
- instruction.mem_size = 16;
+ if L {
+ instruction.mem_size = 32;
+ } else {
+ instruction.mem_size = 16;
+ }
}
instruction.operand_count = 4;
Ok(())
@@ -1123,12 +1135,18 @@ fn read_vex_operands<
instruction.operands[0] = OperandSpec::RegRRR;
instruction.operands[1] = mem_oper;
if mem_oper != OperandSpec::RegMMM {
- if [Opcode::VBROADCASTSS, Opcode::VUCOMISS, Opcode::VCOMISS].contains(&instruction.opcode) {
- instruction.mem_size = 4;
- } else if [Opcode::VMOVDDUP, Opcode::VUCOMISD, Opcode::VCOMISD, Opcode::VCVTPS2PD, Opcode::VMOVQ].contains(&instruction.opcode) {
- instruction.mem_size = 8;
+ if L {
+ instruction.mem_size = 32;
} else {
instruction.mem_size = 16;
+ }
+
+ if instruction.opcode == Opcode::VMOVDDUP && !L {
+ instruction.mem_size = 8;
+ } else if [Opcode::VBROADCASTSS, Opcode::VUCOMISS, Opcode::VCOMISS].contains(&instruction.opcode) {
+ instruction.mem_size = 4;
+ } else if [Opcode::VUCOMISD, Opcode::VCOMISD, Opcode::VCVTPS2PD, Opcode::VMOVQ].contains(&instruction.opcode) {
+ instruction.mem_size = 8;
};
}
instruction.operand_count = 2;
@@ -1154,7 +1172,11 @@ fn read_vex_operands<
} else if [Opcode::VSQRTSD, Opcode::VADDSD, Opcode::VMULSD, Opcode::VSUBSD, Opcode::VMINSD, Opcode::VDIVSD, Opcode::VMAXSD].contains(&instruction.opcode) {
instruction.mem_size = 8;
} else {
- instruction.mem_size = 16;
+ if L {
+ instruction.mem_size = 32;
+ } else {
+ instruction.mem_size = 16;
+ }
}
}
instruction.operand_count = 3;
@@ -1264,7 +1286,13 @@ fn read_vex_operands<
#[allow(non_snake_case)]
let L = instruction.prefixes.vex_unchecked().l();
- let bank = if L {
+ let bank = if L && instruction.opcode != Opcode::VGATHERQPS && instruction.opcode != Opcode::VPGATHERQD {
+ RegisterBank::Y
+ } else {
+ RegisterBank::X
+ };
+
+ let index_bank = if L {
RegisterBank::Y
} else {
RegisterBank::X
@@ -1274,12 +1302,17 @@ fn read_vex_operands<
instruction.regs[0] =
RegSpec::from_parts((modrm >> 3) & 7, instruction.prefixes.vex_unchecked().r(), bank);
let mem_oper = read_E(words, instruction, modrm, bank, sink)?;
- instruction.regs[2].bank = bank;
+ if instruction.opcode == Opcode::VPGATHERDQ {
+ instruction.regs[2].bank = RegisterBank::X;
+ } else {
+ instruction.regs[2].bank = index_bank;
+ }
+ instruction.regs[3].bank = bank;
instruction.operands[0] = OperandSpec::RegRRR;
instruction.operands[1] = mem_oper;
instruction.operands[2] = OperandSpec::RegVex;
if mem_oper != OperandSpec::RegMMM {
- if instruction.opcode == Opcode::VPGATHERDD {
+ if instruction.opcode == Opcode::VPGATHERDD || instruction.opcode == Opcode::VPGATHERQD || instruction.opcode == Opcode::VGATHERDPS || instruction.opcode == Opcode::VGATHERQPS {
instruction.mem_size = 4;
} else {
instruction.mem_size = 8;
@@ -2529,8 +2562,8 @@ fn read_vex_instruction<
}),
0x08 => (Opcode::VROUNDPS, VEXOperandCode::G_E_xyLmm_imm8),
0x09 => (Opcode::VROUNDPD, VEXOperandCode::G_E_xyLmm_imm8),
- 0x0A => (Opcode::VROUNDSS, VEXOperandCode::G_V_E_xyLmm_imm8),
- 0x0B => (Opcode::VROUNDSD, VEXOperandCode::G_V_E_xyLmm_imm8),
+ 0x0A => (Opcode::VROUNDSS, VEXOperandCode::G_V_E_xmm_imm8),
+ 0x0B => (Opcode::VROUNDSD, VEXOperandCode::G_V_E_xmm_imm8),
0x0C => (Opcode::VBLENDPS, VEXOperandCode::G_V_E_xyLmm_imm8),
0x0D => (Opcode::VBLENDPD, VEXOperandCode::G_V_E_xyLmm_imm8),
0x0E => (Opcode::VPBLENDW, VEXOperandCode::G_V_E_xyLmm_imm8),
diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs
index dd3d3d3..3c53fce 100644
--- a/test/long_mode/mod.rs
+++ b/test/long_mode/mod.rs
@@ -1836,22 +1836,22 @@ fn test_vex() {
test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdd xmm8, dword [r9 + xmm12 * 4], xmm0");
test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdd ymm8, dword [r9 + ymm12 * 4], ymm0");
- test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdq xmm8, dword [r9 + xmm12 * 4], xmm0");
- test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdq ymm8, qword [r9 + ymm12 * 4], ymm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x90, 0b00_000_100, 0xa1], "vpgatherdq xmm8, qword [r9 + xmm12 * 4], xmm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x90, 0b00_000_100, 0xa1], "vpgatherdq ymm8, qword [r9 + xmm12 * 4], ymm0");
test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqd xmm8, dword [r9 + xmm12 * 4], xmm0");
test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x91, 0b00_000_100, 0xa1], "vpgatherqd xmm8, dword [r9 + ymm12 * 4], xmm0");
- test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqq xmm8, dword [r9 + xmm12 * 4], xmm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x91, 0b00_000_100, 0xa1], "vpgatherqq xmm8, qword [r9 + xmm12 * 4], xmm0");
test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x91, 0b00_000_100, 0xa1], "vpgatherqq ymm8, qword [r9 + ymm12 * 4], ymm0");
test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdps xmm8, dword [r9 + xmm12 * 4], xmm0");
- test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdps ymm8, qword [r9 + ymm12 * 4], ymm0");
- test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdpd xmm8, dword [r9 + xmm12 * 4], xmm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdps ymm8, dword [r9 + ymm12 * 4], ymm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x92, 0b00_000_100, 0xa1], "vgatherdpd xmm8, qword [r9 + xmm12 * 4], xmm0");
test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x92, 0b00_000_100, 0xa1], "vgatherdpd ymm8, qword [r9 + ymm12 * 4], ymm0");
test_avx2(&[0xc4, 0b000_00010, 0b0_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqps xmm8, dword [r9 + xmm12 * 4], xmm0");
- test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqps ymm8, qword [r9 + ymm12 * 4], ymm0");
- test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqpd xmm8, dword [r9 + xmm12 * 4], xmm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b0_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqps xmm8, dword [r9 + ymm12 * 4], xmm0");
+ test_avx2(&[0xc4, 0b000_00010, 0b1_1111_001, 0x93, 0b00_000_100, 0xa1], "vgatherqpd xmm8, qword [r9 + xmm12 * 4], xmm0");
test_avx2(&[0xc4, 0b000_00010, 0b1_1111_101, 0x93, 0b00_000_100, 0xa1], "vgatherqpd ymm8, qword [r9 + ymm12 * 4], ymm0");
test_instr_vex_aesni(&[0xc4, 0b000_00010, 0b0_1111_001, 0xdb, 0b11_001_010], "vaesimc xmm9, xmm10");
@@ -2492,7 +2492,7 @@ fn test_vex() {
test_instr(&[0xc4, 0x02, 0x79, 0x0f, 0xcd], "vtestpd xmm9, xmm13");
test_instr(&[0xc4, 0x02, 0x7d, 0x0f, 0xcd], "vtestpd ymm9, ymm13");
test_instr(&[0xc4, 0xe2, 0x65, 0x90, 0x04, 0x51], "vpgatherdd ymm0, dword [rcx + ymm2 * 2], ymm3");
- test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, qword [rcx + ymm2 * 2], ymm3");
+ test_instr(&[0xc4, 0xe2, 0xe5, 0x90, 0x04, 0x51], "vpgatherdq ymm0, qword [rcx + xmm2 * 2], ymm3");
test_instr(&[0xc4, 0xe2, 0x65, 0x91, 0x04, 0x51], "vpgatherqd xmm0, dword [rcx + ymm2 * 2], xmm3");
test_instr(&[0xc4, 0xe2, 0xe5, 0x91, 0x04, 0x51], "vpgatherqq ymm0, qword [rcx + ymm2 * 2], ymm3");
test_instr(&[0xc4, 0x02, 0x09, 0x9d, 0xcd], "vfnmadd132ss xmm9, xmm14, xmm13");