From 6eb2af6a3aba7bfad21775319c3a200c5c723918 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 3 May 2026 17:48:14 +0000 Subject: actually support avx/f16c in per-uarch decoding --- src/long_mode/uarch.rs | 2 ++ src/long_mode/vex.rs | 12 ++++++++---- src/protected_mode/uarch.rs | 2 ++ src/real_mode/uarch.rs | 2 ++ test/long_mode/mod.rs | 9 ++++++++- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs index 63fa972..9cfc9de 100644 --- a/src/long_mode/uarch.rs +++ b/src/long_mode/uarch.rs @@ -145,6 +145,7 @@ pub mod amd { .with_rdrand() .with_rdseed() .with_fma3() + .with_f16c() .with_xsavec() .with_xsaves() @@ -272,6 +273,7 @@ pub mod intel { .with_abm() .with_fma3() .with_avx2() + .with_f16c() } /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores diff --git a/src/long_mode/vex.rs b/src/long_mode/vex.rs index 1d94b3c..a847fc0 100644 --- a/src/long_mode/vex.rs +++ b/src/long_mode/vex.rs @@ -2755,11 +2755,15 @@ fn read_vex_instruction< return Err(DecodeError::InvalidOpcode); }) }, - 0x1D => (Opcode::VCVTPS2PH, if L { - VEXOperandCode::E_xmm_G_ymm_imm8 + 0x1D => if instruction.prefixes.vex_unchecked().w() { + return Err(DecodeError::InvalidOpcode); } else { - VEXOperandCode::E_G_xmm_imm8 - }), + (Opcode::VCVTPS2PH, if L { + VEXOperandCode::E_xmm_G_ymm_imm8 + } else { + VEXOperandCode::E_G_xmm_imm8 + }) + }, 0x20 => (Opcode::VPINSRB, if L { return Err(DecodeError::InvalidOpcode); } else { diff --git a/src/protected_mode/uarch.rs b/src/protected_mode/uarch.rs index 6914348..7221747 100644 --- a/src/protected_mode/uarch.rs +++ b/src/protected_mode/uarch.rs @@ -145,6 +145,7 @@ pub mod amd { .with_rdrand() .with_rdseed() .with_fma3() + .with_f16c() .with_xsavec() .with_xsaves() @@ -271,6 +272,7 @@ pub mod intel { .with_abm() .with_fma3() .with_avx2() + .with_f16c() } /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores diff --git a/src/real_mode/uarch.rs b/src/real_mode/uarch.rs index 8df4213..32dd04a 100644 --- a/src/real_mode/uarch.rs +++ b/src/real_mode/uarch.rs @@ -145,6 +145,7 @@ pub mod amd { .with_rdrand() .with_rdseed() .with_fma3() + .with_f16c() .with_xsavec() .with_xsaves() @@ -271,6 +272,7 @@ pub mod intel { .with_abm() .with_fma3() .with_avx2() + .with_f16c() } /// `Haswell-EX` was a variant of `Haswell` launched in 2015 with functional TSX. these cores diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs index 1524868..30ca3cd 100644 --- a/test/long_mode/mod.rs +++ b/test/long_mode/mod.rs @@ -1536,6 +1536,7 @@ fn test_misc() { test_invalid(&[0x43, 0x0f, 0x38, 0x82, 0x2f]); test_display(&[0x66, 0x0f, 0xae, 0xf1], "tpause ecx"); test_display(&[0x66, 0x4f, 0x0f, 0xae, 0xf1], "tpause r9"); + test_display(&[0xc4, 0b000_00011, 0b0_1111_101, 0x1d, 0b11_001_010, 0x77], "vcvtps2ph xmm10, ymm9, 0x77"); } #[test] @@ -1591,6 +1592,8 @@ fn test_vex() { fn test_instr_vex_f16c(bytes: &[u8], text: &'static str) { test_display_under(&InstDecoder::minimal().with_avx().with_f16c(), bytes, text); test_display_under(&InstDecoder::default(), bytes, text); + test_invalid_under(&InstDecoder::minimal().with_avx(), bytes); + test_invalid_under(&InstDecoder::minimal().with_f16c(), bytes); test_invalid_under(&InstDecoder::minimal(), bytes); } @@ -1666,7 +1669,7 @@ fn test_vex() { test_invalid(&[0xc4, 0b000_00011, 0b1_0111_001, 0x18, 0b11_001_010, 0x77]); test_instr(&[0xc4, 0b000_00011, 0b0_0111_101, 0x18, 0b11_001_010, 0x77], "vinsertf128 ymm9, ymm8, xmm10, 0x77"); - test_invalid(&[0xc4, 0b000_00011, 0b1_0111_101, 0x18, 0b11_001_010, 0x77]); + test_invalid(&[0xc4, 0b000_00011, 0b1_0111_101, 0x19, 0b11_001_010, 0x77]); test_instr(&[0xc4, 0b000_00011, 0b0_1111_101, 0x19, 0b11_001_010, 0x77], "vextractf128 xmm10, ymm9, 0x77"); test_invalid(&[0xc4, 0b000_00011, 0b0_1111_001, 0x19, 0b11_001_010, 0x77]); test_invalid(&[0xc4, 0b000_00011, 0b1_1111_101, 0x19, 0b11_001_010, 0x77]); @@ -1678,6 +1681,10 @@ fn test_vex() { test_invalid(&[0xc4, 0b000_00011, 0b0_1111_001, 0x19, 0b11_001_010, 0x77]); test_invalid(&[0xc4, 0b000_00011, 0b1_1111_101, 0x19, 0b11_001_010, 0x77]); + test_instr_vex_f16c(&[0xc4, 0b000_00011, 0b0_1111_101, 0x1d, 0b11_001_010, 0x77], "vcvtps2ph xmm10, ymm9, 0x77"); + test_instr_vex_f16c(&[0xc4, 0b000_00011, 0b0_1111_101, 0x1d, 0b11_001_010, 0x77], "vcvtps2ph xmm10, ymm9, 0x77"); + test_invalid(&[0xc4, 0b000_00011, 0b1_1111_101, 0x1d, 0b11_001_010, 0x77]); + test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x20, 0b11_001_010, 0x77], "vpinsrb xmm9, xmm8, r10d, 0x77"); test_instr(&[0xc4, 0b000_00011, 0b0_0111_001, 0x20, 0b00_001_010, 0x77], "vpinsrb xmm9, xmm8, byte [r10], 0x77"); test_invalid(&[0xc4, 0b000_00011, 0b0_0111_101, 0x20, 0b00_001_010, 0x77]); -- cgit v1.1