diff options
| author | iximeow <me@iximeow.net> | 2024-08-16 21:21:16 -0700 |
|---|---|---|
| committer | iximeow <me@iximeow.net> | 2025-06-01 09:21:37 +0000 |
| commit | 08eed360fea81ab9328fd0859b813ee01937b5b1 (patch) | |
| tree | c379847e969d479c13c29f46864465cf2969d9cb /src/long_mode/uarch.rs | |
| parent | 681262f4472ba4f452446e86012ce629b849d8d9 (diff) | |
expand isa feature selection to more bits
this is backed by the new IsaSettings trait. the existing InstDecoders
are unchanged, except that they implement this new trait.
also add new `DecodeEverything` structs with `IsaSettings` impls that
are unconditionally set to permit anything the decoder can be configured
to conditionally accept or reject.
in the process, add new `_3dnow` flag and stop accepting 3dnow
instructions in uarch-specific decoder settings that would not have
3dnow instructions.
update AMD microarchitectures and cross-ref chip directory
Diffstat (limited to 'src/long_mode/uarch.rs')
| -rw-r--r-- | src/long_mode/uarch.rs | 100 |
1 files changed, 87 insertions, 13 deletions
diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs index bfd4887..5af0175 100644 --- a/src/long_mode/uarch.rs +++ b/src/long_mode/uarch.rs @@ -1,12 +1,24 @@ +//! information for AMD and Intel microarchitectures in the modules below is sourced from a +//! combination of Wikipedia (especially for dates), one-off research for particular +//! microarchitectures, and `InstLatx64`'s CPUID dumps via [chip directory](https://github.com/iximeow/chip_directory). +//! +//! these microarchitecture-specific decoders are relatively rarely used, but generally should be +//! accurate. + pub mod amd { - //! most information about instruction set extensions for microarchitectures here was sourced - //! from - //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html) + //! initial information for the mircoarchitecture (families) described here came from a + //! combination of the Wikipedia pages + //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview) //! and - //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html). - //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards - //! these decoders rejecting instructions they should not, or incorrectly accepting - //! instructions. + //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features). + //! it has been since "augmented" by the CPUID dumps from InstLatx64, via [chip + //! directory](https://github.com/iximeow/chip_directory/tree/no-gods-no-/x86). scare quotes + //! because in several cases CPUID measurement error adds, rather than removes, ambiguity. + //! additionally, for some CPU features, InstLatx64 has CPUID dumps of early engineering + //! samples where features are not present. later production steppings of those parts do + //! universally have the corresponding feature, which makes it less obvious which features are + //! universally present in a family, standardized in a following architecture, unevenly present + //! due to market segmentation, and so on. //! //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension @@ -35,9 +47,6 @@ pub mod amd { .with_abm() .with_lahfsahf() .with_sse3() - .with_ssse3() - .with_sse4() - .with_sse4_2() .with_sse4a() } @@ -45,6 +54,9 @@ pub mod amd { /// support among other extensions, and are notable for including `AESNI`. pub fn bulldozer() -> InstDecoder { k10() + .with_ssse3() + .with_sse4() + .with_sse4_2() .with_bmi1() .with_aesni() .with_pclmulqdq() @@ -52,6 +64,8 @@ pub mod amd { .with_avx() .with_fma4() .with_xop() + .with_xsave() + .with_skinit() } /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. @@ -86,21 +100,81 @@ pub mod amd { /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, /// SHA, RDSEED, and other extensions. pub fn zen() -> InstDecoder { + // no nice way to *un*set feature bits, but several extensions were dropped. + // so, start again from K10. k10() + // first, bundle all the K10->Bulldozer features.. + .with_ssse3() + .with_sse4() + .with_sse4_2() + .with_bmi1() + .with_aesni() + .with_pclmulqdq() + .with_f16c() .with_avx() + .with_xsave() + .with_skinit() + // now all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features .with_avx2() - .with_bmi1() .with_aesni() .with_pclmulqdq() .with_f16c() .with_movbe() .with_bmi2() - .with_rdrand() .with_adx() .with_sha() + .with_rdrand() .with_rdseed() .with_fma3() - // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? + + .with_xsavec() + .with_xsaves() + .with_xsaveopt() + .with_clflushopt() + .with_clwb() + .with_fsgsbase() + .with_monitorx() + } + + /// `Zen 2`, launched in 2019, succeeded `Zen`/`Zen+`. there aren't many instruction set + /// extensions here, but `clwb`, `rdpid`, and `wbnoinvd` show up here. + pub fn zen2() -> InstDecoder { + zen() + .with_clwb() + .with_rdpid() + .with_wbnoinvd() + } + + /// `Zen 3`, launched in 2020, succeeded `Zen 2`. like `Zen 2`, there aren't many instruction + /// set extensions here. + pub fn zen3() -> InstDecoder { + zen2() + .with_invpcid() + .with_vaes() + .with_vpclmulqdq() + } + + /// `Zen 4`, launched in 2022, succeeded `Zen 3`. `Zen 4` is notable for being the first AMD + /// processor family supporting AVX-512. + pub fn zen4() -> InstDecoder { + zen3() + .with_avx512_f() + .with_avx512_vl() + .with_avx512_bw() + .with_avx512_cd() + .with_avx512_cd() + .with_avx512_vbmi() + .with_avx512_vbmi2() + .with_avx512_vpopcntdq() + .with_gfni() + } + + /// `Zen 5`, launched in 2024, succeeded `Zen 4`. `Zen 5` adds only a few additional + /// instructions; some AVX-512 features, `enqcmd`, and `movdir64b`. + pub fn zen5() -> InstDecoder { + zen4() + .with_movdir64b() + .with_enqcmd() } } |
