diff options
Diffstat (limited to 'src/protected_mode/uarch.rs')
| -rw-r--r-- | src/protected_mode/uarch.rs | 129 |
1 files changed, 111 insertions, 18 deletions
diff --git a/src/protected_mode/uarch.rs b/src/protected_mode/uarch.rs index cbe3e89..6914348 100644 --- a/src/protected_mode/uarch.rs +++ b/src/protected_mode/uarch.rs @@ -1,12 +1,24 @@ +//! information for AMD and Intel microarchitectures in the modules below is sourced from a +//! combination of Wikipedia (especially for dates), one-off research for particular +//! microarchitectures, and `InstLatx64`'s CPUID dumps via [chip directory](https://github.com/iximeow/chip_directory). +//! +//! these microarchitecture-specific decoders are relatively rarely used, but generally should be +//! accurate. + pub mod amd { - //! most information about instruction set extensions for microarchitectures here was sourced - //! from - //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html) + //! initial information for the mircoarchitecture (families) described here came from a + //! combination of the Wikipedia pages + //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview) //! and - //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html). - //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards - //! these decoders rejecting instructions they should not, or incorrectly accepting - //! instructions. + //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features). + //! it has been since "augmented" by the CPUID dumps from InstLatx64, via [chip + //! directory](https://github.com/iximeow/chip_directory/tree/no-gods-no-/x86). scare quotes + //! because in several cases CPUID measurement error adds, rather than removes, ambiguity. + //! additionally, for some CPU features, InstLatx64 has CPUID dumps of early engineering + //! samples where features are not present. later production steppings of those parts do + //! universally have the corresponding feature, which makes it less obvious which features are + //! universally present in a family, standardized in a following architecture, unevenly present + //! due to market segmentation, and so on. //! //! microarchitectures as defined here are with respect to flags reported by CPUID. notably, //! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension @@ -23,28 +35,41 @@ pub mod amd { /// support - SSE2 and no later. pub fn k8() -> InstDecoder { InstDecoder::minimal() + .with_3dnow() + .with_3dnowprefetch() + .with_cmov() } /// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through /// to SSE4.2a, as well as consistent `cmov` support, among other features. pub fn k10() -> InstDecoder { k8() - .with_cmov() .with_cmpxchg16b() .with_svm() .with_abm() .with_lahfsahf() .with_sse3() - .with_ssse3() - .with_sse4() - .with_sse4_2() .with_sse4a() } /// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX - /// support among other extensions, and are notable for including `AESNI`. + /// support among other extensions, and are notable for including `AESNI`. `Bulldozer` was also + /// the first microarchitecture to *remove* support for 3DNow instructions. pub fn bulldozer() -> InstDecoder { - k10() + InstDecoder::minimal() + // first, apply all the K8 extensions again, sans 3DNow + // .. should be sse, sse2 + // then the K10 + .with_cmpxchg16b() + .with_svm() + .with_abm() + .with_lahfsahf() + .with_sse3() + .with_sse4a() + // now the new extensions + .with_ssse3() + .with_sse4() + .with_sse4_2() .with_bmi1() .with_aesni() .with_pclmulqdq() @@ -52,6 +77,8 @@ pub mod amd { .with_avx() .with_fma4() .with_xop() + .with_xsave() + .with_skinit() } /// `Piledriver` was the successor to `Bulldozer`, launched in 2012. @@ -86,21 +113,87 @@ pub mod amd { /// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX, /// SHA, RDSEED, and other extensions. pub fn zen() -> InstDecoder { - k10() - .with_avx() - .with_avx2() + // no nice way to *un*set feature bits, but several extensions were dropped. + // so, start again. + InstDecoder::minimal() + // first, apply all the K8 extensions again, sans 3DNow + // .. should be sse, sse2 + // then the K10 + .with_cmpxchg16b() + .with_svm() + .with_abm() + .with_lahfsahf() + .with_sse3() + .with_sse4a() + // now, bundle all the K10->Bulldozer features.. + .with_ssse3() + .with_sse4() + .with_sse4_2() .with_bmi1() .with_aesni() .with_pclmulqdq() .with_f16c() + .with_avx() + .with_xsave() + .with_skinit() + // finally all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features + .with_avx2() .with_movbe() .with_bmi2() - .with_rdrand() .with_adx() .with_sha() + .with_rdrand() .with_rdseed() .with_fma3() - // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO? + + .with_xsavec() + .with_xsaves() + .with_xsaveopt() + .with_clflushopt() + .with_clwb() + .with_fsgsbase() + .with_monitorx() + } + + /// `Zen 2`, launched in 2019, succeeded `Zen`/`Zen+`. there aren't many instruction set + /// extensions here, but `clwb`, `rdpid`, and `wbnoinvd` show up here. + pub fn zen2() -> InstDecoder { + zen() + .with_clwb() + .with_rdpid() + .with_wbnoinvd() + } + + /// `Zen 3`, launched in 2020, succeeded `Zen 2`. like `Zen 2`, there aren't many instruction + /// set extensions here. + pub fn zen3() -> InstDecoder { + zen2() + .with_invpcid() + .with_vaes() + .with_vpclmulqdq() + } + + /// `Zen 4`, launched in 2022, succeeded `Zen 3`. `Zen 4` is notable for being the first AMD + /// processor family supporting AVX-512. + pub fn zen4() -> InstDecoder { + zen3() + .with_avx512_f() + .with_avx512_vl() + .with_avx512_bw() + .with_avx512_cd() + .with_avx512_cd() + .with_avx512_vbmi() + .with_avx512_vbmi2() + .with_avx512_vpopcntdq() + .with_gfni() + } + + /// `Zen 5`, launched in 2024, succeeded `Zen 4`. `Zen 5` adds only a few additional + /// instructions; some AVX-512 features, `enqcmd`, and `movdir64b`. + pub fn zen5() -> InstDecoder { + zen4() + .with_movdir64b() + .with_enqcmd() } } |
