aboutsummaryrefslogtreecommitdiff
path: root/src/long_mode/uarch.rs
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2024-08-16 21:21:16 -0700
committeriximeow <me@iximeow.net>2025-06-01 09:21:37 +0000
commit08eed360fea81ab9328fd0859b813ee01937b5b1 (patch)
treec379847e969d479c13c29f46864465cf2969d9cb /src/long_mode/uarch.rs
parent681262f4472ba4f452446e86012ce629b849d8d9 (diff)
expand isa feature selection to more bits
this is backed by the new IsaSettings trait. the existing InstDecoders are unchanged, except that they implement this new trait. also add new `DecodeEverything` structs with `IsaSettings` impls that are unconditionally set to permit anything the decoder can be configured to conditionally accept or reject. in the process, add new `_3dnow` flag and stop accepting 3dnow instructions in uarch-specific decoder settings that would not have 3dnow instructions. update AMD microarchitectures and cross-ref chip directory
Diffstat (limited to 'src/long_mode/uarch.rs')
-rw-r--r--src/long_mode/uarch.rs100
1 files changed, 87 insertions, 13 deletions
diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs
index bfd4887..5af0175 100644
--- a/src/long_mode/uarch.rs
+++ b/src/long_mode/uarch.rs
@@ -1,12 +1,24 @@
+//! information for AMD and Intel microarchitectures in the modules below is sourced from a
+//! combination of Wikipedia (especially for dates), one-off research for particular
+//! microarchitectures, and `InstLatx64`'s CPUID dumps via [chip directory](https://github.com/iximeow/chip_directory).
+//!
+//! these microarchitecture-specific decoders are relatively rarely used, but generally should be
+//! accurate.
+
pub mod amd {
- //! most information about instruction set extensions for microarchitectures here was sourced
- //! from
- //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html)
+ //! initial information for the mircoarchitecture (families) described here came from a
+ //! combination of the Wikipedia pages
+ //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview)
//! and
- //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html).
- //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards
- //! these decoders rejecting instructions they should not, or incorrectly accepting
- //! instructions.
+ //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features).
+ //! it has been since "augmented" by the CPUID dumps from InstLatx64, via [chip
+ //! directory](https://github.com/iximeow/chip_directory/tree/no-gods-no-/x86). scare quotes
+ //! because in several cases CPUID measurement error adds, rather than removes, ambiguity.
+ //! additionally, for some CPU features, InstLatx64 has CPUID dumps of early engineering
+ //! samples where features are not present. later production steppings of those parts do
+ //! universally have the corresponding feature, which makes it less obvious which features are
+ //! universally present in a family, standardized in a following architecture, unevenly present
+ //! due to market segmentation, and so on.
//!
//! microarchitectures as defined here are with respect to flags reported by CPUID. notably,
//! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension
@@ -35,9 +47,6 @@ pub mod amd {
.with_abm()
.with_lahfsahf()
.with_sse3()
- .with_ssse3()
- .with_sse4()
- .with_sse4_2()
.with_sse4a()
}
@@ -45,6 +54,9 @@ pub mod amd {
/// support among other extensions, and are notable for including `AESNI`.
pub fn bulldozer() -> InstDecoder {
k10()
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
.with_bmi1()
.with_aesni()
.with_pclmulqdq()
@@ -52,6 +64,8 @@ pub mod amd {
.with_avx()
.with_fma4()
.with_xop()
+ .with_xsave()
+ .with_skinit()
}
/// `Piledriver` was the successor to `Bulldozer`, launched in 2012.
@@ -86,21 +100,81 @@ pub mod amd {
/// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX,
/// SHA, RDSEED, and other extensions.
pub fn zen() -> InstDecoder {
+ // no nice way to *un*set feature bits, but several extensions were dropped.
+ // so, start again from K10.
k10()
+ // first, bundle all the K10->Bulldozer features..
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
+ .with_bmi1()
+ .with_aesni()
+ .with_pclmulqdq()
+ .with_f16c()
.with_avx()
+ .with_xsave()
+ .with_skinit()
+ // now all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features
.with_avx2()
- .with_bmi1()
.with_aesni()
.with_pclmulqdq()
.with_f16c()
.with_movbe()
.with_bmi2()
- .with_rdrand()
.with_adx()
.with_sha()
+ .with_rdrand()
.with_rdseed()
.with_fma3()
- // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO?
+
+ .with_xsavec()
+ .with_xsaves()
+ .with_xsaveopt()
+ .with_clflushopt()
+ .with_clwb()
+ .with_fsgsbase()
+ .with_monitorx()
+ }
+
+ /// `Zen 2`, launched in 2019, succeeded `Zen`/`Zen+`. there aren't many instruction set
+ /// extensions here, but `clwb`, `rdpid`, and `wbnoinvd` show up here.
+ pub fn zen2() -> InstDecoder {
+ zen()
+ .with_clwb()
+ .with_rdpid()
+ .with_wbnoinvd()
+ }
+
+ /// `Zen 3`, launched in 2020, succeeded `Zen 2`. like `Zen 2`, there aren't many instruction
+ /// set extensions here.
+ pub fn zen3() -> InstDecoder {
+ zen2()
+ .with_invpcid()
+ .with_vaes()
+ .with_vpclmulqdq()
+ }
+
+ /// `Zen 4`, launched in 2022, succeeded `Zen 3`. `Zen 4` is notable for being the first AMD
+ /// processor family supporting AVX-512.
+ pub fn zen4() -> InstDecoder {
+ zen3()
+ .with_avx512_f()
+ .with_avx512_vl()
+ .with_avx512_bw()
+ .with_avx512_cd()
+ .with_avx512_cd()
+ .with_avx512_vbmi()
+ .with_avx512_vbmi2()
+ .with_avx512_vpopcntdq()
+ .with_gfni()
+ }
+
+ /// `Zen 5`, launched in 2024, succeeded `Zen 4`. `Zen 5` adds only a few additional
+ /// instructions; some AVX-512 features, `enqcmd`, and `movdir64b`.
+ pub fn zen5() -> InstDecoder {
+ zen4()
+ .with_movdir64b()
+ .with_enqcmd()
}
}