aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriximeow <me@iximeow.net>2025-06-01 23:51:42 +0000
committeriximeow <me@iximeow.net>2025-06-01 23:51:42 +0000
commit016cb77445857b63b3c5ba3ea095c5a36a357fbd (patch)
tree4110c29afa61c45234f8fd6e095fb6928feb9c00
parentcf9ab8fcd04608c565da4fd1aff5bff47a5fe385 (diff)
3dnow was still supported on K8, K10. 32-bit mode should learn about uarch tweaks too
-rw-r--r--src/long_mode/uarch.rs39
-rw-r--r--src/protected_mode/uarch.rs129
-rw-r--r--src/real_mode/uarch.rs129
-rw-r--r--test/long_mode/mod.rs3
-rw-r--r--test/protected_mode/mod.rs25
5 files changed, 271 insertions, 54 deletions
diff --git a/src/long_mode/uarch.rs b/src/long_mode/uarch.rs
index 5af0175..63fa972 100644
--- a/src/long_mode/uarch.rs
+++ b/src/long_mode/uarch.rs
@@ -35,13 +35,15 @@ pub mod amd {
/// support - SSE2 and no later.
pub fn k8() -> InstDecoder {
InstDecoder::minimal()
+ .with_3dnow()
+ .with_3dnowprefetch()
+ .with_cmov()
}
/// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through
/// to SSE4.2a, as well as consistent `cmov` support, among other features.
pub fn k10() -> InstDecoder {
k8()
- .with_cmov()
.with_cmpxchg16b()
.with_svm()
.with_abm()
@@ -51,9 +53,20 @@ pub mod amd {
}
/// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX
- /// support among other extensions, and are notable for including `AESNI`.
+ /// support among other extensions, and are notable for including `AESNI`. `Bulldozer` was also
+ /// the first microarchitecture to *remove* support for 3DNow instructions.
pub fn bulldozer() -> InstDecoder {
- k10()
+ InstDecoder::minimal()
+ // first, apply all the K8 extensions again, sans 3DNow
+ // .. should be sse, sse2
+ // then the K10
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_sse4a()
+ // now the new extensions
.with_ssse3()
.with_sse4()
.with_sse4_2()
@@ -101,9 +114,18 @@ pub mod amd {
/// SHA, RDSEED, and other extensions.
pub fn zen() -> InstDecoder {
// no nice way to *un*set feature bits, but several extensions were dropped.
- // so, start again from K10.
- k10()
- // first, bundle all the K10->Bulldozer features..
+ // so, start again.
+ InstDecoder::minimal()
+ // first, apply all the K8 extensions again, sans 3DNow
+ // .. should be sse, sse2
+ // then the K10
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_sse4a()
+ // now, bundle all the K10->Bulldozer features..
.with_ssse3()
.with_sse4()
.with_sse4_2()
@@ -114,11 +136,8 @@ pub mod amd {
.with_avx()
.with_xsave()
.with_skinit()
- // now all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features
+ // finally all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features
.with_avx2()
- .with_aesni()
- .with_pclmulqdq()
- .with_f16c()
.with_movbe()
.with_bmi2()
.with_adx()
diff --git a/src/protected_mode/uarch.rs b/src/protected_mode/uarch.rs
index cbe3e89..6914348 100644
--- a/src/protected_mode/uarch.rs
+++ b/src/protected_mode/uarch.rs
@@ -1,12 +1,24 @@
+//! information for AMD and Intel microarchitectures in the modules below is sourced from a
+//! combination of Wikipedia (especially for dates), one-off research for particular
+//! microarchitectures, and `InstLatx64`'s CPUID dumps via [chip directory](https://github.com/iximeow/chip_directory).
+//!
+//! these microarchitecture-specific decoders are relatively rarely used, but generally should be
+//! accurate.
+
pub mod amd {
- //! most information about instruction set extensions for microarchitectures here was sourced
- //! from
- //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html)
+ //! initial information for the mircoarchitecture (families) described here came from a
+ //! combination of the Wikipedia pages
+ //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview)
//! and
- //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/protected_mode/uarch/intel/index.html).
- //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards
- //! these decoders rejecting instructions they should not, or incorrectly accepting
- //! instructions.
+ //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features).
+ //! it has been since "augmented" by the CPUID dumps from InstLatx64, via [chip
+ //! directory](https://github.com/iximeow/chip_directory/tree/no-gods-no-/x86). scare quotes
+ //! because in several cases CPUID measurement error adds, rather than removes, ambiguity.
+ //! additionally, for some CPU features, InstLatx64 has CPUID dumps of early engineering
+ //! samples where features are not present. later production steppings of those parts do
+ //! universally have the corresponding feature, which makes it less obvious which features are
+ //! universally present in a family, standardized in a following architecture, unevenly present
+ //! due to market segmentation, and so on.
//!
//! microarchitectures as defined here are with respect to flags reported by CPUID. notably,
//! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension
@@ -23,28 +35,41 @@ pub mod amd {
/// support - SSE2 and no later.
pub fn k8() -> InstDecoder {
InstDecoder::minimal()
+ .with_3dnow()
+ .with_3dnowprefetch()
+ .with_cmov()
}
/// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through
/// to SSE4.2a, as well as consistent `cmov` support, among other features.
pub fn k10() -> InstDecoder {
k8()
- .with_cmov()
.with_cmpxchg16b()
.with_svm()
.with_abm()
.with_lahfsahf()
.with_sse3()
- .with_ssse3()
- .with_sse4()
- .with_sse4_2()
.with_sse4a()
}
/// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX
- /// support among other extensions, and are notable for including `AESNI`.
+ /// support among other extensions, and are notable for including `AESNI`. `Bulldozer` was also
+ /// the first microarchitecture to *remove* support for 3DNow instructions.
pub fn bulldozer() -> InstDecoder {
- k10()
+ InstDecoder::minimal()
+ // first, apply all the K8 extensions again, sans 3DNow
+ // .. should be sse, sse2
+ // then the K10
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_sse4a()
+ // now the new extensions
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
.with_bmi1()
.with_aesni()
.with_pclmulqdq()
@@ -52,6 +77,8 @@ pub mod amd {
.with_avx()
.with_fma4()
.with_xop()
+ .with_xsave()
+ .with_skinit()
}
/// `Piledriver` was the successor to `Bulldozer`, launched in 2012.
@@ -86,21 +113,87 @@ pub mod amd {
/// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX,
/// SHA, RDSEED, and other extensions.
pub fn zen() -> InstDecoder {
- k10()
- .with_avx()
- .with_avx2()
+ // no nice way to *un*set feature bits, but several extensions were dropped.
+ // so, start again.
+ InstDecoder::minimal()
+ // first, apply all the K8 extensions again, sans 3DNow
+ // .. should be sse, sse2
+ // then the K10
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_sse4a()
+ // now, bundle all the K10->Bulldozer features..
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
.with_bmi1()
.with_aesni()
.with_pclmulqdq()
.with_f16c()
+ .with_avx()
+ .with_xsave()
+ .with_skinit()
+ // finally all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features
+ .with_avx2()
.with_movbe()
.with_bmi2()
- .with_rdrand()
.with_adx()
.with_sha()
+ .with_rdrand()
.with_rdseed()
.with_fma3()
- // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO?
+
+ .with_xsavec()
+ .with_xsaves()
+ .with_xsaveopt()
+ .with_clflushopt()
+ .with_clwb()
+ .with_fsgsbase()
+ .with_monitorx()
+ }
+
+ /// `Zen 2`, launched in 2019, succeeded `Zen`/`Zen+`. there aren't many instruction set
+ /// extensions here, but `clwb`, `rdpid`, and `wbnoinvd` show up here.
+ pub fn zen2() -> InstDecoder {
+ zen()
+ .with_clwb()
+ .with_rdpid()
+ .with_wbnoinvd()
+ }
+
+ /// `Zen 3`, launched in 2020, succeeded `Zen 2`. like `Zen 2`, there aren't many instruction
+ /// set extensions here.
+ pub fn zen3() -> InstDecoder {
+ zen2()
+ .with_invpcid()
+ .with_vaes()
+ .with_vpclmulqdq()
+ }
+
+ /// `Zen 4`, launched in 2022, succeeded `Zen 3`. `Zen 4` is notable for being the first AMD
+ /// processor family supporting AVX-512.
+ pub fn zen4() -> InstDecoder {
+ zen3()
+ .with_avx512_f()
+ .with_avx512_vl()
+ .with_avx512_bw()
+ .with_avx512_cd()
+ .with_avx512_cd()
+ .with_avx512_vbmi()
+ .with_avx512_vbmi2()
+ .with_avx512_vpopcntdq()
+ .with_gfni()
+ }
+
+ /// `Zen 5`, launched in 2024, succeeded `Zen 4`. `Zen 5` adds only a few additional
+ /// instructions; some AVX-512 features, `enqcmd`, and `movdir64b`.
+ pub fn zen5() -> InstDecoder {
+ zen4()
+ .with_movdir64b()
+ .with_enqcmd()
}
}
diff --git a/src/real_mode/uarch.rs b/src/real_mode/uarch.rs
index 60bf168..8df4213 100644
--- a/src/real_mode/uarch.rs
+++ b/src/real_mode/uarch.rs
@@ -1,12 +1,24 @@
+//! information for AMD and Intel microarchitectures in the modules below is sourced from a
+//! combination of Wikipedia (especially for dates), one-off research for particular
+//! microarchitectures, and `InstLatx64`'s CPUID dumps via [chip directory](https://github.com/iximeow/chip_directory).
+//!
+//! these microarchitecture-specific decoders are relatively rarely used, but generally should be
+//! accurate.
+
pub mod amd {
- //! most information about instruction set extensions for microarchitectures here was sourced
- //! from
- //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/real_mode/uarch/intel/index.html)
+ //! initial information for the mircoarchitecture (families) described here came from a
+ //! combination of the Wikipedia pages
+ //! [https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview](https://en.wikipedia.org/wiki/AMD_Accelerated_Processing_Unit#Feature_overview)
//! and
- //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://docs.rs/yaxpeax-x86/0.0.12/yaxpeax_x86/real_mode/uarch/intel/index.html).
- //! these mappings are best-effort but fairly unused, so a critical eye should be kept towards
- //! these decoders rejecting instructions they should not, or incorrectly accepting
- //! instructions.
+ //! [https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features](https://en.wikipedia.org/wiki/Template:AMD_x86_CPU_features).
+ //! it has been since "augmented" by the CPUID dumps from InstLatx64, via [chip
+ //! directory](https://github.com/iximeow/chip_directory/tree/no-gods-no-/x86). scare quotes
+ //! because in several cases CPUID measurement error adds, rather than removes, ambiguity.
+ //! additionally, for some CPU features, InstLatx64 has CPUID dumps of early engineering
+ //! samples where features are not present. later production steppings of those parts do
+ //! universally have the corresponding feature, which makes it less obvious which features are
+ //! universally present in a family, standardized in a following architecture, unevenly present
+ //! due to market segmentation, and so on.
//!
//! microarchitectures as defined here are with respect to flags reported by CPUID. notably,
//! `Zen` does not report `FMA4` support by `CPUID`, but instructions in that extension
@@ -23,28 +35,41 @@ pub mod amd {
/// support - SSE2 and no later.
pub fn k8() -> InstDecoder {
InstDecoder::minimal()
+ .with_3dnow()
+ .with_3dnowprefetch()
+ .with_cmov()
}
/// `k10` was the successor to `k8`, launched in 2007. `k10` cores extended SSE support through
/// to SSE4.2a, as well as consistent `cmov` support, among other features.
pub fn k10() -> InstDecoder {
k8()
- .with_cmov()
.with_cmpxchg16b()
.with_svm()
.with_abm()
.with_lahfsahf()
.with_sse3()
- .with_ssse3()
- .with_sse4()
- .with_sse4_2()
.with_sse4a()
}
/// `Bulldozer` was the successor to `K10`, launched in 2011. `Bulldozer` cores include AVX
- /// support among other extensions, and are notable for including `AESNI`.
+ /// support among other extensions, and are notable for including `AESNI`. `Bulldozer` was also
+ /// the first microarchitecture to *remove* support for 3DNow instructions.
pub fn bulldozer() -> InstDecoder {
- k10()
+ InstDecoder::minimal()
+ // first, apply all the K8 extensions again, sans 3DNow
+ // .. should be sse, sse2
+ // then the K10
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_sse4a()
+ // now the new extensions
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
.with_bmi1()
.with_aesni()
.with_pclmulqdq()
@@ -52,6 +77,8 @@ pub mod amd {
.with_avx()
.with_fma4()
.with_xop()
+ .with_xsave()
+ .with_skinit()
}
/// `Piledriver` was the successor to `Bulldozer`, launched in 2012.
@@ -86,21 +113,87 @@ pub mod amd {
/// instructions to AVX2 and discarded FMA4, TBM, and XOP extensions. they also gained ADX,
/// SHA, RDSEED, and other extensions.
pub fn zen() -> InstDecoder {
- k10()
- .with_avx()
- .with_avx2()
+ // no nice way to *un*set feature bits, but several extensions were dropped.
+ // so, start again.
+ InstDecoder::minimal()
+ // first, apply all the K8 extensions again, sans 3DNow
+ // .. should be sse, sse2
+ // then the K10
+ .with_cmpxchg16b()
+ .with_svm()
+ .with_abm()
+ .with_lahfsahf()
+ .with_sse3()
+ .with_sse4a()
+ // now, bundle all the K10->Bulldozer features..
+ .with_ssse3()
+ .with_sse4()
+ .with_sse4_2()
.with_bmi1()
.with_aesni()
.with_pclmulqdq()
.with_f16c()
+ .with_avx()
+ .with_xsave()
+ .with_skinit()
+ // finally all the Bulldozer (/Piledriver/Steamroller/Excavator)->Zen features
+ .with_avx2()
.with_movbe()
.with_bmi2()
- .with_rdrand()
.with_adx()
.with_sha()
+ .with_rdrand()
.with_rdseed()
.with_fma3()
- // TODO: XSAVEC, XSAVES, XRSTORS, CLFLUSHOPT, CLZERO?
+
+ .with_xsavec()
+ .with_xsaves()
+ .with_xsaveopt()
+ .with_clflushopt()
+ .with_clwb()
+ .with_fsgsbase()
+ .with_monitorx()
+ }
+
+ /// `Zen 2`, launched in 2019, succeeded `Zen`/`Zen+`. there aren't many instruction set
+ /// extensions here, but `clwb`, `rdpid`, and `wbnoinvd` show up here.
+ pub fn zen2() -> InstDecoder {
+ zen()
+ .with_clwb()
+ .with_rdpid()
+ .with_wbnoinvd()
+ }
+
+ /// `Zen 3`, launched in 2020, succeeded `Zen 2`. like `Zen 2`, there aren't many instruction
+ /// set extensions here.
+ pub fn zen3() -> InstDecoder {
+ zen2()
+ .with_invpcid()
+ .with_vaes()
+ .with_vpclmulqdq()
+ }
+
+ /// `Zen 4`, launched in 2022, succeeded `Zen 3`. `Zen 4` is notable for being the first AMD
+ /// processor family supporting AVX-512.
+ pub fn zen4() -> InstDecoder {
+ zen3()
+ .with_avx512_f()
+ .with_avx512_vl()
+ .with_avx512_bw()
+ .with_avx512_cd()
+ .with_avx512_cd()
+ .with_avx512_vbmi()
+ .with_avx512_vbmi2()
+ .with_avx512_vpopcntdq()
+ .with_gfni()
+ }
+
+ /// `Zen 5`, launched in 2024, succeeded `Zen 4`. `Zen 5` adds only a few additional
+ /// instructions; some AVX-512 features, `enqcmd`, and `movdir64b`.
+ pub fn zen5() -> InstDecoder {
+ zen4()
+ .with_movdir64b()
+ .with_enqcmd()
}
}
diff --git a/test/long_mode/mod.rs b/test/long_mode/mod.rs
index 9493a63..2c7771c 100644
--- a/test/long_mode/mod.rs
+++ b/test/long_mode/mod.rs
@@ -3454,7 +3454,8 @@ fn test_3dnow() {
test_display_under(&InstDecoder::default(), bytes, text);
test_invalid_under(&InstDecoder::minimal(), bytes);
test_invalid_under(&InstDecoder::minimal(), bytes);
- test_invalid_under(&yaxpeax_x86::long_mode::uarch::amd::k8(), bytes);
+ test_display_under(&yaxpeax_x86::long_mode::uarch::amd::k8(), bytes, text);
+ test_invalid_under(&yaxpeax_x86::long_mode::uarch::amd::bulldozer(), bytes);
test_invalid_under(&yaxpeax_x86::long_mode::uarch::intel::netburst(), bytes);
}
diff --git a/test/protected_mode/mod.rs b/test/protected_mode/mod.rs
index 8fecdab..29e9ec4 100644
--- a/test/protected_mode/mod.rs
+++ b/test/protected_mode/mod.rs
@@ -3068,13 +3068,24 @@ fn test_sse4a() {
#[test]
fn test_3dnow() {
- test_display(&[0x0f, 0x0f, 0xe0, 0x8a], "pfnacc mm4, mm0");
- test_display(&[0x0f, 0x0f, 0x38, 0x8e], "pfpnacc mm7, qword [eax]");
- test_display(&[0x65, 0x67, 0x65, 0x65, 0x0f, 0x0e], "femms");
- test_display(&[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4], "pfmax mm4, qword cs:[ebx + edx * 4 - 0x6d]");
- test_display(&[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c], "pi2fw mm6, qword ss:[eax - 0x5]");
- test_display(&[0x66, 0x0f, 0x0f, 0xc6, 0xb7], "pmulhrw mm0, mm6");
- test_display(&[0x0f, 0x0f, 0xc6, 0xb7], "pmulhrw mm0, mm6");
+ fn test_instr(bytes: &[u8], text: &'static str) {
+ test_display_under(&InstDecoder::minimal().with_3dnow(), bytes, text);
+ test_display_under(&InstDecoder::default(), bytes, text);
+ test_invalid_under(&InstDecoder::minimal(), bytes);
+ test_invalid_under(&InstDecoder::minimal(), bytes);
+ test_display_under(&yaxpeax_x86::protected_mode::uarch::amd::k8(), bytes, text);
+ test_invalid_under(&yaxpeax_x86::protected_mode::uarch::amd::bulldozer(), bytes);
+ test_invalid_under(&yaxpeax_x86::protected_mode::uarch::intel::netburst(), bytes);
+ }
+
+ test_instr(&[0x0f, 0x0f, 0xe0, 0x8a], "pfnacc mm4, mm0");
+ test_instr(&[0x0f, 0x0f, 0x38, 0x8e], "pfpnacc mm7, qword [eax]");
+ test_instr(&[0x65, 0x67, 0x65, 0x65, 0x0f, 0x0e], "femms");
+ test_instr(&[0x3e, 0xf3, 0x2e, 0xf2, 0x0f, 0x0f, 0x64, 0x93, 0x93, 0xa4], "pfmax mm4, qword cs:[ebx + edx * 4 - 0x6d]");
+ test_instr(&[0x26, 0x36, 0x0f, 0x0f, 0x70, 0xfb, 0x0c], "pi2fw mm6, qword ss:[eax - 0x5]");
+ test_instr(&[0x66, 0x0f, 0x0f, 0xc6, 0xb7], "pmulhrw mm0, mm6");
+ test_instr(&[0x0f, 0x0f, 0xc6, 0xb7], "pmulhrw mm0, mm6");
+ test_instr(&[0x0f, 0x0e], "femms");
}
// first appeared in tremont