std_detect/detect/os/
x86.rs

1//! x86 run-time feature detection is OS independent.
2
3#[cfg(target_arch = "x86")]
4use core::arch::x86::*;
5#[cfg(target_arch = "x86_64")]
6use core::arch::x86_64::*;
7
8use core::mem;
9
10use crate::detect::{Feature, bit, cache};
11
12/// Run-time feature detection on x86 works by using the CPUID instruction.
13///
14/// The [CPUID Wikipedia page][wiki_cpuid] contains
15/// all the information about which flags to set to query which values, and in
16/// which registers these are reported.
17///
18/// The definitive references are:
19/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
20///   Instruction Set Reference, A-Z][intel64_ref].
21/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
22///   System Instructions][amd64_ref].
23///
24/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
25/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
26/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
27#[allow(clippy::similar_names)]
28pub(crate) fn detect_features() -> cache::Initializer {
29    let mut value = cache::Initializer::default();
30
31    if cfg!(target_env = "sgx") {
32        // doesn't support this because it is untrusted data
33        return value;
34    }
35
36    // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
37    // has `cpuid` support.
38
39    // 0. EAX = 0: Basic Information:
40    // - EAX returns the "Highest Function Parameter", that is, the maximum
41    // leaf value for subsequent calls of `cpuinfo` in range [0,
42    // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
43    // returned in EBX, EDX, and   ECX (in that order):
44    let (max_basic_leaf, vendor_id) = unsafe {
45        let CpuidResult {
46            eax: max_basic_leaf,
47            ebx,
48            ecx,
49            edx,
50        } = __cpuid(0);
51        let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
52        let vendor_id: [u8; 12] = mem::transmute(vendor_id);
53        (max_basic_leaf, vendor_id)
54    };
55
56    if max_basic_leaf < 1 {
57        // Earlier Intel 486, CPUID not implemented
58        return value;
59    }
60
61    // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
62    // Contains information about most x86 features.
63    let CpuidResult {
64        ecx: proc_info_ecx,
65        edx: proc_info_edx,
66        ..
67    } = unsafe { __cpuid(0x0000_0001_u32) };
68
69    // EAX = 7: Queries "Extended Features";
70    // Contains information about bmi,bmi2, and avx2 support.
71    let (
72        extended_features_ebx,
73        extended_features_ecx,
74        extended_features_edx,
75        extended_features_eax_leaf_1,
76        extended_features_edx_leaf_1,
77    ) = if max_basic_leaf >= 7 {
78        let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
79        let CpuidResult {
80            eax: eax_1,
81            edx: edx_1,
82            ..
83        } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
84        (ebx, ecx, edx, eax_1, edx_1)
85    } else {
86        (0, 0, 0, 0, 0) // CPUID does not support "Extended Features"
87    };
88
89    // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
90    // - EAX returns the max leaf value for extended information, that is,
91    // `cpuid` calls in range [0x8000_0000; u32::MAX]:
92    let CpuidResult {
93        eax: extended_max_basic_leaf,
94        ..
95    } = unsafe { __cpuid(0x8000_0000_u32) };
96
97    // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
98    // Bits"
99    let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
100        let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
101        ecx
102    } else {
103        0
104    };
105
106    {
107        // borrows value till the end of this scope:
108        let mut enable = |r, rb, f| {
109            let present = bit::test(r as usize, rb);
110            if present {
111                value.set(f as u32);
112            }
113            present
114        };
115
116        enable(proc_info_ecx, 0, Feature::sse3);
117        enable(proc_info_ecx, 1, Feature::pclmulqdq);
118        enable(proc_info_ecx, 9, Feature::ssse3);
119        enable(proc_info_ecx, 13, Feature::cmpxchg16b);
120        enable(proc_info_ecx, 19, Feature::sse4_1);
121        enable(proc_info_ecx, 20, Feature::sse4_2);
122        enable(proc_info_ecx, 22, Feature::movbe);
123        enable(proc_info_ecx, 23, Feature::popcnt);
124        enable(proc_info_ecx, 25, Feature::aes);
125        let f16c = enable(proc_info_ecx, 29, Feature::f16c);
126        enable(proc_info_ecx, 30, Feature::rdrand);
127        enable(extended_features_ebx, 18, Feature::rdseed);
128        enable(extended_features_ebx, 19, Feature::adx);
129        enable(extended_features_ebx, 11, Feature::rtm);
130        enable(proc_info_edx, 4, Feature::tsc);
131        enable(proc_info_edx, 23, Feature::mmx);
132        enable(proc_info_edx, 24, Feature::fxsr);
133        enable(proc_info_edx, 25, Feature::sse);
134        enable(proc_info_edx, 26, Feature::sse2);
135        enable(extended_features_ebx, 29, Feature::sha);
136
137        enable(extended_features_ecx, 8, Feature::gfni);
138        enable(extended_features_ecx, 9, Feature::vaes);
139        enable(extended_features_ecx, 10, Feature::vpclmulqdq);
140
141        enable(extended_features_ebx, 3, Feature::bmi1);
142        enable(extended_features_ebx, 8, Feature::bmi2);
143
144        enable(extended_features_ebx, 9, Feature::ermsb);
145
146        enable(extended_features_eax_leaf_1, 31, Feature::movrs);
147
148        // Detect if CPUID.19h available
149        if bit::test(extended_features_ecx as usize, 23) {
150            let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
151            enable(ebx, 0, Feature::kl);
152            enable(ebx, 2, Feature::widekl);
153        }
154
155        // `XSAVE` and `AVX` support:
156        let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
157        if cpu_xsave {
158            // 0. Here the CPU supports `XSAVE`.
159
160            // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
161            // supports saving the state of the AVX/AVX2 vector registers on
162            // context-switches, see:
163            //
164            // - [intel: is avx enabled?][is_avx_enabled],
165            // - [mozilla: sse.cpp][mozilla_sse_cpp].
166            //
167            // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
168            // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
169            let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
170
171            if cpu_osxsave {
172                // 2. The OS must have signaled the CPU that it supports saving and
173                // restoring the:
174                //
175                // * SSE -> `XCR0.SSE[1]`
176                // * AVX -> `XCR0.AVX[2]`
177                // * AVX-512 -> `XCR0.AVX-512[7:5]`.
178                // * AMX -> `XCR0.AMX[18:17]`
179                //
180                // by setting the corresponding bits of `XCR0` to `1`.
181                //
182                // This is safe because the CPU supports `xsave`
183                // and the OS has set `osxsave`.
184                let xcr0 = unsafe { _xgetbv(0) };
185                // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
186                let os_avx_support = xcr0 & 6 == 6;
187                // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`:
188                let os_avx512_support = xcr0 & 0xe0 == 0xe0;
189                // Test `XCR0.AMX[18:17]` with the mask `0b110_0000_0000_0000_0000 == 0x60000`
190                let os_amx_support = xcr0 & 0x60000 == 0x60000;
191
192                // Only if the OS and the CPU support saving/restoring the AVX
193                // registers we enable `xsave` support:
194                if os_avx_support {
195                    // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
196                    // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
197                    // Developer’s Manual, Volume 1: Basic Architecture":
198                    //
199                    // "Software enables the XSAVE feature set by setting
200                    // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
201                    // instruction). If this bit is 0, execution of any of XGETBV,
202                    // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
203                    // causes an invalid-opcode exception (#UD)"
204                    //
205                    enable(proc_info_ecx, 26, Feature::xsave);
206
207                    // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
208                    // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
209                    // ECX = 1):
210                    if max_basic_leaf >= 0xd {
211                        let CpuidResult {
212                            eax: proc_extended_state1_eax,
213                            ..
214                        } = unsafe { __cpuid_count(0xd_u32, 1) };
215                        enable(proc_extended_state1_eax, 0, Feature::xsaveopt);
216                        enable(proc_extended_state1_eax, 1, Feature::xsavec);
217                        enable(proc_extended_state1_eax, 3, Feature::xsaves);
218                    }
219
220                    // FMA (uses 256-bit wide registers):
221                    let fma = enable(proc_info_ecx, 12, Feature::fma);
222
223                    // And AVX/AVX2:
224                    enable(proc_info_ecx, 28, Feature::avx);
225                    enable(extended_features_ebx, 5, Feature::avx2);
226
227                    // "Short" versions of AVX512 instructions
228                    enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
229                    enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
230                    enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
231                    enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
232                    enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
233
234                    enable(extended_features_eax_leaf_1, 0, Feature::sha512);
235                    enable(extended_features_eax_leaf_1, 1, Feature::sm3);
236                    enable(extended_features_eax_leaf_1, 2, Feature::sm4);
237
238                    // For AVX-512 the OS also needs to support saving/restoring
239                    // the extended state, only then we enable AVX-512 support:
240                    // Also, Rust makes `avx512f` imply `fma` and `f16c`, because
241                    // otherwise the assembler is broken. But Intel doesn't guarantee
242                    // that `fma` and `f16c` are available with `avx512f`, so we
243                    // need to check for them separately.
244                    if os_avx512_support && f16c && fma {
245                        enable(extended_features_ebx, 16, Feature::avx512f);
246                        enable(extended_features_ebx, 17, Feature::avx512dq);
247                        enable(extended_features_ebx, 21, Feature::avx512ifma);
248                        enable(extended_features_ebx, 26, Feature::avx512pf);
249                        enable(extended_features_ebx, 27, Feature::avx512er);
250                        enable(extended_features_ebx, 28, Feature::avx512cd);
251                        enable(extended_features_ebx, 30, Feature::avx512bw);
252                        enable(extended_features_ebx, 31, Feature::avx512vl);
253                        enable(extended_features_ecx, 1, Feature::avx512vbmi);
254                        enable(extended_features_ecx, 6, Feature::avx512vbmi2);
255                        enable(extended_features_ecx, 11, Feature::avx512vnni);
256                        enable(extended_features_ecx, 12, Feature::avx512bitalg);
257                        enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);
258                        enable(extended_features_edx, 8, Feature::avx512vp2intersect);
259                        enable(extended_features_edx, 23, Feature::avx512fp16);
260                        enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
261                    }
262                }
263
264                if os_amx_support {
265                    enable(extended_features_edx, 24, Feature::amx_tile);
266                    enable(extended_features_edx, 25, Feature::amx_int8);
267                    enable(extended_features_edx, 22, Feature::amx_bf16);
268                    enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
269                    enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
270
271                    if max_basic_leaf >= 0x1e {
272                        let CpuidResult {
273                            eax: amx_feature_flags_eax,
274                            ..
275                        } = unsafe { __cpuid_count(0x1e_u32, 1) };
276
277                        enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
278                        enable(amx_feature_flags_eax, 5, Feature::amx_transpose);
279                        enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
280                        enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
281                        enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
282                    }
283                }
284            }
285        }
286
287        // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
288        // On intel CPUs with popcnt, lzcnt implements the
289        // "missing part" of ABM, so we map both to the same
290        // internal feature.
291        //
292        // The `is_x86_feature_detected!("lzcnt")` macro then
293        // internally maps to Feature::abm.
294        enable(extended_proc_info_ecx, 5, Feature::lzcnt);
295
296        // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
297        // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series
298        // number(Family 18h).
299        //
300        // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
301        // family 17h.
302        //
303        // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf.
304        // Related Hygon kernel patch can be found on
305        // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
306        if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
307            // These features are available on AMD arch CPUs:
308            enable(extended_proc_info_ecx, 6, Feature::sse4a);
309            enable(extended_proc_info_ecx, 21, Feature::tbm);
310            enable(extended_proc_info_ecx, 11, Feature::xop);
311        }
312    }
313
314    // Unfortunately, some Skylake chips erroneously report support for BMI1 and
315    // BMI2 without actual support. These chips don't support AVX, and it seems
316    // that all Intel chips with non-erroneous support BMI do (I didn't check
317    // other vendors), so we can disable these flags for chips that don't also
318    // report support for AVX.
319    //
320    // It's possible this will pessimize future chips that do support BMI and
321    // not AVX, but this seems minor compared to a hard crash you get when
322    // executing an unsupported instruction (to put it another way, it's safe
323    // for us to under-report CPU features, but not to over-report them). Still,
324    // to limit any impact this may have in the future, we only do this for
325    // Intel chips, as it's a bug only present in their chips.
326    //
327    // This bug is documented as `SKL052` in the errata section of this document:
328    // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf
329    if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) {
330        value.unset(Feature::bmi1 as u32);
331        value.unset(Feature::bmi2 as u32);
332    }
333
334    value
335}