std_detect/detect/
cache.rs

1//! Caches run-time feature detection so that it only needs to be computed
2//! once.
3
4#![allow(dead_code)] // not used on all platforms
5
6use core::sync::atomic::Ordering;
7
8use core::sync::atomic::AtomicUsize;
9
10/// Sets the `bit` of `x`.
11#[inline]
12const fn set_bit(x: u128, bit: u32) -> u128 {
13    x | 1 << bit
14}
15
16/// Tests the `bit` of `x`.
17#[inline]
18const fn test_bit(x: u128, bit: u32) -> bool {
19    x & (1 << bit) != 0
20}
21
22/// Unset the `bit of `x`.
23#[inline]
24const fn unset_bit(x: u128, bit: u32) -> u128 {
25    x & !(1 << bit)
26}
27
28/// Maximum number of features that can be cached.
29const CACHE_CAPACITY: u32 = 93;
30
31/// This type is used to initialize the cache
32// The derived `Default` implementation will initialize the field to zero,
33// which is what we want.
34#[derive(Copy, Clone, Default, PartialEq, Eq)]
35pub(crate) struct Initializer(u128);
36
37// NOTE: the `debug_assert!` would catch that we do not add more Features than
38// the one fitting our cache.
39impl Initializer {
40    /// Tests the `bit` of the cache.
41    #[inline]
42    pub(crate) fn test(self, bit: u32) -> bool {
43        debug_assert!(
44            bit < CACHE_CAPACITY,
45            "too many features, time to increase the cache size!"
46        );
47        test_bit(self.0, bit)
48    }
49
50    /// Sets the `bit` of the cache.
51    #[inline]
52    pub(crate) fn set(&mut self, bit: u32) {
53        debug_assert!(
54            bit < CACHE_CAPACITY,
55            "too many features, time to increase the cache size!"
56        );
57        let v = self.0;
58        self.0 = set_bit(v, bit);
59    }
60
61    /// Unsets the `bit` of the cache.
62    #[inline]
63    pub(crate) fn unset(&mut self, bit: u32) {
64        debug_assert!(
65            bit < CACHE_CAPACITY,
66            "too many features, time to increase the cache size!"
67        );
68        let v = self.0;
69        self.0 = unset_bit(v, bit);
70    }
71}
72
73/// This global variable is a cache of the features supported by the CPU.
74// Note: the third slot is only used in x86
75// Another Slot can be added if needed without any change to `Initializer`
76static CACHE: [Cache; 3] = [
77    Cache::uninitialized(),
78    Cache::uninitialized(),
79    Cache::uninitialized(),
80];
81
82/// Feature cache with capacity for `size_of::<usize>() * 8 - 1` features.
83///
84/// Note: 0 is used to represent an uninitialized cache, and (at least) the most
85/// significant bit is set on any cache which has been initialized.
86///
87/// Note: we use `Relaxed` atomic operations, because we are only interested in
88/// the effects of operations on a single memory location. That is, we only need
89/// "modification order", and not the full-blown "happens before".
90struct Cache(AtomicUsize);
91
92impl Cache {
93    const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32;
94    const MASK: usize = (1 << Cache::CAPACITY) - 1;
95    const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY;
96
97    /// Creates an uninitialized cache.
98    #[allow(clippy::declare_interior_mutable_const)]
99    const fn uninitialized() -> Self {
100        Cache(AtomicUsize::new(0))
101    }
102
103    /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized.
104    #[inline]
105    pub(crate) fn test(&self, bit: u32) -> Option<bool> {
106        let cached = self.0.load(Ordering::Relaxed);
107        if cached == 0 {
108            None
109        } else {
110            Some(test_bit(cached as u128, bit))
111        }
112    }
113
114    /// Initializes the cache.
115    #[inline]
116    fn initialize(&self, value: usize) -> usize {
117        debug_assert_eq!((value & !Cache::MASK), 0);
118        self.0
119            .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed);
120        value
121    }
122}
123
124#[inline]
125fn initialize(value: Initializer) -> Initializer {
126    CACHE[0].initialize((value.0) as usize & Cache::MASK);
127    CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
128    CACHE[2].initialize((value.0 >> (2 * Cache::CAPACITY)) as usize & Cache::MASK);
129    value
130}
131
132// We only have to detect features once, and it's fairly costly, so hint to LLVM
133// that it should assume that cache hits are more common than misses (which is
134// the point of caching). It's possibly unfortunate that this function needs to
135// reach across modules like this to call `os::detect_features`, but it produces
136// the best code out of several attempted variants.
137//
138// The `Initializer` that the cache was initialized with is returned, so that
139// the caller can call `test()` on it without having to load the value from the
140// cache again.
141#[cold]
142fn detect_and_initialize() -> Initializer {
143    initialize(super::os::detect_features())
144}
145
146/// Tests the `bit` of the storage. If the storage has not been initialized,
147/// initializes it with the result of `os::detect_features()`.
148///
149/// On its first invocation, it detects the CPU features and caches them in the
150/// `CACHE` global variable as an `AtomicU64`.
151///
152/// It uses the `Feature` variant to index into this variable as a bitset. If
153/// the bit is set, the feature is enabled, and otherwise it is disabled.
154#[inline]
155pub(crate) fn test(bit: u32) -> bool {
156    let (relative_bit, idx) = if bit < Cache::CAPACITY {
157        (bit, 0)
158    } else if bit < 2 * Cache::CAPACITY {
159        (bit - Cache::CAPACITY, 1)
160    } else {
161        (bit - 2 * Cache::CAPACITY, 2)
162    };
163    CACHE[idx]
164        .test(relative_bit)
165        .unwrap_or_else(|| detect_and_initialize().test(bit))
166}