std/sys/pal/unix/stack_overflow/
thread_info.rs

1//! TLS, but async-signal-safe.
2//!
3//! Unfortunately, because thread local storage isn't async-signal-safe, we
4//! cannot soundly use it in our stack overflow handler. While this works
5//! without problems on most platforms, it can lead to undefined behaviour
6//! on others (such as GNU/Linux). Luckily, the POSIX specification documents
7//! two thread-specific values that can be accessed in asynchronous signal
8//! handlers: the value of `pthread_self()` and the address of `errno`. As
9//! `pthread_t` is an opaque platform-specific type, we use the address of
10//! `errno` here. As it is thread-specific and does not change over the
11//! lifetime of a thread, we can use `&errno` as a key for a `BTreeMap`
12//! that stores thread-specific data.
13//!
14//! Concurrent access to this map is synchronized by two locks – an outer
15//! [`Mutex`] and an inner spin lock that also remembers the identity of
16//! the lock owner:
17//! * The spin lock is the primary means of synchronization: since it only
18//!   uses native atomics, it can be soundly used inside the signal handle
19//!   as opposed to [`Mutex`], which might not be async-signal-safe.
20//! * The [`Mutex`] prevents busy-waiting in the setup logic, as all accesses
21//!   there are performed with the [`Mutex`] held, which makes the spin-lock
22//!   redundant in the common case.
23//! * Finally, by using the `errno` address as the locked value of the spin
24//!   lock, we can detect cases where a SIGSEGV occurred while the thread
25//!   info is being modified.
26
27use crate::collections::BTreeMap;
28use crate::hint::spin_loop;
29use crate::ops::Range;
30use crate::sync::Mutex;
31use crate::sync::atomic::{AtomicUsize, Ordering};
32use crate::sys::os::errno_location;
33
34pub struct ThreadInfo {
35    pub guard_page_range: Range<usize>,
36    pub thread_name: Option<Box<str>>,
37}
38
39static LOCK: Mutex<()> = Mutex::new(());
40static SPIN_LOCK: AtomicUsize = AtomicUsize::new(0);
41// This uses a `BTreeMap` instead of a hashmap since it supports constant
42// initialization and automatically reduces the amount of memory used when
43// items are removed.
44static mut THREAD_INFO: BTreeMap<usize, ThreadInfo> = BTreeMap::new();
45
46struct UnlockOnDrop;
47
48impl Drop for UnlockOnDrop {
49    fn drop(&mut self) {
50        SPIN_LOCK.store(0, Ordering::Release);
51    }
52}
53
54/// Get the current thread's information, if available.
55///
56/// Calling this function might freeze other threads if they attempt to modify
57/// their thread information. Thus, the caller should ensure that the process
58/// is aborted shortly after this function is called.
59///
60/// This function is guaranteed to be async-signal-safe if `f` is too.
61pub fn with_current_info<R>(f: impl FnOnce(Option<&ThreadInfo>) -> R) -> R {
62    let this = errno_location().addr();
63    let mut attempt = 0;
64    let _guard = loop {
65        // If we are just spinning endlessly, it's very likely that the thread
66        // modifying the thread info map has a lower priority than us and will
67        // not continue until we stop running. Just give up in that case.
68        if attempt == 10_000_000 {
69            rtprintpanic!("deadlock in SIGSEGV handler");
70            return f(None);
71        }
72
73        match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
74            Ok(_) => break UnlockOnDrop,
75            Err(owner) if owner == this => {
76                rtabort!("a thread received SIGSEGV while modifying its stack overflow information")
77            }
78            // Spin until the lock can be acquired – there is nothing better to
79            // do. This is unfortunately a priority hole, but a stack overflow
80            // is a fatal error anyway.
81            Err(_) => {
82                spin_loop();
83                attempt += 1;
84            }
85        }
86    };
87
88    // SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
89    let thread_info = unsafe { &*(&raw const THREAD_INFO) };
90    f(thread_info.get(&this))
91}
92
93fn spin_lock_in_setup(this: usize) -> UnlockOnDrop {
94    loop {
95        match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
96            Ok(_) => return UnlockOnDrop,
97            Err(owner) if owner == this => {
98                unreachable!("the thread info setup logic isn't recursive")
99            }
100            // This function is always called with the outer lock held,
101            // meaning the only time locking can fail is if another thread has
102            // encountered a stack overflow. Since that will abort the process,
103            // we just stop the current thread until that time. We use `pause`
104            // instead of spinning to avoid priority inversion.
105            // SAFETY: this doesn't have any safety preconditions.
106            Err(_) => drop(unsafe { libc::pause() }),
107        }
108    }
109}
110
111pub fn set_current_info(guard_page_range: Range<usize>, thread_name: Option<Box<str>>) {
112    let this = errno_location().addr();
113    let _lock_guard = LOCK.lock();
114    let _spin_guard = spin_lock_in_setup(this);
115
116    // SAFETY: we own the spin lock, so `THREAD_INFO` cannot be aliased.
117    let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
118    thread_info.insert(this, ThreadInfo { guard_page_range, thread_name });
119}
120
121pub fn delete_current_info() {
122    let this = errno_location().addr();
123    let _lock_guard = LOCK.lock();
124    let _spin_guard = spin_lock_in_setup(this);
125
126    // SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
127    let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
128    thread_info.remove(&this);
129}