compiler_builtins/
probestack.rs

1// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! This module defines the `__rust_probestack` intrinsic which is used in the
12//! implementation of "stack probes" on certain platforms.
13//!
14//! The purpose of a stack probe is to provide a static guarantee that if a
15//! thread has a guard page then a stack overflow is guaranteed to hit that
16//! guard page. If a function did not have a stack probe then there's a risk of
17//! having a stack frame *larger* than the guard page, so a function call could
18//! skip over the guard page entirely and then later hit maybe the heap or
19//! another thread, possibly leading to security vulnerabilities such as [The
20//! Stack Clash], for example.
21//!
22//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
23//!
24//! The `__rust_probestack` is called in the prologue of functions whose stack
25//! size is larger than the guard page, for example larger than 4096 bytes on
26//! x86. This function is then responsible for "touching" all pages relevant to
27//! the stack to ensure that that if any of them are the guard page we'll hit
28//! them guaranteed.
29//!
30//! The precise ABI for how this function operates is defined by LLVM. There's
31//! no real documentation as to what this is, so you'd basically need to read
32//! the LLVM source code for reference. Often though the test cases can be
33//! illuminating as to the ABI that's generated, or just looking at the output
34//! of `llc`.
35//!
36//! Note that `#[naked]` is typically used here for the stack probe because the
37//! ABI corresponds to no actual ABI.
38//!
39//! Finally it's worth noting that at the time of this writing LLVM only has
40//! support for stack probes on x86 and x86_64. There's no support for stack
41//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
42//! be more than welcome to accept such a change!
43
44#![cfg(not(feature = "mangled-names"))]
45// Windows and Cygwin already has builtins to do this.
46#![cfg(not(any(windows, target_os = "cygwin")))]
47// All these builtins require assembly
48#![cfg(not(feature = "no-asm"))]
49// We only define stack probing for these architectures today.
50#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
51
52extern "C" {
53    pub fn __rust_probestack();
54}
55
56// A wrapper for our implementation of __rust_probestack, which allows us to
57// keep the assembly inline while controlling all CFI directives in the assembly
58// emitted for the function.
59//
60// This is the ELF version.
61#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
62macro_rules! define_rust_probestack {
63    ($body: expr) => {
64        concat!(
65            "
66            .pushsection .text.__rust_probestack
67            .globl __rust_probestack
68            .type  __rust_probestack, @function
69            .hidden __rust_probestack
70        __rust_probestack:
71            ",
72            $body,
73            "
74            .size __rust_probestack, . - __rust_probestack
75            .popsection
76            "
77        )
78    };
79}
80
81#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
82macro_rules! define_rust_probestack {
83    ($body: expr) => {
84        concat!(
85            "
86            .globl __rust_probestack
87        __rust_probestack:
88            ",
89            $body
90        )
91    };
92}
93
94// Same as above, but for Mach-O. Note that the triple underscore
95// is deliberate
96#[cfg(target_vendor = "apple")]
97macro_rules! define_rust_probestack {
98    ($body: expr) => {
99        concat!(
100            "
101            .globl ___rust_probestack
102        ___rust_probestack:
103            ",
104            $body
105        )
106    };
107}
108
109// In UEFI x86 arch, triple underscore is deliberate.
110#[cfg(all(target_os = "uefi", target_arch = "x86"))]
111macro_rules! define_rust_probestack {
112    ($body: expr) => {
113        concat!(
114            "
115            .globl ___rust_probestack
116        ___rust_probestack:
117            ",
118            $body
119        )
120    };
121}
122
123// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
124// ensuring that if any pages are unmapped we'll make a page fault.
125//
126// The ABI here is that the stack frame size is located in `%rax`. Upon
127// return we're not supposed to modify `%rsp` or `%rax`.
128//
129// Any changes to this function should be replicated to the SGX version below.
130#[cfg(all(
131    target_arch = "x86_64",
132    not(all(target_env = "sgx", target_vendor = "fortanix"))
133))]
134core::arch::global_asm!(
135    define_rust_probestack!(
136        "
137    .cfi_startproc
138    pushq  %rbp
139    .cfi_adjust_cfa_offset 8
140    .cfi_offset %rbp, -16
141    movq   %rsp, %rbp
142    .cfi_def_cfa_register %rbp
143
144    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
145
146    // Main loop, taken in one page increments. We're decrementing rsp by
147    // a page each time until there's less than a page remaining. We're
148    // guaranteed that this function isn't called unless there's more than a
149    // page needed.
150    //
151    // Note that we're also testing against `8(%rsp)` to account for the 8
152    // bytes pushed on the stack orginally with our return address. Using
153    // `8(%rsp)` simulates us testing the stack pointer in the caller's
154    // context.
155
156    // It's usually called when %rax >= 0x1000, but that's not always true.
157    // Dynamic stack allocation, which is needed to implement unsized
158    // rvalues, triggers stackprobe even if %rax < 0x1000.
159    // Thus we have to check %r11 first to avoid segfault.
160    cmp    $0x1000,%r11
161    jna    3f
1622:
163    sub    $0x1000,%rsp
164    test   %rsp,8(%rsp)
165    sub    $0x1000,%r11
166    cmp    $0x1000,%r11
167    ja     2b
168
1693:
170    // Finish up the last remaining stack space requested, getting the last
171    // bits out of r11
172    sub    %r11,%rsp
173    test   %rsp,8(%rsp)
174
175    // Restore the stack pointer to what it previously was when entering
176    // this function. The caller will readjust the stack pointer after we
177    // return.
178    add    %rax,%rsp
179
180    leave
181    .cfi_def_cfa_register %rsp
182    .cfi_adjust_cfa_offset -8
183    ret
184    .cfi_endproc
185    "
186    ),
187    options(att_syntax)
188);
189
190// This function is the same as above, except that some instructions are
191// [manually patched for LVI].
192//
193// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
194#[cfg(all(
195    target_arch = "x86_64",
196    all(target_env = "sgx", target_vendor = "fortanix")
197))]
198core::arch::global_asm!(
199    define_rust_probestack!(
200        "
201    .cfi_startproc
202    pushq  %rbp
203    .cfi_adjust_cfa_offset 8
204    .cfi_offset %rbp, -16
205    movq   %rsp, %rbp
206    .cfi_def_cfa_register %rbp
207
208    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
209
210    // Main loop, taken in one page increments. We're decrementing rsp by
211    // a page each time until there's less than a page remaining. We're
212    // guaranteed that this function isn't called unless there's more than a
213    // page needed.
214    //
215    // Note that we're also testing against `8(%rsp)` to account for the 8
216    // bytes pushed on the stack orginally with our return address. Using
217    // `8(%rsp)` simulates us testing the stack pointer in the caller's
218    // context.
219
220    // It's usually called when %rax >= 0x1000, but that's not always true.
221    // Dynamic stack allocation, which is needed to implement unsized
222    // rvalues, triggers stackprobe even if %rax < 0x1000.
223    // Thus we have to check %r11 first to avoid segfault.
224    cmp    $0x1000,%r11
225    jna    3f
2262:
227    sub    $0x1000,%rsp
228    test   %rsp,8(%rsp)
229    sub    $0x1000,%r11
230    cmp    $0x1000,%r11
231    ja     2b
232
2333:
234    // Finish up the last remaining stack space requested, getting the last
235    // bits out of r11
236    sub    %r11,%rsp
237    test   %rsp,8(%rsp)
238
239    // Restore the stack pointer to what it previously was when entering
240    // this function. The caller will readjust the stack pointer after we
241    // return.
242    add    %rax,%rsp
243
244    leave
245    .cfi_def_cfa_register %rsp
246    .cfi_adjust_cfa_offset -8
247    pop %r11
248    lfence
249    jmp *%r11
250    .cfi_endproc
251    "
252    ),
253    options(att_syntax)
254);
255
256#[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
257// This is the same as x86_64 above, only translated for 32-bit sizes. Note
258// that on Unix we're expected to restore everything as it was, this
259// function basically can't tamper with anything.
260//
261// The ABI here is the same as x86_64, except everything is 32-bits large.
262core::arch::global_asm!(
263    define_rust_probestack!(
264        "
265    .cfi_startproc
266    push   %ebp
267    .cfi_adjust_cfa_offset 4
268    .cfi_offset %ebp, -8
269    mov    %esp, %ebp
270    .cfi_def_cfa_register %ebp
271    push   %ecx
272    mov    %eax,%ecx
273
274    cmp    $0x1000,%ecx
275    jna    3f
2762:
277    sub    $0x1000,%esp
278    test   %esp,8(%esp)
279    sub    $0x1000,%ecx
280    cmp    $0x1000,%ecx
281    ja     2b
282
2833:
284    sub    %ecx,%esp
285    test   %esp,8(%esp)
286
287    add    %eax,%esp
288    pop    %ecx
289    leave
290    .cfi_def_cfa_register %esp
291    .cfi_adjust_cfa_offset -4
292    ret
293    .cfi_endproc
294    "
295    ),
296    options(att_syntax)
297);
298
299#[cfg(all(target_arch = "x86", target_os = "uefi"))]
300// UEFI target is windows like target. LLVM will do _chkstk things like windows.
301// probestack function will also do things like _chkstk in MSVC.
302// So we need to sub %ax %sp in probestack when arch is x86.
303//
304// REF: Rust commit(74e80468347)
305// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
306// Comments in LLVM:
307//   MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
308//   MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
309//   themselves.
310core::arch::global_asm!(
311    define_rust_probestack!(
312        "
313    .cfi_startproc
314    push   %ebp
315    .cfi_adjust_cfa_offset 4
316    .cfi_offset %ebp, -8
317    mov    %esp, %ebp
318    .cfi_def_cfa_register %ebp
319    push   %ecx
320    push   %edx
321    mov    %eax,%ecx
322
323    cmp    $0x1000,%ecx
324    jna    3f
3252:
326    sub    $0x1000,%esp
327    test   %esp,8(%esp)
328    sub    $0x1000,%ecx
329    cmp    $0x1000,%ecx
330    ja     2b
331
3323:
333    sub    %ecx,%esp
334    test   %esp,8(%esp)
335    mov    4(%ebp),%edx
336    mov    %edx, 12(%esp)
337    add    %eax,%esp
338    pop    %edx
339    pop    %ecx
340    leave
341
342    sub   %eax, %esp
343    .cfi_def_cfa_register %esp
344    .cfi_adjust_cfa_offset -4
345    ret
346    .cfi_endproc
347    "
348    ),
349    options(att_syntax)
350);