compiler_builtins/math/libm_math/arch/x86/
fma.rs

1//! Use assembly fma if the `fma` or `fma4` feature is detected at runtime.
2
3use core::arch::asm;
4
5use super::super::super::generic;
6use super::detect::{cpu_flags, get_cpu_features};
7use crate::support::Round;
8use crate::support::feature_detect::select_once;
9
10pub fn fma(x: f64, y: f64, z: f64) -> f64 {
11    select_once! {
12        sig: fn(x: f64, y: f64, z: f64) -> f64,
13        init: || {
14            let features = get_cpu_features();
15            if features.contains(cpu_flags::FMA) {
16                fma_with_fma
17            } else if features.contains(cpu_flags::FMA4) {
18               fma_with_fma4
19            } else {
20                fma_fallback as Func
21            }
22        },
23        // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked.
24        call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
25    }
26}
27
28pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
29    select_once! {
30        sig: fn(x: f32, y: f32, z: f32) -> f32,
31        init: || {
32            let features = get_cpu_features();
33            if features.contains(cpu_flags::FMA) {
34                fmaf_with_fma
35            } else if features.contains(cpu_flags::FMA4) {
36                fmaf_with_fma4
37            } else {
38                fmaf_fallback as Func
39            }
40        },
41        // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked.
42        call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
43    }
44}
45
46/// # Safety
47///
48/// Must have +fma available.
49unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 {
50    debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
51
52    // SAFETY: fma is asserted available by precondition, which provides the instruction. No
53    // memory access or side effects.
54    unsafe {
55        asm!(
56            "vfmadd213sd {x}, {y}, {z}",
57            x = inout(xmm_reg) x,
58            y = in(xmm_reg) y,
59            z = in(xmm_reg) z,
60            options(nostack, nomem, pure),
61        );
62    }
63    x
64}
65
66/// # Safety
67///
68/// Must have +fma available.
69unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 {
70    debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
71
72    // SAFETY: fma is asserted available by precondition, which provides the instruction. No
73    // memory access or side effects.
74    unsafe {
75        asm!(
76            "vfmadd213ss {x}, {y}, {z}",
77            x = inout(xmm_reg) x,
78            y = in(xmm_reg) y,
79            z = in(xmm_reg) z,
80            options(nostack, nomem, pure),
81        );
82    }
83    x
84}
85
86/// # Safety
87///
88/// Must have +fma4 available.
89unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 {
90    debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
91
92    // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No
93    // memory access or side effects.
94    unsafe {
95        asm!(
96            "vfmaddsd {x}, {x}, {y}, {z}",
97            x = inout(xmm_reg) x,
98            y = in(xmm_reg) y,
99            z = in(xmm_reg) z,
100            options(nostack, nomem, pure),
101        );
102    }
103    x
104}
105
106/// # Safety
107///
108/// Must have +fma4 available.
109unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 {
110    debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
111
112    // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No
113    // memory access or side effects.
114    unsafe {
115        asm!(
116            "vfmaddss {x}, {x}, {y}, {z}",
117            x = inout(xmm_reg) x,
118            y = in(xmm_reg) y,
119            z = in(xmm_reg) z,
120            options(nostack, nomem, pure),
121        );
122    }
123    x
124}
125
126// FIXME: the `select_implementation` macro should handle arch implementations that want
127// to use the fallback, so we don't need to recreate the body.
128
129fn fma_fallback(x: f64, y: f64, z: f64) -> f64 {
130    generic::fma_round(x, y, z, Round::Nearest).val
131}
132
133fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 {
134    generic::fma_wide_round(x, y, z, Round::Nearest).val
135}