compiler_builtins/math/libm_math/arch/x86/
fma.rs1use core::arch::asm;
4
5use super::super::super::generic;
6use super::detect::{cpu_flags, get_cpu_features};
7use crate::support::Round;
8use crate::support::feature_detect::select_once;
9
10pub fn fma(x: f64, y: f64, z: f64) -> f64 {
11 select_once! {
12 sig: fn(x: f64, y: f64, z: f64) -> f64,
13 init: || {
14 let features = get_cpu_features();
15 if features.contains(cpu_flags::FMA) {
16 fma_with_fma
17 } else if features.contains(cpu_flags::FMA4) {
18 fma_with_fma4
19 } else {
20 fma_fallback as Func
21 }
22 },
23 call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
25 }
26}
27
28pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
29 select_once! {
30 sig: fn(x: f32, y: f32, z: f32) -> f32,
31 init: || {
32 let features = get_cpu_features();
33 if features.contains(cpu_flags::FMA) {
34 fmaf_with_fma
35 } else if features.contains(cpu_flags::FMA4) {
36 fmaf_with_fma4
37 } else {
38 fmaf_fallback as Func
39 }
40 },
41 call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
43 }
44}
45
46unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 {
50 debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
51
52 unsafe {
55 asm!(
56 "vfmadd213sd {x}, {y}, {z}",
57 x = inout(xmm_reg) x,
58 y = in(xmm_reg) y,
59 z = in(xmm_reg) z,
60 options(nostack, nomem, pure),
61 );
62 }
63 x
64}
65
66unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 {
70 debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
71
72 unsafe {
75 asm!(
76 "vfmadd213ss {x}, {y}, {z}",
77 x = inout(xmm_reg) x,
78 y = in(xmm_reg) y,
79 z = in(xmm_reg) z,
80 options(nostack, nomem, pure),
81 );
82 }
83 x
84}
85
86unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 {
90 debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
91
92 unsafe {
95 asm!(
96 "vfmaddsd {x}, {x}, {y}, {z}",
97 x = inout(xmm_reg) x,
98 y = in(xmm_reg) y,
99 z = in(xmm_reg) z,
100 options(nostack, nomem, pure),
101 );
102 }
103 x
104}
105
106unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 {
110 debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
111
112 unsafe {
115 asm!(
116 "vfmaddss {x}, {x}, {y}, {z}",
117 x = inout(xmm_reg) x,
118 y = in(xmm_reg) y,
119 z = in(xmm_reg) z,
120 options(nostack, nomem, pure),
121 );
122 }
123 x
124}
125
126fn fma_fallback(x: f64, y: f64, z: f64) -> f64 {
130 generic::fma_round(x, y, z, Round::Nearest).val
131}
132
133fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 {
134 generic::fma_wide_round(x, y, z, Round::Nearest).val
135}