1use core::arch::asm;
20use core::{intrinsics, mem};
21
22#[inline(always)]
23#[cfg(target_feature = "ermsb")]
24pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
25 core::arch::asm!(
27 "repe movsb (%rsi), (%rdi)",
28 inout("rcx") count => _,
29 inout("rdi") dest => _,
30 inout("rsi") src => _,
31 options(att_syntax, nostack, preserves_flags)
32 );
33}
34
35#[inline(always)]
36#[cfg(not(target_feature = "ermsb"))]
37pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) {
38 let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count);
39 asm!(
41 "rep movsb",
42 inout("ecx") pre_byte_count => _,
43 inout("rdi") dest => dest,
44 inout("rsi") src => src,
45 options(att_syntax, nostack, preserves_flags)
46 );
47 asm!(
48 "rep movsq",
49 inout("rcx") qword_count => _,
50 inout("rdi") dest => dest,
51 inout("rsi") src => src,
52 options(att_syntax, nostack, preserves_flags)
53 );
54 asm!(
55 "rep movsb",
56 inout("ecx") byte_count => _,
57 inout("rdi") dest => _,
58 inout("rsi") src => _,
59 options(att_syntax, nostack, preserves_flags)
60 );
61}
62
63#[inline(always)]
64pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
65 let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count);
66 asm!(
68 "std",
69 "rep movsb",
70 "sub $7, %rsi",
71 "sub $7, %rdi",
72 "mov {qword_count}, %rcx",
73 "rep movsq",
74 "test {pre_byte_count:e}, {pre_byte_count:e}",
75 "add $7, %rsi",
76 "add $7, %rdi",
77 "mov {pre_byte_count:e}, %ecx",
78 "rep movsb",
79 "cld",
80 pre_byte_count = in(reg) pre_byte_count,
81 qword_count = in(reg) qword_count,
82 inout("ecx") byte_count => _,
83 inout("rdi") dest.add(count - 1) => _,
84 inout("rsi") src.add(count - 1) => _,
85 options(att_syntax, nostack, preserves_flags)
87 );
88}
89
90#[inline(always)]
91#[cfg(target_feature = "ermsb")]
92pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
93 core::arch::asm!(
95 "repe stosb %al, (%rdi)",
96 inout("rcx") count => _,
97 inout("rdi") dest => _,
98 inout("al") c => _,
99 options(att_syntax, nostack, preserves_flags)
100 )
101}
102
103#[inline(always)]
104#[cfg(not(target_feature = "ermsb"))]
105pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) {
106 let c = c as u64 * 0x0101_0101_0101_0101;
107 let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count);
108 asm!(
110 "rep stosb",
111 inout("ecx") pre_byte_count => _,
112 inout("rdi") dest => dest,
113 in("rax") c,
114 options(att_syntax, nostack, preserves_flags)
115 );
116 asm!(
117 "rep stosq",
118 inout("rcx") qword_count => _,
119 inout("rdi") dest => dest,
120 in("rax") c,
121 options(att_syntax, nostack, preserves_flags)
122 );
123 asm!(
124 "rep stosb",
125 inout("ecx") byte_count => _,
126 inout("rdi") dest => _,
127 in("rax") c,
128 options(att_syntax, nostack, preserves_flags)
129 );
130}
131
132#[inline(always)]
133pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {
134 #[inline(always)]
135 unsafe fn cmp<T, U, F>(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32
136 where
137 T: Clone + Copy + Eq,
138 U: Clone + Copy + Eq,
139 F: FnOnce(*const U, *const U, usize) -> i32,
140 {
141 const { assert!(mem::size_of::<T>() != 0) };
143
144 let end = a.add(intrinsics::unchecked_div(n, mem::size_of::<T>()));
145 while a != end {
146 if a.read_unaligned() != b.read_unaligned() {
147 return f(a.cast(), b.cast(), mem::size_of::<T>());
148 }
149 a = a.add(1);
150 b = b.add(1);
151 }
152 f(
153 a.cast(),
154 b.cast(),
155 intrinsics::unchecked_rem(n, mem::size_of::<T>()),
156 )
157 }
158 let c1 = |mut a: *const u8, mut b: *const u8, n| {
159 for _ in 0..n {
160 if a.read() != b.read() {
161 return i32::from(a.read()) - i32::from(b.read());
162 }
163 a = a.add(1);
164 b = b.add(1);
165 }
166 0
167 };
168 let c2 = |a: *const u16, b, n| cmp(a, b, n, c1);
169 let c4 = |a: *const u32, b, n| cmp(a, b, n, c2);
170 let c8 = |a: *const u64, b, n| cmp(a, b, n, c4);
171 let c16 = |a: *const u128, b, n| cmp(a, b, n, c8);
172 c16(a.cast(), b.cast(), n)
173}
174
175#[cfg(target_feature = "sse2")]
186#[inline(always)]
187pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
188 use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8};
189
190 let mut n = 0;
191
192 for _ in 0..4 {
197 if *s == 0 {
198 return n;
199 }
200
201 n += 1;
202 s = s.add(1);
203 }
204
205 let align = s as usize & 15;
209 let mut s = ((s as usize) - align) as *const __m128i;
210 let zero = _mm_set1_epi8(0);
211
212 let x = {
213 let r;
214 asm!(
215 "movdqa ({addr}), {dest}",
216 addr = in(reg) s,
217 dest = out(xmm_reg) r,
218 options(att_syntax, nostack),
219 );
220 r
221 };
222 let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) >> align;
223
224 if cmp != 0 {
225 return n + cmp.trailing_zeros() as usize;
226 }
227
228 n += 16 - align;
229 s = s.add(1);
230
231 loop {
232 let x = {
233 let r;
234 asm!(
235 "movdqa ({addr}), {dest}",
236 addr = in(reg) s,
237 dest = out(xmm_reg) r,
238 options(att_syntax, nostack),
239 );
240 r
241 };
242 let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) as u32;
243 if cmp == 0 {
244 n += 16;
245 s = s.add(1);
246 } else {
247 return n + cmp.trailing_zeros() as usize;
248 }
249 }
250}
251
252#[cfg(not(target_feature = "sse2"))]
255#[inline(always)]
256pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
257 let mut n = 0;
258
259 while s as usize & 7 != 0 {
264 if *s == 0 {
265 return n;
266 }
267 n += 1;
268 s = s.add(1);
269 }
270
271 let mut s = s as *const u64;
275
276 loop {
277 let mut cs = {
278 let r: u64;
279 asm!(
280 "mov ({addr}), {dest}",
281 addr = in(reg) s,
282 dest = out(reg) r,
283 options(att_syntax, nostack),
284 );
285 r
286 };
287 if (cs.wrapping_sub(0x0101010101010101) & !cs & 0x8080808080808080) != 0 {
290 loop {
291 if cs & 255 == 0 {
292 return n;
293 } else {
294 cs >>= 8;
295 n += 1;
296 }
297 }
298 } else {
299 n += 8;
300 s = s.add(1);
301 }
302 }
303}
304
305fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) {
307 let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count);
309 count -= pre_byte_count;
310 let qword_count = count >> 3;
311 let byte_count = count & 0b111;
312 (pre_byte_count, qword_count, byte_count)
313}