1 //===-- Elementary operations for x86 -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
11 
12 #include "src/__support/CPP/Bit.h"
13 #include "src/__support/architectures.h"
14 
15 #if defined(LLVM_LIBC_ARCH_X86)
16 
17 #include <stddef.h> // size_t
18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
19 
20 #ifdef __SSE2__
21 #include <immintrin.h>
22 #endif //  __SSE2__
23 
24 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar
25 
26 // Fixed-size Vector Operations
27 // ----------------------------
28 
29 namespace __llvm_libc {
30 namespace x86 {
31 
32 #ifdef __SSE2__
33 template <typename Base> struct Vector : public Base {
copyVector34   static void copy(char *__restrict dst, const char *__restrict src) {
35     Base::store(dst, Base::load(src));
36   }
37 
moveVector38   static void move(char *dst, const char *src) {
39     Base::store(dst, Base::load(src));
40   }
41 
equalsVector42   static bool equals(const char *a, const char *b) {
43     return Base::not_equal_mask(Base::load(a), Base::load(b)) == 0;
44   }
45 
three_way_compareVector46   static int three_way_compare(const char *a, const char *b) {
47     const auto mask = Base::not_equal_mask(Base::load(a), Base::load(b));
48     if (!mask)
49       return 0;
50     return char_diff(a, b, mask);
51   }
52 
splat_setVector53   static void splat_set(char *dst, const unsigned char value) {
54     Base::store(dst, Base::get_splatted_value(value));
55   }
56 
char_diffVector57   static int char_diff(const char *a, const char *b, uint64_t mask) {
58     const size_t diff_index = __builtin_ctzll(mask);
59     const int ca = (unsigned char)a[diff_index];
60     const int cb = (unsigned char)b[diff_index];
61     return ca - cb;
62   }
63 };
64 
65 struct M128 {
66   static constexpr size_t SIZE = 16;
67   using T = char __attribute__((__vector_size__(SIZE)));
maskM12868   static uint16_t mask(T value) {
69     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
70     return static_cast<uint16_t>(
71         _mm_movemask_epi8(__llvm_libc::bit_cast<__m128i>(value)));
72   }
not_equal_maskM12873   static uint16_t not_equal_mask(T a, T b) { return mask(a != b); }
loadM12874   static T load(const char *ptr) {
75     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
76     return __llvm_libc::bit_cast<T>(
77         _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)));
78   }
storeM12879   static void store(char *ptr, T value) {
80     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
81     return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr),
82                             __llvm_libc::bit_cast<__m128i>(value));
83   }
get_splatted_valueM12884   static T get_splatted_value(const char v) {
85     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
86     return splatted;
87   }
88 };
89 
90 using Vector128 = Vector<M128>; // 16 Bytes
91 
92 #ifdef __AVX2__
93 struct M256 {
94   static constexpr size_t SIZE = 32;
95   using T = char __attribute__((__vector_size__(SIZE)));
maskM25696   static uint32_t mask(T value) {
97     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
98     return _mm256_movemask_epi8(__llvm_libc::bit_cast<__m256i>(value));
99   }
not_equal_maskM256100   static uint32_t not_equal_mask(T a, T b) { return mask(a != b); }
loadM256101   static T load(const char *ptr) {
102     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
103     return __llvm_libc::bit_cast<T>(
104         _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)));
105   }
storeM256106   static void store(char *ptr, T value) {
107     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
108     return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr),
109                                __llvm_libc::bit_cast<__m256i>(value));
110   }
get_splatted_valueM256111   static T get_splatted_value(const char v) {
112     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
113                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
114     return splatted;
115   }
116 };
117 
118 using Vector256 = Vector<M256>; // 32 Bytes
119 
120 #if defined(__AVX512F__) and defined(__AVX512BW__)
121 struct M512 {
122   static constexpr size_t SIZE = 64;
123   using T = char __attribute__((__vector_size__(SIZE)));
not_equal_maskM512124   static uint64_t not_equal_mask(T a, T b) {
125     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
126     return _mm512_cmpneq_epi8_mask(__llvm_libc::bit_cast<__m512i>(a),
127                                    __llvm_libc::bit_cast<__m512i>(b));
128   }
loadM512129   static T load(const char *ptr) {
130     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
131     return __llvm_libc::bit_cast<T>(_mm512_loadu_epi8(ptr));
132   }
storeM512133   static void store(char *ptr, T value) {
134     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
135     return _mm512_storeu_epi8(ptr, __llvm_libc::bit_cast<__m512i>(value));
136   }
get_splatted_valueM512137   static T get_splatted_value(const char v) {
138     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
139                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
140                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
141                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
142     return splatted;
143   }
144 };
145 using Vector512 = Vector<M512>;
146 
147 #endif // defined(__AVX512F__) and defined(__AVX512BW__)
148 #endif // __AVX2__
149 #endif // __SSE2__
150 
151 using _1 = __llvm_libc::scalar::_1;
152 using _2 = __llvm_libc::scalar::_2;
153 using _3 = __llvm_libc::scalar::_3;
154 using _4 = __llvm_libc::scalar::_4;
155 using _8 = __llvm_libc::scalar::_8;
156 #if defined(__AVX512F__) && defined(__AVX512BW__)
157 using _16 = __llvm_libc::x86::Vector128;
158 using _32 = __llvm_libc::x86::Vector256;
159 using _64 = __llvm_libc::x86::Vector512;
160 using _128 = __llvm_libc::Repeated<_64, 2>;
161 #elif defined(__AVX2__)
162 using _16 = __llvm_libc::x86::Vector128;
163 using _32 = __llvm_libc::x86::Vector256;
164 using _64 = __llvm_libc::Repeated<_32, 2>;
165 using _128 = __llvm_libc::Repeated<_32, 4>;
166 #elif defined(__SSE2__)
167 using _16 = __llvm_libc::x86::Vector128;
168 using _32 = __llvm_libc::Repeated<_16, 2>;
169 using _64 = __llvm_libc::Repeated<_16, 4>;
170 using _128 = __llvm_libc::Repeated<_16, 8>;
171 #else
172 using _16 = __llvm_libc::Repeated<_8, 2>;
173 using _32 = __llvm_libc::Repeated<_8, 4>;
174 using _64 = __llvm_libc::Repeated<_8, 8>;
175 using _128 = __llvm_libc::Repeated<_8, 16>;
176 #endif
177 
178 struct Accelerator {
copyAccelerator179   static void copy(char *dst, const char *src, size_t count) {
180     asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
181   }
182 };
183 
184 } // namespace x86
185 } // namespace __llvm_libc
186 
187 #endif // defined(LLVM_LIBC_ARCH_X86)
188 
189 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
190