1 //===-- Elementary operations for x86 -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
11 
12 #include "src/__support/CPP/Bit.h"
13 #include "src/__support/architectures.h"
14 
15 #if defined(LLVM_LIBC_ARCH_X86)
16 
17 #include <stddef.h> // size_t
18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
19 
20 #ifdef __SSE2__
21 #include <immintrin.h>
22 #endif //  __SSE2__
23 
24 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar
25 
26 // Fixed-size Vector Operations
27 // ----------------------------
28 
29 namespace __llvm_libc {
30 namespace x86 {
31 
32 #ifdef __SSE2__
33 template <typename Base> struct Vector : public Base {
34   static void copy(char *__restrict dst, const char *__restrict src) {
35     Base::store(dst, Base::load(src));
36   }
37 
38   static void move(char *dst, const char *src) {
39     Base::store(dst, Base::load(src));
40   }
41 
42   static bool equals(const char *a, const char *b) {
43     return Base::not_equal_mask(Base::load(a), Base::load(b)) == 0;
44   }
45 
46   static int three_way_compare(const char *a, const char *b) {
47     const auto mask = Base::not_equal_mask(Base::load(a), Base::load(b));
48     if (!mask)
49       return 0;
50     return char_diff(a, b, mask);
51   }
52 
53   static void splat_set(char *dst, const unsigned char value) {
54     Base::store(dst, Base::get_splatted_value(value));
55   }
56 
57   static int char_diff(const char *a, const char *b, uint64_t mask) {
58     const size_t diff_index = __builtin_ctzll(mask);
59     const int ca = (unsigned char)a[diff_index];
60     const int cb = (unsigned char)b[diff_index];
61     return ca - cb;
62   }
63 };
64 
65 struct M128 {
66   static constexpr size_t SIZE = 16;
67   using T = char __attribute__((__vector_size__(SIZE)));
68   static uint16_t mask(T value) {
69     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
70     return _mm_movemask_epi8(__llvm_libc::bit_cast<__m128i>(value));
71   }
72   static uint16_t not_equal_mask(T a, T b) { return mask(a != b); }
73   static T load(const char *ptr) {
74     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
75     return __llvm_libc::bit_cast<T>(
76         _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)));
77   }
78   static void store(char *ptr, T value) {
79     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
80     return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr),
81                             __llvm_libc::bit_cast<__m128i>(value));
82   }
83   static T get_splatted_value(const char v) {
84     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
85     return splatted;
86   }
87 };
88 
89 using Vector128 = Vector<M128>; // 16 Bytes
90 
91 #ifdef __AVX2__
92 struct M256 {
93   static constexpr size_t SIZE = 32;
94   using T = char __attribute__((__vector_size__(SIZE)));
95   static uint32_t mask(T value) {
96     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
97     return _mm256_movemask_epi8(__llvm_libc::bit_cast<__m256i>(value));
98   }
99   static uint32_t not_equal_mask(T a, T b) { return mask(a != b); }
100   static T load(const char *ptr) {
101     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
102     return __llvm_libc::bit_cast<T>(
103         _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)));
104   }
105   static void store(char *ptr, T value) {
106     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
107     return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr),
108                                __llvm_libc::bit_cast<__m256i>(value));
109   }
110   static T get_splatted_value(const char v) {
111     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
112                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
113     return splatted;
114   }
115 };
116 
117 using Vector256 = Vector<M256>; // 32 Bytes
118 
119 #if defined(__AVX512F__) and defined(__AVX512BW__)
120 struct M512 {
121   static constexpr size_t SIZE = 64;
122   using T = char __attribute__((__vector_size__(SIZE)));
123   static uint64_t not_equal_mask(T a, T b) {
124     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
125     return _mm512_cmpneq_epi8_mask(__llvm_libc::bit_cast<__m512i>(a),
126                                    __llvm_libc::bit_cast<__m512i>(b));
127   }
128   static T load(const char *ptr) {
129     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
130     return __llvm_libc::bit_cast<T>(_mm512_loadu_epi8(ptr));
131   }
132   static void store(char *ptr, T value) {
133     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
134     return _mm512_storeu_epi8(ptr, __llvm_libc::bit_cast<__m512i>(value));
135   }
136   static T get_splatted_value(const char v) {
137     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
138                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
139                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
140                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
141     return splatted;
142   }
143 };
144 using Vector512 = Vector<M512>;
145 
146 #endif // defined(__AVX512F__) and defined(__AVX512BW__)
147 #endif // __AVX2__
148 #endif // __SSE2__
149 
150 using _1 = __llvm_libc::scalar::_1;
151 using _2 = __llvm_libc::scalar::_2;
152 using _3 = __llvm_libc::scalar::_3;
153 using _4 = __llvm_libc::scalar::_4;
154 using _8 = __llvm_libc::scalar::_8;
155 #if defined(__AVX512F__) && defined(__AVX512BW__)
156 using _16 = __llvm_libc::x86::Vector128;
157 using _32 = __llvm_libc::x86::Vector256;
158 using _64 = __llvm_libc::x86::Vector512;
159 using _128 = __llvm_libc::Repeated<_64, 2>;
160 #elif defined(__AVX2__)
161 using _16 = __llvm_libc::x86::Vector128;
162 using _32 = __llvm_libc::x86::Vector256;
163 using _64 = __llvm_libc::Repeated<_32, 2>;
164 using _128 = __llvm_libc::Repeated<_32, 4>;
165 #elif defined(__SSE2__)
166 using _16 = __llvm_libc::x86::Vector128;
167 using _32 = __llvm_libc::Repeated<_16, 2>;
168 using _64 = __llvm_libc::Repeated<_16, 4>;
169 using _128 = __llvm_libc::Repeated<_16, 8>;
170 #else
171 using _16 = __llvm_libc::Repeated<_8, 2>;
172 using _32 = __llvm_libc::Repeated<_8, 4>;
173 using _64 = __llvm_libc::Repeated<_8, 8>;
174 using _128 = __llvm_libc::Repeated<_8, 16>;
175 #endif
176 
177 struct Accelerator {
178   static void copy(char *dst, const char *src, size_t count) {
179     asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
180   }
181 };
182 
183 } // namespace x86
184 } // namespace __llvm_libc
185 
186 #endif // defined(LLVM_LIBC_ARCH_X86)
187 
188 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
189