1 //===-- Elementary operations for x86 -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
11 
12 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||            \
13     defined(_M_X64)
14 
15 #include <stddef.h> // size_t
16 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
17 
18 #ifdef __SSE2__
19 #include <immintrin.h>
20 #endif //  __SSE2__
21 
22 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar
23 
24 // Fixed-size Vector Operations
25 // ----------------------------
26 
27 namespace __llvm_libc {
28 namespace x86 {
29 
30 #ifdef __SSE2__
31 template <typename Base> struct Vector : public Base {
32   static void Copy(char *dst, const char *src) {
33     Base::Store(dst, Base::Load(src));
34   }
35 
36   static bool Equals(const char *a, const char *b) {
37     return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0;
38   }
39 
40   static int ThreeWayCompare(const char *a, const char *b) {
41     const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b));
42     if (!mask)
43       return 0;
44     return CharDiff(a, b, mask);
45   }
46 
47   static void SplatSet(char *dst, const unsigned char value) {
48     Base::Store(dst, Base::GetSplattedValue(value));
49   }
50 
51   static int CharDiff(const char *a, const char *b, uint64_t mask) {
52     const size_t diff_index = __builtin_ctzll(mask);
53     const int ca = (unsigned char)a[diff_index];
54     const int cb = (unsigned char)b[diff_index];
55     return ca - cb;
56   }
57 };
58 
59 struct M128 {
60   static constexpr size_t kSize = 16;
61   using T = char __attribute__((__vector_size__(kSize)));
62   static uint16_t mask(T value) {
63     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
64     return _mm_movemask_epi8(value);
65   }
66   static uint16_t NotEqualMask(T a, T b) { return mask(a != b); }
67   static T Load(const char *ptr) {
68     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
69     return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr));
70   }
71   static void Store(char *ptr, T value) {
72     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
73     return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value);
74   }
75   static T GetSplattedValue(const char v) {
76     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
77     return splatted;
78   }
79 };
80 
81 using Vector128 = Vector<M128>; // 16 Bytes
82 
83 #ifdef __AVX2__
84 struct M256 {
85   static constexpr size_t kSize = 32;
86   using T = char __attribute__((__vector_size__(kSize)));
87   static uint32_t mask(T value) {
88     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
89     return _mm256_movemask_epi8(value);
90   }
91   static uint32_t NotEqualMask(T a, T b) { return mask(a != b); }
92   static T Load(const char *ptr) {
93     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
94     return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr));
95   }
96   static void Store(char *ptr, T value) {
97     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
98     return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value);
99   }
100   static T GetSplattedValue(const char v) {
101     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
102                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
103     return splatted;
104   }
105 };
106 
107 using Vector256 = Vector<M256>; // 32 Bytes
108 
109 #if defined(__AVX512F__) and defined(__AVX512BW__)
110 struct M512 {
111   static constexpr size_t kSize = 64;
112   using T = char __attribute__((__vector_size__(kSize)));
113   static uint64_t NotEqualMask(T a, T b) {
114     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
115     return _mm512_cmpneq_epi8_mask(a, b);
116   }
117   static T Load(const char *ptr) {
118     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
119     return _mm512_loadu_epi8(ptr);
120   }
121   static void Store(char *ptr, T value) {
122     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
123     return _mm512_storeu_epi8(ptr, value);
124   }
125   static T GetSplattedValue(const char v) {
126     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
127                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
128                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
129                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
130     return splatted;
131   }
132 };
133 using Vector512 = Vector<M512>;
134 
135 #endif // defined(__AVX512F__) and defined(__AVX512BW__)
136 #endif // __AVX2__
137 #endif // __SSE2__
138 
139 using _1 = __llvm_libc::scalar::_1;
140 using _2 = __llvm_libc::scalar::_2;
141 using _3 = __llvm_libc::scalar::_3;
142 using _4 = __llvm_libc::scalar::_4;
143 using _8 = __llvm_libc::scalar::_8;
144 #if defined(__AVX512F__) && defined(__AVX512BW__)
145 using _16 = __llvm_libc::x86::Vector128;
146 using _32 = __llvm_libc::x86::Vector256;
147 using _64 = __llvm_libc::x86::Vector512;
148 using _128 = __llvm_libc::Repeated<_64, 2>;
149 #elif defined(__AVX2__)
150 using _16 = __llvm_libc::x86::Vector128;
151 using _32 = __llvm_libc::x86::Vector256;
152 using _64 = __llvm_libc::Repeated<_32, 2>;
153 using _128 = __llvm_libc::Repeated<_32, 4>;
154 #elif defined(__SSE2__)
155 using _16 = __llvm_libc::x86::Vector128;
156 using _32 = __llvm_libc::Repeated<_16, 2>;
157 using _64 = __llvm_libc::Repeated<_16, 4>;
158 using _128 = __llvm_libc::Repeated<_16, 8>;
159 #else
160 using _16 = __llvm_libc::Repeated<_8, 2>;
161 using _32 = __llvm_libc::Repeated<_8, 4>;
162 using _64 = __llvm_libc::Repeated<_8, 8>;
163 using _128 = __llvm_libc::Repeated<_8, 16>;
164 #endif
165 
166 struct Accelerator {
167   static void Copy(char *dst, const char *src, size_t count) {
168     asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
169   }
170 };
171 
172 } // namespace x86
173 } // namespace __llvm_libc
174 
175 #endif // defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||
176        // defined(_M_X64)
177 
178 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
179