1 //===-- Elementary operations for x86 -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
11 
12 #include "src/__support/architectures.h"
13 
14 #if defined(LLVM_LIBC_ARCH_X86)
15 
16 #include <stddef.h> // size_t
17 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
18 
19 #ifdef __SSE2__
20 #include <immintrin.h>
21 #endif //  __SSE2__
22 
23 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar
24 
25 // Fixed-size Vector Operations
26 // ----------------------------
27 
28 namespace __llvm_libc {
29 namespace x86 {
30 
31 #ifdef __SSE2__
32 template <typename Base> struct Vector : public Base {
33   static void Copy(char *dst, const char *src) {
34     Base::Store(dst, Base::Load(src));
35   }
36 
37   static bool Equals(const char *a, const char *b) {
38     return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0;
39   }
40 
41   static int ThreeWayCompare(const char *a, const char *b) {
42     const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b));
43     if (!mask)
44       return 0;
45     return CharDiff(a, b, mask);
46   }
47 
48   static void SplatSet(char *dst, const unsigned char value) {
49     Base::Store(dst, Base::GetSplattedValue(value));
50   }
51 
52   static int CharDiff(const char *a, const char *b, uint64_t mask) {
53     const size_t diff_index = __builtin_ctzll(mask);
54     const int ca = (unsigned char)a[diff_index];
55     const int cb = (unsigned char)b[diff_index];
56     return ca - cb;
57   }
58 };
59 
60 struct M128 {
61   static constexpr size_t kSize = 16;
62   using T = char __attribute__((__vector_size__(kSize)));
63   static uint16_t mask(T value) {
64     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
65     return _mm_movemask_epi8(value);
66   }
67   static uint16_t NotEqualMask(T a, T b) { return mask(a != b); }
68   static T Load(const char *ptr) {
69     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
70     return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr));
71   }
72   static void Store(char *ptr, T value) {
73     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
74     return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value);
75   }
76   static T GetSplattedValue(const char v) {
77     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
78     return splatted;
79   }
80 };
81 
82 using Vector128 = Vector<M128>; // 16 Bytes
83 
84 #ifdef __AVX2__
85 struct M256 {
86   static constexpr size_t kSize = 32;
87   using T = char __attribute__((__vector_size__(kSize)));
88   static uint32_t mask(T value) {
89     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
90     return _mm256_movemask_epi8(value);
91   }
92   static uint32_t NotEqualMask(T a, T b) { return mask(a != b); }
93   static T Load(const char *ptr) {
94     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
95     return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr));
96   }
97   static void Store(char *ptr, T value) {
98     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
99     return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value);
100   }
101   static T GetSplattedValue(const char v) {
102     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
103                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
104     return splatted;
105   }
106 };
107 
108 using Vector256 = Vector<M256>; // 32 Bytes
109 
110 #if defined(__AVX512F__) and defined(__AVX512BW__)
111 struct M512 {
112   static constexpr size_t kSize = 64;
113   using T = char __attribute__((__vector_size__(kSize)));
114   static uint64_t NotEqualMask(T a, T b) {
115     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
116     return _mm512_cmpneq_epi8_mask(a, b);
117   }
118   static T Load(const char *ptr) {
119     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
120     return _mm512_loadu_epi8(ptr);
121   }
122   static void Store(char *ptr, T value) {
123     // NOLINTNEXTLINE(llvmlibc-callee-namespace)
124     return _mm512_storeu_epi8(ptr, value);
125   }
126   static T GetSplattedValue(const char v) {
127     const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
128                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
129                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
130                         v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
131     return splatted;
132   }
133 };
134 using Vector512 = Vector<M512>;
135 
136 #endif // defined(__AVX512F__) and defined(__AVX512BW__)
137 #endif // __AVX2__
138 #endif // __SSE2__
139 
140 using _1 = __llvm_libc::scalar::_1;
141 using _2 = __llvm_libc::scalar::_2;
142 using _3 = __llvm_libc::scalar::_3;
143 using _4 = __llvm_libc::scalar::_4;
144 using _8 = __llvm_libc::scalar::_8;
145 #if defined(__AVX512F__) && defined(__AVX512BW__)
146 using _16 = __llvm_libc::x86::Vector128;
147 using _32 = __llvm_libc::x86::Vector256;
148 using _64 = __llvm_libc::x86::Vector512;
149 using _128 = __llvm_libc::Repeated<_64, 2>;
150 #elif defined(__AVX2__)
151 using _16 = __llvm_libc::x86::Vector128;
152 using _32 = __llvm_libc::x86::Vector256;
153 using _64 = __llvm_libc::Repeated<_32, 2>;
154 using _128 = __llvm_libc::Repeated<_32, 4>;
155 #elif defined(__SSE2__)
156 using _16 = __llvm_libc::x86::Vector128;
157 using _32 = __llvm_libc::Repeated<_16, 2>;
158 using _64 = __llvm_libc::Repeated<_16, 4>;
159 using _128 = __llvm_libc::Repeated<_16, 8>;
160 #else
161 using _16 = __llvm_libc::Repeated<_8, 2>;
162 using _32 = __llvm_libc::Repeated<_8, 4>;
163 using _64 = __llvm_libc::Repeated<_8, 8>;
164 using _128 = __llvm_libc::Repeated<_8, 16>;
165 #endif
166 
167 struct Accelerator {
168   static void Copy(char *dst, const char *src, size_t count) {
169     asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
170   }
171 };
172 
173 } // namespace x86
174 } // namespace __llvm_libc
175 
176 #endif // defined(LLVM_LIBC_ARCH_X86)
177 
178 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
179