1 //===-- Elementary operations for x86 -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 11 12 #include "src/__support/architectures.h" 13 14 #if defined(LLVM_LIBC_ARCH_X86) 15 16 #include <stddef.h> // size_t 17 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 18 19 #ifdef __SSE2__ 20 #include <immintrin.h> 21 #endif // __SSE2__ 22 23 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar 24 25 // Fixed-size Vector Operations 26 // ---------------------------- 27 28 namespace __llvm_libc { 29 namespace x86 { 30 31 #ifdef __SSE2__ 32 template <typename Base> struct Vector : public Base { 33 static void Copy(char *__restrict dst, const char *__restrict src) { 34 Base::Store(dst, Base::Load(src)); 35 } 36 37 static void Move(char *dst, const char *src) { 38 Base::Store(dst, Base::Load(src)); 39 } 40 41 static bool Equals(const char *a, const char *b) { 42 return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0; 43 } 44 45 static int ThreeWayCompare(const char *a, const char *b) { 46 const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b)); 47 if (!mask) 48 return 0; 49 return CharDiff(a, b, mask); 50 } 51 52 static void SplatSet(char *dst, const unsigned char value) { 53 Base::Store(dst, Base::GetSplattedValue(value)); 54 } 55 56 static int CharDiff(const char *a, const char *b, uint64_t mask) { 57 const size_t diff_index = __builtin_ctzll(mask); 58 const int ca = (unsigned char)a[diff_index]; 59 const int cb = (unsigned char)b[diff_index]; 60 return ca - cb; 61 } 62 }; 63 64 struct M128 { 65 static constexpr size_t kSize = 16; 66 using T = char __attribute__((__vector_size__(kSize))); 67 static uint16_t mask(T value) { 68 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 69 return _mm_movemask_epi8(value); 70 } 71 static uint16_t NotEqualMask(T a, T b) { return mask(a != b); } 72 static T Load(const char *ptr) { 73 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 74 return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)); 75 } 76 static void Store(char *ptr, T value) { 77 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 78 return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value); 79 } 80 static T GetSplattedValue(const char v) { 81 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 82 return splatted; 83 } 84 }; 85 86 using Vector128 = Vector<M128>; // 16 Bytes 87 88 #ifdef __AVX2__ 89 struct M256 { 90 static constexpr size_t kSize = 32; 91 using T = char __attribute__((__vector_size__(kSize))); 92 static uint32_t mask(T value) { 93 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 94 return _mm256_movemask_epi8(value); 95 } 96 static uint32_t NotEqualMask(T a, T b) { return mask(a != b); } 97 static T Load(const char *ptr) { 98 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 99 return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)); 100 } 101 static void Store(char *ptr, T value) { 102 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 103 return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value); 104 } 105 static T GetSplattedValue(const char v) { 106 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 107 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 108 return splatted; 109 } 110 }; 111 112 using Vector256 = Vector<M256>; // 32 Bytes 113 114 #if defined(__AVX512F__) and defined(__AVX512BW__) 115 struct M512 { 116 static constexpr size_t kSize = 64; 117 using T = char __attribute__((__vector_size__(kSize))); 118 static uint64_t NotEqualMask(T a, T b) { 119 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 120 return _mm512_cmpneq_epi8_mask(a, b); 121 } 122 static T Load(const char *ptr) { 123 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 124 return _mm512_loadu_epi8(ptr); 125 } 126 static void Store(char *ptr, T value) { 127 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 128 return _mm512_storeu_epi8(ptr, value); 129 } 130 static T GetSplattedValue(const char v) { 131 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 132 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 133 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 134 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 135 return splatted; 136 } 137 }; 138 using Vector512 = Vector<M512>; 139 140 #endif // defined(__AVX512F__) and defined(__AVX512BW__) 141 #endif // __AVX2__ 142 #endif // __SSE2__ 143 144 using _1 = __llvm_libc::scalar::_1; 145 using _2 = __llvm_libc::scalar::_2; 146 using _3 = __llvm_libc::scalar::_3; 147 using _4 = __llvm_libc::scalar::_4; 148 using _8 = __llvm_libc::scalar::_8; 149 #if defined(__AVX512F__) && defined(__AVX512BW__) 150 using _16 = __llvm_libc::x86::Vector128; 151 using _32 = __llvm_libc::x86::Vector256; 152 using _64 = __llvm_libc::x86::Vector512; 153 using _128 = __llvm_libc::Repeated<_64, 2>; 154 #elif defined(__AVX2__) 155 using _16 = __llvm_libc::x86::Vector128; 156 using _32 = __llvm_libc::x86::Vector256; 157 using _64 = __llvm_libc::Repeated<_32, 2>; 158 using _128 = __llvm_libc::Repeated<_32, 4>; 159 #elif defined(__SSE2__) 160 using _16 = __llvm_libc::x86::Vector128; 161 using _32 = __llvm_libc::Repeated<_16, 2>; 162 using _64 = __llvm_libc::Repeated<_16, 4>; 163 using _128 = __llvm_libc::Repeated<_16, 8>; 164 #else 165 using _16 = __llvm_libc::Repeated<_8, 2>; 166 using _32 = __llvm_libc::Repeated<_8, 4>; 167 using _64 = __llvm_libc::Repeated<_8, 8>; 168 using _128 = __llvm_libc::Repeated<_8, 16>; 169 #endif 170 171 struct Accelerator { 172 static void Copy(char *dst, const char *src, size_t count) { 173 asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); 174 } 175 }; 176 177 } // namespace x86 178 } // namespace __llvm_libc 179 180 #endif // defined(LLVM_LIBC_ARCH_X86) 181 182 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 183