1 //===-- Elementary operations for x86 -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 11 12 #include <stddef.h> // size_t 13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 14 15 #ifdef __SSE2__ 16 #include <immintrin.h> 17 #endif // __SSE2__ 18 19 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar 20 21 // Fixed-size Vector Operations 22 // ---------------------------- 23 24 namespace __llvm_libc { 25 namespace x86 { 26 27 #ifdef __SSE2__ 28 template <typename Base> struct Vector : public Base { 29 static void Copy(char *dst, const char *src) { 30 Base::Store(dst, Base::Load(src)); 31 } 32 33 static bool Equals(const char *a, const char *b) { 34 return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0; 35 } 36 37 static int ThreeWayCompare(const char *a, const char *b) { 38 const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b)); 39 if (!mask) 40 return 0; 41 return CharDiff(a, b, mask); 42 } 43 44 static void SplatSet(char *dst, const unsigned char value) { 45 Base::Store(dst, Base::GetSplattedValue(value)); 46 } 47 48 static int CharDiff(const char *a, const char *b, uint64_t mask) { 49 const size_t diff_index = __builtin_ctzll(mask); 50 const int ca = (unsigned char)a[diff_index]; 51 const int cb = (unsigned char)b[diff_index]; 52 return ca - cb; 53 } 54 }; 55 56 struct M128 { 57 static constexpr size_t kSize = 16; 58 using T = char __attribute__((__vector_size__(kSize))); 59 static uint16_t mask(T value) { 60 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 61 return _mm_movemask_epi8(value); 62 } 63 static uint16_t NotEqualMask(T a, T b) { return mask(a != b); } 64 static T Load(const char *ptr) { 65 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 66 return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)); 67 } 68 static void Store(char *ptr, T value) { 69 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 70 return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value); 71 } 72 static T GetSplattedValue(const char v) { 73 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 74 return splatted; 75 } 76 }; 77 78 using Vector128 = Vector<M128>; // 16 Bytes 79 80 #ifdef __AVX2__ 81 struct M256 { 82 static constexpr size_t kSize = 32; 83 using T = char __attribute__((__vector_size__(kSize))); 84 static uint32_t mask(T value) { 85 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 86 return _mm256_movemask_epi8(value); 87 } 88 static uint32_t NotEqualMask(T a, T b) { return mask(a != b); } 89 static T Load(const char *ptr) { 90 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 91 return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)); 92 } 93 static void Store(char *ptr, T value) { 94 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 95 return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value); 96 } 97 static T GetSplattedValue(const char v) { 98 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 99 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 100 return splatted; 101 } 102 }; 103 104 using Vector256 = Vector<M256>; // 32 Bytes 105 106 #if defined(__AVX512F__) and defined(__AVX512BW__) 107 struct M512 { 108 static constexpr size_t kSize = 64; 109 using T = char __attribute__((__vector_size__(kSize))); 110 static uint64_t NotEqualMask(T a, T b) { 111 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 112 return _mm512_cmpneq_epi8_mask(a, b); 113 } 114 static T Load(const char *ptr) { 115 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 116 return _mm512_loadu_epi8(ptr); 117 } 118 static void Store(char *ptr, T value) { 119 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 120 return _mm512_storeu_epi8(ptr, value); 121 } 122 static T GetSplattedValue(const char v) { 123 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 124 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 125 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 126 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 127 return splatted; 128 } 129 }; 130 using Vector512 = Vector<M512>; 131 132 #endif // defined(__AVX512F__) and defined(__AVX512BW__) 133 #endif // __AVX2__ 134 #endif // __SSE2__ 135 136 using _1 = __llvm_libc::scalar::_1; 137 using _2 = __llvm_libc::scalar::_2; 138 using _3 = __llvm_libc::scalar::_3; 139 using _4 = __llvm_libc::scalar::_4; 140 using _8 = __llvm_libc::scalar::_8; 141 #if defined(__AVX512F__) && defined(__AVX512BW__) 142 using _16 = __llvm_libc::x86::Vector128; 143 using _32 = __llvm_libc::x86::Vector256; 144 using _64 = __llvm_libc::x86::Vector512; 145 using _128 = __llvm_libc::Repeated<_64, 2>; 146 #elif defined(__AVX2__) 147 using _16 = __llvm_libc::x86::Vector128; 148 using _32 = __llvm_libc::x86::Vector256; 149 using _64 = __llvm_libc::Repeated<_32, 2>; 150 using _128 = __llvm_libc::Repeated<_32, 4>; 151 #elif defined(__SSE2__) 152 using _16 = __llvm_libc::x86::Vector128; 153 using _32 = __llvm_libc::Repeated<_16, 2>; 154 using _64 = __llvm_libc::Repeated<_16, 4>; 155 using _128 = __llvm_libc::Repeated<_16, 8>; 156 #else 157 using _16 = __llvm_libc::Repeated<_8, 2>; 158 using _32 = __llvm_libc::Repeated<_8, 4>; 159 using _64 = __llvm_libc::Repeated<_8, 8>; 160 using _128 = __llvm_libc::Repeated<_8, 16>; 161 #endif 162 163 } // namespace x86 164 } // namespace __llvm_libc 165 166 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 167