1 //===-- Elementary operations for x86 -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 11 12 #include "src/__support/CPP/Bit.h" 13 #include "src/__support/architectures.h" 14 15 #if defined(LLVM_LIBC_ARCH_X86) 16 17 #include <stddef.h> // size_t 18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 19 20 #ifdef __SSE2__ 21 #include <immintrin.h> 22 #endif // __SSE2__ 23 24 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar 25 26 // Fixed-size Vector Operations 27 // ---------------------------- 28 29 namespace __llvm_libc { 30 namespace x86 { 31 32 #ifdef __SSE2__ 33 template <typename Base> struct Vector : public Base { copyVector34 static void copy(char *__restrict dst, const char *__restrict src) { 35 Base::store(dst, Base::load(src)); 36 } 37 moveVector38 static void move(char *dst, const char *src) { 39 Base::store(dst, Base::load(src)); 40 } 41 equalsVector42 static bool equals(const char *a, const char *b) { 43 return Base::not_equal_mask(Base::load(a), Base::load(b)) == 0; 44 } 45 three_way_compareVector46 static int three_way_compare(const char *a, const char *b) { 47 const auto mask = Base::not_equal_mask(Base::load(a), Base::load(b)); 48 if (!mask) 49 return 0; 50 return char_diff(a, b, mask); 51 } 52 splat_setVector53 static void splat_set(char *dst, const unsigned char value) { 54 Base::store(dst, Base::get_splatted_value(value)); 55 } 56 char_diffVector57 static int char_diff(const char *a, const char *b, uint64_t mask) { 58 const size_t diff_index = __builtin_ctzll(mask); 59 const int ca = (unsigned char)a[diff_index]; 60 const int cb = (unsigned char)b[diff_index]; 61 return ca - cb; 62 } 63 }; 64 65 struct M128 { 66 static constexpr size_t SIZE = 16; 67 using T = char __attribute__((__vector_size__(SIZE))); maskM12868 static uint16_t mask(T value) { 69 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 70 return static_cast<uint16_t>( 71 _mm_movemask_epi8(__llvm_libc::bit_cast<__m128i>(value))); 72 } not_equal_maskM12873 static uint16_t not_equal_mask(T a, T b) { return mask(a != b); } loadM12874 static T load(const char *ptr) { 75 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 76 return __llvm_libc::bit_cast<T>( 77 _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr))); 78 } storeM12879 static void store(char *ptr, T value) { 80 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 81 return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), 82 __llvm_libc::bit_cast<__m128i>(value)); 83 } get_splatted_valueM12884 static T get_splatted_value(const char v) { 85 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 86 return splatted; 87 } 88 }; 89 90 using Vector128 = Vector<M128>; // 16 Bytes 91 92 #ifdef __AVX2__ 93 struct M256 { 94 static constexpr size_t SIZE = 32; 95 using T = char __attribute__((__vector_size__(SIZE))); maskM25696 static uint32_t mask(T value) { 97 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 98 return _mm256_movemask_epi8(__llvm_libc::bit_cast<__m256i>(value)); 99 } not_equal_maskM256100 static uint32_t not_equal_mask(T a, T b) { return mask(a != b); } loadM256101 static T load(const char *ptr) { 102 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 103 return __llvm_libc::bit_cast<T>( 104 _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr))); 105 } storeM256106 static void store(char *ptr, T value) { 107 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 108 return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), 109 __llvm_libc::bit_cast<__m256i>(value)); 110 } get_splatted_valueM256111 static T get_splatted_value(const char v) { 112 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 113 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 114 return splatted; 115 } 116 }; 117 118 using Vector256 = Vector<M256>; // 32 Bytes 119 120 #if defined(__AVX512F__) and defined(__AVX512BW__) 121 struct M512 { 122 static constexpr size_t SIZE = 64; 123 using T = char __attribute__((__vector_size__(SIZE))); not_equal_maskM512124 static uint64_t not_equal_mask(T a, T b) { 125 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 126 return _mm512_cmpneq_epi8_mask(__llvm_libc::bit_cast<__m512i>(a), 127 __llvm_libc::bit_cast<__m512i>(b)); 128 } loadM512129 static T load(const char *ptr) { 130 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 131 return __llvm_libc::bit_cast<T>(_mm512_loadu_epi8(ptr)); 132 } storeM512133 static void store(char *ptr, T value) { 134 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 135 return _mm512_storeu_epi8(ptr, __llvm_libc::bit_cast<__m512i>(value)); 136 } get_splatted_valueM512137 static T get_splatted_value(const char v) { 138 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 139 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 140 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 141 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 142 return splatted; 143 } 144 }; 145 using Vector512 = Vector<M512>; 146 147 #endif // defined(__AVX512F__) and defined(__AVX512BW__) 148 #endif // __AVX2__ 149 #endif // __SSE2__ 150 151 using _1 = __llvm_libc::scalar::_1; 152 using _2 = __llvm_libc::scalar::_2; 153 using _3 = __llvm_libc::scalar::_3; 154 using _4 = __llvm_libc::scalar::_4; 155 using _8 = __llvm_libc::scalar::_8; 156 #if defined(__AVX512F__) && defined(__AVX512BW__) 157 using _16 = __llvm_libc::x86::Vector128; 158 using _32 = __llvm_libc::x86::Vector256; 159 using _64 = __llvm_libc::x86::Vector512; 160 using _128 = __llvm_libc::Repeated<_64, 2>; 161 #elif defined(__AVX2__) 162 using _16 = __llvm_libc::x86::Vector128; 163 using _32 = __llvm_libc::x86::Vector256; 164 using _64 = __llvm_libc::Repeated<_32, 2>; 165 using _128 = __llvm_libc::Repeated<_32, 4>; 166 #elif defined(__SSE2__) 167 using _16 = __llvm_libc::x86::Vector128; 168 using _32 = __llvm_libc::Repeated<_16, 2>; 169 using _64 = __llvm_libc::Repeated<_16, 4>; 170 using _128 = __llvm_libc::Repeated<_16, 8>; 171 #else 172 using _16 = __llvm_libc::Repeated<_8, 2>; 173 using _32 = __llvm_libc::Repeated<_8, 4>; 174 using _64 = __llvm_libc::Repeated<_8, 8>; 175 using _128 = __llvm_libc::Repeated<_8, 16>; 176 #endif 177 178 struct Accelerator { copyAccelerator179 static void copy(char *dst, const char *src, size_t count) { 180 asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); 181 } 182 }; 183 184 } // namespace x86 185 } // namespace __llvm_libc 186 187 #endif // defined(LLVM_LIBC_ARCH_X86) 188 189 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 190