1 //===-- Elementary operations for x86 -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 11 12 #include "src/__support/CPP/Bit.h" 13 #include "src/__support/architectures.h" 14 15 #if defined(LLVM_LIBC_ARCH_X86) 16 17 #include <stddef.h> // size_t 18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 19 20 #ifdef __SSE2__ 21 #include <immintrin.h> 22 #endif // __SSE2__ 23 24 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar 25 26 // Fixed-size Vector Operations 27 // ---------------------------- 28 29 namespace __llvm_libc { 30 namespace x86 { 31 32 #ifdef __SSE2__ 33 template <typename Base> struct Vector : public Base { 34 static void copy(char *__restrict dst, const char *__restrict src) { 35 Base::store(dst, Base::load(src)); 36 } 37 38 static void move(char *dst, const char *src) { 39 Base::store(dst, Base::load(src)); 40 } 41 42 static bool equals(const char *a, const char *b) { 43 return Base::not_equal_mask(Base::load(a), Base::load(b)) == 0; 44 } 45 46 static int three_way_compare(const char *a, const char *b) { 47 const auto mask = Base::not_equal_mask(Base::load(a), Base::load(b)); 48 if (!mask) 49 return 0; 50 return char_diff(a, b, mask); 51 } 52 53 static void splat_set(char *dst, const unsigned char value) { 54 Base::store(dst, Base::get_splatted_value(value)); 55 } 56 57 static int char_diff(const char *a, const char *b, uint64_t mask) { 58 const size_t diff_index = __builtin_ctzll(mask); 59 const int ca = (unsigned char)a[diff_index]; 60 const int cb = (unsigned char)b[diff_index]; 61 return ca - cb; 62 } 63 }; 64 65 struct M128 { 66 static constexpr size_t SIZE = 16; 67 using T = char __attribute__((__vector_size__(SIZE))); 68 static uint16_t mask(T value) { 69 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 70 return static_cast<uint16_t>( 71 _mm_movemask_epi8(__llvm_libc::bit_cast<__m128i>(value))); 72 } 73 static uint16_t not_equal_mask(T a, T b) { return mask(a != b); } 74 static T load(const char *ptr) { 75 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 76 return __llvm_libc::bit_cast<T>( 77 _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr))); 78 } 79 static void store(char *ptr, T value) { 80 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 81 return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), 82 __llvm_libc::bit_cast<__m128i>(value)); 83 } 84 static T get_splatted_value(const char v) { 85 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 86 return splatted; 87 } 88 }; 89 90 using Vector128 = Vector<M128>; // 16 Bytes 91 92 #ifdef __AVX2__ 93 struct M256 { 94 static constexpr size_t SIZE = 32; 95 using T = char __attribute__((__vector_size__(SIZE))); 96 static uint32_t mask(T value) { 97 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 98 return _mm256_movemask_epi8(__llvm_libc::bit_cast<__m256i>(value)); 99 } 100 static uint32_t not_equal_mask(T a, T b) { return mask(a != b); } 101 static T load(const char *ptr) { 102 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 103 return __llvm_libc::bit_cast<T>( 104 _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr))); 105 } 106 static void store(char *ptr, T value) { 107 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 108 return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), 109 __llvm_libc::bit_cast<__m256i>(value)); 110 } 111 static T get_splatted_value(const char v) { 112 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 113 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 114 return splatted; 115 } 116 }; 117 118 using Vector256 = Vector<M256>; // 32 Bytes 119 120 #if defined(__AVX512F__) and defined(__AVX512BW__) 121 struct M512 { 122 static constexpr size_t SIZE = 64; 123 using T = char __attribute__((__vector_size__(SIZE))); 124 static uint64_t not_equal_mask(T a, T b) { 125 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 126 return _mm512_cmpneq_epi8_mask(__llvm_libc::bit_cast<__m512i>(a), 127 __llvm_libc::bit_cast<__m512i>(b)); 128 } 129 static T load(const char *ptr) { 130 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 131 return __llvm_libc::bit_cast<T>(_mm512_loadu_epi8(ptr)); 132 } 133 static void store(char *ptr, T value) { 134 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 135 return _mm512_storeu_epi8(ptr, __llvm_libc::bit_cast<__m512i>(value)); 136 } 137 static T get_splatted_value(const char v) { 138 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 139 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 140 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 141 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 142 return splatted; 143 } 144 }; 145 using Vector512 = Vector<M512>; 146 147 #endif // defined(__AVX512F__) and defined(__AVX512BW__) 148 #endif // __AVX2__ 149 #endif // __SSE2__ 150 151 using _1 = __llvm_libc::scalar::_1; 152 using _2 = __llvm_libc::scalar::_2; 153 using _3 = __llvm_libc::scalar::_3; 154 using _4 = __llvm_libc::scalar::_4; 155 using _8 = __llvm_libc::scalar::_8; 156 #if defined(__AVX512F__) && defined(__AVX512BW__) 157 using _16 = __llvm_libc::x86::Vector128; 158 using _32 = __llvm_libc::x86::Vector256; 159 using _64 = __llvm_libc::x86::Vector512; 160 using _128 = __llvm_libc::Repeated<_64, 2>; 161 #elif defined(__AVX2__) 162 using _16 = __llvm_libc::x86::Vector128; 163 using _32 = __llvm_libc::x86::Vector256; 164 using _64 = __llvm_libc::Repeated<_32, 2>; 165 using _128 = __llvm_libc::Repeated<_32, 4>; 166 #elif defined(__SSE2__) 167 using _16 = __llvm_libc::x86::Vector128; 168 using _32 = __llvm_libc::Repeated<_16, 2>; 169 using _64 = __llvm_libc::Repeated<_16, 4>; 170 using _128 = __llvm_libc::Repeated<_16, 8>; 171 #else 172 using _16 = __llvm_libc::Repeated<_8, 2>; 173 using _32 = __llvm_libc::Repeated<_8, 4>; 174 using _64 = __llvm_libc::Repeated<_8, 8>; 175 using _128 = __llvm_libc::Repeated<_8, 16>; 176 #endif 177 178 struct Accelerator { 179 static void copy(char *dst, const char *src, size_t count) { 180 asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); 181 } 182 }; 183 184 } // namespace x86 185 } // namespace __llvm_libc 186 187 #endif // defined(LLVM_LIBC_ARCH_X86) 188 189 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 190