1 //===-- Elementary operations for x86 -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 11 12 #include "src/__support/CPP/Bit.h" 13 #include "src/__support/architectures.h" 14 15 #if defined(LLVM_LIBC_ARCH_X86) 16 17 #include <stddef.h> // size_t 18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 19 20 #ifdef __SSE2__ 21 #include <immintrin.h> 22 #endif // __SSE2__ 23 24 #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar 25 26 // Fixed-size Vector Operations 27 // ---------------------------- 28 29 namespace __llvm_libc { 30 namespace x86 { 31 32 #ifdef __SSE2__ 33 template <typename Base> struct Vector : public Base { 34 static void copy(char *__restrict dst, const char *__restrict src) { 35 Base::store(dst, Base::load(src)); 36 } 37 38 static void move(char *dst, const char *src) { 39 Base::store(dst, Base::load(src)); 40 } 41 42 static bool equals(const char *a, const char *b) { 43 return Base::not_equal_mask(Base::load(a), Base::load(b)) == 0; 44 } 45 46 static int three_way_compare(const char *a, const char *b) { 47 const auto mask = Base::not_equal_mask(Base::load(a), Base::load(b)); 48 if (!mask) 49 return 0; 50 return char_diff(a, b, mask); 51 } 52 53 static void splat_set(char *dst, const unsigned char value) { 54 Base::store(dst, Base::get_splatted_value(value)); 55 } 56 57 static int char_diff(const char *a, const char *b, uint64_t mask) { 58 const size_t diff_index = __builtin_ctzll(mask); 59 const int ca = (unsigned char)a[diff_index]; 60 const int cb = (unsigned char)b[diff_index]; 61 return ca - cb; 62 } 63 }; 64 65 struct M128 { 66 static constexpr size_t SIZE = 16; 67 using T = char __attribute__((__vector_size__(SIZE))); 68 static uint16_t mask(T value) { 69 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 70 return _mm_movemask_epi8(__llvm_libc::bit_cast<__m128i>(value)); 71 } 72 static uint16_t not_equal_mask(T a, T b) { return mask(a != b); } 73 static T load(const char *ptr) { 74 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 75 return __llvm_libc::bit_cast<T>( 76 _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr))); 77 } 78 static void store(char *ptr, T value) { 79 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 80 return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), 81 __llvm_libc::bit_cast<__m128i>(value)); 82 } 83 static T get_splatted_value(const char v) { 84 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 85 return splatted; 86 } 87 }; 88 89 using Vector128 = Vector<M128>; // 16 Bytes 90 91 #ifdef __AVX2__ 92 struct M256 { 93 static constexpr size_t SIZE = 32; 94 using T = char __attribute__((__vector_size__(SIZE))); 95 static uint32_t mask(T value) { 96 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 97 return _mm256_movemask_epi8(__llvm_libc::bit_cast<__m256i>(value)); 98 } 99 static uint32_t not_equal_mask(T a, T b) { return mask(a != b); } 100 static T load(const char *ptr) { 101 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 102 return __llvm_libc::bit_cast<T>( 103 _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr))); 104 } 105 static void store(char *ptr, T value) { 106 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 107 return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), 108 __llvm_libc::bit_cast<__m256i>(value)); 109 } 110 static T get_splatted_value(const char v) { 111 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 112 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 113 return splatted; 114 } 115 }; 116 117 using Vector256 = Vector<M256>; // 32 Bytes 118 119 #if defined(__AVX512F__) and defined(__AVX512BW__) 120 struct M512 { 121 static constexpr size_t SIZE = 64; 122 using T = char __attribute__((__vector_size__(SIZE))); 123 static uint64_t not_equal_mask(T a, T b) { 124 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 125 return _mm512_cmpneq_epi8_mask(__llvm_libc::bit_cast<__m512i>(a), 126 __llvm_libc::bit_cast<__m512i>(b)); 127 } 128 static T load(const char *ptr) { 129 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 130 return __llvm_libc::bit_cast<T>(_mm512_loadu_epi8(ptr)); 131 } 132 static void store(char *ptr, T value) { 133 // NOLINTNEXTLINE(llvmlibc-callee-namespace) 134 return _mm512_storeu_epi8(ptr, __llvm_libc::bit_cast<__m512i>(value)); 135 } 136 static T get_splatted_value(const char v) { 137 const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 138 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 139 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, 140 v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; 141 return splatted; 142 } 143 }; 144 using Vector512 = Vector<M512>; 145 146 #endif // defined(__AVX512F__) and defined(__AVX512BW__) 147 #endif // __AVX2__ 148 #endif // __SSE2__ 149 150 using _1 = __llvm_libc::scalar::_1; 151 using _2 = __llvm_libc::scalar::_2; 152 using _3 = __llvm_libc::scalar::_3; 153 using _4 = __llvm_libc::scalar::_4; 154 using _8 = __llvm_libc::scalar::_8; 155 #if defined(__AVX512F__) && defined(__AVX512BW__) 156 using _16 = __llvm_libc::x86::Vector128; 157 using _32 = __llvm_libc::x86::Vector256; 158 using _64 = __llvm_libc::x86::Vector512; 159 using _128 = __llvm_libc::Repeated<_64, 2>; 160 #elif defined(__AVX2__) 161 using _16 = __llvm_libc::x86::Vector128; 162 using _32 = __llvm_libc::x86::Vector256; 163 using _64 = __llvm_libc::Repeated<_32, 2>; 164 using _128 = __llvm_libc::Repeated<_32, 4>; 165 #elif defined(__SSE2__) 166 using _16 = __llvm_libc::x86::Vector128; 167 using _32 = __llvm_libc::Repeated<_16, 2>; 168 using _64 = __llvm_libc::Repeated<_16, 4>; 169 using _128 = __llvm_libc::Repeated<_16, 8>; 170 #else 171 using _16 = __llvm_libc::Repeated<_8, 2>; 172 using _32 = __llvm_libc::Repeated<_8, 4>; 173 using _64 = __llvm_libc::Repeated<_8, 8>; 174 using _128 = __llvm_libc::Repeated<_8, 16>; 175 #endif 176 177 struct Accelerator { 178 static void copy(char *dst, const char *src, size_t count) { 179 asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); 180 } 181 }; 182 183 } // namespace x86 184 } // namespace __llvm_libc 185 186 #endif // defined(LLVM_LIBC_ARCH_X86) 187 188 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H 189