//===-- Elementary operations for x86 -------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H #include "src/__support/architectures.h" #if defined(LLVM_LIBC_ARCH_X86) #include // size_t #include // uint8_t, uint16_t, uint32_t, uint64_t #ifdef __SSE2__ #include #endif // __SSE2__ #include "src/string/memory_utils/elements.h" // __llvm_libc::scalar // Fixed-size Vector Operations // ---------------------------- namespace __llvm_libc { namespace x86 { #ifdef __SSE2__ template struct Vector : public Base { static void Copy(char *__restrict dst, const char *__restrict src) { Base::Store(dst, Base::Load(src)); } static void Move(char *dst, const char *src) { Base::Store(dst, Base::Load(src)); } static bool Equals(const char *a, const char *b) { return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0; } static int ThreeWayCompare(const char *a, const char *b) { const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b)); if (!mask) return 0; return CharDiff(a, b, mask); } static void SplatSet(char *dst, const unsigned char value) { Base::Store(dst, Base::GetSplattedValue(value)); } static int CharDiff(const char *a, const char *b, uint64_t mask) { const size_t diff_index = __builtin_ctzll(mask); const int ca = (unsigned char)a[diff_index]; const int cb = (unsigned char)b[diff_index]; return ca - cb; } }; struct M128 { static constexpr size_t kSize = 16; using T = char __attribute__((__vector_size__(kSize))); static uint16_t mask(T value) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm_movemask_epi8(value); } static uint16_t NotEqualMask(T a, T b) { return mask(a != b); } static T Load(const char *ptr) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)); } static void Store(char *ptr, T value) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value); } static T GetSplattedValue(const char v) { const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; return splatted; } }; using Vector128 = Vector; // 16 Bytes #ifdef __AVX2__ struct M256 { static constexpr size_t kSize = 32; using T = char __attribute__((__vector_size__(kSize))); static uint32_t mask(T value) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm256_movemask_epi8(value); } static uint32_t NotEqualMask(T a, T b) { return mask(a != b); } static T Load(const char *ptr) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)); } static void Store(char *ptr, T value) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value); } static T GetSplattedValue(const char v) { const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; return splatted; } }; using Vector256 = Vector; // 32 Bytes #if defined(__AVX512F__) and defined(__AVX512BW__) struct M512 { static constexpr size_t kSize = 64; using T = char __attribute__((__vector_size__(kSize))); static uint64_t NotEqualMask(T a, T b) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm512_cmpneq_epi8_mask(a, b); } static T Load(const char *ptr) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm512_loadu_epi8(ptr); } static void Store(char *ptr, T value) { // NOLINTNEXTLINE(llvmlibc-callee-namespace) return _mm512_storeu_epi8(ptr, value); } static T GetSplattedValue(const char v) { const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; return splatted; } }; using Vector512 = Vector; #endif // defined(__AVX512F__) and defined(__AVX512BW__) #endif // __AVX2__ #endif // __SSE2__ using _1 = __llvm_libc::scalar::_1; using _2 = __llvm_libc::scalar::_2; using _3 = __llvm_libc::scalar::_3; using _4 = __llvm_libc::scalar::_4; using _8 = __llvm_libc::scalar::_8; #if defined(__AVX512F__) && defined(__AVX512BW__) using _16 = __llvm_libc::x86::Vector128; using _32 = __llvm_libc::x86::Vector256; using _64 = __llvm_libc::x86::Vector512; using _128 = __llvm_libc::Repeated<_64, 2>; #elif defined(__AVX2__) using _16 = __llvm_libc::x86::Vector128; using _32 = __llvm_libc::x86::Vector256; using _64 = __llvm_libc::Repeated<_32, 2>; using _128 = __llvm_libc::Repeated<_32, 4>; #elif defined(__SSE2__) using _16 = __llvm_libc::x86::Vector128; using _32 = __llvm_libc::Repeated<_16, 2>; using _64 = __llvm_libc::Repeated<_16, 4>; using _128 = __llvm_libc::Repeated<_16, 8>; #else using _16 = __llvm_libc::Repeated<_8, 2>; using _32 = __llvm_libc::Repeated<_8, 4>; using _64 = __llvm_libc::Repeated<_8, 8>; using _128 = __llvm_libc::Repeated<_8, 16>; #endif struct Accelerator { static void Copy(char *dst, const char *src, size_t count) { asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); } }; } // namespace x86 } // namespace __llvm_libc #endif // defined(LLVM_LIBC_ARCH_X86) #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H