1 //===-- Sized Operations --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the SizedOp struct that serves as the middle end of the 10 // framework. It implements sized memory operations by breaking them down into 11 // simpler types whose availability is described in the Backend. It also 12 // provides a way to load and store sized chunks of memory (necessary for the 13 // move operation). SizedOp are the building blocks of higher order algorithms 14 // like HeadTail, Align or Loop. 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H 18 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H 19 20 #include <stddef.h> // size_t 21 22 #ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 23 #define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \ 24 __has_builtin(__builtin_memcpy_inline) 25 #endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 26 27 #ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 28 #define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \ 29 __has_builtin(__builtin_memset_inline) 30 #endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 31 32 namespace __llvm_libc { 33 34 template <typename Backend, size_t Size> struct SizedOp { 35 static constexpr size_t SIZE = Size; 36 37 private: 38 static_assert(Backend::IS_BACKEND_TYPE); 39 static_assert(SIZE > 0); 40 using type = typename Backend::template getNextType<Size>; 41 static constexpr size_t TYPE_SIZE = sizeof(type); 42 static_assert(SIZE >= TYPE_SIZE); 43 static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE; 44 using NextBlock = SizedOp<Backend, NEXT_SIZE>; 45 46 // Returns whether we can use an aligned operations. 47 // This is possible because the address type carries known compile-time 48 // alignment informations. 49 template <typename T, typename AddrT> static constexpr Aligned isAligned() { 50 static_assert(IsAddressType<AddrT>::Value); 51 return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES 52 : Aligned::NO; 53 } 54 55 // Loads a value of the current `type` from `src`. 56 // This function is responsible for extracting Temporality and Alignment from 57 // the Address type. 58 template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) { 59 static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ); 60 constexpr auto AS = isAligned<type, SrcAddrT>(); 61 constexpr auto TS = SrcAddrT::TEMPORALITY; 62 return Backend::template load<type, TS, AS>(as<const type>(src)); 63 } 64 65 // Stores a value of the current `type` to `dst`. 66 // This function is responsible for extracting Temporality and Alignment from 67 // the Address type. 68 template <typename DstAddrT> 69 static inline void nativeStore(type value, DstAddrT dst) { 70 static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE); 71 constexpr auto AS = isAligned<type, DstAddrT>(); 72 constexpr auto TS = DstAddrT::TEMPORALITY; 73 return Backend::template store<type, TS, AS>(as<type>(dst), value); 74 } 75 76 // A well aligned POD structure to store Size bytes. 77 // This is used to implement the move operations. 78 struct Value { 79 alignas(alignof(type)) ubyte payload[Size]; 80 }; 81 82 public: 83 template <typename DstAddrT, typename SrcAddrT> 84 static inline void copy(DstAddrT dst, SrcAddrT src) { 85 static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE); 86 static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ); 87 if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE && 88 DstAddrT::TEMPORALITY == Temporality::TEMPORAL && 89 SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) { 90 // delegate optimized copy to compiler. 91 __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size); 92 return; 93 } 94 nativeStore(nativeLoad(src), dst); 95 if constexpr (NEXT_SIZE > 0) 96 NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src)); 97 } 98 99 template <typename DstAddrT, typename SrcAddrT> 100 static inline void move(DstAddrT dst, SrcAddrT src) { 101 const auto payload = nativeLoad(src); 102 if constexpr (NEXT_SIZE > 0) 103 NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src)); 104 nativeStore(payload, dst); 105 } 106 107 template <typename DstAddrT> 108 static inline void set(DstAddrT dst, ubyte value) { 109 if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE && 110 DstAddrT::TEMPORALITY == Temporality::TEMPORAL) { 111 // delegate optimized set to compiler. 112 __builtin_memset_inline(dst.ptr(), value, Size); 113 return; 114 } 115 nativeStore(Backend::template splat<type>(value), dst); 116 if constexpr (NEXT_SIZE > 0) 117 NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value); 118 } 119 120 template <typename SrcAddrT1, typename SrcAddrT2> 121 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { 122 const uint64_t current = 123 Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2)); 124 if constexpr (NEXT_SIZE > 0) { 125 // In the case where we cannot handle Size with single operation (e.g. 126 // Size == 3) we can either return early if current is non zero or 127 // aggregate all the operations through the bitwise or operator. 128 // We chose the later to reduce branching. 129 return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1), 130 offsetAddr<TYPE_SIZE>(src2))); 131 } else { 132 return current; 133 } 134 } 135 136 template <typename SrcAddrT1, typename SrcAddrT2> 137 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { 138 const auto a = nativeLoad(src1); 139 const auto b = nativeLoad(src2); 140 // If we cannot handle Size as a single operation we have two choices: 141 // - Either use Backend's threeWayCmp directly and return it is non 142 // zero. 143 // 144 // if (int32_t res = Backend::template threeWayCmp<type>(a, b)) 145 // return res; 146 // 147 // - Or use Backend's notEquals first and use threeWayCmp only if 148 // different, the assumption here is that notEquals is faster than 149 // threeWayCmp and that we can save cycles when the Size needs to be 150 // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1) 151 // 152 // if (Backend::template notEquals<type>(a, b)) 153 // return Backend::template threeWayCmp<type>(a, b); 154 // 155 // We chose the former to reduce code bloat and branching. 156 if (int32_t res = Backend::template threeWayCmp<type>(a, b)) 157 return res; 158 if constexpr (NEXT_SIZE > 0) 159 return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1), 160 offsetAddr<TYPE_SIZE>(src2)); 161 return 0; 162 } 163 164 template <typename SrcAddrT> static Value load(SrcAddrT src) { 165 Value output; 166 copy(DstAddr<alignof(type)>(output.payload), src); 167 return output; 168 } 169 170 template <typename DstAddrT> static void store(DstAddrT dst, Value value) { 171 copy(dst, SrcAddr<alignof(type)>(value.payload)); 172 } 173 }; 174 175 } // namespace __llvm_libc 176 177 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H 178