1 //===-- Sized Operations --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the SizedOp struct that serves as the middle end of the 10 // framework. It implements sized memory operations by breaking them down into 11 // simpler types whose availability is described in the Backend. It also 12 // provides a way to load and store sized chunks of memory (necessary for the 13 // move operation). SizedOp are the building blocks of higher order algorithms 14 // like HeadTail, Align or Loop. 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H 18 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H 19 20 #include <stddef.h> // size_t 21 22 #ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 23 #define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \ 24 __has_builtin(__builtin_memcpy_inline) 25 #endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 26 27 #ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 28 #define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \ 29 __has_builtin(__builtin_memset_inline) 30 #endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE 31 32 namespace __llvm_libc { 33 34 template <typename Backend, size_t Size> struct SizedOp { 35 static constexpr size_t SIZE = Size; 36 // Define instantiations of SizedOp as a fixed size operation. 37 // i.e. an operation that is composable by types in algorithm.h 38 static constexpr bool IS_FIXED_SIZE = true; 39 40 private: 41 static_assert(Backend::IS_BACKEND_TYPE); 42 static_assert(SIZE > 0); 43 using type = typename Backend::template getNextType<Size>; 44 static constexpr size_t TYPE_SIZE = sizeof(type); 45 static_assert(SIZE >= TYPE_SIZE); 46 static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE; 47 using NextBlock = SizedOp<Backend, NEXT_SIZE>; 48 49 // Returns whether we can use an aligned operations. 50 // This is possible because the address type carries known compile-time 51 // alignment informations. isAlignedSizedOp52 template <typename T, typename AddrT> static constexpr Aligned isAligned() { 53 static_assert(IsAddressType<AddrT>::Value); 54 return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES 55 : Aligned::NO; 56 } 57 58 // Loads a value of the current `type` from `src`. 59 // This function is responsible for extracting Temporality and Alignment from 60 // the Address type. nativeLoadSizedOp61 template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) { 62 static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ); 63 constexpr auto AS = isAligned<type, SrcAddrT>(); 64 constexpr auto TS = SrcAddrT::TEMPORALITY; 65 return Backend::template load<type, TS, AS>(as<const type>(src)); 66 } 67 68 // Stores a value of the current `type` to `dst`. 69 // This function is responsible for extracting Temporality and Alignment from 70 // the Address type. 71 template <typename DstAddrT> nativeStoreSizedOp72 static inline void nativeStore(type value, DstAddrT dst) { 73 static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE); 74 constexpr auto AS = isAligned<type, DstAddrT>(); 75 constexpr auto TS = DstAddrT::TEMPORALITY; 76 return Backend::template store<type, TS, AS>(as<type>(dst), value); 77 } 78 79 // A well aligned POD structure to store Size bytes. 80 // This is used to implement the move operations. 81 struct Value { 82 alignas(alignof(type)) ubyte payload[Size]; 83 }; 84 85 public: 86 template <typename DstAddrT, typename SrcAddrT> copySizedOp87 static inline void copy(DstAddrT dst, SrcAddrT src) { 88 static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE); 89 static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ); 90 if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE && 91 DstAddrT::TEMPORALITY == Temporality::TEMPORAL && 92 SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) { 93 // delegate optimized copy to compiler. 94 __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size); 95 return; 96 } 97 nativeStore(nativeLoad(src), dst); 98 if constexpr (NEXT_SIZE > 0) 99 NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src)); 100 } 101 102 template <typename DstAddrT, typename SrcAddrT> moveSizedOp103 static inline void move(DstAddrT dst, SrcAddrT src) { 104 const auto payload = nativeLoad(src); 105 if constexpr (NEXT_SIZE > 0) 106 NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src)); 107 nativeStore(payload, dst); 108 } 109 110 template <typename DstAddrT> setSizedOp111 static inline void set(DstAddrT dst, ubyte value) { 112 if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE && 113 DstAddrT::TEMPORALITY == Temporality::TEMPORAL) { 114 // delegate optimized set to compiler. 115 __builtin_memset_inline(dst.ptr(), static_cast<int>(value), Size); 116 return; 117 } 118 nativeStore(Backend::template splat<type>(value), dst); 119 if constexpr (NEXT_SIZE > 0) 120 NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value); 121 } 122 123 template <typename SrcAddrT1, typename SrcAddrT2> isDifferentSizedOp124 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { 125 const uint64_t current = 126 Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2)); 127 if constexpr (NEXT_SIZE > 0) { 128 // In the case where we cannot handle Size with single operation (e.g. 129 // Size == 3) we can either return early if current is non zero or 130 // aggregate all the operations through the bitwise or operator. 131 // We chose the later to reduce branching. 132 return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1), 133 offsetAddr<TYPE_SIZE>(src2))); 134 } else { 135 return current; 136 } 137 } 138 139 template <typename SrcAddrT1, typename SrcAddrT2> threeWayCmpSizedOp140 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { 141 const auto a = nativeLoad(src1); 142 const auto b = nativeLoad(src2); 143 // If we cannot handle Size as a single operation we have two choices: 144 // - Either use Backend's threeWayCmp directly and return it is non 145 // zero. 146 // 147 // if (int32_t res = Backend::template threeWayCmp<type>(a, b)) 148 // return res; 149 // 150 // - Or use Backend's notEquals first and use threeWayCmp only if 151 // different, the assumption here is that notEquals is faster than 152 // threeWayCmp and that we can save cycles when the Size needs to be 153 // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1) 154 // 155 // if (Backend::template notEquals<type>(a, b)) 156 // return Backend::template threeWayCmp<type>(a, b); 157 // 158 // We chose the former to reduce code bloat and branching. 159 if (int32_t res = Backend::template threeWayCmp<type>(a, b)) 160 return res; 161 if constexpr (NEXT_SIZE > 0) 162 return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1), 163 offsetAddr<TYPE_SIZE>(src2)); 164 return 0; 165 } 166 loadSizedOp167 template <typename SrcAddrT> static Value load(SrcAddrT src) { 168 Value output; 169 copy(DstAddr<alignof(type)>(output.payload), src); 170 return output; 171 } 172 storeSizedOp173 template <typename DstAddrT> static void store(DstAddrT dst, Value value) { 174 copy(dst, SrcAddr<alignof(type)>(value.payload)); 175 } 176 }; 177 178 } // namespace __llvm_libc 179 180 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H 181