1 //===-- Sized Operations --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the SizedOp struct that serves as the middle end of the
10 // framework. It implements sized memory operations by breaking them down into
11 // simpler types whose availability is described in the Backend. It also
12 // provides a way to load and store sized chunks of memory (necessary for the
13 // move operation). SizedOp are the building blocks of higher order algorithms
14 // like HeadTail, Align or Loop.
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
18 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
19 
20 #include <stddef.h> // size_t
21 
22 #ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
23 #define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE                                    \
24   __has_builtin(__builtin_memcpy_inline)
25 #endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
26 
27 #ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
28 #define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE                                    \
29   __has_builtin(__builtin_memset_inline)
30 #endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
31 
32 namespace __llvm_libc {
33 
34 template <typename Backend, size_t Size> struct SizedOp {
35   static constexpr size_t SIZE = Size;
36   // Define instantiations of SizedOp as a fixed size operation.
37   // i.e. an operation that is composable by types in algorithm.h
38   static constexpr bool IS_FIXED_SIZE = true;
39 
40 private:
41   static_assert(Backend::IS_BACKEND_TYPE);
42   static_assert(SIZE > 0);
43   using type = typename Backend::template getNextType<Size>;
44   static constexpr size_t TYPE_SIZE = sizeof(type);
45   static_assert(SIZE >= TYPE_SIZE);
46   static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE;
47   using NextBlock = SizedOp<Backend, NEXT_SIZE>;
48 
49   // Returns whether we can use an aligned operations.
50   // This is possible because the address type carries known compile-time
51   // alignment informations.
isAlignedSizedOp52   template <typename T, typename AddrT> static constexpr Aligned isAligned() {
53     static_assert(IsAddressType<AddrT>::Value);
54     return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES
55                                                                  : Aligned::NO;
56   }
57 
58   // Loads a value of the current `type` from `src`.
59   // This function is responsible for extracting Temporality and Alignment from
60   // the Address type.
nativeLoadSizedOp61   template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) {
62     static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
63     constexpr auto AS = isAligned<type, SrcAddrT>();
64     constexpr auto TS = SrcAddrT::TEMPORALITY;
65     return Backend::template load<type, TS, AS>(as<const type>(src));
66   }
67 
68   // Stores a value of the current `type` to `dst`.
69   // This function is responsible for extracting Temporality and Alignment from
70   // the Address type.
71   template <typename DstAddrT>
nativeStoreSizedOp72   static inline void nativeStore(type value, DstAddrT dst) {
73     static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
74     constexpr auto AS = isAligned<type, DstAddrT>();
75     constexpr auto TS = DstAddrT::TEMPORALITY;
76     return Backend::template store<type, TS, AS>(as<type>(dst), value);
77   }
78 
79   // A well aligned POD structure to store Size bytes.
80   // This is used to implement the move operations.
81   struct Value {
82     alignas(alignof(type)) ubyte payload[Size];
83   };
84 
85 public:
86   template <typename DstAddrT, typename SrcAddrT>
copySizedOp87   static inline void copy(DstAddrT dst, SrcAddrT src) {
88     static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
89     static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
90     if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE &&
91                   DstAddrT::TEMPORALITY == Temporality::TEMPORAL &&
92                   SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) {
93       // delegate optimized copy to compiler.
94       __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size);
95       return;
96     }
97     nativeStore(nativeLoad(src), dst);
98     if constexpr (NEXT_SIZE > 0)
99       NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
100   }
101 
102   template <typename DstAddrT, typename SrcAddrT>
moveSizedOp103   static inline void move(DstAddrT dst, SrcAddrT src) {
104     const auto payload = nativeLoad(src);
105     if constexpr (NEXT_SIZE > 0)
106       NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
107     nativeStore(payload, dst);
108   }
109 
110   template <typename DstAddrT>
setSizedOp111   static inline void set(DstAddrT dst, ubyte value) {
112     if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE &&
113                   DstAddrT::TEMPORALITY == Temporality::TEMPORAL) {
114       // delegate optimized set to compiler.
115       __builtin_memset_inline(dst.ptr(), static_cast<int>(value), Size);
116       return;
117     }
118     nativeStore(Backend::template splat<type>(value), dst);
119     if constexpr (NEXT_SIZE > 0)
120       NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value);
121   }
122 
123   template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentSizedOp124   static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
125     const uint64_t current =
126         Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2));
127     if constexpr (NEXT_SIZE > 0) {
128       // In the case where we cannot handle Size with single operation (e.g.
129       // Size == 3) we can either return early if current is non zero or
130       // aggregate all the operations through the bitwise or operator.
131       // We chose the later to reduce branching.
132       return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1),
133                                                offsetAddr<TYPE_SIZE>(src2)));
134     } else {
135       return current;
136     }
137   }
138 
139   template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpSizedOp140   static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
141     const auto a = nativeLoad(src1);
142     const auto b = nativeLoad(src2);
143     // If we cannot handle Size as a single operation we have two choices:
144     // - Either use Backend's threeWayCmp directly and return it is non
145     // zero.
146     //
147     //   if (int32_t res = Backend::template threeWayCmp<type>(a, b))
148     //     return res;
149     //
150     // - Or use Backend's notEquals first and use threeWayCmp only if
151     // different, the assumption here is that notEquals is faster than
152     // threeWayCmp and that we can save cycles when the Size needs to be
153     // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1)
154     //
155     //   if (Backend::template notEquals<type>(a, b))
156     //     return Backend::template threeWayCmp<type>(a, b);
157     //
158     // We chose the former to reduce code bloat and branching.
159     if (int32_t res = Backend::template threeWayCmp<type>(a, b))
160       return res;
161     if constexpr (NEXT_SIZE > 0)
162       return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1),
163                                     offsetAddr<TYPE_SIZE>(src2));
164     return 0;
165   }
166 
loadSizedOp167   template <typename SrcAddrT> static Value load(SrcAddrT src) {
168     Value output;
169     copy(DstAddr<alignof(type)>(output.payload), src);
170     return output;
171   }
172 
storeSizedOp173   template <typename DstAddrT> static void store(DstAddrT dst, Value value) {
174     copy(dst, SrcAddr<alignof(type)>(value.payload));
175   }
176 };
177 
178 } // namespace __llvm_libc
179 
180 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
181