1 //===-- Sized Operations --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the SizedOp struct that serves as the middle end of the
10 // framework. It implements sized memory operations by breaking them down into
11 // simpler types whose availability is described in the Backend. It also
12 // provides a way to load and store sized chunks of memory (necessary for the
13 // move operation). SizedOp are the building blocks of higher order algorithms
14 // like HeadTail, Align or Loop.
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
18 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
19 
20 #include <stddef.h> // size_t
21 
22 #ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
23 #define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE                                    \
24   __has_builtin(__builtin_memcpy_inline)
25 #endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
26 
27 #ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
28 #define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE                                    \
29   __has_builtin(__builtin_memset_inline)
30 #endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
31 
32 namespace __llvm_libc {
33 
34 template <typename Backend, size_t Size> struct SizedOp {
35   static constexpr size_t SIZE = Size;
36 
37 private:
38   static_assert(Backend::IS_BACKEND_TYPE);
39   static_assert(SIZE > 0);
40   using type = typename Backend::template getNextType<Size>;
41   static constexpr size_t TYPE_SIZE = sizeof(type);
42   static_assert(SIZE >= TYPE_SIZE);
43   static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE;
44   using NextBlock = SizedOp<Backend, NEXT_SIZE>;
45 
46   // Returns whether we can use an aligned operations.
47   // This is possible because the address type carries known compile-time
48   // alignment informations.
49   template <typename T, typename AddrT> static constexpr Aligned isAligned() {
50     static_assert(IsAddressType<AddrT>::Value);
51     return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES
52                                                                  : Aligned::NO;
53   }
54 
55   // Loads a value of the current `type` from `src`.
56   // This function is responsible for extracting Temporality and Alignment from
57   // the Address type.
58   template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) {
59     static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
60     constexpr auto AS = isAligned<type, SrcAddrT>();
61     constexpr auto TS = SrcAddrT::TEMPORALITY;
62     return Backend::template load<type, TS, AS>(as<const type>(src));
63   }
64 
65   // Stores a value of the current `type` to `dst`.
66   // This function is responsible for extracting Temporality and Alignment from
67   // the Address type.
68   template <typename DstAddrT>
69   static inline void nativeStore(type value, DstAddrT dst) {
70     static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
71     constexpr auto AS = isAligned<type, DstAddrT>();
72     constexpr auto TS = DstAddrT::TEMPORALITY;
73     return Backend::template store<type, TS, AS>(as<type>(dst), value);
74   }
75 
76   // A well aligned POD structure to store Size bytes.
77   // This is used to implement the move operations.
78   struct Value {
79     alignas(alignof(type)) ubyte payload[Size];
80   };
81 
82 public:
83   template <typename DstAddrT, typename SrcAddrT>
84   static inline void copy(DstAddrT dst, SrcAddrT src) {
85     static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
86     static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
87     if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE &&
88                   DstAddrT::TEMPORALITY == Temporality::TEMPORAL &&
89                   SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) {
90       // delegate optimized copy to compiler.
91       __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size);
92       return;
93     }
94     nativeStore(nativeLoad(src), dst);
95     if constexpr (NEXT_SIZE > 0)
96       NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
97   }
98 
99   template <typename DstAddrT, typename SrcAddrT>
100   static inline void move(DstAddrT dst, SrcAddrT src) {
101     const auto payload = nativeLoad(src);
102     if constexpr (NEXT_SIZE > 0)
103       NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
104     nativeStore(payload, dst);
105   }
106 
107   template <typename DstAddrT>
108   static inline void set(DstAddrT dst, ubyte value) {
109     if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE &&
110                   DstAddrT::TEMPORALITY == Temporality::TEMPORAL) {
111       // delegate optimized set to compiler.
112       __builtin_memset_inline(dst.ptr(), value, Size);
113       return;
114     }
115     nativeStore(Backend::template splat<type>(value), dst);
116     if constexpr (NEXT_SIZE > 0)
117       NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value);
118   }
119 
120   template <typename SrcAddrT1, typename SrcAddrT2>
121   static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
122     const uint64_t current =
123         Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2));
124     if constexpr (NEXT_SIZE > 0) {
125       // In the case where we cannot handle Size with single operation (e.g.
126       // Size == 3) we can either return early if current is non zero or
127       // aggregate all the operations through the bitwise or operator.
128       // We chose the later to reduce branching.
129       return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1),
130                                                offsetAddr<TYPE_SIZE>(src2)));
131     } else {
132       return current;
133     }
134   }
135 
136   template <typename SrcAddrT1, typename SrcAddrT2>
137   static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
138     const auto a = nativeLoad(src1);
139     const auto b = nativeLoad(src2);
140     // If we cannot handle Size as a single operation we have two choices:
141     // - Either use Backend's threeWayCmp directly and return it is non
142     // zero.
143     //
144     //   if (int32_t res = Backend::template threeWayCmp<type>(a, b))
145     //     return res;
146     //
147     // - Or use Backend's notEquals first and use threeWayCmp only if
148     // different, the assumption here is that notEquals is faster than
149     // threeWayCmp and that we can save cycles when the Size needs to be
150     // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1)
151     //
152     //   if (Backend::template notEquals<type>(a, b))
153     //     return Backend::template threeWayCmp<type>(a, b);
154     //
155     // We chose the former to reduce code bloat and branching.
156     if (int32_t res = Backend::template threeWayCmp<type>(a, b))
157       return res;
158     if constexpr (NEXT_SIZE > 0)
159       return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1),
160                                     offsetAddr<TYPE_SIZE>(src2));
161     return 0;
162   }
163 
164   template <typename SrcAddrT> static Value load(SrcAddrT src) {
165     Value output;
166     copy(DstAddr<alignof(type)>(output.payload), src);
167     return output;
168   }
169 
170   template <typename DstAddrT> static void store(DstAddrT dst, Value value) {
171     copy(dst, SrcAddr<alignof(type)>(value.payload));
172   }
173 };
174 
175 } // namespace __llvm_libc
176 
177 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
178