1*7f5d7bc8SGuillaume Chatelet //===-- Algorithms to compose sized memory operations ---------------------===//
2*7f5d7bc8SGuillaume Chatelet //
3*7f5d7bc8SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*7f5d7bc8SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
5*7f5d7bc8SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*7f5d7bc8SGuillaume Chatelet //
7*7f5d7bc8SGuillaume Chatelet //===----------------------------------------------------------------------===//
8*7f5d7bc8SGuillaume Chatelet //
9*7f5d7bc8SGuillaume Chatelet // Higher order primitives that build upon the SizedOpT facility.
10*7f5d7bc8SGuillaume Chatelet // They constitute the basic blocks for composing memory functions.
11*7f5d7bc8SGuillaume Chatelet // This file defines the following operations:
12*7f5d7bc8SGuillaume Chatelet // - Skip
13*7f5d7bc8SGuillaume Chatelet // - Tail
14*7f5d7bc8SGuillaume Chatelet // - HeadTail
15*7f5d7bc8SGuillaume Chatelet // - Loop
16*7f5d7bc8SGuillaume Chatelet // - Align
17*7f5d7bc8SGuillaume Chatelet //
18*7f5d7bc8SGuillaume Chatelet // See each class for documentation.
19*7f5d7bc8SGuillaume Chatelet //===----------------------------------------------------------------------===//
20*7f5d7bc8SGuillaume Chatelet 
21*7f5d7bc8SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
22*7f5d7bc8SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
23*7f5d7bc8SGuillaume Chatelet 
24*7f5d7bc8SGuillaume Chatelet #include "src/string/memory_utils/address.h" // Address
25*7f5d7bc8SGuillaume Chatelet #include "src/string/memory_utils/utils.h"   // offset_to_next_aligned
26*7f5d7bc8SGuillaume Chatelet 
27*7f5d7bc8SGuillaume Chatelet #include <stddef.h> // ptrdiff_t
28*7f5d7bc8SGuillaume Chatelet 
29*7f5d7bc8SGuillaume Chatelet namespace __llvm_libc {
30*7f5d7bc8SGuillaume Chatelet 
31*7f5d7bc8SGuillaume Chatelet // We are not yet allowed to use asserts in low level memory operations as
32*7f5d7bc8SGuillaume Chatelet // assert itself could depend on them.
33*7f5d7bc8SGuillaume Chatelet // We define this empty macro so we can enable them as soon as possible and keep
34*7f5d7bc8SGuillaume Chatelet // track of invariants.
35*7f5d7bc8SGuillaume Chatelet #define LIBC_ASSERT(COND)
36*7f5d7bc8SGuillaume Chatelet 
37*7f5d7bc8SGuillaume Chatelet // An operation that allows to skip the specified amount of bytes.
38*7f5d7bc8SGuillaume Chatelet template <ptrdiff_t Bytes> struct Skip {
39*7f5d7bc8SGuillaume Chatelet   template <typename NextT> struct Then {
40*7f5d7bc8SGuillaume Chatelet     template <typename DstAddrT>
setSkip::Then41*7f5d7bc8SGuillaume Chatelet     static inline void set(DstAddrT dst, ubyte value) {
42*7f5d7bc8SGuillaume Chatelet       static_assert(NextT::IS_FIXED_SIZE);
43*7f5d7bc8SGuillaume Chatelet       NextT::set(offsetAddr<Bytes>(dst), value);
44*7f5d7bc8SGuillaume Chatelet     }
45*7f5d7bc8SGuillaume Chatelet 
46*7f5d7bc8SGuillaume Chatelet     template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentSkip::Then47*7f5d7bc8SGuillaume Chatelet     static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
48*7f5d7bc8SGuillaume Chatelet       static_assert(NextT::IS_FIXED_SIZE);
49*7f5d7bc8SGuillaume Chatelet       return NextT::isDifferent(offsetAddr<Bytes>(src1),
50*7f5d7bc8SGuillaume Chatelet                                 offsetAddr<Bytes>(src2));
51*7f5d7bc8SGuillaume Chatelet     }
52*7f5d7bc8SGuillaume Chatelet 
53*7f5d7bc8SGuillaume Chatelet     template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpSkip::Then54*7f5d7bc8SGuillaume Chatelet     static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
55*7f5d7bc8SGuillaume Chatelet       static_assert(NextT::IS_FIXED_SIZE);
56*7f5d7bc8SGuillaume Chatelet       return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
57*7f5d7bc8SGuillaume Chatelet                                 offsetAddr<Bytes>(src2));
58*7f5d7bc8SGuillaume Chatelet     }
59*7f5d7bc8SGuillaume Chatelet 
60*7f5d7bc8SGuillaume Chatelet     template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpSkip::Then61*7f5d7bc8SGuillaume Chatelet     static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
62*7f5d7bc8SGuillaume Chatelet                                       size_t runtime_size) {
63*7f5d7bc8SGuillaume Chatelet       static_assert(NextT::IS_RUNTIME_SIZE);
64*7f5d7bc8SGuillaume Chatelet       return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
65*7f5d7bc8SGuillaume Chatelet                                 offsetAddr<Bytes>(src2), runtime_size - Bytes);
66*7f5d7bc8SGuillaume Chatelet     }
67*7f5d7bc8SGuillaume Chatelet   };
68*7f5d7bc8SGuillaume Chatelet };
69*7f5d7bc8SGuillaume Chatelet 
70*7f5d7bc8SGuillaume Chatelet // Compute the address of a tail operation.
71*7f5d7bc8SGuillaume Chatelet // Because of the runtime size, we loose the alignment information.
72*7f5d7bc8SGuillaume Chatelet template <size_t Size, typename AddrT>
tailAddr(AddrT addr,size_t runtime_size)73*7f5d7bc8SGuillaume Chatelet static auto tailAddr(AddrT addr, size_t runtime_size) {
74*7f5d7bc8SGuillaume Chatelet   static_assert(IsAddressType<AddrT>::Value);
75*7f5d7bc8SGuillaume Chatelet   return offsetAddrAssumeAligned<1>(addr, runtime_size - Size);
76*7f5d7bc8SGuillaume Chatelet }
77*7f5d7bc8SGuillaume Chatelet 
78*7f5d7bc8SGuillaume Chatelet // Perform the operation on the last 'Size' bytes of the buffer.
79*7f5d7bc8SGuillaume Chatelet //
80*7f5d7bc8SGuillaume Chatelet // e.g. with
81*7f5d7bc8SGuillaume Chatelet // [1234567812345678123]
82*7f5d7bc8SGuillaume Chatelet // [__XXXXXXXXXXXXXX___]
83*7f5d7bc8SGuillaume Chatelet // [________XXXXXXXX___]
84*7f5d7bc8SGuillaume Chatelet //
85*7f5d7bc8SGuillaume Chatelet // Precondition: `runtime_size >= Size`.
86*7f5d7bc8SGuillaume Chatelet template <typename SizedOpT> struct Tail {
87*7f5d7bc8SGuillaume Chatelet   static_assert(SizedOpT::IS_FIXED_SIZE);
88*7f5d7bc8SGuillaume Chatelet   static constexpr bool IS_RUNTIME_SIZE = true;
89*7f5d7bc8SGuillaume Chatelet   static constexpr size_t SIZE = SizedOpT::SIZE;
90*7f5d7bc8SGuillaume Chatelet 
91*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
copyTail92*7f5d7bc8SGuillaume Chatelet   static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
93*7f5d7bc8SGuillaume Chatelet     SizedOpT::copy(tailAddr<SIZE>(dst, runtime_size),
94*7f5d7bc8SGuillaume Chatelet                    tailAddr<SIZE>(src, runtime_size));
95*7f5d7bc8SGuillaume Chatelet   }
96*7f5d7bc8SGuillaume Chatelet 
97*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
moveTail98*7f5d7bc8SGuillaume Chatelet   static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
99*7f5d7bc8SGuillaume Chatelet     SizedOpT::move(tailAddr<SIZE>(dst, runtime_size),
100*7f5d7bc8SGuillaume Chatelet                    tailAddr<SIZE>(src, runtime_size));
101*7f5d7bc8SGuillaume Chatelet   }
102*7f5d7bc8SGuillaume Chatelet 
103*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT>
setTail104*7f5d7bc8SGuillaume Chatelet   static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
105*7f5d7bc8SGuillaume Chatelet     SizedOpT::set(tailAddr<SIZE>(dst, runtime_size), value);
106*7f5d7bc8SGuillaume Chatelet   }
107*7f5d7bc8SGuillaume Chatelet 
108*7f5d7bc8SGuillaume Chatelet   template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentTail109*7f5d7bc8SGuillaume Chatelet   static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
110*7f5d7bc8SGuillaume Chatelet                                      size_t runtime_size) {
111*7f5d7bc8SGuillaume Chatelet     return SizedOpT::isDifferent(tailAddr<SIZE>(src1, runtime_size),
112*7f5d7bc8SGuillaume Chatelet                                  tailAddr<SIZE>(src2, runtime_size));
113*7f5d7bc8SGuillaume Chatelet   }
114*7f5d7bc8SGuillaume Chatelet 
115*7f5d7bc8SGuillaume Chatelet   template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpTail116*7f5d7bc8SGuillaume Chatelet   static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
117*7f5d7bc8SGuillaume Chatelet                                     size_t runtime_size) {
118*7f5d7bc8SGuillaume Chatelet     return SizedOpT::threeWayCmp(tailAddr<SIZE>(src1, runtime_size),
119*7f5d7bc8SGuillaume Chatelet                                  tailAddr<SIZE>(src2, runtime_size));
120*7f5d7bc8SGuillaume Chatelet   }
121*7f5d7bc8SGuillaume Chatelet };
122*7f5d7bc8SGuillaume Chatelet 
123*7f5d7bc8SGuillaume Chatelet // Perform the operation on the first and the last `SizedOpT::Size` bytes of the
124*7f5d7bc8SGuillaume Chatelet // buffer. This is useful for overlapping operations.
125*7f5d7bc8SGuillaume Chatelet //
126*7f5d7bc8SGuillaume Chatelet // e.g. with
127*7f5d7bc8SGuillaume Chatelet // [1234567812345678123]
128*7f5d7bc8SGuillaume Chatelet // [__XXXXXXXXXXXXXX___]
129*7f5d7bc8SGuillaume Chatelet // [__XXXXXXXX_________]
130*7f5d7bc8SGuillaume Chatelet // [________XXXXXXXX___]
131*7f5d7bc8SGuillaume Chatelet //
132*7f5d7bc8SGuillaume Chatelet // Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`.
133*7f5d7bc8SGuillaume Chatelet template <typename SizedOpT> struct HeadTail {
134*7f5d7bc8SGuillaume Chatelet   static_assert(SizedOpT::IS_FIXED_SIZE);
135*7f5d7bc8SGuillaume Chatelet   static constexpr bool IS_RUNTIME_SIZE = true;
136*7f5d7bc8SGuillaume Chatelet 
137*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
copyHeadTail138*7f5d7bc8SGuillaume Chatelet   static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
139*7f5d7bc8SGuillaume Chatelet     LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
140*7f5d7bc8SGuillaume Chatelet     SizedOpT::copy(dst, src);
141*7f5d7bc8SGuillaume Chatelet     Tail<SizedOpT>::copy(dst, src, runtime_size);
142*7f5d7bc8SGuillaume Chatelet   }
143*7f5d7bc8SGuillaume Chatelet 
144*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
moveHeadTail145*7f5d7bc8SGuillaume Chatelet   static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
146*7f5d7bc8SGuillaume Chatelet     LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
147*7f5d7bc8SGuillaume Chatelet     static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
148*7f5d7bc8SGuillaume Chatelet     // The load and store operations can be performed in any order as long as
149*7f5d7bc8SGuillaume Chatelet     // they are not interleaved. More investigations are needed to determine the
150*7f5d7bc8SGuillaume Chatelet     // best order.
151*7f5d7bc8SGuillaume Chatelet     auto head = SizedOpT::load(src);
152*7f5d7bc8SGuillaume Chatelet     auto tail = SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
153*7f5d7bc8SGuillaume Chatelet     SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail);
154*7f5d7bc8SGuillaume Chatelet     SizedOpT::store(dst, head);
155*7f5d7bc8SGuillaume Chatelet   }
156*7f5d7bc8SGuillaume Chatelet 
157*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT>
setHeadTail158*7f5d7bc8SGuillaume Chatelet   static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
159*7f5d7bc8SGuillaume Chatelet     LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
160*7f5d7bc8SGuillaume Chatelet     SizedOpT::set(dst, value);
161*7f5d7bc8SGuillaume Chatelet     Tail<SizedOpT>::set(dst, value, runtime_size);
162*7f5d7bc8SGuillaume Chatelet   }
163*7f5d7bc8SGuillaume Chatelet 
164*7f5d7bc8SGuillaume Chatelet   template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentHeadTail165*7f5d7bc8SGuillaume Chatelet   static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
166*7f5d7bc8SGuillaume Chatelet                                      size_t runtime_size) {
167*7f5d7bc8SGuillaume Chatelet     LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
168*7f5d7bc8SGuillaume Chatelet     // Two strategies can be applied here:
169*7f5d7bc8SGuillaume Chatelet     // 1. Compute head and tail and compose them with a bitwise or operation.
170*7f5d7bc8SGuillaume Chatelet     // 2. Stop early if head is different.
171*7f5d7bc8SGuillaume Chatelet     // We chose the later because HeadTail operations are typically performed
172*7f5d7bc8SGuillaume Chatelet     // with sizes ranging from 4 to 256 bytes. The cost of the loads is then
173*7f5d7bc8SGuillaume Chatelet     // significantly larger than the cost of the branch.
174*7f5d7bc8SGuillaume Chatelet     if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
175*7f5d7bc8SGuillaume Chatelet       return res;
176*7f5d7bc8SGuillaume Chatelet     return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
177*7f5d7bc8SGuillaume Chatelet   }
178*7f5d7bc8SGuillaume Chatelet 
179*7f5d7bc8SGuillaume Chatelet   template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpHeadTail180*7f5d7bc8SGuillaume Chatelet   static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
181*7f5d7bc8SGuillaume Chatelet                                     size_t runtime_size) {
182*7f5d7bc8SGuillaume Chatelet     LIBC_ASSERT(runtime_size >= SizedOpT::SIZE &&
183*7f5d7bc8SGuillaume Chatelet                 runtime_size <= 2 * SizedOpT::SIZE);
184*7f5d7bc8SGuillaume Chatelet     if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
185*7f5d7bc8SGuillaume Chatelet       return res;
186*7f5d7bc8SGuillaume Chatelet     return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
187*7f5d7bc8SGuillaume Chatelet   }
188*7f5d7bc8SGuillaume Chatelet };
189*7f5d7bc8SGuillaume Chatelet 
190*7f5d7bc8SGuillaume Chatelet // Simple loop ending with a Tail operation.
191*7f5d7bc8SGuillaume Chatelet //
192*7f5d7bc8SGuillaume Chatelet // e.g. with
193*7f5d7bc8SGuillaume Chatelet // [12345678123456781234567812345678]
194*7f5d7bc8SGuillaume Chatelet // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
195*7f5d7bc8SGuillaume Chatelet // [__XXXXXXXX_______________________]
196*7f5d7bc8SGuillaume Chatelet // [__________XXXXXXXX_______________]
197*7f5d7bc8SGuillaume Chatelet // [__________________XXXXXXXX_______]
198*7f5d7bc8SGuillaume Chatelet // [______________________XXXXXXXX___]
199*7f5d7bc8SGuillaume Chatelet //
200*7f5d7bc8SGuillaume Chatelet // Precondition:
201*7f5d7bc8SGuillaume Chatelet // - runtime_size >= Size
202*7f5d7bc8SGuillaume Chatelet template <typename SizedOpT> struct Loop {
203*7f5d7bc8SGuillaume Chatelet   static_assert(SizedOpT::IS_FIXED_SIZE);
204*7f5d7bc8SGuillaume Chatelet   static constexpr bool IS_RUNTIME_SIZE = true;
205*7f5d7bc8SGuillaume Chatelet   static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
206*7f5d7bc8SGuillaume Chatelet 
207*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
copyLoop208*7f5d7bc8SGuillaume Chatelet   static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
209*7f5d7bc8SGuillaume Chatelet     size_t offset = 0;
210*7f5d7bc8SGuillaume Chatelet     do {
211*7f5d7bc8SGuillaume Chatelet       SizedOpT::copy(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
212*7f5d7bc8SGuillaume Chatelet                      offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
213*7f5d7bc8SGuillaume Chatelet       offset += BLOCK_SIZE;
214*7f5d7bc8SGuillaume Chatelet     } while (offset < runtime_size - BLOCK_SIZE);
215*7f5d7bc8SGuillaume Chatelet     Tail<SizedOpT>::copy(dst, src, runtime_size);
216*7f5d7bc8SGuillaume Chatelet   }
217*7f5d7bc8SGuillaume Chatelet 
218*7f5d7bc8SGuillaume Chatelet   // Move forward suitable when dst < src. We load the tail bytes before
219*7f5d7bc8SGuillaume Chatelet   // handling the loop.
220*7f5d7bc8SGuillaume Chatelet   //
221*7f5d7bc8SGuillaume Chatelet   // e.g. Moving two bytes
222*7f5d7bc8SGuillaume Chatelet   // [   |       |       |       |       |]
223*7f5d7bc8SGuillaume Chatelet   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
224*7f5d7bc8SGuillaume Chatelet   // [_________________________LLLLLLLL___]
225*7f5d7bc8SGuillaume Chatelet   // [___LLLLLLLL_________________________]
226*7f5d7bc8SGuillaume Chatelet   // [_SSSSSSSS___________________________]
227*7f5d7bc8SGuillaume Chatelet   // [___________LLLLLLLL_________________]
228*7f5d7bc8SGuillaume Chatelet   // [_________SSSSSSSS___________________]
229*7f5d7bc8SGuillaume Chatelet   // [___________________LLLLLLLL_________]
230*7f5d7bc8SGuillaume Chatelet   // [_________________SSSSSSSS___________]
231*7f5d7bc8SGuillaume Chatelet   // [_______________________SSSSSSSS_____]
232*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
moveLoop233*7f5d7bc8SGuillaume Chatelet   static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
234*7f5d7bc8SGuillaume Chatelet     const auto tail_value =
235*7f5d7bc8SGuillaume Chatelet         SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
236*7f5d7bc8SGuillaume Chatelet     size_t offset = 0;
237*7f5d7bc8SGuillaume Chatelet     do {
238*7f5d7bc8SGuillaume Chatelet       SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
239*7f5d7bc8SGuillaume Chatelet                      offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
240*7f5d7bc8SGuillaume Chatelet       offset += BLOCK_SIZE;
241*7f5d7bc8SGuillaume Chatelet     } while (offset < runtime_size - BLOCK_SIZE);
242*7f5d7bc8SGuillaume Chatelet     SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail_value);
243*7f5d7bc8SGuillaume Chatelet   }
244*7f5d7bc8SGuillaume Chatelet 
245*7f5d7bc8SGuillaume Chatelet   // Move backward suitable when dst > src. We load the head bytes before
246*7f5d7bc8SGuillaume Chatelet   // handling the loop.
247*7f5d7bc8SGuillaume Chatelet   //
248*7f5d7bc8SGuillaume Chatelet   // e.g. Moving two bytes
249*7f5d7bc8SGuillaume Chatelet   // [   |       |       |       |       |]
250*7f5d7bc8SGuillaume Chatelet   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
251*7f5d7bc8SGuillaume Chatelet   // [___LLLLLLLL_________________________]
252*7f5d7bc8SGuillaume Chatelet   // [_________________________LLLLLLLL___]
253*7f5d7bc8SGuillaume Chatelet   // [___________________________SSSSSSSS_]
254*7f5d7bc8SGuillaume Chatelet   // [_________________LLLLLLLL___________]
255*7f5d7bc8SGuillaume Chatelet   // [___________________SSSSSSSS_________]
256*7f5d7bc8SGuillaume Chatelet   // [_________LLLLLLLL___________________]
257*7f5d7bc8SGuillaume Chatelet   // [___________SSSSSSSS_________________]
258*7f5d7bc8SGuillaume Chatelet   // [_____SSSSSSSS_______________________]
259*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT, typename SrcAddrT>
move_backwardLoop260*7f5d7bc8SGuillaume Chatelet   static inline void move_backward(DstAddrT dst, SrcAddrT src,
261*7f5d7bc8SGuillaume Chatelet                                    size_t runtime_size) {
262*7f5d7bc8SGuillaume Chatelet     const auto head_value = SizedOpT::load(src);
263*7f5d7bc8SGuillaume Chatelet     ptrdiff_t offset = runtime_size - BLOCK_SIZE;
264*7f5d7bc8SGuillaume Chatelet     do {
265*7f5d7bc8SGuillaume Chatelet       SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
266*7f5d7bc8SGuillaume Chatelet                      offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
267*7f5d7bc8SGuillaume Chatelet       offset -= BLOCK_SIZE;
268*7f5d7bc8SGuillaume Chatelet     } while (offset >= 0);
269*7f5d7bc8SGuillaume Chatelet     SizedOpT::store(dst, head_value);
270*7f5d7bc8SGuillaume Chatelet   }
271*7f5d7bc8SGuillaume Chatelet 
272*7f5d7bc8SGuillaume Chatelet   template <typename DstAddrT>
setLoop273*7f5d7bc8SGuillaume Chatelet   static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
274*7f5d7bc8SGuillaume Chatelet     size_t offset = 0;
275*7f5d7bc8SGuillaume Chatelet     do {
276*7f5d7bc8SGuillaume Chatelet       SizedOpT::set(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), value);
277*7f5d7bc8SGuillaume Chatelet       offset += BLOCK_SIZE;
278*7f5d7bc8SGuillaume Chatelet     } while (offset < runtime_size - BLOCK_SIZE);
279*7f5d7bc8SGuillaume Chatelet     Tail<SizedOpT>::set(dst, value, runtime_size);
280*7f5d7bc8SGuillaume Chatelet   }
281*7f5d7bc8SGuillaume Chatelet 
282*7f5d7bc8SGuillaume Chatelet   template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentLoop283*7f5d7bc8SGuillaume Chatelet   static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
284*7f5d7bc8SGuillaume Chatelet                                      size_t runtime_size) {
285*7f5d7bc8SGuillaume Chatelet     size_t offset = 0;
286*7f5d7bc8SGuillaume Chatelet     do {
287*7f5d7bc8SGuillaume Chatelet       if (uint64_t res = SizedOpT::isDifferent(
288*7f5d7bc8SGuillaume Chatelet               offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
289*7f5d7bc8SGuillaume Chatelet               offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
290*7f5d7bc8SGuillaume Chatelet         return res;
291*7f5d7bc8SGuillaume Chatelet       offset += BLOCK_SIZE;
292*7f5d7bc8SGuillaume Chatelet     } while (offset < runtime_size - BLOCK_SIZE);
293*7f5d7bc8SGuillaume Chatelet     return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
294*7f5d7bc8SGuillaume Chatelet   }
295*7f5d7bc8SGuillaume Chatelet 
296*7f5d7bc8SGuillaume Chatelet   template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpLoop297*7f5d7bc8SGuillaume Chatelet   static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
298*7f5d7bc8SGuillaume Chatelet                                     size_t runtime_size) {
299*7f5d7bc8SGuillaume Chatelet     size_t offset = 0;
300*7f5d7bc8SGuillaume Chatelet     do {
301*7f5d7bc8SGuillaume Chatelet       if (int32_t res = SizedOpT::threeWayCmp(
302*7f5d7bc8SGuillaume Chatelet               offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
303*7f5d7bc8SGuillaume Chatelet               offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
304*7f5d7bc8SGuillaume Chatelet         return res;
305*7f5d7bc8SGuillaume Chatelet       offset += BLOCK_SIZE;
306*7f5d7bc8SGuillaume Chatelet     } while (offset < runtime_size - BLOCK_SIZE);
307*7f5d7bc8SGuillaume Chatelet     return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
308*7f5d7bc8SGuillaume Chatelet   }
309*7f5d7bc8SGuillaume Chatelet };
310*7f5d7bc8SGuillaume Chatelet 
311*7f5d7bc8SGuillaume Chatelet // Aligns using a statically-sized operation, then calls the subsequent NextT
312*7f5d7bc8SGuillaume Chatelet // operation.
313*7f5d7bc8SGuillaume Chatelet //
314*7f5d7bc8SGuillaume Chatelet // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
315*7f5d7bc8SGuillaume Chatelet // Align<_16, Arg::Dst>::Then<Loop<_32>>::copy(dst, src, runtime_size);
316*7f5d7bc8SGuillaume Chatelet enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
317*7f5d7bc8SGuillaume Chatelet template <typename SizedOpT, Arg AlignOn = Arg::_1> struct Align {
318*7f5d7bc8SGuillaume Chatelet   static_assert(SizedOpT::IS_FIXED_SIZE);
319*7f5d7bc8SGuillaume Chatelet 
320*7f5d7bc8SGuillaume Chatelet   template <typename NextT> struct Then {
321*7f5d7bc8SGuillaume Chatelet     static_assert(NextT::IS_RUNTIME_SIZE);
322*7f5d7bc8SGuillaume Chatelet 
323*7f5d7bc8SGuillaume Chatelet     template <typename DstAddrT, typename SrcAddrT>
copyAlign::Then324*7f5d7bc8SGuillaume Chatelet     static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
325*7f5d7bc8SGuillaume Chatelet       SizedOpT::copy(dst, src);
326*7f5d7bc8SGuillaume Chatelet       auto aligned = align(dst, src, runtime_size);
327*7f5d7bc8SGuillaume Chatelet       NextT::copy(aligned.arg1, aligned.arg2, aligned.size);
328*7f5d7bc8SGuillaume Chatelet     }
329*7f5d7bc8SGuillaume Chatelet 
330*7f5d7bc8SGuillaume Chatelet     // Move forward suitable when dst < src. The alignment is performed with
331*7f5d7bc8SGuillaume Chatelet     // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
332*7f5d7bc8SGuillaume Chatelet     //
333*7f5d7bc8SGuillaume Chatelet     // e.g. Moving two bytes and making sure src is then aligned.
334*7f5d7bc8SGuillaume Chatelet     // [  |       |       |       |      ]
335*7f5d7bc8SGuillaume Chatelet     // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
336*7f5d7bc8SGuillaume Chatelet     // [____LLLLLLLL_____________________]
337*7f5d7bc8SGuillaume Chatelet     // [___________LLLLLLLL______________]
338*7f5d7bc8SGuillaume Chatelet     // [_SSSSSSSS________________________]
339*7f5d7bc8SGuillaume Chatelet     // [________SSSSSSSS_________________]
340*7f5d7bc8SGuillaume Chatelet     //
341*7f5d7bc8SGuillaume Chatelet     // e.g. Moving two bytes and making sure dst is then aligned.
342*7f5d7bc8SGuillaume Chatelet     // [  |       |       |       |      ]
343*7f5d7bc8SGuillaume Chatelet     // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
344*7f5d7bc8SGuillaume Chatelet     // [____LLLLLLLL_____________________]
345*7f5d7bc8SGuillaume Chatelet     // [______LLLLLLLL___________________]
346*7f5d7bc8SGuillaume Chatelet     // [_SSSSSSSS________________________]
347*7f5d7bc8SGuillaume Chatelet     // [___SSSSSSSS______________________]
348*7f5d7bc8SGuillaume Chatelet     template <typename DstAddrT, typename SrcAddrT>
moveAlign::Then349*7f5d7bc8SGuillaume Chatelet     static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
350*7f5d7bc8SGuillaume Chatelet       auto aligned_after_begin = align(dst, src, runtime_size);
351*7f5d7bc8SGuillaume Chatelet       // We move pointers forward by Size so we can perform HeadTail.
352*7f5d7bc8SGuillaume Chatelet       auto aligned = aligned_after_begin.stepForward();
353*7f5d7bc8SGuillaume Chatelet       HeadTail<SizedOpT>::move(dst, src, runtime_size - aligned.size);
354*7f5d7bc8SGuillaume Chatelet       NextT::move(aligned.arg1, aligned.arg2, aligned.size);
355*7f5d7bc8SGuillaume Chatelet     }
356*7f5d7bc8SGuillaume Chatelet 
357*7f5d7bc8SGuillaume Chatelet     // Move backward suitable when dst > src. The alignment is performed with
358*7f5d7bc8SGuillaume Chatelet     // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
359*7f5d7bc8SGuillaume Chatelet     //
360*7f5d7bc8SGuillaume Chatelet     // e.g. Moving two bytes backward and making sure src is then aligned.
361*7f5d7bc8SGuillaume Chatelet     // [  |       |       |       |      ]
362*7f5d7bc8SGuillaume Chatelet     // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
363*7f5d7bc8SGuillaume Chatelet     // [ _________________LLLLLLLL_______]
364*7f5d7bc8SGuillaume Chatelet     // [ ___________________LLLLLLLL_____]
365*7f5d7bc8SGuillaume Chatelet     // [____________________SSSSSSSS_____]
366*7f5d7bc8SGuillaume Chatelet     // [______________________SSSSSSSS___]
367*7f5d7bc8SGuillaume Chatelet     //
368*7f5d7bc8SGuillaume Chatelet     // e.g. Moving two bytes and making sure dst is then aligned.
369*7f5d7bc8SGuillaume Chatelet     // [  |       |       |       |      ]
370*7f5d7bc8SGuillaume Chatelet     // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
371*7f5d7bc8SGuillaume Chatelet     // [ _______________LLLLLLLL_________]
372*7f5d7bc8SGuillaume Chatelet     // [ ___________________LLLLLLLL_____]
373*7f5d7bc8SGuillaume Chatelet     // [__________________SSSSSSSS_______]
374*7f5d7bc8SGuillaume Chatelet     // [______________________SSSSSSSS___]
375*7f5d7bc8SGuillaume Chatelet     template <typename DstAddrT, typename SrcAddrT>
move_backwardAlign::Then376*7f5d7bc8SGuillaume Chatelet     static inline void move_backward(DstAddrT dst, SrcAddrT src,
377*7f5d7bc8SGuillaume Chatelet                                      size_t runtime_size) {
378*7f5d7bc8SGuillaume Chatelet       const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size);
379*7f5d7bc8SGuillaume Chatelet       const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size);
380*7f5d7bc8SGuillaume Chatelet       auto aligned_after_end = align(dst_end, src_end, 0);
381*7f5d7bc8SGuillaume Chatelet       // We move pointers back by 2 x Size so we can perform HeadTail.
382*7f5d7bc8SGuillaume Chatelet       auto aligned = aligned_after_end.stepBack().stepBack();
383*7f5d7bc8SGuillaume Chatelet       HeadTail<SizedOpT>::move(aligned.arg1, aligned.arg2, aligned.size);
384*7f5d7bc8SGuillaume Chatelet       NextT::move_backward(dst, src, runtime_size - aligned.size);
385*7f5d7bc8SGuillaume Chatelet     }
386*7f5d7bc8SGuillaume Chatelet 
387*7f5d7bc8SGuillaume Chatelet     template <typename DstAddrT>
setAlign::Then388*7f5d7bc8SGuillaume Chatelet     static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
389*7f5d7bc8SGuillaume Chatelet       SizedOpT::set(dst, value);
390*7f5d7bc8SGuillaume Chatelet       DstAddrT _(nullptr);
391*7f5d7bc8SGuillaume Chatelet       auto aligned = align(dst, _, runtime_size);
392*7f5d7bc8SGuillaume Chatelet       NextT::set(aligned.arg1, value, aligned.size);
393*7f5d7bc8SGuillaume Chatelet     }
394*7f5d7bc8SGuillaume Chatelet 
395*7f5d7bc8SGuillaume Chatelet     template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentAlign::Then396*7f5d7bc8SGuillaume Chatelet     static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
397*7f5d7bc8SGuillaume Chatelet                                        size_t runtime_size) {
398*7f5d7bc8SGuillaume Chatelet       if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
399*7f5d7bc8SGuillaume Chatelet         return res;
400*7f5d7bc8SGuillaume Chatelet       auto aligned = align(src1, src2, runtime_size);
401*7f5d7bc8SGuillaume Chatelet       return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size);
402*7f5d7bc8SGuillaume Chatelet     }
403*7f5d7bc8SGuillaume Chatelet 
404*7f5d7bc8SGuillaume Chatelet     template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpAlign::Then405*7f5d7bc8SGuillaume Chatelet     static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
406*7f5d7bc8SGuillaume Chatelet                                       size_t runtime_size) {
407*7f5d7bc8SGuillaume Chatelet       if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
408*7f5d7bc8SGuillaume Chatelet         return res;
409*7f5d7bc8SGuillaume Chatelet       auto aligned = align(src1, src2, runtime_size);
410*7f5d7bc8SGuillaume Chatelet       return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size);
411*7f5d7bc8SGuillaume Chatelet     }
412*7f5d7bc8SGuillaume Chatelet   };
413*7f5d7bc8SGuillaume Chatelet 
414*7f5d7bc8SGuillaume Chatelet private:
415*7f5d7bc8SGuillaume Chatelet   static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE;
416*7f5d7bc8SGuillaume Chatelet   static_assert(ALIGN_OP_SIZE > 1);
417*7f5d7bc8SGuillaume Chatelet 
418*7f5d7bc8SGuillaume Chatelet   template <typename Arg1AddrT, typename Arg2AddrT> struct Aligned {
419*7f5d7bc8SGuillaume Chatelet     Arg1AddrT arg1;
420*7f5d7bc8SGuillaume Chatelet     Arg2AddrT arg2;
421*7f5d7bc8SGuillaume Chatelet     size_t size;
422*7f5d7bc8SGuillaume Chatelet 
stepForwardAlign::Aligned423*7f5d7bc8SGuillaume Chatelet     Aligned stepForward() const {
424*7f5d7bc8SGuillaume Chatelet       return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, ALIGN_OP_SIZE),
425*7f5d7bc8SGuillaume Chatelet                      offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, ALIGN_OP_SIZE),
426*7f5d7bc8SGuillaume Chatelet                      size - ALIGN_OP_SIZE};
427*7f5d7bc8SGuillaume Chatelet     }
428*7f5d7bc8SGuillaume Chatelet 
stepBackAlign::Aligned429*7f5d7bc8SGuillaume Chatelet     Aligned stepBack() const {
430*7f5d7bc8SGuillaume Chatelet       return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, -ALIGN_OP_SIZE),
431*7f5d7bc8SGuillaume Chatelet                      offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, -ALIGN_OP_SIZE),
432*7f5d7bc8SGuillaume Chatelet                      size + ALIGN_OP_SIZE};
433*7f5d7bc8SGuillaume Chatelet     }
434*7f5d7bc8SGuillaume Chatelet   };
435*7f5d7bc8SGuillaume Chatelet 
436*7f5d7bc8SGuillaume Chatelet   template <typename Arg1AddrT, typename Arg2AddrT>
makeAlignedAlign437*7f5d7bc8SGuillaume Chatelet   static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) {
438*7f5d7bc8SGuillaume Chatelet     return Aligned<Arg1AddrT, Arg2AddrT>{arg1, arg2, size};
439*7f5d7bc8SGuillaume Chatelet   }
440*7f5d7bc8SGuillaume Chatelet 
441*7f5d7bc8SGuillaume Chatelet   template <typename Arg1AddrT, typename Arg2AddrT>
alignAlign442*7f5d7bc8SGuillaume Chatelet   static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) {
443*7f5d7bc8SGuillaume Chatelet     static_assert(IsAddressType<Arg1AddrT>::Value);
444*7f5d7bc8SGuillaume Chatelet     static_assert(IsAddressType<Arg2AddrT>::Value);
445*7f5d7bc8SGuillaume Chatelet     if constexpr (AlignOn == Arg::_1) {
446*7f5d7bc8SGuillaume Chatelet       auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg1.ptr_);
447*7f5d7bc8SGuillaume Chatelet       return makeAligned(offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg1, offset),
448*7f5d7bc8SGuillaume Chatelet                          offsetAddrAssumeAligned<1>(arg2, offset),
449*7f5d7bc8SGuillaume Chatelet                          runtime_size - offset);
450*7f5d7bc8SGuillaume Chatelet     } else if constexpr (AlignOn == Arg::_2) {
451*7f5d7bc8SGuillaume Chatelet       auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg2.ptr_);
452*7f5d7bc8SGuillaume Chatelet       return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset),
453*7f5d7bc8SGuillaume Chatelet                          offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg2, offset),
454*7f5d7bc8SGuillaume Chatelet                          runtime_size - offset);
455*7f5d7bc8SGuillaume Chatelet     } else {
456*7f5d7bc8SGuillaume Chatelet       DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2");
457*7f5d7bc8SGuillaume Chatelet     }
458*7f5d7bc8SGuillaume Chatelet   }
459*7f5d7bc8SGuillaume Chatelet };
460*7f5d7bc8SGuillaume Chatelet 
461*7f5d7bc8SGuillaume Chatelet } // namespace __llvm_libc
462*7f5d7bc8SGuillaume Chatelet 
463*7f5d7bc8SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
464