1 //===-- Algorithms to compose sized memory operations ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Higher order primitives that build upon the SizedOpT facility.
10 // They constitute the basic blocks for composing memory functions.
11 // This file defines the following operations:
12 // - Skip
13 // - Tail
14 // - HeadTail
15 // - Loop
16 // - Align
17 //
18 // See each class for documentation.
19 //===----------------------------------------------------------------------===//
20
21 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
22 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
23
24 #include "src/string/memory_utils/address.h" // Address
25 #include "src/string/memory_utils/utils.h" // offset_to_next_aligned
26
27 #include <stddef.h> // ptrdiff_t
28
29 namespace __llvm_libc {
30
31 // We are not yet allowed to use asserts in low level memory operations as
32 // assert itself could depend on them.
33 // We define this empty macro so we can enable them as soon as possible and keep
34 // track of invariants.
35 #define LIBC_ASSERT(COND)
36
37 // An operation that allows to skip the specified amount of bytes.
38 template <ptrdiff_t Bytes> struct Skip {
39 template <typename NextT> struct Then {
40 template <typename DstAddrT>
setSkip::Then41 static inline void set(DstAddrT dst, ubyte value) {
42 static_assert(NextT::IS_FIXED_SIZE);
43 NextT::set(offsetAddr<Bytes>(dst), value);
44 }
45
46 template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentSkip::Then47 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
48 static_assert(NextT::IS_FIXED_SIZE);
49 return NextT::isDifferent(offsetAddr<Bytes>(src1),
50 offsetAddr<Bytes>(src2));
51 }
52
53 template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpSkip::Then54 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
55 static_assert(NextT::IS_FIXED_SIZE);
56 return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
57 offsetAddr<Bytes>(src2));
58 }
59
60 template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpSkip::Then61 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
62 size_t runtime_size) {
63 static_assert(NextT::IS_RUNTIME_SIZE);
64 return NextT::threeWayCmp(offsetAddr<Bytes>(src1),
65 offsetAddr<Bytes>(src2), runtime_size - Bytes);
66 }
67 };
68 };
69
70 // Compute the address of a tail operation.
71 // Because of the runtime size, we loose the alignment information.
72 template <size_t Size, typename AddrT>
tailAddr(AddrT addr,size_t runtime_size)73 static auto tailAddr(AddrT addr, size_t runtime_size) {
74 static_assert(IsAddressType<AddrT>::Value);
75 return offsetAddrAssumeAligned<1>(addr, runtime_size - Size);
76 }
77
78 // Perform the operation on the last 'Size' bytes of the buffer.
79 //
80 // e.g. with
81 // [1234567812345678123]
82 // [__XXXXXXXXXXXXXX___]
83 // [________XXXXXXXX___]
84 //
85 // Precondition: `runtime_size >= Size`.
86 template <typename SizedOpT> struct Tail {
87 static_assert(SizedOpT::IS_FIXED_SIZE);
88 static constexpr bool IS_RUNTIME_SIZE = true;
89 static constexpr size_t SIZE = SizedOpT::SIZE;
90
91 template <typename DstAddrT, typename SrcAddrT>
copyTail92 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
93 SizedOpT::copy(tailAddr<SIZE>(dst, runtime_size),
94 tailAddr<SIZE>(src, runtime_size));
95 }
96
97 template <typename DstAddrT, typename SrcAddrT>
moveTail98 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
99 SizedOpT::move(tailAddr<SIZE>(dst, runtime_size),
100 tailAddr<SIZE>(src, runtime_size));
101 }
102
103 template <typename DstAddrT>
setTail104 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
105 SizedOpT::set(tailAddr<SIZE>(dst, runtime_size), value);
106 }
107
108 template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentTail109 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
110 size_t runtime_size) {
111 return SizedOpT::isDifferent(tailAddr<SIZE>(src1, runtime_size),
112 tailAddr<SIZE>(src2, runtime_size));
113 }
114
115 template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpTail116 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
117 size_t runtime_size) {
118 return SizedOpT::threeWayCmp(tailAddr<SIZE>(src1, runtime_size),
119 tailAddr<SIZE>(src2, runtime_size));
120 }
121 };
122
123 // Perform the operation on the first and the last `SizedOpT::Size` bytes of the
124 // buffer. This is useful for overlapping operations.
125 //
126 // e.g. with
127 // [1234567812345678123]
128 // [__XXXXXXXXXXXXXX___]
129 // [__XXXXXXXX_________]
130 // [________XXXXXXXX___]
131 //
132 // Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`.
133 template <typename SizedOpT> struct HeadTail {
134 static_assert(SizedOpT::IS_FIXED_SIZE);
135 static constexpr bool IS_RUNTIME_SIZE = true;
136
137 template <typename DstAddrT, typename SrcAddrT>
copyHeadTail138 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
139 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
140 SizedOpT::copy(dst, src);
141 Tail<SizedOpT>::copy(dst, src, runtime_size);
142 }
143
144 template <typename DstAddrT, typename SrcAddrT>
moveHeadTail145 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
146 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
147 static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
148 // The load and store operations can be performed in any order as long as
149 // they are not interleaved. More investigations are needed to determine the
150 // best order.
151 auto head = SizedOpT::load(src);
152 auto tail = SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
153 SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail);
154 SizedOpT::store(dst, head);
155 }
156
157 template <typename DstAddrT>
setHeadTail158 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
159 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
160 SizedOpT::set(dst, value);
161 Tail<SizedOpT>::set(dst, value, runtime_size);
162 }
163
164 template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentHeadTail165 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
166 size_t runtime_size) {
167 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE);
168 // Two strategies can be applied here:
169 // 1. Compute head and tail and compose them with a bitwise or operation.
170 // 2. Stop early if head is different.
171 // We chose the later because HeadTail operations are typically performed
172 // with sizes ranging from 4 to 256 bytes. The cost of the loads is then
173 // significantly larger than the cost of the branch.
174 if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
175 return res;
176 return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
177 }
178
179 template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpHeadTail180 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
181 size_t runtime_size) {
182 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE &&
183 runtime_size <= 2 * SizedOpT::SIZE);
184 if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
185 return res;
186 return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
187 }
188 };
189
190 // Simple loop ending with a Tail operation.
191 //
192 // e.g. with
193 // [12345678123456781234567812345678]
194 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
195 // [__XXXXXXXX_______________________]
196 // [__________XXXXXXXX_______________]
197 // [__________________XXXXXXXX_______]
198 // [______________________XXXXXXXX___]
199 //
200 // Precondition:
201 // - runtime_size >= Size
202 template <typename SizedOpT> struct Loop {
203 static_assert(SizedOpT::IS_FIXED_SIZE);
204 static constexpr bool IS_RUNTIME_SIZE = true;
205 static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE;
206
207 template <typename DstAddrT, typename SrcAddrT>
copyLoop208 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
209 size_t offset = 0;
210 do {
211 SizedOpT::copy(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
212 offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
213 offset += BLOCK_SIZE;
214 } while (offset < runtime_size - BLOCK_SIZE);
215 Tail<SizedOpT>::copy(dst, src, runtime_size);
216 }
217
218 // Move forward suitable when dst < src. We load the tail bytes before
219 // handling the loop.
220 //
221 // e.g. Moving two bytes
222 // [ | | | | |]
223 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
224 // [_________________________LLLLLLLL___]
225 // [___LLLLLLLL_________________________]
226 // [_SSSSSSSS___________________________]
227 // [___________LLLLLLLL_________________]
228 // [_________SSSSSSSS___________________]
229 // [___________________LLLLLLLL_________]
230 // [_________________SSSSSSSS___________]
231 // [_______________________SSSSSSSS_____]
232 template <typename DstAddrT, typename SrcAddrT>
moveLoop233 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
234 const auto tail_value =
235 SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size));
236 size_t offset = 0;
237 do {
238 SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
239 offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
240 offset += BLOCK_SIZE;
241 } while (offset < runtime_size - BLOCK_SIZE);
242 SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail_value);
243 }
244
245 // Move backward suitable when dst > src. We load the head bytes before
246 // handling the loop.
247 //
248 // e.g. Moving two bytes
249 // [ | | | | |]
250 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
251 // [___LLLLLLLL_________________________]
252 // [_________________________LLLLLLLL___]
253 // [___________________________SSSSSSSS_]
254 // [_________________LLLLLLLL___________]
255 // [___________________SSSSSSSS_________]
256 // [_________LLLLLLLL___________________]
257 // [___________SSSSSSSS_________________]
258 // [_____SSSSSSSS_______________________]
259 template <typename DstAddrT, typename SrcAddrT>
move_backwardLoop260 static inline void move_backward(DstAddrT dst, SrcAddrT src,
261 size_t runtime_size) {
262 const auto head_value = SizedOpT::load(src);
263 ptrdiff_t offset = runtime_size - BLOCK_SIZE;
264 do {
265 SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset),
266 offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset));
267 offset -= BLOCK_SIZE;
268 } while (offset >= 0);
269 SizedOpT::store(dst, head_value);
270 }
271
272 template <typename DstAddrT>
setLoop273 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
274 size_t offset = 0;
275 do {
276 SizedOpT::set(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), value);
277 offset += BLOCK_SIZE;
278 } while (offset < runtime_size - BLOCK_SIZE);
279 Tail<SizedOpT>::set(dst, value, runtime_size);
280 }
281
282 template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentLoop283 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
284 size_t runtime_size) {
285 size_t offset = 0;
286 do {
287 if (uint64_t res = SizedOpT::isDifferent(
288 offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
289 offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
290 return res;
291 offset += BLOCK_SIZE;
292 } while (offset < runtime_size - BLOCK_SIZE);
293 return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size);
294 }
295
296 template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpLoop297 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
298 size_t runtime_size) {
299 size_t offset = 0;
300 do {
301 if (int32_t res = SizedOpT::threeWayCmp(
302 offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset),
303 offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset)))
304 return res;
305 offset += BLOCK_SIZE;
306 } while (offset < runtime_size - BLOCK_SIZE);
307 return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size);
308 }
309 };
310
311 // Aligns using a statically-sized operation, then calls the subsequent NextT
312 // operation.
313 //
314 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
315 // Align<_16, Arg::Dst>::Then<Loop<_32>>::copy(dst, src, runtime_size);
316 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
317 template <typename SizedOpT, Arg AlignOn = Arg::_1> struct Align {
318 static_assert(SizedOpT::IS_FIXED_SIZE);
319
320 template <typename NextT> struct Then {
321 static_assert(NextT::IS_RUNTIME_SIZE);
322
323 template <typename DstAddrT, typename SrcAddrT>
copyAlign::Then324 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
325 SizedOpT::copy(dst, src);
326 auto aligned = align(dst, src, runtime_size);
327 NextT::copy(aligned.arg1, aligned.arg2, aligned.size);
328 }
329
330 // Move forward suitable when dst < src. The alignment is performed with
331 // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
332 //
333 // e.g. Moving two bytes and making sure src is then aligned.
334 // [ | | | | ]
335 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
336 // [____LLLLLLLL_____________________]
337 // [___________LLLLLLLL______________]
338 // [_SSSSSSSS________________________]
339 // [________SSSSSSSS_________________]
340 //
341 // e.g. Moving two bytes and making sure dst is then aligned.
342 // [ | | | | ]
343 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
344 // [____LLLLLLLL_____________________]
345 // [______LLLLLLLL___________________]
346 // [_SSSSSSSS________________________]
347 // [___SSSSSSSS______________________]
348 template <typename DstAddrT, typename SrcAddrT>
moveAlign::Then349 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) {
350 auto aligned_after_begin = align(dst, src, runtime_size);
351 // We move pointers forward by Size so we can perform HeadTail.
352 auto aligned = aligned_after_begin.stepForward();
353 HeadTail<SizedOpT>::move(dst, src, runtime_size - aligned.size);
354 NextT::move(aligned.arg1, aligned.arg2, aligned.size);
355 }
356
357 // Move backward suitable when dst > src. The alignment is performed with
358 // an HeadTail operation of size ∈ [Alignment, 2 x Alignment].
359 //
360 // e.g. Moving two bytes backward and making sure src is then aligned.
361 // [ | | | | ]
362 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
363 // [ _________________LLLLLLLL_______]
364 // [ ___________________LLLLLLLL_____]
365 // [____________________SSSSSSSS_____]
366 // [______________________SSSSSSSS___]
367 //
368 // e.g. Moving two bytes and making sure dst is then aligned.
369 // [ | | | | ]
370 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
371 // [ _______________LLLLLLLL_________]
372 // [ ___________________LLLLLLLL_____]
373 // [__________________SSSSSSSS_______]
374 // [______________________SSSSSSSS___]
375 template <typename DstAddrT, typename SrcAddrT>
move_backwardAlign::Then376 static inline void move_backward(DstAddrT dst, SrcAddrT src,
377 size_t runtime_size) {
378 const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size);
379 const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size);
380 auto aligned_after_end = align(dst_end, src_end, 0);
381 // We move pointers back by 2 x Size so we can perform HeadTail.
382 auto aligned = aligned_after_end.stepBack().stepBack();
383 HeadTail<SizedOpT>::move(aligned.arg1, aligned.arg2, aligned.size);
384 NextT::move_backward(dst, src, runtime_size - aligned.size);
385 }
386
387 template <typename DstAddrT>
setAlign::Then388 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) {
389 SizedOpT::set(dst, value);
390 DstAddrT _(nullptr);
391 auto aligned = align(dst, _, runtime_size);
392 NextT::set(aligned.arg1, value, aligned.size);
393 }
394
395 template <typename SrcAddrT1, typename SrcAddrT2>
isDifferentAlign::Then396 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2,
397 size_t runtime_size) {
398 if (const uint64_t res = SizedOpT::isDifferent(src1, src2))
399 return res;
400 auto aligned = align(src1, src2, runtime_size);
401 return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size);
402 }
403
404 template <typename SrcAddrT1, typename SrcAddrT2>
threeWayCmpAlign::Then405 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2,
406 size_t runtime_size) {
407 if (const int32_t res = SizedOpT::threeWayCmp(src1, src2))
408 return res;
409 auto aligned = align(src1, src2, runtime_size);
410 return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size);
411 }
412 };
413
414 private:
415 static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE;
416 static_assert(ALIGN_OP_SIZE > 1);
417
418 template <typename Arg1AddrT, typename Arg2AddrT> struct Aligned {
419 Arg1AddrT arg1;
420 Arg2AddrT arg2;
421 size_t size;
422
stepForwardAlign::Aligned423 Aligned stepForward() const {
424 return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, ALIGN_OP_SIZE),
425 offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, ALIGN_OP_SIZE),
426 size - ALIGN_OP_SIZE};
427 }
428
stepBackAlign::Aligned429 Aligned stepBack() const {
430 return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, -ALIGN_OP_SIZE),
431 offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, -ALIGN_OP_SIZE),
432 size + ALIGN_OP_SIZE};
433 }
434 };
435
436 template <typename Arg1AddrT, typename Arg2AddrT>
makeAlignedAlign437 static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) {
438 return Aligned<Arg1AddrT, Arg2AddrT>{arg1, arg2, size};
439 }
440
441 template <typename Arg1AddrT, typename Arg2AddrT>
alignAlign442 static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) {
443 static_assert(IsAddressType<Arg1AddrT>::Value);
444 static_assert(IsAddressType<Arg2AddrT>::Value);
445 if constexpr (AlignOn == Arg::_1) {
446 auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg1.ptr_);
447 return makeAligned(offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg1, offset),
448 offsetAddrAssumeAligned<1>(arg2, offset),
449 runtime_size - offset);
450 } else if constexpr (AlignOn == Arg::_2) {
451 auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg2.ptr_);
452 return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset),
453 offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg2, offset),
454 runtime_size - offset);
455 } else {
456 DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2");
457 }
458 }
459 };
460
461 } // namespace __llvm_libc
462
463 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H
464