1 //===-- Elementary operations to compose memory primitives ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
11 
12 #include <stddef.h> // size_t
13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
14 
15 #include "src/__support/endian.h"
16 #include "src/string/memory_utils/utils.h"
17 
18 namespace __llvm_libc {
19 
20 // Elementary Operations
21 // --------------------------------
22 // We define abstract elementary operations acting on fixed chunks of memory.
23 // These are low level building blocks that are meant to be assembled to compose
24 // higher order abstractions. Each function is defined twice: once with
25 // fixed-size operations, and once with runtime-size operations.
26 
27 // Fixed-size copy from 'src' to 'dst'.
28 template <typename Element>
copy(char * __restrict dst,const char * __restrict src)29 void copy(char *__restrict dst, const char *__restrict src) {
30   Element::copy(dst, src);
31 }
32 // Runtime-size copy from 'src' to 'dst'.
33 template <typename Element>
copy(char * __restrict dst,const char * __restrict src,size_t size)34 void copy(char *__restrict dst, const char *__restrict src, size_t size) {
35   Element::copy(dst, src, size);
36 }
37 
38 // Fixed-size move from 'src' to 'dst'.
move(char * dst,const char * src)39 template <typename Element> void move(char *dst, const char *src) {
40   Element::move(dst, src);
41 }
42 // Runtime-size move from 'src' to 'dst'.
move(char * dst,const char * src,size_t size)43 template <typename Element> void move(char *dst, const char *src, size_t size) {
44   Element::move(dst, src, size);
45 }
46 // Runtime-size move from 'src' to 'dst'.
47 template <typename Element>
move_backward(char * dst,const char * src,size_t size)48 void move_backward(char *dst, const char *src, size_t size) {
49   Element::move_backward(dst, src, size);
50 }
51 
52 // Fixed-size equality between 'lhs' and 'rhs'.
equals(const char * lhs,const char * rhs)53 template <typename Element> bool equals(const char *lhs, const char *rhs) {
54   return Element::equals(lhs, rhs);
55 }
56 // Runtime-size equality between 'lhs' and 'rhs'.
57 template <typename Element>
equals(const char * lhs,const char * rhs,size_t size)58 bool equals(const char *lhs, const char *rhs, size_t size) {
59   return Element::equals(lhs, rhs, size);
60 }
61 
62 // Fixed-size three-way comparison between 'lhs' and 'rhs'.
63 template <typename Element>
three_way_compare(const char * lhs,const char * rhs)64 int three_way_compare(const char *lhs, const char *rhs) {
65   return Element::three_way_compare(lhs, rhs);
66 }
67 // Runtime-size three-way comparison between 'lhs' and 'rhs'.
68 template <typename Element>
three_way_compare(const char * lhs,const char * rhs,size_t size)69 int three_way_compare(const char *lhs, const char *rhs, size_t size) {
70   return Element::three_way_compare(lhs, rhs, size);
71 }
72 
73 // Fixed-size initialization.
74 template <typename Element>
splat_set(char * dst,const unsigned char value)75 void splat_set(char *dst, const unsigned char value) {
76   Element::splat_set(dst, value);
77 }
78 // Runtime-size initialization.
79 template <typename Element>
splat_set(char * dst,const unsigned char value,size_t size)80 void splat_set(char *dst, const unsigned char value, size_t size) {
81   Element::splat_set(dst, value, size);
82 }
83 
84 // Stack placeholder for Move operations.
85 template <typename Element> struct Storage { char bytes[Element::SIZE]; };
86 
87 // Fixed-size Higher-Order Operations
88 // ----------------------------------
89 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
90 // - Chained<Types...>: Chain the operation of several types.
91 
92 // Repeat the operation several times in a row.
93 template <typename Element, size_t ElementCount> struct Repeated {
94   static constexpr size_t SIZE = ElementCount * Element::SIZE;
95 
copyRepeated96   static void copy(char *__restrict dst, const char *__restrict src) {
97     for (size_t i = 0; i < ElementCount; ++i) {
98       const size_t offset = i * Element::SIZE;
99       Element::copy(dst + offset, src + offset);
100     }
101   }
102 
moveRepeated103   static void move(char *dst, const char *src) {
104     const auto value = load(src);
105     store(dst, value);
106   }
107 
equalsRepeated108   static bool equals(const char *lhs, const char *rhs) {
109     for (size_t i = 0; i < ElementCount; ++i) {
110       const size_t offset = i * Element::SIZE;
111       if (!Element::equals(lhs + offset, rhs + offset))
112         return false;
113     }
114     return true;
115   }
116 
three_way_compareRepeated117   static int three_way_compare(const char *lhs, const char *rhs) {
118     for (size_t i = 0; i < ElementCount; ++i) {
119       const size_t offset = i * Element::SIZE;
120       // We make the assumption that 'equals' is cheaper than
121       // 'three_way_compare'.
122       if (Element::equals(lhs + offset, rhs + offset))
123         continue;
124       return Element::three_way_compare(lhs + offset, rhs + offset);
125     }
126     return 0;
127   }
128 
splat_setRepeated129   static void splat_set(char *dst, const unsigned char value) {
130     for (size_t i = 0; i < ElementCount; ++i) {
131       const size_t offset = i * Element::SIZE;
132       Element::splat_set(dst + offset, value);
133     }
134   }
135 
loadRepeated136   static Storage<Repeated> load(const char *ptr) {
137     Storage<Repeated> value;
138     copy(reinterpret_cast<char *>(&value), ptr);
139     return value;
140   }
141 
storeRepeated142   static void store(char *ptr, Storage<Repeated> value) {
143     copy(ptr, reinterpret_cast<const char *>(&value));
144   }
145 };
146 
147 template <typename Element> struct Repeated<Element, 0> {
148   static void move(char *dst, const char *src) {}
149 };
150 
151 // Chain the operation of several types.
152 // For instance, to handle a 3 bytes operation, one can use:
153 // Chained<UINT16, UINT8>::Operation();
154 template <typename... Types> struct Chained;
155 
156 template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
157   static constexpr size_t SIZE = Head::SIZE + Chained<Tail...>::SIZE;
158 
159   static void copy(char *__restrict dst, const char *__restrict src) {
160     Chained<Tail...>::copy(dst + Head::SIZE, src + Head::SIZE);
161     __llvm_libc::copy<Head>(dst, src);
162   }
163 
164   static void move(char *dst, const char *src) {
165     const auto value = Head::load(src);
166     Chained<Tail...>::move(dst + Head::SIZE, src + Head::SIZE);
167     Head::store(dst, value);
168   }
169 
170   static bool equals(const char *lhs, const char *rhs) {
171     if (!__llvm_libc::equals<Head>(lhs, rhs))
172       return false;
173     return Chained<Tail...>::equals(lhs + Head::SIZE, rhs + Head::SIZE);
174   }
175 
176   static int three_way_compare(const char *lhs, const char *rhs) {
177     if (__llvm_libc::equals<Head>(lhs, rhs))
178       return Chained<Tail...>::three_way_compare(lhs + Head::SIZE,
179                                                  rhs + Head::SIZE);
180     return __llvm_libc::three_way_compare<Head>(lhs, rhs);
181   }
182 
183   static void splat_set(char *dst, const unsigned char value) {
184     Chained<Tail...>::splat_set(dst + Head::SIZE, value);
185     __llvm_libc::splat_set<Head>(dst, value);
186   }
187 };
188 
189 template <> struct Chained<> {
190   static constexpr size_t SIZE = 0;
191   static void copy(char *__restrict dst, const char *__restrict src) {}
192   static void move(char *dst, const char *src) {}
193   static bool equals(const char *lhs, const char *rhs) { return true; }
194   static int three_way_compare(const char *lhs, const char *rhs) { return 0; }
195   static void splat_set(char *dst, const unsigned char value) {}
196 };
197 
198 // Overlap ElementA and ElementB so they span Size bytes.
199 template <size_t Size, typename ElementA, typename ElementB = ElementA>
200 struct Overlap {
201   static constexpr size_t SIZE = Size;
202   static_assert(ElementB::SIZE <= ElementA::SIZE, "ElementB too big");
203   static_assert(ElementA::SIZE <= Size, "ElementA too big");
204   static_assert((ElementA::SIZE + ElementB::SIZE) >= Size,
205                 "Elements too small to overlap");
206   static constexpr size_t OFFSET = SIZE - ElementB::SIZE;
207 
208   static void copy(char *__restrict dst, const char *__restrict src) {
209     ElementA::copy(dst, src);
210     ElementB::copy(dst + OFFSET, src + OFFSET);
211   }
212 
213   static void move(char *dst, const char *src) {
214     const auto value_a = ElementA::load(src);
215     const auto value_b = ElementB::load(src + OFFSET);
216     ElementB::store(dst + OFFSET, value_b);
217     ElementA::store(dst, value_a);
218   }
219 
220   static bool equals(const char *lhs, const char *rhs) {
221     if (!ElementA::equals(lhs, rhs))
222       return false;
223     if (!ElementB::equals(lhs + OFFSET, rhs + OFFSET))
224       return false;
225     return true;
226   }
227 
228   static int three_way_compare(const char *lhs, const char *rhs) {
229     if (!ElementA::equals(lhs, rhs))
230       return ElementA::three_way_compare(lhs, rhs);
231     if (!ElementB::equals(lhs + OFFSET, rhs + OFFSET))
232       return ElementB::three_way_compare(lhs + OFFSET, rhs + OFFSET);
233     return 0;
234   }
235 
236   static void splat_set(char *dst, const unsigned char value) {
237     ElementA::splat_set(dst, value);
238     ElementB::splat_set(dst + OFFSET, value);
239   }
240 };
241 
242 // Runtime-size Higher-Order Operations
243 // ------------------------------------
244 // - Tail<T>: Perform the operation on the last 'T::SIZE' bytes of the buffer.
245 // - HeadTail<T>: Perform the operation on the first and last 'T::SIZE' bytes
246 //   of the buffer.
247 // - Loop<T>: Perform a loop of fixed-sized operations.
248 
249 // Perform the operation on the last 'T::SIZE' bytes of the buffer.
250 //
251 // e.g. with
252 // [1234567812345678123]
253 // [__XXXXXXXXXXXXXX___]
254 // [________XXXXXXXX___]
255 //
256 // Precondition: `size >= T::SIZE`.
257 template <typename T> struct Tail {
258   static void copy(char *__restrict dst, const char *__restrict src,
259                    size_t size) {
260     return T::copy(dst + offset(size), src + offset(size));
261   }
262 
263   static bool equals(const char *lhs, const char *rhs, size_t size) {
264     return T::equals(lhs + offset(size), rhs + offset(size));
265   }
266 
267   static int three_way_compare(const char *lhs, const char *rhs, size_t size) {
268     return T::three_way_compare(lhs + offset(size), rhs + offset(size));
269   }
270 
271   static void splat_set(char *dst, const unsigned char value, size_t size) {
272     return T::splat_set(dst + offset(size), value);
273   }
274 
275   static size_t offset(size_t size) { return size - T::SIZE; }
276 };
277 
278 // Perform the operation on the first and last 'T::SIZE' bytes of the buffer.
279 // This is useful for overlapping operations.
280 //
281 // e.g. with
282 // [1234567812345678123]
283 // [__XXXXXXXXXXXXXX___]
284 // [__XXXXXXXX_________]
285 // [________XXXXXXXX___]
286 //
287 // Precondition: `size >= T::SIZE && size <= 2 x T::SIZE`.
288 template <typename T> struct HeadTail {
289   static void copy(char *__restrict dst, const char *__restrict src,
290                    size_t size) {
291     T::copy(dst, src);
292     Tail<T>::copy(dst, src, size);
293   }
294 
295   static void move(char *dst, const char *src, size_t size) {
296     const size_t offset = Tail<T>::offset(size);
297     const auto head_value = T::load(src);
298     const auto tail_value = T::load(src + offset);
299     T::store(dst + offset, tail_value);
300     T::store(dst, head_value);
301   }
302 
303   static bool equals(const char *lhs, const char *rhs, size_t size) {
304     if (!T::equals(lhs, rhs))
305       return false;
306     return Tail<T>::equals(lhs, rhs, size);
307   }
308 
309   static int three_way_compare(const char *lhs, const char *rhs, size_t size) {
310     if (!T::equals(lhs, rhs))
311       return T::three_way_compare(lhs, rhs);
312     return Tail<T>::three_way_compare(lhs, rhs, size);
313   }
314 
315   static void splat_set(char *dst, const unsigned char value, size_t size) {
316     T::splat_set(dst, value);
317     Tail<T>::splat_set(dst, value, size);
318   }
319 };
320 
321 // Simple loop ending with a Tail operation.
322 //
323 // e.g. with
324 // [12345678123456781234567812345678]
325 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
326 // [__XXXXXXXX_______________________]
327 // [__________XXXXXXXX_______________]
328 // [__________________XXXXXXXX_______]
329 // [______________________XXXXXXXX___]
330 //
331 // Precondition:
332 // - size >= T::SIZE
333 template <typename T, typename TailT = T> struct Loop {
334   static_assert(T::SIZE == TailT::SIZE,
335                 "Tail type must have the same size as T");
336 
337   static void copy(char *__restrict dst, const char *__restrict src,
338                    size_t size) {
339     size_t offset = 0;
340     do {
341       T::copy(dst + offset, src + offset);
342       offset += T::SIZE;
343     } while (offset < size - T::SIZE);
344     Tail<TailT>::copy(dst, src, size);
345   }
346 
347   // Move forward suitable when dst < src. We load the tail bytes before
348   // handling the loop.
349   //
350   // e.g. Moving two bytes
351   // [   |       |       |       |       |]
352   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
353   // [_________________________LLLLLLLL___]
354   // [___LLLLLLLL_________________________]
355   // [_SSSSSSSS___________________________]
356   // [___________LLLLLLLL_________________]
357   // [_________SSSSSSSS___________________]
358   // [___________________LLLLLLLL_________]
359   // [_________________SSSSSSSS___________]
360   // [_______________________SSSSSSSS_____]
361   static void move(char *dst, const char *src, size_t size) {
362     const size_t tail_offset = Tail<T>::offset(size);
363     const auto tail_value = TailT::load(src + tail_offset);
364     size_t offset = 0;
365     do {
366       T::move(dst + offset, src + offset);
367       offset += T::SIZE;
368     } while (offset < size - T::SIZE);
369     TailT::store(dst + tail_offset, tail_value);
370   }
371 
372   // Move forward suitable when dst > src. We load the head bytes before
373   // handling the loop.
374   //
375   // e.g. Moving two bytes
376   // [   |       |       |       |       |]
377   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
378   // [___LLLLLLLL_________________________]
379   // [_________________________LLLLLLLL___]
380   // [___________________________SSSSSSSS_]
381   // [_________________LLLLLLLL___________]
382   // [___________________SSSSSSSS_________]
383   // [_________LLLLLLLL___________________]
384   // [___________SSSSSSSS_________________]
385   // [_____SSSSSSSS_______________________]
386   static void move_backward(char *dst, const char *src, size_t size) {
387     const auto head_value = TailT::load(src);
388     ptrdiff_t offset = size - T::SIZE;
389     do {
390       T::move(dst + offset, src + offset);
391       offset -= T::SIZE;
392     } while (offset >= 0);
393     TailT::store(dst, head_value);
394   }
395 
396   static bool equals(const char *lhs, const char *rhs, size_t size) {
397     size_t offset = 0;
398     do {
399       if (!T::equals(lhs + offset, rhs + offset))
400         return false;
401       offset += T::SIZE;
402     } while (offset < size - T::SIZE);
403     return Tail<TailT>::equals(lhs, rhs, size);
404   }
405 
406   static int three_way_compare(const char *lhs, const char *rhs, size_t size) {
407     size_t offset = 0;
408     do {
409       if (!T::equals(lhs + offset, rhs + offset))
410         return T::three_way_compare(lhs + offset, rhs + offset);
411       offset += T::SIZE;
412     } while (offset < size - T::SIZE);
413     return Tail<TailT>::three_way_compare(lhs, rhs, size);
414   }
415 
416   static void splat_set(char *dst, const unsigned char value, size_t size) {
417     size_t offset = 0;
418     do {
419       T::splat_set(dst + offset, value);
420       offset += T::SIZE;
421     } while (offset < size - T::SIZE);
422     Tail<TailT>::splat_set(dst, value, size);
423   }
424 };
425 
426 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
427 
428 namespace internal {
429 
430 template <Arg arg> struct ArgSelector {};
431 
432 template <> struct ArgSelector<Arg::_1> {
433   template <typename T1, typename T2>
434   static T1 *__restrict &Select(T1 *__restrict &p1ref, T2 *__restrict &p2ref) {
435     return p1ref;
436   }
437 };
438 
439 template <> struct ArgSelector<Arg::_2> {
440   template <typename T1, typename T2>
441   static T2 *__restrict &Select(T1 *__restrict &p1ref, T2 *__restrict &p2ref) {
442     return p2ref;
443   }
444 };
445 
446 // Provides a specialized bump function that adjusts pointers and size so first
447 // argument (resp. second argument) gets aligned to Alignment.
448 // We make sure the compiler knows about the adjusted pointer alignment.
449 // The 'additional_bumps' parameter allows to reach previous / next aligned
450 // pointers.
451 template <Arg arg, size_t Alignment> struct Align {
452   template <typename T1, typename T2>
453   static void bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size,
454                    int additional_bumps = 0) {
455     auto &aligned_ptr = ArgSelector<arg>::Select(p1ref, p2ref);
456     auto offset = offset_to_next_aligned<Alignment>(aligned_ptr);
457     offset += additional_bumps * Alignment;
458     p1ref += offset;
459     p2ref += offset;
460     size -= offset;
461     aligned_ptr = assume_aligned<Alignment>(aligned_ptr);
462   }
463 };
464 
465 } // namespace internal
466 
467 // An alignment operation that:
468 // - executes the 'AlignmentT' operation
469 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
470 //   pointer gets aligned, size is decreased accordingly.
471 // - calls the 'NextT' operation.
472 //
473 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
474 // copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
475 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align {
476 private:
477   static constexpr size_t ALIGNMENT = AlignmentT::SIZE;
478   static_assert(ALIGNMENT > 1, "Alignment must be more than 1");
479   static_assert(is_power2(ALIGNMENT), "Alignment must be a power of 2");
480 
481 public:
482   template <typename NextT> struct Then {
483     static void copy(char *__restrict dst, const char *__restrict src,
484                      size_t size) {
485       AlignmentT::copy(dst, src);
486       internal::Align<AlignOn, ALIGNMENT>::bump(dst, src, size);
487       NextT::copy(dst, src, size);
488     }
489 
490     // Move forward suitable when dst < src. The alignment is performed with an
491     // HeadTail operation of size ∈ [Alignment, 2 x Alignment].
492     //
493     // e.g. Moving two bytes and making sure src is then aligned.
494     // [  |       |       |       |      ]
495     // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
496     // [____LLLLLLLL_____________________]
497     // [___________LLLLLLLL______________]
498     // [_SSSSSSSS________________________]
499     // [________SSSSSSSS_________________]
500     //
501     // e.g. Moving two bytes and making sure dst is then aligned.
502     // [  |       |       |       |      ]
503     // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
504     // [____LLLLLLLL_____________________]
505     // [______LLLLLLLL___________________]
506     // [_SSSSSSSS________________________]
507     // [___SSSSSSSS______________________]
508     static void move(char *dst, const char *src, size_t size) {
509       char *next_dst = dst;
510       const char *next_src = src;
511       size_t next_size = size;
512       internal::Align<AlignOn, ALIGNMENT>::bump(next_dst, next_src, next_size,
513                                                 1);
514       HeadTail<AlignmentT>::move(dst, src, size - next_size);
515       NextT::move(next_dst, next_src, next_size);
516     }
517 
518     // Move backward suitable when dst > src. The alignment is performed with an
519     // HeadTail operation of size ∈ [Alignment, 2 x Alignment].
520     //
521     // e.g. Moving two bytes backward and making sure src is then aligned.
522     // [  |       |       |       |      ]
523     // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
524     // [ _________________LLLLLLLL_______]
525     // [ ___________________LLLLLLLL_____]
526     // [____________________SSSSSSSS_____]
527     // [______________________SSSSSSSS___]
528     //
529     // e.g. Moving two bytes and making sure dst is then aligned.
530     // [  |       |       |       |      ]
531     // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
532     // [ _______________LLLLLLLL_________]
533     // [ ___________________LLLLLLLL_____]
534     // [__________________SSSSSSSS_______]
535     // [______________________SSSSSSSS___]
536     static void move_backward(char *dst, const char *src, size_t size) {
537       char *headtail_dst = dst + size;
538       const char *headtail_src = src + size;
539       size_t headtail_size = 0;
540       internal::Align<AlignOn, ALIGNMENT>::bump(headtail_dst, headtail_src,
541                                                 headtail_size, -2);
542       HeadTail<AlignmentT>::move(headtail_dst, headtail_src, headtail_size);
543       NextT::move_backward(dst, src, size - headtail_size);
544     }
545 
546     static bool equals(const char *lhs, const char *rhs, size_t size) {
547       if (!AlignmentT::equals(lhs, rhs))
548         return false;
549       internal::Align<AlignOn, ALIGNMENT>::bump(lhs, rhs, size);
550       return NextT::equals(lhs, rhs, size);
551     }
552 
553     static int three_way_compare(const char *lhs, const char *rhs,
554                                  size_t size) {
555       if (!AlignmentT::equals(lhs, rhs))
556         return AlignmentT::three_way_compare(lhs, rhs);
557       internal::Align<AlignOn, ALIGNMENT>::bump(lhs, rhs, size);
558       return NextT::three_way_compare(lhs, rhs, size);
559     }
560 
561     static void splat_set(char *dst, const unsigned char value, size_t size) {
562       AlignmentT::splat_set(dst, value);
563       char *dummy = nullptr;
564       internal::Align<Arg::_1, ALIGNMENT>::bump(dst, dummy, size);
565       NextT::splat_set(dst, value, size);
566     }
567   };
568 };
569 
570 // An operation that allows to skip the specified amount of bytes.
571 template <ptrdiff_t Bytes> struct Skip {
572   template <typename NextT> struct Then {
573     static void copy(char *__restrict dst, const char *__restrict src,
574                      size_t size) {
575       NextT::copy(dst + Bytes, src + Bytes, size - Bytes);
576     }
577 
578     static void copy(char *__restrict dst, const char *__restrict src) {
579       NextT::copy(dst + Bytes, src + Bytes);
580     }
581 
582     static bool equals(const char *lhs, const char *rhs, size_t size) {
583       return NextT::equals(lhs + Bytes, rhs + Bytes, size - Bytes);
584     }
585 
586     static bool equals(const char *lhs, const char *rhs) {
587       return NextT::equals(lhs + Bytes, rhs + Bytes);
588     }
589 
590     static int three_way_compare(const char *lhs, const char *rhs,
591                                  size_t size) {
592       return NextT::three_way_compare(lhs + Bytes, rhs + Bytes, size - Bytes);
593     }
594 
595     static int three_way_compare(const char *lhs, const char *rhs) {
596       return NextT::three_way_compare(lhs + Bytes, rhs + Bytes);
597     }
598 
599     static void splat_set(char *dst, const unsigned char value, size_t size) {
600       NextT::splat_set(dst + Bytes, value, size - Bytes);
601     }
602 
603     static void splat_set(char *dst, const unsigned char value) {
604       NextT::splat_set(dst + Bytes, value);
605     }
606   };
607 };
608 
609 // Fixed-size Builtin Operations
610 // -----------------------------
611 // Note: Do not use 'builtin' right now as it requires the implementation of the
612 // `_inline` versions of all the builtins. Theoretically, Clang can still turn
613 // them into calls to the C library leading to reentrancy problems.
614 namespace builtin {
615 
616 #ifndef __has_builtin
617 #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
618 #endif
619 
620 template <size_t Size> struct Builtin {
621   static constexpr size_t SIZE = Size;
622 
623   static void copy(char *__restrict dst, const char *__restrict src) {
624 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
625     for_loop_copy(dst, src);
626 #elif __has_builtin(__builtin_memcpy_inline)
627     // __builtin_memcpy_inline guarantees to never call external functions.
628     // Unfortunately it is not widely available.
629     __builtin_memcpy_inline(dst, src, SIZE);
630 #else
631     for_loop_copy(dst, src);
632 #endif
633   }
634 
635   static void move(char *dst, const char *src) {
636 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
637     for_loop_move(dst, src);
638 #elif __has_builtin(__builtin_memmove)
639     __builtin_memmove(dst, src, SIZE);
640 #else
641     for_loop_move(dst, src);
642 #endif
643   }
644 
645 #if __has_builtin(__builtin_memcmp_inline)
646 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline
647 #else
648 #define LLVM_LIBC_MEMCMP __builtin_memcmp
649 #endif
650 
651   static bool equals(const char *lhs, const char *rhs) {
652     return LLVM_LIBC_MEMCMP(lhs, rhs, SIZE) == 0;
653   }
654 
655   static int three_way_compare(const char *lhs, const char *rhs) {
656     return LLVM_LIBC_MEMCMP(lhs, rhs, SIZE);
657   }
658 
659   static void splat_set(char *dst, const unsigned char value) {
660     __builtin_memset(dst, value, SIZE);
661   }
662 
663 private:
664   // Copies `SIZE` bytes from `src` to `dst` using a for loop.
665   // This code requires the use of `-fno-builtin-memcpy` to prevent the compiler
666   // from turning the for-loop back into `__builtin_memcpy`.
667   static void for_loop_copy(char *__restrict dst, const char *__restrict src) {
668     for (size_t i = 0; i < SIZE; ++i)
669       dst[i] = src[i];
670   }
671 
672   static void for_loop_move(char *dst, const char *src) {
673     for (size_t i = 0; i < SIZE; ++i)
674       dst[i] = src[i];
675   }
676 };
677 
678 using _1 = Builtin<1>;
679 using _2 = Builtin<2>;
680 using _3 = Builtin<3>;
681 using _4 = Builtin<4>;
682 using _8 = Builtin<8>;
683 using _16 = Builtin<16>;
684 using _32 = Builtin<32>;
685 using _64 = Builtin<64>;
686 using _128 = Builtin<128>;
687 
688 } // namespace builtin
689 
690 // Fixed-size Scalar Operations
691 // ----------------------------
692 namespace scalar {
693 
694 // The Scalar type makes use of simple sized integers.
695 template <typename T> struct Scalar {
696   static constexpr size_t SIZE = sizeof(T);
697 
698   static void copy(char *__restrict dst, const char *__restrict src) {
699     store(dst, load(src));
700   }
701 
702   static void move(char *dst, const char *src) { store(dst, load(src)); }
703 
704   static bool equals(const char *lhs, const char *rhs) {
705     return load(lhs) == load(rhs);
706   }
707 
708   static int three_way_compare(const char *lhs, const char *rhs) {
709     return scalar_three_way_compare(load(lhs), load(rhs));
710   }
711 
712   static void splat_set(char *dst, const unsigned char value) {
713     store(dst, get_splatted_value(value));
714   }
715 
716   static int scalar_three_way_compare(T a, T b);
717 
718   static T load(const char *ptr) {
719     T value;
720     builtin::Builtin<SIZE>::copy(reinterpret_cast<char *>(&value), ptr);
721     return value;
722   }
723   static void store(char *ptr, T value) {
724     builtin::Builtin<SIZE>::copy(ptr, reinterpret_cast<const char *>(&value));
725   }
726 
727 private:
728   static T get_splatted_value(const unsigned char value) {
729     return T(~0) / T(0xFF) * T(value);
730   }
731 };
732 
733 template <>
734 inline int Scalar<uint8_t>::scalar_three_way_compare(uint8_t a, uint8_t b) {
735   const int16_t la = Endian::to_big_endian(a);
736   const int16_t lb = Endian::to_big_endian(b);
737   return la - lb;
738 }
739 template <>
740 inline int Scalar<uint16_t>::scalar_three_way_compare(uint16_t a, uint16_t b) {
741   const int32_t la = Endian::to_big_endian(a);
742   const int32_t lb = Endian::to_big_endian(b);
743   return la - lb;
744 }
745 template <>
746 inline int Scalar<uint32_t>::scalar_three_way_compare(uint32_t a, uint32_t b) {
747   const uint32_t la = Endian::to_big_endian(a);
748   const uint32_t lb = Endian::to_big_endian(b);
749   return la > lb ? 1 : la < lb ? -1 : 0;
750 }
751 template <>
752 inline int Scalar<uint64_t>::scalar_three_way_compare(uint64_t a, uint64_t b) {
753   const uint64_t la = Endian::to_big_endian(a);
754   const uint64_t lb = Endian::to_big_endian(b);
755   return la > lb ? 1 : la < lb ? -1 : 0;
756 }
757 
758 using UINT8 = Scalar<uint8_t>;   // 1 Byte
759 using UINT16 = Scalar<uint16_t>; // 2 Bytes
760 using UINT32 = Scalar<uint32_t>; // 4 Bytes
761 using UINT64 = Scalar<uint64_t>; // 8 Bytes
762 
763 using _1 = UINT8;
764 using _2 = UINT16;
765 using _3 = Chained<UINT16, UINT8>;
766 using _4 = UINT32;
767 using _8 = UINT64;
768 using _16 = Repeated<_8, 2>;
769 using _32 = Repeated<_8, 4>;
770 using _64 = Repeated<_8, 8>;
771 using _128 = Repeated<_8, 16>;
772 
773 } // namespace scalar
774 } // namespace __llvm_libc
775 
776 #include <src/string/memory_utils/elements_aarch64.h>
777 #include <src/string/memory_utils/elements_x86.h>
778 
779 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
780