1 //===-- Elementary operations to compose memory primitives ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
11 
12 #include <stddef.h> // size_t
13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
14 
15 #include "src/__support/endian.h"
16 #include "src/string/memory_utils/utils.h"
17 
18 namespace __llvm_libc {
19 
20 // Elementary Operations
21 // --------------------------------
22 // We define abstract elementary operations acting on fixed chunks of memory.
23 // These are low level building blocks that are meant to be assembled to compose
24 // higher order abstractions. Each function is defined twice: once with
25 // fixed-size operations, and once with runtime-size operations.
26 
27 // Fixed-size copies from 'src' to 'dst'.
28 template <typename Element>
29 void Copy(char *__restrict dst, const char *__restrict src) {
30   Element::Copy(dst, src);
31 }
32 // Runtime-size copies from 'src' to 'dst'.
33 template <typename Element>
34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) {
35   Element::Copy(dst, src, size);
36 }
37 
38 // Fixed-size equality between 'lhs' and 'rhs'.
39 template <typename Element> bool Equals(const char *lhs, const char *rhs) {
40   return Element::Equals(lhs, rhs);
41 }
42 // Runtime-size equality between 'lhs' and 'rhs'.
43 template <typename Element>
44 bool Equals(const char *lhs, const char *rhs, size_t size) {
45   return Element::Equals(lhs, rhs, size);
46 }
47 
48 // Fixed-size three-way comparison between 'lhs' and 'rhs'.
49 template <typename Element>
50 int ThreeWayCompare(const char *lhs, const char *rhs) {
51   return Element::ThreeWayCompare(lhs, rhs);
52 }
53 // Runtime-size three-way comparison between 'lhs' and 'rhs'.
54 template <typename Element>
55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
56   return Element::ThreeWayCompare(lhs, rhs, size);
57 }
58 
59 // Fixed-size initialization.
60 template <typename Element>
61 void SplatSet(char *dst, const unsigned char value) {
62   Element::SplatSet(dst, value);
63 }
64 // Runtime-size initialization.
65 template <typename Element>
66 void SplatSet(char *dst, const unsigned char value, size_t size) {
67   Element::SplatSet(dst, value, size);
68 }
69 
70 // Fixed-size Higher-Order Operations
71 // ----------------------------------
72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
73 // - Chained<Types...>: Chain the operation of several types.
74 
75 // Repeat the operation several times in a row.
76 template <typename Element, size_t ElementCount> struct Repeated {
77   static constexpr size_t kSize = ElementCount * Element::kSize;
78 
79   static void Copy(char *__restrict dst, const char *__restrict src) {
80     for (size_t i = 0; i < ElementCount; ++i) {
81       const size_t offset = i * Element::kSize;
82       Element::Copy(dst + offset, src + offset);
83     }
84   }
85 
86   static bool Equals(const char *lhs, const char *rhs) {
87     for (size_t i = 0; i < ElementCount; ++i) {
88       const size_t offset = i * Element::kSize;
89       if (!Element::Equals(lhs + offset, rhs + offset))
90         return false;
91     }
92     return true;
93   }
94 
95   static int ThreeWayCompare(const char *lhs, const char *rhs) {
96     for (size_t i = 0; i < ElementCount; ++i) {
97       const size_t offset = i * Element::kSize;
98       // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'.
99       if (Element::Equals(lhs + offset, rhs + offset))
100         continue;
101       return Element::ThreeWayCompare(lhs + offset, rhs + offset);
102     }
103     return 0;
104   }
105 
106   static void SplatSet(char *dst, const unsigned char value) {
107     for (size_t i = 0; i < ElementCount; ++i) {
108       const size_t offset = i * Element::kSize;
109       Element::SplatSet(dst + offset, value);
110     }
111   }
112 };
113 
114 // Chain the operation of several types.
115 // For instance, to handle a 3 bytes operation, one can use:
116 // Chained<UINT16, UINT8>::Operation();
117 template <typename... Types> struct Chained;
118 
119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
120   static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize;
121 
122   static void Copy(char *__restrict dst, const char *__restrict src) {
123     Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize);
124     __llvm_libc::Copy<Head>(dst, src);
125   }
126 
127   static bool Equals(const char *lhs, const char *rhs) {
128     if (!__llvm_libc::Equals<Head>(lhs, rhs))
129       return false;
130     return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize);
131   }
132 
133   static int ThreeWayCompare(const char *lhs, const char *rhs) {
134     if (__llvm_libc::Equals<Head>(lhs, rhs))
135       return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize,
136                                                rhs + Head::kSize);
137     return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs);
138   }
139 
140   static void SplatSet(char *dst, const unsigned char value) {
141     Chained<Tail...>::SplatSet(dst + Head::kSize, value);
142     __llvm_libc::SplatSet<Head>(dst, value);
143   }
144 };
145 
146 template <> struct Chained<> {
147   static constexpr size_t kSize = 0;
148   static void Copy(char *__restrict dst, const char *__restrict src) {}
149   static bool Equals(const char *lhs, const char *rhs) { return true; }
150   static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; }
151   static void SplatSet(char *dst, const unsigned char value) {}
152 };
153 
154 // Overlap ElementA and ElementB so they span Size bytes.
155 template <size_t Size, typename ElementA, typename ElementB = ElementA>
156 struct Overlap {
157   static constexpr size_t kSize = Size;
158   static_assert(ElementB::kSize <= ElementA::kSize, "ElementB too big");
159   static_assert(ElementA::kSize <= Size, "ElementA too big");
160   static_assert((ElementA::kSize + ElementB::kSize) >= Size,
161                 "Elements too small to overlap");
162   static constexpr size_t kOffset = kSize - ElementB::kSize;
163 
164   static void Copy(char *__restrict dst, const char *__restrict src) {
165     ElementA::Copy(dst, src);
166     ElementB::Copy(dst + kOffset, src + kOffset);
167   }
168 
169   static bool Equals(const char *lhs, const char *rhs) {
170     if (!ElementA::Equals(lhs, rhs))
171       return false;
172     if (!ElementB::Equals(lhs + kOffset, rhs + kOffset))
173       return false;
174     return true;
175   }
176 
177   static int ThreeWayCompare(const char *lhs, const char *rhs) {
178     if (!ElementA::Equals(lhs, rhs))
179       return ElementA::ThreeWayCompare(lhs, rhs);
180     if (!ElementB::Equals(lhs + kOffset, rhs + kOffset))
181       return ElementB::ThreeWayCompare(lhs + kOffset, rhs + kOffset);
182     return 0;
183   }
184 
185   static void SplatSet(char *dst, const unsigned char value) {
186     ElementA::SplatSet(dst, value);
187     ElementB::SplatSet(dst + kOffset, value);
188   }
189 };
190 
191 // Runtime-size Higher-Order Operations
192 // ------------------------------------
193 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
194 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes
195 //   of the buffer.
196 // - Loop<T>: Perform a loop of fixed-sized operations.
197 
198 // Perform the operation on the last 'T::kSize' bytes of the buffer.
199 //
200 // e.g. with
201 // [1234567812345678123]
202 // [__XXXXXXXXXXXXXX___]
203 // [________XXXXXXXX___]
204 //
205 // Precondition: `size >= T::kSize`.
206 template <typename T> struct Tail {
207   static void Copy(char *__restrict dst, const char *__restrict src,
208                    size_t size) {
209     return T::Copy(dst + offset(size), src + offset(size));
210   }
211 
212   static bool Equals(const char *lhs, const char *rhs, size_t size) {
213     return T::Equals(lhs + offset(size), rhs + offset(size));
214   }
215 
216   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
217     return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size));
218   }
219 
220   static void SplatSet(char *dst, const unsigned char value, size_t size) {
221     return T::SplatSet(dst + offset(size), value);
222   }
223 
224   static size_t offset(size_t size) { return size - T::kSize; }
225 };
226 
227 // Perform the operation on the first and last 'T::kSize' bytes of the buffer.
228 // This is useful for overlapping operations.
229 //
230 // e.g. with
231 // [1234567812345678123]
232 // [__XXXXXXXXXXXXXX___]
233 // [__XXXXXXXX_________]
234 // [________XXXXXXXX___]
235 //
236 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`.
237 template <typename T> struct HeadTail {
238   static void Copy(char *__restrict dst, const char *__restrict src,
239                    size_t size) {
240     T::Copy(dst, src);
241     Tail<T>::Copy(dst, src, size);
242   }
243 
244   static bool Equals(const char *lhs, const char *rhs, size_t size) {
245     if (!T::Equals(lhs, rhs))
246       return false;
247     return Tail<T>::Equals(lhs, rhs, size);
248   }
249 
250   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
251     if (!T::Equals(lhs, rhs))
252       return T::ThreeWayCompare(lhs, rhs);
253     return Tail<T>::ThreeWayCompare(lhs, rhs, size);
254   }
255 
256   static void SplatSet(char *dst, const unsigned char value, size_t size) {
257     T::SplatSet(dst, value);
258     Tail<T>::SplatSet(dst, value, size);
259   }
260 };
261 
262 // Simple loop ending with a Tail operation.
263 //
264 // e.g. with
265 // [12345678123456781234567812345678]
266 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
267 // [__XXXXXXXX_______________________]
268 // [__________XXXXXXXX_______________]
269 // [__________________XXXXXXXX_______]
270 // [______________________XXXXXXXX___]
271 //
272 // Precondition:
273 // - size >= T::kSize
274 template <typename T, typename TailT = T> struct Loop {
275   static_assert(T::kSize == TailT::kSize,
276                 "Tail type must have the same size as T");
277 
278   static void Copy(char *__restrict dst, const char *__restrict src,
279                    size_t size) {
280     size_t offset = 0;
281     do {
282       T::Copy(dst + offset, src + offset);
283       offset += T::kSize;
284     } while (offset < size - T::kSize);
285     Tail<TailT>::Copy(dst, src, size);
286   }
287 
288   static bool Equals(const char *lhs, const char *rhs, size_t size) {
289     size_t offset = 0;
290     do {
291       if (!T::Equals(lhs + offset, rhs + offset))
292         return false;
293       offset += T::kSize;
294     } while (offset < size - T::kSize);
295     return Tail<TailT>::Equals(lhs, rhs, size);
296   }
297 
298   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
299     size_t offset = 0;
300     do {
301       if (!T::Equals(lhs + offset, rhs + offset))
302         return T::ThreeWayCompare(lhs + offset, rhs + offset);
303       offset += T::kSize;
304     } while (offset < size - T::kSize);
305     return Tail<TailT>::ThreeWayCompare(lhs, rhs, size);
306   }
307 
308   static void SplatSet(char *dst, const unsigned char value, size_t size) {
309     size_t offset = 0;
310     do {
311       T::SplatSet(dst + offset, value);
312       offset += T::kSize;
313     } while (offset < size - T::kSize);
314     Tail<TailT>::SplatSet(dst, value, size);
315   }
316 };
317 
318 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
319 
320 namespace internal {
321 
322 // Provides a specialized Bump function that adjusts pointers and size so first
323 // argument (resp. second argument) gets aligned to Alignment.
324 // We make sure the compiler knows about the adjusted pointer alignment.
325 template <Arg arg, size_t Alignment> struct AlignHelper {};
326 
327 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> {
328   template <typename T1, typename T2>
329   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
330     const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
331     p1ref += offset;
332     p2ref += offset;
333     size -= offset;
334     p1ref = assume_aligned<Alignment>(p1ref);
335   }
336 };
337 
338 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> {
339   template <typename T1, typename T2>
340   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
341     const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
342     p1ref += offset;
343     p2ref += offset;
344     size -= offset;
345     p2ref = assume_aligned<Alignment>(p2ref);
346   }
347 };
348 
349 } // namespace internal
350 
351 // An alignment operation that:
352 // - executes the 'AlignmentT' operation
353 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
354 //   pointer gets aligned, size is decreased accordingly.
355 // - calls the 'NextT' operation.
356 //
357 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
358 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
359 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align {
360 private:
361   static constexpr size_t Alignment = AlignmentT::kSize;
362   static_assert(Alignment > 1, "Alignment must be more than 1");
363   static_assert(is_power2(Alignment), "Alignment must be a power of 2");
364 
365 public:
366   template <typename NextT> struct Then {
367     static void Copy(char *__restrict dst, const char *__restrict src,
368                      size_t size) {
369       AlignmentT::Copy(dst, src);
370       internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size);
371       NextT::Copy(dst, src, size);
372     }
373 
374     static bool Equals(const char *lhs, const char *rhs, size_t size) {
375       if (!AlignmentT::Equals(lhs, rhs))
376         return false;
377       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
378       return NextT::Equals(lhs, rhs, size);
379     }
380 
381     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
382       if (!AlignmentT::Equals(lhs, rhs))
383         return AlignmentT::ThreeWayCompare(lhs, rhs);
384       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
385       return NextT::ThreeWayCompare(lhs, rhs, size);
386     }
387 
388     static void SplatSet(char *dst, const unsigned char value, size_t size) {
389       AlignmentT::SplatSet(dst, value);
390       char *dummy = nullptr;
391       internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
392       NextT::SplatSet(dst, value, size);
393     }
394   };
395 };
396 
397 // An operation that allows to skip the specified amount of bytes.
398 template <ptrdiff_t Bytes> struct Skip {
399   template <typename NextT> struct Then {
400     static void Copy(char *__restrict dst, const char *__restrict src,
401                      size_t size) {
402       NextT::Copy(dst + Bytes, src + Bytes, size - Bytes);
403     }
404 
405     static void Copy(char *__restrict dst, const char *__restrict src) {
406       NextT::Copy(dst + Bytes, src + Bytes);
407     }
408 
409     static bool Equals(const char *lhs, const char *rhs, size_t size) {
410       return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes);
411     }
412 
413     static bool Equals(const char *lhs, const char *rhs) {
414       return NextT::Equals(lhs + Bytes, rhs + Bytes);
415     }
416 
417     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
418       return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes);
419     }
420 
421     static int ThreeWayCompare(const char *lhs, const char *rhs) {
422       return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes);
423     }
424 
425     static void SplatSet(char *dst, const unsigned char value, size_t size) {
426       NextT::SplatSet(dst + Bytes, value, size - Bytes);
427     }
428 
429     static void SplatSet(char *dst, const unsigned char value) {
430       NextT::SplatSet(dst + Bytes, value);
431     }
432   };
433 };
434 
435 // Fixed-size Builtin Operations
436 // -----------------------------
437 // Note: Do not use 'builtin' right now as it requires the implementation of the
438 // `_inline` versions of all the builtins. Theoretically, Clang can still turn
439 // them into calls to the C library leading to reentrancy problems.
440 namespace builtin {
441 
442 #ifndef __has_builtin
443 #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
444 #endif
445 
446 template <size_t Size> struct Builtin {
447   static constexpr size_t kSize = Size;
448 
449   static void Copy(char *__restrict dst, const char *__restrict src) {
450 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
451     ForLoopCopy(dst, src);
452 #elif __has_builtin(__builtin_memcpy_inline)
453     // __builtin_memcpy_inline guarantees to never call external functions.
454     // Unfortunately it is not widely available.
455     __builtin_memcpy_inline(dst, src, kSize);
456 #elif __has_builtin(__builtin_memcpy)
457     __builtin_memcpy(dst, src, kSize);
458 #else
459     ForLoopCopy(dst, src);
460 #endif
461   }
462 
463 #if __has_builtin(__builtin_memcmp_inline)
464 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline
465 #else
466 #define LLVM_LIBC_MEMCMP __builtin_memcmp
467 #endif
468 
469   static bool Equals(const char *lhs, const char *rhs) {
470     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0;
471   }
472 
473   static int ThreeWayCompare(const char *lhs, const char *rhs) {
474     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize);
475   }
476 
477   static void SplatSet(char *dst, const unsigned char value) {
478     __builtin_memset(dst, value, kSize);
479   }
480 
481 private:
482   // Copies `kSize` bytes from `src` to `dst` using a for loop.
483   // This code requires the use of `-fno-builtin-memcpy` to prevent the compiler
484   // from turning the for-loop back into `__builtin_memcpy`.
485   static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
486     for (size_t i = 0; i < kSize; ++i)
487       dst[i] = src[i];
488   }
489 };
490 
491 using _1 = Builtin<1>;
492 using _2 = Builtin<2>;
493 using _3 = Builtin<3>;
494 using _4 = Builtin<4>;
495 using _8 = Builtin<8>;
496 using _16 = Builtin<16>;
497 using _32 = Builtin<32>;
498 using _64 = Builtin<64>;
499 using _128 = Builtin<128>;
500 
501 } // namespace builtin
502 
503 // Fixed-size Scalar Operations
504 // ----------------------------
505 namespace scalar {
506 
507 // The Scalar type makes use of simple sized integers.
508 template <typename T> struct Scalar {
509   static constexpr size_t kSize = sizeof(T);
510 
511   static void Copy(char *__restrict dst, const char *__restrict src) {
512     Store(dst, Load(src));
513   }
514 
515   static bool Equals(const char *lhs, const char *rhs) {
516     return Load(lhs) == Load(rhs);
517   }
518 
519   static int ThreeWayCompare(const char *lhs, const char *rhs) {
520     return ScalarThreeWayCompare(Load(lhs), Load(rhs));
521   }
522 
523   static void SplatSet(char *dst, const unsigned char value) {
524     Store(dst, GetSplattedValue(value));
525   }
526 
527   static int ScalarThreeWayCompare(T a, T b);
528 
529 private:
530   static T Load(const char *ptr) {
531     T value;
532     builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr);
533     return value;
534   }
535   static void Store(char *ptr, T value) {
536     builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value));
537   }
538   static T GetSplattedValue(const unsigned char value) {
539     return T(~0) / T(0xFF) * T(value);
540   }
541 };
542 
543 template <>
544 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) {
545   const int16_t la = Endian::ToBigEndian(a);
546   const int16_t lb = Endian::ToBigEndian(b);
547   return la - lb;
548 }
549 template <>
550 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) {
551   const int32_t la = Endian::ToBigEndian(a);
552   const int32_t lb = Endian::ToBigEndian(b);
553   return la - lb;
554 }
555 template <>
556 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) {
557   const uint32_t la = Endian::ToBigEndian(a);
558   const uint32_t lb = Endian::ToBigEndian(b);
559   return la > lb ? 1 : la < lb ? -1 : 0;
560 }
561 template <>
562 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) {
563   const uint64_t la = Endian::ToBigEndian(a);
564   const uint64_t lb = Endian::ToBigEndian(b);
565   return la > lb ? 1 : la < lb ? -1 : 0;
566 }
567 
568 using UINT8 = Scalar<uint8_t>;   // 1 Byte
569 using UINT16 = Scalar<uint16_t>; // 2 Bytes
570 using UINT32 = Scalar<uint32_t>; // 4 Bytes
571 using UINT64 = Scalar<uint64_t>; // 8 Bytes
572 
573 using _1 = UINT8;
574 using _2 = UINT16;
575 using _3 = Chained<UINT16, UINT8>;
576 using _4 = UINT32;
577 using _8 = UINT64;
578 using _16 = Repeated<_8, 2>;
579 using _32 = Repeated<_8, 4>;
580 using _64 = Repeated<_8, 8>;
581 using _128 = Repeated<_8, 16>;
582 
583 } // namespace scalar
584 } // namespace __llvm_libc
585 
586 #include <src/string/memory_utils/elements_aarch64.h>
587 #include <src/string/memory_utils/elements_x86.h>
588 
589 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
590