1 //===-- Elementary operations to compose memory primitives ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
11 
12 #include <stddef.h> // size_t
13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
14 
15 #include "src/__support/endian.h"
16 #include "src/string/memory_utils/utils.h"
17 
18 namespace __llvm_libc {
19 
20 // Elementary Operations
21 // --------------------------------
22 // We define abstract elementary operations acting on fixed chunks of memory.
23 // These are low level building blocks that are meant to be assembled to compose
24 // higher order abstractions. Each function is defined twice: once with
25 // fixed-size operations, and once with runtime-size operations.
26 
27 // Fixed-size copies from 'src' to 'dst'.
28 template <typename Element>
29 void Copy(char *__restrict dst, const char *__restrict src) {
30   Element::Copy(dst, src);
31 }
32 // Runtime-size copies from 'src' to 'dst'.
33 template <typename Element>
34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) {
35   Element::Copy(dst, src, size);
36 }
37 
38 // Fixed-size equality between 'lhs' and 'rhs'.
39 template <typename Element> bool Equals(const char *lhs, const char *rhs) {
40   return Element::Equals(lhs, rhs);
41 }
42 // Runtime-size equality between 'lhs' and 'rhs'.
43 template <typename Element>
44 bool Equals(const char *lhs, const char *rhs, size_t size) {
45   return Element::Equals(lhs, rhs, size);
46 }
47 
48 // Fixed-size three-way comparison between 'lhs' and 'rhs'.
49 template <typename Element>
50 int ThreeWayCompare(const char *lhs, const char *rhs) {
51   return Element::ThreeWayCompare(lhs, rhs);
52 }
53 // Runtime-size three-way comparison between 'lhs' and 'rhs'.
54 template <typename Element>
55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
56   return Element::ThreeWayCompare(lhs, rhs, size);
57 }
58 
59 // Fixed-size initialization.
60 template <typename Element>
61 void SplatSet(char *dst, const unsigned char value) {
62   Element::SplatSet(dst, value);
63 }
64 // Runtime-size initialization.
65 template <typename Element>
66 void SplatSet(char *dst, const unsigned char value, size_t size) {
67   Element::SplatSet(dst, value, size);
68 }
69 
70 // Fixed-size Higher-Order Operations
71 // ----------------------------------
72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
73 // - Chained<Types...>: Chain the operation of several types.
74 
75 // Repeat the operation several times in a row.
76 template <typename Element, size_t ElementCount> struct Repeated {
77   static constexpr size_t kSize = ElementCount * Element::kSize;
78 
79   static void Copy(char *__restrict dst, const char *__restrict src) {
80     for (size_t i = 0; i < ElementCount; ++i) {
81       const size_t offset = i * Element::kSize;
82       Element::Copy(dst + offset, src + offset);
83     }
84   }
85 
86   static bool Equals(const char *lhs, const char *rhs) {
87     for (size_t i = 0; i < ElementCount; ++i) {
88       const size_t offset = i * Element::kSize;
89       if (!Element::Equals(lhs + offset, rhs + offset))
90         return false;
91     }
92     return true;
93   }
94 
95   static int ThreeWayCompare(const char *lhs, const char *rhs) {
96     for (size_t i = 0; i < ElementCount; ++i) {
97       const size_t offset = i * Element::kSize;
98       // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'.
99       if (Element::Equals(lhs + offset, rhs + offset))
100         continue;
101       return Element::ThreeWayCompare(lhs + offset, rhs + offset);
102     }
103     return 0;
104   }
105 
106   static void SplatSet(char *dst, const unsigned char value) {
107     for (size_t i = 0; i < ElementCount; ++i) {
108       const size_t offset = i * Element::kSize;
109       Element::SplatSet(dst + offset, value);
110     }
111   }
112 };
113 
114 // Chain the operation of several types.
115 // For instance, to handle a 3 bytes operation, one can use:
116 // Chained<UINT16, UINT8>::Operation();
117 template <typename... Types> struct Chained;
118 
119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
120   static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize;
121 
122   static void Copy(char *__restrict dst, const char *__restrict src) {
123     Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize);
124     __llvm_libc::Copy<Head>(dst, src);
125   }
126 
127   static bool Equals(const char *lhs, const char *rhs) {
128     if (!__llvm_libc::Equals<Head>(lhs, rhs))
129       return false;
130     return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize);
131   }
132 
133   static int ThreeWayCompare(const char *lhs, const char *rhs) {
134     if (__llvm_libc::Equals<Head>(lhs, rhs))
135       return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize,
136                                                rhs + Head::kSize);
137     return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs);
138   }
139 
140   static void SplatSet(char *dst, const unsigned char value) {
141     Chained<Tail...>::SplatSet(dst + Head::kSize, value);
142     __llvm_libc::SplatSet<Head>(dst, value);
143   }
144 };
145 
146 template <> struct Chained<> {
147   static constexpr size_t kSize = 0;
148   static void Copy(char *__restrict dst, const char *__restrict src) {}
149   static bool Equals(const char *lhs, const char *rhs) { return true; }
150   static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; }
151   static void SplatSet(char *dst, const unsigned char value) {}
152 };
153 
154 // Runtime-size Higher-Order Operations
155 // ------------------------------------
156 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
157 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes
158 //   of the buffer.
159 // - Loop<T>: Perform a loop of fixed-sized operations.
160 
161 // Perform the operation on the last 'T::kSize' bytes of the buffer.
162 //
163 // e.g. with
164 // [1234567812345678123]
165 // [__XXXXXXXXXXXXXX___]
166 // [________XXXXXXXX___]
167 //
168 // Precondition: `size >= T::kSize`.
169 template <typename T> struct Tail {
170   static void Copy(char *__restrict dst, const char *__restrict src,
171                    size_t size) {
172     return T::Copy(dst + offset(size), src + offset(size));
173   }
174 
175   static bool Equals(const char *lhs, const char *rhs, size_t size) {
176     return T::Equals(lhs + offset(size), rhs + offset(size));
177   }
178 
179   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
180     return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size));
181   }
182 
183   static void SplatSet(char *dst, const unsigned char value, size_t size) {
184     return T::SplatSet(dst + offset(size), value);
185   }
186 
187   static size_t offset(size_t size) { return size - T::kSize; }
188 };
189 
190 // Perform the operation on the first and last 'T::kSize' bytes of the buffer.
191 // This is useful for overlapping operations.
192 //
193 // e.g. with
194 // [1234567812345678123]
195 // [__XXXXXXXXXXXXXX___]
196 // [__XXXXXXXX_________]
197 // [________XXXXXXXX___]
198 //
199 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`.
200 template <typename T> struct HeadTail {
201   static void Copy(char *__restrict dst, const char *__restrict src,
202                    size_t size) {
203     T::Copy(dst, src);
204     Tail<T>::Copy(dst, src, size);
205   }
206 
207   static bool Equals(const char *lhs, const char *rhs, size_t size) {
208     if (!T::Equals(lhs, rhs))
209       return false;
210     return Tail<T>::Equals(lhs, rhs, size);
211   }
212 
213   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
214     if (!T::Equals(lhs, rhs))
215       return T::ThreeWayCompare(lhs, rhs);
216     return Tail<T>::ThreeWayCompare(lhs, rhs, size);
217   }
218 
219   static void SplatSet(char *dst, const unsigned char value, size_t size) {
220     T::SplatSet(dst, value);
221     Tail<T>::SplatSet(dst, value, size);
222   }
223 };
224 
225 // Simple loop ending with a Tail operation.
226 //
227 // e.g. with
228 // [12345678123456781234567812345678]
229 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
230 // [__XXXXXXXX_______________________]
231 // [__________XXXXXXXX_______________]
232 // [__________________XXXXXXXX_______]
233 // [______________________XXXXXXXX___]
234 //
235 // Precondition:
236 // - size >= T::kSize
237 template <typename T, typename TailT = T> struct Loop {
238   static_assert(T::kSize == TailT::kSize,
239                 "Tail type must have the same size as T");
240 
241   static void Copy(char *__restrict dst, const char *__restrict src,
242                    size_t size) {
243     size_t offset = 0;
244     do {
245       T::Copy(dst + offset, src + offset);
246       offset += T::kSize;
247     } while (offset < size - T::kSize);
248     Tail<TailT>::Copy(dst, src, size);
249   }
250 
251   static bool Equals(const char *lhs, const char *rhs, size_t size) {
252     size_t offset = 0;
253     do {
254       if (!T::Equals(lhs + offset, rhs + offset))
255         return false;
256       offset += T::kSize;
257     } while (offset < size - T::kSize);
258     return Tail<TailT>::Equals(lhs, rhs, size);
259   }
260 
261   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
262     size_t offset = 0;
263     do {
264       if (!T::Equals(lhs + offset, rhs + offset))
265         return T::ThreeWayCompare(lhs + offset, rhs + offset);
266       offset += T::kSize;
267     } while (offset < size - T::kSize);
268     return Tail<TailT>::ThreeWayCompare(lhs, rhs, size);
269   }
270 
271   static void SplatSet(char *dst, const unsigned char value, size_t size) {
272     size_t offset = 0;
273     do {
274       T::SplatSet(dst + offset, value);
275       offset += T::kSize;
276     } while (offset < size - T::kSize);
277     Tail<TailT>::SplatSet(dst, value, size);
278   }
279 };
280 
281 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
282 
283 namespace internal {
284 
285 // Provides a specialized Bump function that adjusts pointers and size so first
286 // argument (resp. second argument) gets aligned to Alignment.
287 // We make sure the compiler knows about the adjusted pointer alignment.
288 template <Arg arg, size_t Alignment> struct AlignHelper {};
289 
290 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> {
291   template <typename T1, typename T2>
292   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
293     const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
294     p1ref += offset;
295     p2ref += offset;
296     size -= offset;
297     p1ref = assume_aligned<Alignment>(p1ref);
298   }
299 };
300 
301 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> {
302   template <typename T1, typename T2>
303   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
304     const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
305     p1ref += offset;
306     p2ref += offset;
307     size -= offset;
308     p2ref = assume_aligned<Alignment>(p2ref);
309   }
310 };
311 
312 } // namespace internal
313 
314 // An alignment operation that:
315 // - executes the 'AlignmentT' operation
316 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
317 //   pointer gets aligned, size is decreased accordingly.
318 // - calls the 'NextT' operation.
319 //
320 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
321 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
322 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align {
323 private:
324   static constexpr size_t Alignment = AlignmentT::kSize;
325   static_assert(Alignment > 1, "Alignment must be more than 1");
326   static_assert(is_power2(Alignment), "Alignment must be a power of 2");
327 
328 public:
329   template <typename NextT> struct Then {
330     static void Copy(char *__restrict dst, const char *__restrict src,
331                      size_t size) {
332       AlignmentT::Copy(dst, src);
333       internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size);
334       NextT::Copy(dst, src, size);
335     }
336 
337     static bool Equals(const char *lhs, const char *rhs, size_t size) {
338       if (!AlignmentT::Equals(lhs, rhs))
339         return false;
340       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
341       return NextT::Equals(lhs, rhs, size);
342     }
343 
344     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
345       if (!AlignmentT::Equals(lhs, rhs))
346         return AlignmentT::ThreeWayCompare(lhs, rhs);
347       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
348       return NextT::ThreeWayCompare(lhs, rhs, size);
349     }
350 
351     static void SplatSet(char *dst, const unsigned char value, size_t size) {
352       AlignmentT::SplatSet(dst, value);
353       char *dummy = nullptr;
354       internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
355       NextT::SplatSet(dst, value, size);
356     }
357   };
358 };
359 
360 // An operation that allows to skip the specified amount of bytes.
361 template <ptrdiff_t Bytes> struct Skip {
362   template <typename NextT> struct Then {
363     static void Copy(char *__restrict dst, const char *__restrict src,
364                      size_t size) {
365       NextT::Copy(dst + Bytes, src + Bytes, size - Bytes);
366     }
367 
368     static void Copy(char *__restrict dst, const char *__restrict src) {
369       NextT::Copy(dst + Bytes, src + Bytes);
370     }
371 
372     static bool Equals(const char *lhs, const char *rhs, size_t size) {
373       return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes);
374     }
375 
376     static bool Equals(const char *lhs, const char *rhs) {
377       return NextT::Equals(lhs + Bytes, rhs + Bytes);
378     }
379 
380     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
381       return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes);
382     }
383 
384     static int ThreeWayCompare(const char *lhs, const char *rhs) {
385       return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes);
386     }
387 
388     static void SplatSet(char *dst, const unsigned char value, size_t size) {
389       NextT::SplatSet(dst + Bytes, value, size - Bytes);
390     }
391 
392     static void SplatSet(char *dst, const unsigned char value) {
393       NextT::SplatSet(dst + Bytes, value);
394     }
395   };
396 };
397 
398 // Fixed-size Builtin Operations
399 // -----------------------------
400 // Note: Do not use 'builtin' right now as it requires the implementation of the
401 // `_inline` versions of all the builtins. Theoretically, Clang can still turn
402 // them into calls to the C library leading to reentrancy problems.
403 namespace builtin {
404 
405 #ifndef __has_builtin
406 #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
407 #endif
408 
409 template <size_t Size> struct Builtin {
410   static constexpr size_t kSize = Size;
411 
412   static void Copy(char *__restrict dst, const char *__restrict src) {
413 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
414     ForLoopCopy(dst, src);
415 #elif __has_builtin(__builtin_memcpy_inline)
416     // __builtin_memcpy_inline guarantees to never call external functions.
417     // Unfortunately it is not widely available.
418     __builtin_memcpy_inline(dst, src, kSize);
419 #elif __has_builtin(__builtin_memcpy)
420     __builtin_memcpy(dst, src, kSize);
421 #else
422     ForLoopCopy(dst, src);
423 #endif
424   }
425 
426 #if __has_builtin(__builtin_memcmp_inline)
427 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline
428 #else
429 #define LLVM_LIBC_MEMCMP __builtin_memcmp
430 #endif
431 
432   static bool Equals(const char *lhs, const char *rhs) {
433     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0;
434   }
435 
436   static int ThreeWayCompare(const char *lhs, const char *rhs) {
437     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize);
438   }
439 
440   static void SplatSet(char *dst, const unsigned char value) {
441     __builtin_memset(dst, value, kSize);
442   }
443 
444 private:
445   // Copies `kSize` bytes from `src` to `dst` using a for loop.
446   // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler
447   // from turning the for-loop back into `__builtin_memcpy`.
448   static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
449     for (size_t i = 0; i < kSize; ++i)
450       dst[i] = src[i];
451   }
452 };
453 
454 using _1 = Builtin<1>;
455 using _2 = Builtin<2>;
456 using _3 = Builtin<3>;
457 using _4 = Builtin<4>;
458 using _8 = Builtin<8>;
459 using _16 = Builtin<16>;
460 using _32 = Builtin<32>;
461 using _64 = Builtin<64>;
462 using _128 = Builtin<128>;
463 
464 } // namespace builtin
465 
466 // Fixed-size Scalar Operations
467 // ----------------------------
468 namespace scalar {
469 
470 // The Scalar type makes use of simple sized integers.
471 template <typename T> struct Scalar {
472   static constexpr size_t kSize = sizeof(T);
473 
474   static void Copy(char *__restrict dst, const char *__restrict src) {
475     Store(dst, Load(src));
476   }
477 
478   static bool Equals(const char *lhs, const char *rhs) {
479     return Load(lhs) == Load(rhs);
480   }
481 
482   static int ThreeWayCompare(const char *lhs, const char *rhs) {
483     return ScalarThreeWayCompare(Load(lhs), Load(rhs));
484   }
485 
486   static void SplatSet(char *dst, const unsigned char value) {
487     Store(dst, GetSplattedValue(value));
488   }
489 
490   static int ScalarThreeWayCompare(T a, T b);
491 
492 private:
493   static T Load(const char *ptr) {
494     T value;
495     builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr);
496     return value;
497   }
498   static void Store(char *ptr, T value) {
499     builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value));
500   }
501   static T GetSplattedValue(const unsigned char value) {
502     return T(~0) / T(0xFF) * T(value);
503   }
504 };
505 
506 template <>
507 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) {
508   const int16_t la = Endian::ToBigEndian(a);
509   const int16_t lb = Endian::ToBigEndian(b);
510   return la - lb;
511 }
512 template <>
513 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) {
514   const int32_t la = Endian::ToBigEndian(a);
515   const int32_t lb = Endian::ToBigEndian(b);
516   return la - lb;
517 }
518 template <>
519 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) {
520   const uint32_t la = Endian::ToBigEndian(a);
521   const uint32_t lb = Endian::ToBigEndian(b);
522   return la > lb ? 1 : la < lb ? -1 : 0;
523 }
524 template <>
525 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) {
526   const uint64_t la = Endian::ToBigEndian(a);
527   const uint64_t lb = Endian::ToBigEndian(b);
528   return la > lb ? 1 : la < lb ? -1 : 0;
529 }
530 
531 using UINT8 = Scalar<uint8_t>;   // 1 Byte
532 using UINT16 = Scalar<uint16_t>; // 2 Bytes
533 using UINT32 = Scalar<uint32_t>; // 4 Bytes
534 using UINT64 = Scalar<uint64_t>; // 8 Bytes
535 
536 using _1 = UINT8;
537 using _2 = UINT16;
538 using _3 = Chained<UINT16, UINT8>;
539 using _4 = UINT32;
540 using _8 = UINT64;
541 using _16 = Repeated<_8, 2>;
542 using _32 = Repeated<_8, 4>;
543 using _64 = Repeated<_8, 8>;
544 using _128 = Repeated<_8, 16>;
545 
546 } // namespace scalar
547 } // namespace __llvm_libc
548 
549 #include <src/string/memory_utils/elements_aarch64.h>
550 #include <src/string/memory_utils/elements_x86.h>
551 
552 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
553