1 //===-- Elementary operations to compose memory primitives ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
11 
12 #include <stddef.h> // size_t
13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
14 
15 #include "src/__support/endian.h"
16 #include "src/string/memory_utils/utils.h"
17 
18 namespace __llvm_libc {
19 
20 // Elementary Operations
21 // --------------------------------
22 // We define abstract elementary operations acting on fixed chunks of memory.
23 // These are low level building blocks that are meant to be assembled to compose
24 // higher order abstractions. Each function is defined twice: once with
25 // fixed-size operations, and once with runtime-size operations.
26 
27 // Fixed-size copies from 'src' to 'dst'.
28 template <typename Element>
29 void Copy(char *__restrict dst, const char *__restrict src) {
30   Element::Copy(dst, src);
31 }
32 // Runtime-size copies from 'src' to 'dst'.
33 template <typename Element>
34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) {
35   Element::Copy(dst, src, size);
36 }
37 
38 // Fixed-size equality between 'lhs' and 'rhs'.
39 template <typename Element> bool Equals(const char *lhs, const char *rhs) {
40   return Element::Equals(lhs, rhs);
41 }
42 // Runtime-size equality between 'lhs' and 'rhs'.
43 template <typename Element>
44 bool Equals(const char *lhs, const char *rhs, size_t size) {
45   return Element::Equals(lhs, rhs, size);
46 }
47 
48 // Fixed-size three-way comparison between 'lhs' and 'rhs'.
49 template <typename Element>
50 int ThreeWayCompare(const char *lhs, const char *rhs) {
51   return Element::ThreeWayCompare(lhs, rhs);
52 }
53 // Runtime-size three-way comparison between 'lhs' and 'rhs'.
54 template <typename Element>
55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
56   return Element::ThreeWayCompare(lhs, rhs, size);
57 }
58 
59 // Fixed-size initialization.
60 template <typename Element>
61 void SplatSet(char *dst, const unsigned char value) {
62   Element::SplatSet(dst, value);
63 }
64 // Runtime-size initialization.
65 template <typename Element>
66 void SplatSet(char *dst, const unsigned char value, size_t size) {
67   Element::SplatSet(dst, value, size);
68 }
69 
70 // Fixed-size Higher-Order Operations
71 // ----------------------------------
72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
73 // - Chained<Types...>: Chain the operation of several types.
74 
75 // Repeat the operation several times in a row.
76 template <typename Element, size_t ElementCount> struct Repeated {
77   static constexpr size_t kSize = ElementCount * Element::kSize;
78 
79   static void Copy(char *__restrict dst, const char *__restrict src) {
80     for (size_t i = 0; i < ElementCount; ++i) {
81       const size_t offset = i * Element::kSize;
82       Element::Copy(dst + offset, src + offset);
83     }
84   }
85 
86   static bool Equals(const char *lhs, const char *rhs) {
87     for (size_t i = 0; i < ElementCount; ++i) {
88       const size_t offset = i * Element::kSize;
89       if (!Element::Equals(lhs + offset, rhs + offset))
90         return false;
91     }
92     return true;
93   }
94 
95   static int ThreeWayCompare(const char *lhs, const char *rhs) {
96     for (size_t i = 0; i < ElementCount; ++i) {
97       const size_t offset = i * Element::kSize;
98       // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'.
99       if (Element::Equals(lhs + offset, rhs + offset))
100         continue;
101       return Element::ThreeWayCompare(lhs + offset, rhs + offset);
102     }
103     return 0;
104   }
105 
106   static void SplatSet(char *dst, const unsigned char value) {
107     for (size_t i = 0; i < ElementCount; ++i) {
108       const size_t offset = i * Element::kSize;
109       Element::SplatSet(dst + offset, value);
110     }
111   }
112 };
113 
114 // Chain the operation of several types.
115 // For instance, to handle a 3 bytes operation, one can use:
116 // Chained<UINT16, UINT8>::Operation();
117 template <typename... Types> struct Chained;
118 
119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
120   static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize;
121 
122   static void Copy(char *__restrict dst, const char *__restrict src) {
123     Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize);
124     __llvm_libc::Copy<Head>(dst, src);
125   }
126 
127   static bool Equals(const char *lhs, const char *rhs) {
128     if (!__llvm_libc::Equals<Head>(lhs, rhs))
129       return false;
130     return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize);
131   }
132 
133   static int ThreeWayCompare(const char *lhs, const char *rhs) {
134     if (__llvm_libc::Equals<Head>(lhs, rhs))
135       return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize,
136                                                rhs + Head::kSize);
137     return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs);
138   }
139 
140   static void SplatSet(char *dst, const unsigned char value) {
141     Chained<Tail...>::SplatSet(dst + Head::kSize, value);
142     __llvm_libc::SplatSet<Head>(dst, value);
143   }
144 };
145 
146 template <> struct Chained<> {
147   static constexpr size_t kSize = 0;
148   static void Copy(char *__restrict dst, const char *__restrict src) {}
149   static bool Equals(const char *lhs, const char *rhs) { return true; }
150   static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; }
151   static void SplatSet(char *dst, const unsigned char value) {}
152 };
153 
154 // Runtime-size Higher-Order Operations
155 // ------------------------------------
156 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
157 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes
158 //   of the buffer.
159 // - Loop<T>: Perform a loop of fixed-sized operations.
160 
161 // Perform the operation on the last 'T::kSize' bytes of the buffer.
162 //
163 // e.g. with
164 // [1234567812345678123]
165 // [__XXXXXXXXXXXXXX___]
166 // [________XXXXXXXX___]
167 //
168 // Precondition: `size >= T::kSize`.
169 template <typename T> struct Tail {
170   static void Copy(char *__restrict dst, const char *__restrict src,
171                    size_t size) {
172     return T::Copy(dst + offset(size), src + offset(size));
173   }
174 
175   static bool Equals(const char *lhs, const char *rhs, size_t size) {
176     return T::Equals(lhs + offset(size), rhs + offset(size));
177   }
178 
179   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
180     return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size));
181   }
182 
183   static void SplatSet(char *dst, const unsigned char value, size_t size) {
184     return T::SplatSet(dst + offset(size), value);
185   }
186 
187   static size_t offset(size_t size) { return size - T::kSize; }
188 };
189 
190 // Perform the operation on the first and last 'T::kSize' bytes of the buffer.
191 // This is useful for overlapping operations.
192 //
193 // e.g. with
194 // [1234567812345678123]
195 // [__XXXXXXXXXXXXXX___]
196 // [__XXXXXXXX_________]
197 // [________XXXXXXXX___]
198 //
199 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`.
200 template <typename T> struct HeadTail {
201   static void Copy(char *__restrict dst, const char *__restrict src,
202                    size_t size) {
203     T::Copy(dst, src);
204     Tail<T>::Copy(dst, src, size);
205   }
206 
207   static bool Equals(const char *lhs, const char *rhs, size_t size) {
208     if (!T::Equals(lhs, rhs))
209       return false;
210     return Tail<T>::Equals(lhs, rhs, size);
211   }
212 
213   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
214     if (!T::Equals(lhs, rhs))
215       return T::ThreeWayCompare(lhs, rhs);
216     return Tail<T>::ThreeWayCompare(lhs, rhs, size);
217   }
218 
219   static void SplatSet(char *dst, const unsigned char value, size_t size) {
220     T::SplatSet(dst, value);
221     Tail<T>::SplatSet(dst, value, size);
222   }
223 };
224 
225 // Simple loop ending with a Tail operation.
226 //
227 // e.g. with
228 // [12345678123456781234567812345678]
229 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
230 // [__XXXXXXXX_______________________]
231 // [__________XXXXXXXX_______________]
232 // [__________________XXXXXXXX_______]
233 // [______________________XXXXXXXX___]
234 //
235 // Precondition:
236 // - size >= T::kSize
237 template <typename T> struct Loop {
238   static void Copy(char *__restrict dst, const char *__restrict src,
239                    size_t size) {
240     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
241       T::Copy(dst + offset, src + offset);
242     Tail<T>::Copy(dst, src, size);
243   }
244 
245   static bool Equals(const char *lhs, const char *rhs, size_t size) {
246     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
247       if (!T::Equals(lhs + offset, rhs + offset))
248         return false;
249     return Tail<T>::Equals(lhs, rhs, size);
250   }
251 
252   static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
253     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
254       if (!T::Equals(lhs + offset, rhs + offset))
255         return T::ThreeWayCompare(lhs + offset, rhs + offset);
256     return Tail<T>::ThreeWayCompare(lhs, rhs, size);
257   }
258 
259   static void SplatSet(char *dst, const unsigned char value, size_t size) {
260     for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
261       T::SplatSet(dst + offset, value);
262     Tail<T>::SplatSet(dst, value, size);
263   }
264 };
265 
266 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
267 
268 namespace internal {
269 
270 // Provides a specialized Bump function that adjusts pointers and size so first
271 // argument (resp. second argument) gets aligned to Alignment.
272 // We make sure the compiler knows about the adjusted pointer alignment.
273 template <Arg arg, size_t Alignment> struct AlignHelper {};
274 
275 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> {
276   template <typename T1, typename T2>
277   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
278     const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
279     p1ref += offset;
280     p2ref += offset;
281     size -= offset;
282     p1ref = assume_aligned<Alignment>(p1ref);
283   }
284 };
285 
286 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> {
287   template <typename T1, typename T2>
288   static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
289     const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
290     p1ref += offset;
291     p2ref += offset;
292     size -= offset;
293     p2ref = assume_aligned<Alignment>(p2ref);
294   }
295 };
296 
297 } // namespace internal
298 
299 // An alignment operation that:
300 // - executes the 'AlignmentT' operation
301 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
302 //   pointer gets aligned, size is decreased accordingly.
303 // - calls the 'NextT' operation.
304 //
305 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
306 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
307 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align {
308 private:
309   static constexpr size_t Alignment = AlignmentT::kSize;
310   static_assert(Alignment > 1, "Alignment must be more than 1");
311   static_assert(is_power2(Alignment), "Alignment must be a power of 2");
312 
313 public:
314   template <typename NextT> struct Then {
315     static void Copy(char *__restrict dst, const char *__restrict src,
316                      size_t size) {
317       AlignmentT::Copy(dst, src);
318       internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size);
319       NextT::Copy(dst, src, size);
320     }
321 
322     static bool Equals(const char *lhs, const char *rhs, size_t size) {
323       if (!AlignmentT::Equals(lhs, rhs))
324         return false;
325       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
326       return NextT::Equals(lhs, rhs, size);
327     }
328 
329     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
330       if (!AlignmentT::Equals(lhs, rhs))
331         return AlignmentT::ThreeWayCompare(lhs, rhs);
332       internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
333       return NextT::ThreeWayCompare(lhs, rhs, size);
334     }
335 
336     static void SplatSet(char *dst, const unsigned char value, size_t size) {
337       AlignmentT::SplatSet(dst, value);
338       char *dummy = nullptr;
339       internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
340       NextT::SplatSet(dst, value, size);
341     }
342   };
343 };
344 
345 // An operation that allows to skip the specified amount of bytes.
346 template <ptrdiff_t Bytes> struct Skip {
347   template <typename NextT> struct Then {
348     static void Copy(char *__restrict dst, const char *__restrict src,
349                      size_t size) {
350       NextT::Copy(dst + Bytes, src + Bytes, size - Bytes);
351     }
352 
353     static void Copy(char *__restrict dst, const char *__restrict src) {
354       NextT::Copy(dst + Bytes, src + Bytes);
355     }
356 
357     static bool Equals(const char *lhs, const char *rhs, size_t size) {
358       return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes);
359     }
360 
361     static bool Equals(const char *lhs, const char *rhs) {
362       return NextT::Equals(lhs + Bytes, rhs + Bytes);
363     }
364 
365     static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
366       return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes);
367     }
368 
369     static int ThreeWayCompare(const char *lhs, const char *rhs) {
370       return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes);
371     }
372 
373     static void SplatSet(char *dst, const unsigned char value, size_t size) {
374       NextT::SplatSet(dst + Bytes, value, size - Bytes);
375     }
376 
377     static void SplatSet(char *dst, const unsigned char value) {
378       NextT::SplatSet(dst + Bytes, value);
379     }
380   };
381 };
382 
383 // Fixed-size Builtin Operations
384 // -----------------------------
385 // Note: Do not use 'builtin' right now as it requires the implementation of the
386 // `_inline` versions of all the builtins. Theoretically, Clang can still turn
387 // them into calls to the C library leading to reentrancy problems.
388 namespace builtin {
389 
390 #ifndef __has_builtin
391 #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
392 #endif
393 
394 template <size_t Size> struct Builtin {
395   static constexpr size_t kSize = Size;
396 
397   static void Copy(char *__restrict dst, const char *__restrict src) {
398 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
399     ForLoopCopy(dst, src);
400 #elif __has_builtin(__builtin_memcpy_inline)
401     // __builtin_memcpy_inline guarantees to never call external functions.
402     // Unfortunately it is not widely available.
403     __builtin_memcpy_inline(dst, src, kSize);
404 #elif __has_builtin(__builtin_memcpy)
405     __builtin_memcpy(dst, src, kSize);
406 #else
407     ForLoopCopy(dst, src);
408 #endif
409   }
410 
411 #if __has_builtin(__builtin_memcmp_inline)
412 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline
413 #else
414 #define LLVM_LIBC_MEMCMP __builtin_memcmp
415 #endif
416 
417   static bool Equals(const char *lhs, const char *rhs) {
418     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0;
419   }
420 
421   static int ThreeWayCompare(const char *lhs, const char *rhs) {
422     return LLVM_LIBC_MEMCMP(lhs, rhs, kSize);
423   }
424 
425   static void SplatSet(char *dst, const unsigned char value) {
426     __builtin_memset(dst, value, kSize);
427   }
428 
429 private:
430   // Copies `kSize` bytes from `src` to `dst` using a for loop.
431   // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler
432   // from turning the for-loop back into `__builtin_memcpy`.
433   static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
434     for (size_t i = 0; i < kSize; ++i)
435       dst[i] = src[i];
436   }
437 };
438 
439 using _1 = Builtin<1>;
440 using _2 = Builtin<2>;
441 using _3 = Builtin<3>;
442 using _4 = Builtin<4>;
443 using _8 = Builtin<8>;
444 using _16 = Builtin<16>;
445 using _32 = Builtin<32>;
446 using _64 = Builtin<64>;
447 using _128 = Builtin<128>;
448 
449 } // namespace builtin
450 
451 // Fixed-size Scalar Operations
452 // ----------------------------
453 namespace scalar {
454 
455 // The Scalar type makes use of simple sized integers.
456 template <typename T> struct Scalar {
457   static constexpr size_t kSize = sizeof(T);
458 
459   static void Copy(char *__restrict dst, const char *__restrict src) {
460     Store(dst, Load(src));
461   }
462 
463   static bool Equals(const char *lhs, const char *rhs) {
464     return Load(lhs) == Load(rhs);
465   }
466 
467   static int ThreeWayCompare(const char *lhs, const char *rhs) {
468     return ScalarThreeWayCompare(Load(lhs), Load(rhs));
469   }
470 
471   static void SplatSet(char *dst, const unsigned char value) {
472     Store(dst, GetSplattedValue(value));
473   }
474 
475   static int ScalarThreeWayCompare(T a, T b);
476 
477 private:
478   static T Load(const char *ptr) {
479     T value;
480     builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr);
481     return value;
482   }
483   static void Store(char *ptr, T value) {
484     builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value));
485   }
486   static T GetSplattedValue(const unsigned char value) {
487     return T(~0) / T(0xFF) * T(value);
488   }
489 };
490 
491 template <>
492 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) {
493   const int16_t la = Endian::ToBigEndian(a);
494   const int16_t lb = Endian::ToBigEndian(b);
495   return la - lb;
496 }
497 template <>
498 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) {
499   const int32_t la = Endian::ToBigEndian(a);
500   const int32_t lb = Endian::ToBigEndian(b);
501   return la - lb;
502 }
503 template <>
504 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) {
505   const uint32_t la = Endian::ToBigEndian(a);
506   const uint32_t lb = Endian::ToBigEndian(b);
507   return la > lb ? 1 : la < lb ? -1 : 0;
508 }
509 template <>
510 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) {
511   const uint64_t la = Endian::ToBigEndian(a);
512   const uint64_t lb = Endian::ToBigEndian(b);
513   return la > lb ? 1 : la < lb ? -1 : 0;
514 }
515 
516 using UINT8 = Scalar<uint8_t>;   // 1 Byte
517 using UINT16 = Scalar<uint16_t>; // 2 Bytes
518 using UINT32 = Scalar<uint32_t>; // 4 Bytes
519 using UINT64 = Scalar<uint64_t>; // 8 Bytes
520 
521 using _1 = UINT8;
522 using _2 = UINT16;
523 using _3 = Chained<UINT16, UINT8>;
524 using _4 = UINT32;
525 using _8 = UINT64;
526 using _16 = Repeated<_8, 2>;
527 using _32 = Repeated<_8, 4>;
528 using _64 = Repeated<_8, 8>;
529 using _128 = Repeated<_8, 16>;
530 
531 } // namespace scalar
532 } // namespace __llvm_libc
533 
534 #include <src/string/memory_utils/elements_aarch64.h>
535 #include <src/string/memory_utils/elements_x86.h>
536 
537 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
538