1 //===-- Elementary operations to compose memory primitives ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 11 12 #include <stddef.h> // size_t 13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 14 15 #include "src/__support/endian.h" 16 #include "src/string/memory_utils/utils.h" 17 18 namespace __llvm_libc { 19 20 // Elementary Operations 21 // -------------------------------- 22 // We define abstract elementary operations acting on fixed chunks of memory. 23 // These are low level building blocks that are meant to be assembled to compose 24 // higher order abstractions. Each function is defined twice: once with 25 // fixed-size operations, and once with runtime-size operations. 26 27 // Fixed-size copies from 'src' to 'dst'. 28 template <typename Element> 29 void Copy(char *__restrict dst, const char *__restrict src) { 30 Element::Copy(dst, src); 31 } 32 // Runtime-size copies from 'src' to 'dst'. 33 template <typename Element> 34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) { 35 Element::Copy(dst, src, size); 36 } 37 38 // Fixed-size equality between 'lhs' and 'rhs'. 39 template <typename Element> bool Equals(const char *lhs, const char *rhs) { 40 return Element::Equals(lhs, rhs); 41 } 42 // Runtime-size equality between 'lhs' and 'rhs'. 43 template <typename Element> 44 bool Equals(const char *lhs, const char *rhs, size_t size) { 45 return Element::Equals(lhs, rhs, size); 46 } 47 48 // Fixed-size three-way comparison between 'lhs' and 'rhs'. 49 template <typename Element> 50 int ThreeWayCompare(const char *lhs, const char *rhs) { 51 return Element::ThreeWayCompare(lhs, rhs); 52 } 53 // Runtime-size three-way comparison between 'lhs' and 'rhs'. 54 template <typename Element> 55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 56 return Element::ThreeWayCompare(lhs, rhs, size); 57 } 58 59 // Fixed-size initialization. 60 template <typename Element> 61 void SplatSet(char *dst, const unsigned char value) { 62 Element::SplatSet(dst, value); 63 } 64 // Runtime-size initialization. 65 template <typename Element> 66 void SplatSet(char *dst, const unsigned char value, size_t size) { 67 Element::SplatSet(dst, value, size); 68 } 69 70 // Fixed-size Higher-Order Operations 71 // ---------------------------------- 72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row. 73 // - Chained<Types...>: Chain the operation of several types. 74 75 // Repeat the operation several times in a row. 76 template <typename Element, size_t ElementCount> struct Repeated { 77 static constexpr size_t kSize = ElementCount * Element::kSize; 78 79 static void Copy(char *__restrict dst, const char *__restrict src) { 80 for (size_t i = 0; i < ElementCount; ++i) { 81 const size_t offset = i * Element::kSize; 82 Element::Copy(dst + offset, src + offset); 83 } 84 } 85 86 static bool Equals(const char *lhs, const char *rhs) { 87 for (size_t i = 0; i < ElementCount; ++i) { 88 const size_t offset = i * Element::kSize; 89 if (!Element::Equals(lhs + offset, rhs + offset)) 90 return false; 91 } 92 return true; 93 } 94 95 static int ThreeWayCompare(const char *lhs, const char *rhs) { 96 for (size_t i = 0; i < ElementCount; ++i) { 97 const size_t offset = i * Element::kSize; 98 // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'. 99 if (Element::Equals(lhs + offset, rhs + offset)) 100 continue; 101 return Element::ThreeWayCompare(lhs + offset, rhs + offset); 102 } 103 return 0; 104 } 105 106 static void SplatSet(char *dst, const unsigned char value) { 107 for (size_t i = 0; i < ElementCount; ++i) { 108 const size_t offset = i * Element::kSize; 109 Element::SplatSet(dst + offset, value); 110 } 111 } 112 }; 113 114 // Chain the operation of several types. 115 // For instance, to handle a 3 bytes operation, one can use: 116 // Chained<UINT16, UINT8>::Operation(); 117 template <typename... Types> struct Chained; 118 119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> { 120 static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize; 121 122 static void Copy(char *__restrict dst, const char *__restrict src) { 123 Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize); 124 __llvm_libc::Copy<Head>(dst, src); 125 } 126 127 static bool Equals(const char *lhs, const char *rhs) { 128 if (!__llvm_libc::Equals<Head>(lhs, rhs)) 129 return false; 130 return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize); 131 } 132 133 static int ThreeWayCompare(const char *lhs, const char *rhs) { 134 if (__llvm_libc::Equals<Head>(lhs, rhs)) 135 return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize, 136 rhs + Head::kSize); 137 return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs); 138 } 139 140 static void SplatSet(char *dst, const unsigned char value) { 141 Chained<Tail...>::SplatSet(dst + Head::kSize, value); 142 __llvm_libc::SplatSet<Head>(dst, value); 143 } 144 }; 145 146 template <> struct Chained<> { 147 static constexpr size_t kSize = 0; 148 static void Copy(char *__restrict dst, const char *__restrict src) {} 149 static bool Equals(const char *lhs, const char *rhs) { return true; } 150 static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; } 151 static void SplatSet(char *dst, const unsigned char value) {} 152 }; 153 154 // Overlap ElementA and ElementB so they span Size bytes. 155 template <size_t Size, typename ElementA, typename ElementB = ElementA> 156 struct Overlap { 157 static constexpr size_t kSize = Size; 158 static_assert(ElementB::kSize <= ElementA::kSize, "ElementB too big"); 159 static_assert(ElementA::kSize <= Size, "ElementA too big"); 160 static_assert((ElementA::kSize + ElementB::kSize) >= Size, 161 "Elements too small to overlap"); 162 static constexpr size_t kOffset = kSize - ElementB::kSize; 163 164 static void Copy(char *__restrict dst, const char *__restrict src) { 165 ElementA::Copy(dst, src); 166 ElementB::Copy(dst + kOffset, src + kOffset); 167 } 168 169 static bool Equals(const char *lhs, const char *rhs) { 170 if (!ElementA::Equals(lhs, rhs)) 171 return false; 172 if (!ElementB::Equals(lhs + kOffset, rhs + kOffset)) 173 return false; 174 return true; 175 } 176 177 static int ThreeWayCompare(const char *lhs, const char *rhs) { 178 if (!ElementA::Equals(lhs, rhs)) 179 return ElementA::ThreeWayCompare(lhs, rhs); 180 if (!ElementB::Equals(lhs + kOffset, rhs + kOffset)) 181 return ElementB::ThreeWayCompare(lhs + kOffset, rhs + kOffset); 182 return 0; 183 } 184 185 static void SplatSet(char *dst, const unsigned char value) { 186 ElementA::SplatSet(dst, value); 187 ElementB::SplatSet(dst + kOffset, value); 188 } 189 }; 190 191 // Runtime-size Higher-Order Operations 192 // ------------------------------------ 193 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer. 194 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes 195 // of the buffer. 196 // - Loop<T>: Perform a loop of fixed-sized operations. 197 198 // Perform the operation on the last 'T::kSize' bytes of the buffer. 199 // 200 // e.g. with 201 // [1234567812345678123] 202 // [__XXXXXXXXXXXXXX___] 203 // [________XXXXXXXX___] 204 // 205 // Precondition: `size >= T::kSize`. 206 template <typename T> struct Tail { 207 static void Copy(char *__restrict dst, const char *__restrict src, 208 size_t size) { 209 return T::Copy(dst + offset(size), src + offset(size)); 210 } 211 212 static bool Equals(const char *lhs, const char *rhs, size_t size) { 213 return T::Equals(lhs + offset(size), rhs + offset(size)); 214 } 215 216 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 217 return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size)); 218 } 219 220 static void SplatSet(char *dst, const unsigned char value, size_t size) { 221 return T::SplatSet(dst + offset(size), value); 222 } 223 224 static size_t offset(size_t size) { return size - T::kSize; } 225 }; 226 227 // Perform the operation on the first and last 'T::kSize' bytes of the buffer. 228 // This is useful for overlapping operations. 229 // 230 // e.g. with 231 // [1234567812345678123] 232 // [__XXXXXXXXXXXXXX___] 233 // [__XXXXXXXX_________] 234 // [________XXXXXXXX___] 235 // 236 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`. 237 template <typename T> struct HeadTail { 238 static void Copy(char *__restrict dst, const char *__restrict src, 239 size_t size) { 240 T::Copy(dst, src); 241 Tail<T>::Copy(dst, src, size); 242 } 243 244 static bool Equals(const char *lhs, const char *rhs, size_t size) { 245 if (!T::Equals(lhs, rhs)) 246 return false; 247 return Tail<T>::Equals(lhs, rhs, size); 248 } 249 250 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 251 if (!T::Equals(lhs, rhs)) 252 return T::ThreeWayCompare(lhs, rhs); 253 return Tail<T>::ThreeWayCompare(lhs, rhs, size); 254 } 255 256 static void SplatSet(char *dst, const unsigned char value, size_t size) { 257 T::SplatSet(dst, value); 258 Tail<T>::SplatSet(dst, value, size); 259 } 260 }; 261 262 // Simple loop ending with a Tail operation. 263 // 264 // e.g. with 265 // [12345678123456781234567812345678] 266 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 267 // [__XXXXXXXX_______________________] 268 // [__________XXXXXXXX_______________] 269 // [__________________XXXXXXXX_______] 270 // [______________________XXXXXXXX___] 271 // 272 // Precondition: 273 // - size >= T::kSize 274 template <typename T, typename TailT = T> struct Loop { 275 static_assert(T::kSize == TailT::kSize, 276 "Tail type must have the same size as T"); 277 278 static void Copy(char *__restrict dst, const char *__restrict src, 279 size_t size) { 280 size_t offset = 0; 281 do { 282 T::Copy(dst + offset, src + offset); 283 offset += T::kSize; 284 } while (offset < size - T::kSize); 285 Tail<TailT>::Copy(dst, src, size); 286 } 287 288 static bool Equals(const char *lhs, const char *rhs, size_t size) { 289 size_t offset = 0; 290 do { 291 if (!T::Equals(lhs + offset, rhs + offset)) 292 return false; 293 offset += T::kSize; 294 } while (offset < size - T::kSize); 295 return Tail<TailT>::Equals(lhs, rhs, size); 296 } 297 298 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 299 size_t offset = 0; 300 do { 301 if (!T::Equals(lhs + offset, rhs + offset)) 302 return T::ThreeWayCompare(lhs + offset, rhs + offset); 303 offset += T::kSize; 304 } while (offset < size - T::kSize); 305 return Tail<TailT>::ThreeWayCompare(lhs, rhs, size); 306 } 307 308 static void SplatSet(char *dst, const unsigned char value, size_t size) { 309 size_t offset = 0; 310 do { 311 T::SplatSet(dst + offset, value); 312 offset += T::kSize; 313 } while (offset < size - T::kSize); 314 Tail<TailT>::SplatSet(dst, value, size); 315 } 316 }; 317 318 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; 319 320 namespace internal { 321 322 // Provides a specialized Bump function that adjusts pointers and size so first 323 // argument (resp. second argument) gets aligned to Alignment. 324 // We make sure the compiler knows about the adjusted pointer alignment. 325 template <Arg arg, size_t Alignment> struct AlignHelper {}; 326 327 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> { 328 template <typename T1, typename T2> 329 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { 330 const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref); 331 p1ref += offset; 332 p2ref += offset; 333 size -= offset; 334 p1ref = assume_aligned<Alignment>(p1ref); 335 } 336 }; 337 338 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> { 339 template <typename T1, typename T2> 340 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { 341 const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref); 342 p1ref += offset; 343 p2ref += offset; 344 size -= offset; 345 p2ref = assume_aligned<Alignment>(p2ref); 346 } 347 }; 348 349 } // namespace internal 350 351 // An alignment operation that: 352 // - executes the 'AlignmentT' operation 353 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected 354 // pointer gets aligned, size is decreased accordingly. 355 // - calls the 'NextT' operation. 356 // 357 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: 358 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count); 359 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align { 360 private: 361 static constexpr size_t Alignment = AlignmentT::kSize; 362 static_assert(Alignment > 1, "Alignment must be more than 1"); 363 static_assert(is_power2(Alignment), "Alignment must be a power of 2"); 364 365 public: 366 template <typename NextT> struct Then { 367 static void Copy(char *__restrict dst, const char *__restrict src, 368 size_t size) { 369 AlignmentT::Copy(dst, src); 370 internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size); 371 NextT::Copy(dst, src, size); 372 } 373 374 static bool Equals(const char *lhs, const char *rhs, size_t size) { 375 if (!AlignmentT::Equals(lhs, rhs)) 376 return false; 377 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size); 378 return NextT::Equals(lhs, rhs, size); 379 } 380 381 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 382 if (!AlignmentT::Equals(lhs, rhs)) 383 return AlignmentT::ThreeWayCompare(lhs, rhs); 384 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size); 385 return NextT::ThreeWayCompare(lhs, rhs, size); 386 } 387 388 static void SplatSet(char *dst, const unsigned char value, size_t size) { 389 AlignmentT::SplatSet(dst, value); 390 char *dummy = nullptr; 391 internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size); 392 NextT::SplatSet(dst, value, size); 393 } 394 }; 395 }; 396 397 // An operation that allows to skip the specified amount of bytes. 398 template <ptrdiff_t Bytes> struct Skip { 399 template <typename NextT> struct Then { 400 static void Copy(char *__restrict dst, const char *__restrict src, 401 size_t size) { 402 NextT::Copy(dst + Bytes, src + Bytes, size - Bytes); 403 } 404 405 static void Copy(char *__restrict dst, const char *__restrict src) { 406 NextT::Copy(dst + Bytes, src + Bytes); 407 } 408 409 static bool Equals(const char *lhs, const char *rhs, size_t size) { 410 return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes); 411 } 412 413 static bool Equals(const char *lhs, const char *rhs) { 414 return NextT::Equals(lhs + Bytes, rhs + Bytes); 415 } 416 417 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 418 return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes); 419 } 420 421 static int ThreeWayCompare(const char *lhs, const char *rhs) { 422 return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes); 423 } 424 425 static void SplatSet(char *dst, const unsigned char value, size_t size) { 426 NextT::SplatSet(dst + Bytes, value, size - Bytes); 427 } 428 429 static void SplatSet(char *dst, const unsigned char value) { 430 NextT::SplatSet(dst + Bytes, value); 431 } 432 }; 433 }; 434 435 // Fixed-size Builtin Operations 436 // ----------------------------- 437 // Note: Do not use 'builtin' right now as it requires the implementation of the 438 // `_inline` versions of all the builtins. Theoretically, Clang can still turn 439 // them into calls to the C library leading to reentrancy problems. 440 namespace builtin { 441 442 #ifndef __has_builtin 443 #define __has_builtin(x) 0 // Compatibility with non-clang compilers. 444 #endif 445 446 template <size_t Size> struct Builtin { 447 static constexpr size_t kSize = Size; 448 449 static void Copy(char *__restrict dst, const char *__restrict src) { 450 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER 451 ForLoopCopy(dst, src); 452 #elif __has_builtin(__builtin_memcpy_inline) 453 // __builtin_memcpy_inline guarantees to never call external functions. 454 // Unfortunately it is not widely available. 455 __builtin_memcpy_inline(dst, src, kSize); 456 #elif __has_builtin(__builtin_memcpy) 457 __builtin_memcpy(dst, src, kSize); 458 #else 459 ForLoopCopy(dst, src); 460 #endif 461 } 462 463 #if __has_builtin(__builtin_memcmp_inline) 464 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline 465 #else 466 #define LLVM_LIBC_MEMCMP __builtin_memcmp 467 #endif 468 469 static bool Equals(const char *lhs, const char *rhs) { 470 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0; 471 } 472 473 static int ThreeWayCompare(const char *lhs, const char *rhs) { 474 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize); 475 } 476 477 static void SplatSet(char *dst, const unsigned char value) { 478 __builtin_memset(dst, value, kSize); 479 } 480 481 private: 482 // Copies `kSize` bytes from `src` to `dst` using a for loop. 483 // This code requires the use of `-fno-builtin-memcpy` to prevent the compiler 484 // from turning the for-loop back into `__builtin_memcpy`. 485 static void ForLoopCopy(char *__restrict dst, const char *__restrict src) { 486 for (size_t i = 0; i < kSize; ++i) 487 dst[i] = src[i]; 488 } 489 }; 490 491 using _1 = Builtin<1>; 492 using _2 = Builtin<2>; 493 using _3 = Builtin<3>; 494 using _4 = Builtin<4>; 495 using _8 = Builtin<8>; 496 using _16 = Builtin<16>; 497 using _32 = Builtin<32>; 498 using _64 = Builtin<64>; 499 using _128 = Builtin<128>; 500 501 } // namespace builtin 502 503 // Fixed-size Scalar Operations 504 // ---------------------------- 505 namespace scalar { 506 507 // The Scalar type makes use of simple sized integers. 508 template <typename T> struct Scalar { 509 static constexpr size_t kSize = sizeof(T); 510 511 static void Copy(char *__restrict dst, const char *__restrict src) { 512 Store(dst, Load(src)); 513 } 514 515 static bool Equals(const char *lhs, const char *rhs) { 516 return Load(lhs) == Load(rhs); 517 } 518 519 static int ThreeWayCompare(const char *lhs, const char *rhs) { 520 return ScalarThreeWayCompare(Load(lhs), Load(rhs)); 521 } 522 523 static void SplatSet(char *dst, const unsigned char value) { 524 Store(dst, GetSplattedValue(value)); 525 } 526 527 static int ScalarThreeWayCompare(T a, T b); 528 529 private: 530 static T Load(const char *ptr) { 531 T value; 532 builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr); 533 return value; 534 } 535 static void Store(char *ptr, T value) { 536 builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value)); 537 } 538 static T GetSplattedValue(const unsigned char value) { 539 return T(~0) / T(0xFF) * T(value); 540 } 541 }; 542 543 template <> 544 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) { 545 const int16_t la = Endian::ToBigEndian(a); 546 const int16_t lb = Endian::ToBigEndian(b); 547 return la - lb; 548 } 549 template <> 550 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) { 551 const int32_t la = Endian::ToBigEndian(a); 552 const int32_t lb = Endian::ToBigEndian(b); 553 return la - lb; 554 } 555 template <> 556 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) { 557 const uint32_t la = Endian::ToBigEndian(a); 558 const uint32_t lb = Endian::ToBigEndian(b); 559 return la > lb ? 1 : la < lb ? -1 : 0; 560 } 561 template <> 562 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) { 563 const uint64_t la = Endian::ToBigEndian(a); 564 const uint64_t lb = Endian::ToBigEndian(b); 565 return la > lb ? 1 : la < lb ? -1 : 0; 566 } 567 568 using UINT8 = Scalar<uint8_t>; // 1 Byte 569 using UINT16 = Scalar<uint16_t>; // 2 Bytes 570 using UINT32 = Scalar<uint32_t>; // 4 Bytes 571 using UINT64 = Scalar<uint64_t>; // 8 Bytes 572 573 using _1 = UINT8; 574 using _2 = UINT16; 575 using _3 = Chained<UINT16, UINT8>; 576 using _4 = UINT32; 577 using _8 = UINT64; 578 using _16 = Repeated<_8, 2>; 579 using _32 = Repeated<_8, 4>; 580 using _64 = Repeated<_8, 8>; 581 using _128 = Repeated<_8, 16>; 582 583 } // namespace scalar 584 } // namespace __llvm_libc 585 586 #include <src/string/memory_utils/elements_aarch64.h> 587 #include <src/string/memory_utils/elements_x86.h> 588 589 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 590