1 //===-- Elementary operations to compose memory primitives ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 11 12 #include <stddef.h> // size_t 13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 14 15 #include "src/__support/endian.h" 16 #include "src/string/memory_utils/utils.h" 17 18 namespace __llvm_libc { 19 20 // Elementary Operations 21 // -------------------------------- 22 // We define abstract elementary operations acting on fixed chunks of memory. 23 // These are low level building blocks that are meant to be assembled to compose 24 // higher order abstractions. Each function is defined twice: once with 25 // fixed-size operations, and once with runtime-size operations. 26 27 // Fixed-size copies from 'src' to 'dst'. 28 template <typename Element> 29 void Copy(char *__restrict dst, const char *__restrict src) { 30 Element::Copy(dst, src); 31 } 32 // Runtime-size copies from 'src' to 'dst'. 33 template <typename Element> 34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) { 35 Element::Copy(dst, src, size); 36 } 37 38 // Fixed-size equality between 'lhs' and 'rhs'. 39 template <typename Element> bool Equals(const char *lhs, const char *rhs) { 40 return Element::Equals(lhs, rhs); 41 } 42 // Runtime-size equality between 'lhs' and 'rhs'. 43 template <typename Element> 44 bool Equals(const char *lhs, const char *rhs, size_t size) { 45 return Element::Equals(lhs, rhs, size); 46 } 47 48 // Fixed-size three-way comparison between 'lhs' and 'rhs'. 49 template <typename Element> 50 int ThreeWayCompare(const char *lhs, const char *rhs) { 51 return Element::ThreeWayCompare(lhs, rhs); 52 } 53 // Runtime-size three-way comparison between 'lhs' and 'rhs'. 54 template <typename Element> 55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 56 return Element::ThreeWayCompare(lhs, rhs, size); 57 } 58 59 // Fixed-size initialization. 60 template <typename Element> 61 void SplatSet(char *dst, const unsigned char value) { 62 Element::SplatSet(dst, value); 63 } 64 // Runtime-size initialization. 65 template <typename Element> 66 void SplatSet(char *dst, const unsigned char value, size_t size) { 67 Element::SplatSet(dst, value, size); 68 } 69 70 // Fixed-size Higher-Order Operations 71 // ---------------------------------- 72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row. 73 // - Chained<Types...>: Chain the operation of several types. 74 75 // Repeat the operation several times in a row. 76 template <typename Element, size_t ElementCount> struct Repeated { 77 static constexpr size_t kSize = ElementCount * Element::kSize; 78 79 static void Copy(char *__restrict dst, const char *__restrict src) { 80 for (size_t i = 0; i < ElementCount; ++i) { 81 const size_t offset = i * Element::kSize; 82 Element::Copy(dst + offset, src + offset); 83 } 84 } 85 86 static bool Equals(const char *lhs, const char *rhs) { 87 for (size_t i = 0; i < ElementCount; ++i) { 88 const size_t offset = i * Element::kSize; 89 if (!Element::Equals(lhs + offset, rhs + offset)) 90 return false; 91 } 92 return true; 93 } 94 95 static int ThreeWayCompare(const char *lhs, const char *rhs) { 96 for (size_t i = 0; i < ElementCount; ++i) { 97 const size_t offset = i * Element::kSize; 98 // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'. 99 if (Element::Equals(lhs + offset, rhs + offset)) 100 continue; 101 return Element::ThreeWayCompare(lhs + offset, rhs + offset); 102 } 103 return 0; 104 } 105 106 static void SplatSet(char *dst, const unsigned char value) { 107 for (size_t i = 0; i < ElementCount; ++i) { 108 const size_t offset = i * Element::kSize; 109 Element::SplatSet(dst + offset, value); 110 } 111 } 112 }; 113 114 // Chain the operation of several types. 115 // For instance, to handle a 3 bytes operation, one can use: 116 // Chained<UINT16, UINT8>::Operation(); 117 template <typename... Types> struct Chained; 118 119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> { 120 static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize; 121 122 static void Copy(char *__restrict dst, const char *__restrict src) { 123 Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize); 124 __llvm_libc::Copy<Head>(dst, src); 125 } 126 127 static bool Equals(const char *lhs, const char *rhs) { 128 if (!__llvm_libc::Equals<Head>(lhs, rhs)) 129 return false; 130 return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize); 131 } 132 133 static int ThreeWayCompare(const char *lhs, const char *rhs) { 134 if (__llvm_libc::Equals<Head>(lhs, rhs)) 135 return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize, 136 rhs + Head::kSize); 137 return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs); 138 } 139 140 static void SplatSet(char *dst, const unsigned char value) { 141 Chained<Tail...>::SplatSet(dst + Head::kSize, value); 142 __llvm_libc::SplatSet<Head>(dst, value); 143 } 144 }; 145 146 template <> struct Chained<> { 147 static constexpr size_t kSize = 0; 148 static void Copy(char *__restrict dst, const char *__restrict src) {} 149 static bool Equals(const char *lhs, const char *rhs) { return true; } 150 static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; } 151 static void SplatSet(char *dst, const unsigned char value) {} 152 }; 153 154 // Runtime-size Higher-Order Operations 155 // ------------------------------------ 156 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer. 157 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes 158 // of the buffer. 159 // - Loop<T>: Perform a loop of fixed-sized operations. 160 161 // Perform the operation on the last 'T::kSize' bytes of the buffer. 162 // 163 // e.g. with 164 // [1234567812345678123] 165 // [__XXXXXXXXXXXXXX___] 166 // [________XXXXXXXX___] 167 // 168 // Precondition: `size >= T::kSize`. 169 template <typename T> struct Tail { 170 static void Copy(char *__restrict dst, const char *__restrict src, 171 size_t size) { 172 return T::Copy(dst + offset(size), src + offset(size)); 173 } 174 175 static bool Equals(const char *lhs, const char *rhs, size_t size) { 176 return T::Equals(lhs + offset(size), rhs + offset(size)); 177 } 178 179 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 180 return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size)); 181 } 182 183 static void SplatSet(char *dst, const unsigned char value, size_t size) { 184 return T::SplatSet(dst + offset(size), value); 185 } 186 187 static size_t offset(size_t size) { return size - T::kSize; } 188 }; 189 190 // Perform the operation on the first and last 'T::kSize' bytes of the buffer. 191 // This is useful for overlapping operations. 192 // 193 // e.g. with 194 // [1234567812345678123] 195 // [__XXXXXXXXXXXXXX___] 196 // [__XXXXXXXX_________] 197 // [________XXXXXXXX___] 198 // 199 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`. 200 template <typename T> struct HeadTail { 201 static void Copy(char *__restrict dst, const char *__restrict src, 202 size_t size) { 203 T::Copy(dst, src); 204 Tail<T>::Copy(dst, src, size); 205 } 206 207 static bool Equals(const char *lhs, const char *rhs, size_t size) { 208 if (!T::Equals(lhs, rhs)) 209 return false; 210 return Tail<T>::Equals(lhs, rhs, size); 211 } 212 213 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 214 if (!T::Equals(lhs, rhs)) 215 return T::ThreeWayCompare(lhs, rhs); 216 return Tail<T>::ThreeWayCompare(lhs, rhs, size); 217 } 218 219 static void SplatSet(char *dst, const unsigned char value, size_t size) { 220 T::SplatSet(dst, value); 221 Tail<T>::SplatSet(dst, value, size); 222 } 223 }; 224 225 // Simple loop ending with a Tail operation. 226 // 227 // e.g. with 228 // [12345678123456781234567812345678] 229 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 230 // [__XXXXXXXX_______________________] 231 // [__________XXXXXXXX_______________] 232 // [__________________XXXXXXXX_______] 233 // [______________________XXXXXXXX___] 234 // 235 // Precondition: 236 // - size >= T::kSize 237 template <typename T, typename TailT = T> struct Loop { 238 static_assert(T::kSize == TailT::kSize, 239 "Tail type must have the same size as T"); 240 241 static void Copy(char *__restrict dst, const char *__restrict src, 242 size_t size) { 243 size_t offset = 0; 244 do { 245 T::Copy(dst + offset, src + offset); 246 offset += T::kSize; 247 } while (offset < size - T::kSize); 248 Tail<TailT>::Copy(dst, src, size); 249 } 250 251 static bool Equals(const char *lhs, const char *rhs, size_t size) { 252 size_t offset = 0; 253 do { 254 if (!T::Equals(lhs + offset, rhs + offset)) 255 return false; 256 offset += T::kSize; 257 } while (offset < size - T::kSize); 258 return Tail<TailT>::Equals(lhs, rhs, size); 259 } 260 261 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 262 size_t offset = 0; 263 do { 264 if (!T::Equals(lhs + offset, rhs + offset)) 265 return T::ThreeWayCompare(lhs + offset, rhs + offset); 266 offset += T::kSize; 267 } while (offset < size - T::kSize); 268 return Tail<TailT>::ThreeWayCompare(lhs, rhs, size); 269 } 270 271 static void SplatSet(char *dst, const unsigned char value, size_t size) { 272 size_t offset = 0; 273 do { 274 T::SplatSet(dst + offset, value); 275 offset += T::kSize; 276 } while (offset < size - T::kSize); 277 Tail<TailT>::SplatSet(dst, value, size); 278 } 279 }; 280 281 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; 282 283 namespace internal { 284 285 // Provides a specialized Bump function that adjusts pointers and size so first 286 // argument (resp. second argument) gets aligned to Alignment. 287 // We make sure the compiler knows about the adjusted pointer alignment. 288 template <Arg arg, size_t Alignment> struct AlignHelper {}; 289 290 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> { 291 template <typename T1, typename T2> 292 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { 293 const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref); 294 p1ref += offset; 295 p2ref += offset; 296 size -= offset; 297 p1ref = assume_aligned<Alignment>(p1ref); 298 } 299 }; 300 301 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> { 302 template <typename T1, typename T2> 303 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { 304 const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref); 305 p1ref += offset; 306 p2ref += offset; 307 size -= offset; 308 p2ref = assume_aligned<Alignment>(p2ref); 309 } 310 }; 311 312 } // namespace internal 313 314 // An alignment operation that: 315 // - executes the 'AlignmentT' operation 316 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected 317 // pointer gets aligned, size is decreased accordingly. 318 // - calls the 'NextT' operation. 319 // 320 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: 321 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count); 322 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align { 323 private: 324 static constexpr size_t Alignment = AlignmentT::kSize; 325 static_assert(Alignment > 1, "Alignment must be more than 1"); 326 static_assert(is_power2(Alignment), "Alignment must be a power of 2"); 327 328 public: 329 template <typename NextT> struct Then { 330 static void Copy(char *__restrict dst, const char *__restrict src, 331 size_t size) { 332 AlignmentT::Copy(dst, src); 333 internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size); 334 NextT::Copy(dst, src, size); 335 } 336 337 static bool Equals(const char *lhs, const char *rhs, size_t size) { 338 if (!AlignmentT::Equals(lhs, rhs)) 339 return false; 340 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size); 341 return NextT::Equals(lhs, rhs, size); 342 } 343 344 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 345 if (!AlignmentT::Equals(lhs, rhs)) 346 return AlignmentT::ThreeWayCompare(lhs, rhs); 347 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size); 348 return NextT::ThreeWayCompare(lhs, rhs, size); 349 } 350 351 static void SplatSet(char *dst, const unsigned char value, size_t size) { 352 AlignmentT::SplatSet(dst, value); 353 char *dummy = nullptr; 354 internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size); 355 NextT::SplatSet(dst, value, size); 356 } 357 }; 358 }; 359 360 // An operation that allows to skip the specified amount of bytes. 361 template <ptrdiff_t Bytes> struct Skip { 362 template <typename NextT> struct Then { 363 static void Copy(char *__restrict dst, const char *__restrict src, 364 size_t size) { 365 NextT::Copy(dst + Bytes, src + Bytes, size - Bytes); 366 } 367 368 static void Copy(char *__restrict dst, const char *__restrict src) { 369 NextT::Copy(dst + Bytes, src + Bytes); 370 } 371 372 static bool Equals(const char *lhs, const char *rhs, size_t size) { 373 return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes); 374 } 375 376 static bool Equals(const char *lhs, const char *rhs) { 377 return NextT::Equals(lhs + Bytes, rhs + Bytes); 378 } 379 380 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 381 return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes); 382 } 383 384 static int ThreeWayCompare(const char *lhs, const char *rhs) { 385 return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes); 386 } 387 388 static void SplatSet(char *dst, const unsigned char value, size_t size) { 389 NextT::SplatSet(dst + Bytes, value, size - Bytes); 390 } 391 392 static void SplatSet(char *dst, const unsigned char value) { 393 NextT::SplatSet(dst + Bytes, value); 394 } 395 }; 396 }; 397 398 // Fixed-size Builtin Operations 399 // ----------------------------- 400 // Note: Do not use 'builtin' right now as it requires the implementation of the 401 // `_inline` versions of all the builtins. Theoretically, Clang can still turn 402 // them into calls to the C library leading to reentrancy problems. 403 namespace builtin { 404 405 #ifndef __has_builtin 406 #define __has_builtin(x) 0 // Compatibility with non-clang compilers. 407 #endif 408 409 template <size_t Size> struct Builtin { 410 static constexpr size_t kSize = Size; 411 412 static void Copy(char *__restrict dst, const char *__restrict src) { 413 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER 414 ForLoopCopy(dst, src); 415 #elif __has_builtin(__builtin_memcpy_inline) 416 // __builtin_memcpy_inline guarantees to never call external functions. 417 // Unfortunately it is not widely available. 418 __builtin_memcpy_inline(dst, src, kSize); 419 #elif __has_builtin(__builtin_memcpy) 420 __builtin_memcpy(dst, src, kSize); 421 #else 422 ForLoopCopy(dst, src); 423 #endif 424 } 425 426 #if __has_builtin(__builtin_memcmp_inline) 427 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline 428 #else 429 #define LLVM_LIBC_MEMCMP __builtin_memcmp 430 #endif 431 432 static bool Equals(const char *lhs, const char *rhs) { 433 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0; 434 } 435 436 static int ThreeWayCompare(const char *lhs, const char *rhs) { 437 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize); 438 } 439 440 static void SplatSet(char *dst, const unsigned char value) { 441 __builtin_memset(dst, value, kSize); 442 } 443 444 private: 445 // Copies `kSize` bytes from `src` to `dst` using a for loop. 446 // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler 447 // from turning the for-loop back into `__builtin_memcpy`. 448 static void ForLoopCopy(char *__restrict dst, const char *__restrict src) { 449 for (size_t i = 0; i < kSize; ++i) 450 dst[i] = src[i]; 451 } 452 }; 453 454 using _1 = Builtin<1>; 455 using _2 = Builtin<2>; 456 using _3 = Builtin<3>; 457 using _4 = Builtin<4>; 458 using _8 = Builtin<8>; 459 using _16 = Builtin<16>; 460 using _32 = Builtin<32>; 461 using _64 = Builtin<64>; 462 using _128 = Builtin<128>; 463 464 } // namespace builtin 465 466 // Fixed-size Scalar Operations 467 // ---------------------------- 468 namespace scalar { 469 470 // The Scalar type makes use of simple sized integers. 471 template <typename T> struct Scalar { 472 static constexpr size_t kSize = sizeof(T); 473 474 static void Copy(char *__restrict dst, const char *__restrict src) { 475 Store(dst, Load(src)); 476 } 477 478 static bool Equals(const char *lhs, const char *rhs) { 479 return Load(lhs) == Load(rhs); 480 } 481 482 static int ThreeWayCompare(const char *lhs, const char *rhs) { 483 return ScalarThreeWayCompare(Load(lhs), Load(rhs)); 484 } 485 486 static void SplatSet(char *dst, const unsigned char value) { 487 Store(dst, GetSplattedValue(value)); 488 } 489 490 static int ScalarThreeWayCompare(T a, T b); 491 492 private: 493 static T Load(const char *ptr) { 494 T value; 495 builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr); 496 return value; 497 } 498 static void Store(char *ptr, T value) { 499 builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value)); 500 } 501 static T GetSplattedValue(const unsigned char value) { 502 return T(~0) / T(0xFF) * T(value); 503 } 504 }; 505 506 template <> 507 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) { 508 const int16_t la = Endian::ToBigEndian(a); 509 const int16_t lb = Endian::ToBigEndian(b); 510 return la - lb; 511 } 512 template <> 513 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) { 514 const int32_t la = Endian::ToBigEndian(a); 515 const int32_t lb = Endian::ToBigEndian(b); 516 return la - lb; 517 } 518 template <> 519 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) { 520 const uint32_t la = Endian::ToBigEndian(a); 521 const uint32_t lb = Endian::ToBigEndian(b); 522 return la > lb ? 1 : la < lb ? -1 : 0; 523 } 524 template <> 525 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) { 526 const uint64_t la = Endian::ToBigEndian(a); 527 const uint64_t lb = Endian::ToBigEndian(b); 528 return la > lb ? 1 : la < lb ? -1 : 0; 529 } 530 531 using UINT8 = Scalar<uint8_t>; // 1 Byte 532 using UINT16 = Scalar<uint16_t>; // 2 Bytes 533 using UINT32 = Scalar<uint32_t>; // 4 Bytes 534 using UINT64 = Scalar<uint64_t>; // 8 Bytes 535 536 using _1 = UINT8; 537 using _2 = UINT16; 538 using _3 = Chained<UINT16, UINT8>; 539 using _4 = UINT32; 540 using _8 = UINT64; 541 using _16 = Repeated<_8, 2>; 542 using _32 = Repeated<_8, 4>; 543 using _64 = Repeated<_8, 8>; 544 using _128 = Repeated<_8, 16>; 545 546 } // namespace scalar 547 } // namespace __llvm_libc 548 549 #include <src/string/memory_utils/elements_aarch64.h> 550 #include <src/string/memory_utils/elements_x86.h> 551 552 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 553