1 //===-- Elementary operations to compose memory primitives ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 11 12 #include <stddef.h> // size_t 13 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 14 15 #include "src/__support/endian.h" 16 #include "src/string/memory_utils/utils.h" 17 18 namespace __llvm_libc { 19 20 // Elementary Operations 21 // -------------------------------- 22 // We define abstract elementary operations acting on fixed chunks of memory. 23 // These are low level building blocks that are meant to be assembled to compose 24 // higher order abstractions. Each function is defined twice: once with 25 // fixed-size operations, and once with runtime-size operations. 26 27 // Fixed-size copies from 'src' to 'dst'. 28 template <typename Element> 29 void Copy(char *__restrict dst, const char *__restrict src) { 30 Element::Copy(dst, src); 31 } 32 // Runtime-size copies from 'src' to 'dst'. 33 template <typename Element> 34 void Copy(char *__restrict dst, const char *__restrict src, size_t size) { 35 Element::Copy(dst, src, size); 36 } 37 38 // Fixed-size equality between 'lhs' and 'rhs'. 39 template <typename Element> bool Equals(const char *lhs, const char *rhs) { 40 return Element::Equals(lhs, rhs); 41 } 42 // Runtime-size equality between 'lhs' and 'rhs'. 43 template <typename Element> 44 bool Equals(const char *lhs, const char *rhs, size_t size) { 45 return Element::Equals(lhs, rhs, size); 46 } 47 48 // Fixed-size three-way comparison between 'lhs' and 'rhs'. 49 template <typename Element> 50 int ThreeWayCompare(const char *lhs, const char *rhs) { 51 return Element::ThreeWayCompare(lhs, rhs); 52 } 53 // Runtime-size three-way comparison between 'lhs' and 'rhs'. 54 template <typename Element> 55 int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 56 return Element::ThreeWayCompare(lhs, rhs, size); 57 } 58 59 // Fixed-size initialization. 60 template <typename Element> 61 void SplatSet(char *dst, const unsigned char value) { 62 Element::SplatSet(dst, value); 63 } 64 // Runtime-size initialization. 65 template <typename Element> 66 void SplatSet(char *dst, const unsigned char value, size_t size) { 67 Element::SplatSet(dst, value, size); 68 } 69 70 // Fixed-size Higher-Order Operations 71 // ---------------------------------- 72 // - Repeated<Type, ElementCount>: Repeat the operation several times in a row. 73 // - Chained<Types...>: Chain the operation of several types. 74 75 // Repeat the operation several times in a row. 76 template <typename Element, size_t ElementCount> struct Repeated { 77 static constexpr size_t kSize = ElementCount * Element::kSize; 78 79 static void Copy(char *__restrict dst, const char *__restrict src) { 80 for (size_t i = 0; i < ElementCount; ++i) { 81 const size_t offset = i * Element::kSize; 82 Element::Copy(dst + offset, src + offset); 83 } 84 } 85 86 static bool Equals(const char *lhs, const char *rhs) { 87 for (size_t i = 0; i < ElementCount; ++i) { 88 const size_t offset = i * Element::kSize; 89 if (!Element::Equals(lhs + offset, rhs + offset)) 90 return false; 91 } 92 return true; 93 } 94 95 static int ThreeWayCompare(const char *lhs, const char *rhs) { 96 for (size_t i = 0; i < ElementCount; ++i) { 97 const size_t offset = i * Element::kSize; 98 // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'. 99 if (Element::Equals(lhs + offset, rhs + offset)) 100 continue; 101 return Element::ThreeWayCompare(lhs + offset, rhs + offset); 102 } 103 return 0; 104 } 105 106 static void SplatSet(char *dst, const unsigned char value) { 107 for (size_t i = 0; i < ElementCount; ++i) { 108 const size_t offset = i * Element::kSize; 109 Element::SplatSet(dst + offset, value); 110 } 111 } 112 }; 113 114 // Chain the operation of several types. 115 // For instance, to handle a 3 bytes operation, one can use: 116 // Chained<UINT16, UINT8>::Operation(); 117 template <typename... Types> struct Chained; 118 119 template <typename Head, typename... Tail> struct Chained<Head, Tail...> { 120 static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize; 121 122 static void Copy(char *__restrict dst, const char *__restrict src) { 123 Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize); 124 __llvm_libc::Copy<Head>(dst, src); 125 } 126 127 static bool Equals(const char *lhs, const char *rhs) { 128 if (!__llvm_libc::Equals<Head>(lhs, rhs)) 129 return false; 130 return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize); 131 } 132 133 static int ThreeWayCompare(const char *lhs, const char *rhs) { 134 if (__llvm_libc::Equals<Head>(lhs, rhs)) 135 return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize, 136 rhs + Head::kSize); 137 return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs); 138 } 139 140 static void SplatSet(char *dst, const unsigned char value) { 141 Chained<Tail...>::SplatSet(dst + Head::kSize, value); 142 __llvm_libc::SplatSet<Head>(dst, value); 143 } 144 }; 145 146 template <> struct Chained<> { 147 static constexpr size_t kSize = 0; 148 static void Copy(char *__restrict dst, const char *__restrict src) {} 149 static bool Equals(const char *lhs, const char *rhs) { return true; } 150 static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; } 151 static void SplatSet(char *dst, const unsigned char value) {} 152 }; 153 154 // Runtime-size Higher-Order Operations 155 // ------------------------------------ 156 // - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer. 157 // - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes 158 // of the buffer. 159 // - Loop<T>: Perform a loop of fixed-sized operations. 160 161 // Perform the operation on the last 'T::kSize' bytes of the buffer. 162 // 163 // e.g. with 164 // [1234567812345678123] 165 // [__XXXXXXXXXXXXXX___] 166 // [________XXXXXXXX___] 167 // 168 // Precondition: `size >= T::kSize`. 169 template <typename T> struct Tail { 170 static void Copy(char *__restrict dst, const char *__restrict src, 171 size_t size) { 172 return T::Copy(dst + offset(size), src + offset(size)); 173 } 174 175 static bool Equals(const char *lhs, const char *rhs, size_t size) { 176 return T::Equals(lhs + offset(size), rhs + offset(size)); 177 } 178 179 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 180 return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size)); 181 } 182 183 static void SplatSet(char *dst, const unsigned char value, size_t size) { 184 return T::SplatSet(dst + offset(size), value); 185 } 186 187 static size_t offset(size_t size) { return size - T::kSize; } 188 }; 189 190 // Perform the operation on the first and last 'T::kSize' bytes of the buffer. 191 // This is useful for overlapping operations. 192 // 193 // e.g. with 194 // [1234567812345678123] 195 // [__XXXXXXXXXXXXXX___] 196 // [__XXXXXXXX_________] 197 // [________XXXXXXXX___] 198 // 199 // Precondition: `size >= T::kSize && size <= 2 x T::kSize`. 200 template <typename T> struct HeadTail { 201 static void Copy(char *__restrict dst, const char *__restrict src, 202 size_t size) { 203 T::Copy(dst, src); 204 Tail<T>::Copy(dst, src, size); 205 } 206 207 static bool Equals(const char *lhs, const char *rhs, size_t size) { 208 if (!T::Equals(lhs, rhs)) 209 return false; 210 return Tail<T>::Equals(lhs, rhs, size); 211 } 212 213 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 214 if (!T::Equals(lhs, rhs)) 215 return T::ThreeWayCompare(lhs, rhs); 216 return Tail<T>::ThreeWayCompare(lhs, rhs, size); 217 } 218 219 static void SplatSet(char *dst, const unsigned char value, size_t size) { 220 T::SplatSet(dst, value); 221 Tail<T>::SplatSet(dst, value, size); 222 } 223 }; 224 225 // Simple loop ending with a Tail operation. 226 // 227 // e.g. with 228 // [12345678123456781234567812345678] 229 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 230 // [__XXXXXXXX_______________________] 231 // [__________XXXXXXXX_______________] 232 // [__________________XXXXXXXX_______] 233 // [______________________XXXXXXXX___] 234 // 235 // Precondition: 236 // - size >= T::kSize 237 template <typename T> struct Loop { 238 static void Copy(char *__restrict dst, const char *__restrict src, 239 size_t size) { 240 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) 241 T::Copy(dst + offset, src + offset); 242 Tail<T>::Copy(dst, src, size); 243 } 244 245 static bool Equals(const char *lhs, const char *rhs, size_t size) { 246 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) 247 if (!T::Equals(lhs + offset, rhs + offset)) 248 return false; 249 return Tail<T>::Equals(lhs, rhs, size); 250 } 251 252 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 253 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) 254 if (!T::Equals(lhs + offset, rhs + offset)) 255 return T::ThreeWayCompare(lhs + offset, rhs + offset); 256 return Tail<T>::ThreeWayCompare(lhs, rhs, size); 257 } 258 259 static void SplatSet(char *dst, const unsigned char value, size_t size) { 260 for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) 261 T::SplatSet(dst + offset, value); 262 Tail<T>::SplatSet(dst, value, size); 263 } 264 }; 265 266 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; 267 268 namespace internal { 269 270 // Provides a specialized Bump function that adjusts pointers and size so first 271 // argument (resp. second argument) gets aligned to Alignment. 272 // We make sure the compiler knows about the adjusted pointer alignment. 273 template <Arg arg, size_t Alignment> struct AlignHelper {}; 274 275 template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> { 276 template <typename T1, typename T2> 277 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { 278 const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref); 279 p1ref += offset; 280 p2ref += offset; 281 size -= offset; 282 p1ref = assume_aligned<Alignment>(p1ref); 283 } 284 }; 285 286 template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> { 287 template <typename T1, typename T2> 288 static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { 289 const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref); 290 p1ref += offset; 291 p2ref += offset; 292 size -= offset; 293 p2ref = assume_aligned<Alignment>(p2ref); 294 } 295 }; 296 297 } // namespace internal 298 299 // An alignment operation that: 300 // - executes the 'AlignmentT' operation 301 // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected 302 // pointer gets aligned, size is decreased accordingly. 303 // - calls the 'NextT' operation. 304 // 305 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: 306 // Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count); 307 template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align { 308 private: 309 static constexpr size_t Alignment = AlignmentT::kSize; 310 static_assert(Alignment > 1, "Alignment must be more than 1"); 311 static_assert(is_power2(Alignment), "Alignment must be a power of 2"); 312 313 public: 314 template <typename NextT> struct Then { 315 static void Copy(char *__restrict dst, const char *__restrict src, 316 size_t size) { 317 AlignmentT::Copy(dst, src); 318 internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size); 319 NextT::Copy(dst, src, size); 320 } 321 322 static bool Equals(const char *lhs, const char *rhs, size_t size) { 323 if (!AlignmentT::Equals(lhs, rhs)) 324 return false; 325 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size); 326 return NextT::Equals(lhs, rhs, size); 327 } 328 329 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 330 if (!AlignmentT::Equals(lhs, rhs)) 331 return AlignmentT::ThreeWayCompare(lhs, rhs); 332 internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size); 333 return NextT::ThreeWayCompare(lhs, rhs, size); 334 } 335 336 static void SplatSet(char *dst, const unsigned char value, size_t size) { 337 AlignmentT::SplatSet(dst, value); 338 char *dummy = nullptr; 339 internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size); 340 NextT::SplatSet(dst, value, size); 341 } 342 }; 343 }; 344 345 // An operation that allows to skip the specified amount of bytes. 346 template <ptrdiff_t Bytes> struct Skip { 347 template <typename NextT> struct Then { 348 static void Copy(char *__restrict dst, const char *__restrict src, 349 size_t size) { 350 NextT::Copy(dst + Bytes, src + Bytes, size - Bytes); 351 } 352 353 static void Copy(char *__restrict dst, const char *__restrict src) { 354 NextT::Copy(dst + Bytes, src + Bytes); 355 } 356 357 static bool Equals(const char *lhs, const char *rhs, size_t size) { 358 return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes); 359 } 360 361 static bool Equals(const char *lhs, const char *rhs) { 362 return NextT::Equals(lhs + Bytes, rhs + Bytes); 363 } 364 365 static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { 366 return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes); 367 } 368 369 static int ThreeWayCompare(const char *lhs, const char *rhs) { 370 return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes); 371 } 372 373 static void SplatSet(char *dst, const unsigned char value, size_t size) { 374 NextT::SplatSet(dst + Bytes, value, size - Bytes); 375 } 376 377 static void SplatSet(char *dst, const unsigned char value) { 378 NextT::SplatSet(dst + Bytes, value); 379 } 380 }; 381 }; 382 383 // Fixed-size Builtin Operations 384 // ----------------------------- 385 // Note: Do not use 'builtin' right now as it requires the implementation of the 386 // `_inline` versions of all the builtins. Theoretically, Clang can still turn 387 // them into calls to the C library leading to reentrancy problems. 388 namespace builtin { 389 390 #ifndef __has_builtin 391 #define __has_builtin(x) 0 // Compatibility with non-clang compilers. 392 #endif 393 394 template <size_t Size> struct Builtin { 395 static constexpr size_t kSize = Size; 396 397 static void Copy(char *__restrict dst, const char *__restrict src) { 398 #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER 399 ForLoopCopy(dst, src); 400 #elif __has_builtin(__builtin_memcpy_inline) 401 // __builtin_memcpy_inline guarantees to never call external functions. 402 // Unfortunately it is not widely available. 403 __builtin_memcpy_inline(dst, src, kSize); 404 #elif __has_builtin(__builtin_memcpy) 405 __builtin_memcpy(dst, src, kSize); 406 #else 407 ForLoopCopy(dst, src); 408 #endif 409 } 410 411 #if __has_builtin(__builtin_memcmp_inline) 412 #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline 413 #else 414 #define LLVM_LIBC_MEMCMP __builtin_memcmp 415 #endif 416 417 static bool Equals(const char *lhs, const char *rhs) { 418 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize) == 0; 419 } 420 421 static int ThreeWayCompare(const char *lhs, const char *rhs) { 422 return LLVM_LIBC_MEMCMP(lhs, rhs, kSize); 423 } 424 425 static void SplatSet(char *dst, const unsigned char value) { 426 __builtin_memset(dst, value, kSize); 427 } 428 429 private: 430 // Copies `kSize` bytes from `src` to `dst` using a for loop. 431 // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler 432 // from turning the for-loop back into `__builtin_memcpy`. 433 static void ForLoopCopy(char *__restrict dst, const char *__restrict src) { 434 for (size_t i = 0; i < kSize; ++i) 435 dst[i] = src[i]; 436 } 437 }; 438 439 using _1 = Builtin<1>; 440 using _2 = Builtin<2>; 441 using _3 = Builtin<3>; 442 using _4 = Builtin<4>; 443 using _8 = Builtin<8>; 444 using _16 = Builtin<16>; 445 using _32 = Builtin<32>; 446 using _64 = Builtin<64>; 447 using _128 = Builtin<128>; 448 449 } // namespace builtin 450 451 // Fixed-size Scalar Operations 452 // ---------------------------- 453 namespace scalar { 454 455 // The Scalar type makes use of simple sized integers. 456 template <typename T> struct Scalar { 457 static constexpr size_t kSize = sizeof(T); 458 459 static void Copy(char *__restrict dst, const char *__restrict src) { 460 Store(dst, Load(src)); 461 } 462 463 static bool Equals(const char *lhs, const char *rhs) { 464 return Load(lhs) == Load(rhs); 465 } 466 467 static int ThreeWayCompare(const char *lhs, const char *rhs) { 468 return ScalarThreeWayCompare(Load(lhs), Load(rhs)); 469 } 470 471 static void SplatSet(char *dst, const unsigned char value) { 472 Store(dst, GetSplattedValue(value)); 473 } 474 475 static int ScalarThreeWayCompare(T a, T b); 476 477 private: 478 static T Load(const char *ptr) { 479 T value; 480 builtin::Builtin<kSize>::Copy(reinterpret_cast<char *>(&value), ptr); 481 return value; 482 } 483 static void Store(char *ptr, T value) { 484 builtin::Builtin<kSize>::Copy(ptr, reinterpret_cast<const char *>(&value)); 485 } 486 static T GetSplattedValue(const unsigned char value) { 487 return T(~0) / T(0xFF) * T(value); 488 } 489 }; 490 491 template <> 492 inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) { 493 const int16_t la = Endian::ToBigEndian(a); 494 const int16_t lb = Endian::ToBigEndian(b); 495 return la - lb; 496 } 497 template <> 498 inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) { 499 const int32_t la = Endian::ToBigEndian(a); 500 const int32_t lb = Endian::ToBigEndian(b); 501 return la - lb; 502 } 503 template <> 504 inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) { 505 const uint32_t la = Endian::ToBigEndian(a); 506 const uint32_t lb = Endian::ToBigEndian(b); 507 return la > lb ? 1 : la < lb ? -1 : 0; 508 } 509 template <> 510 inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) { 511 const uint64_t la = Endian::ToBigEndian(a); 512 const uint64_t lb = Endian::ToBigEndian(b); 513 return la > lb ? 1 : la < lb ? -1 : 0; 514 } 515 516 using UINT8 = Scalar<uint8_t>; // 1 Byte 517 using UINT16 = Scalar<uint16_t>; // 2 Bytes 518 using UINT32 = Scalar<uint32_t>; // 4 Bytes 519 using UINT64 = Scalar<uint64_t>; // 8 Bytes 520 521 using _1 = UINT8; 522 using _2 = UINT16; 523 using _3 = Chained<UINT16, UINT8>; 524 using _4 = UINT32; 525 using _8 = UINT64; 526 using _16 = Repeated<_8, 2>; 527 using _32 = Repeated<_8, 4>; 528 using _64 = Repeated<_8, 8>; 529 using _128 = Repeated<_8, 16>; 530 531 } // namespace scalar 532 } // namespace __llvm_libc 533 534 #include <src/string/memory_utils/elements_aarch64.h> 535 #include <src/string/memory_utils/elements_x86.h> 536 537 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H 538