//===-- Elementary operations to compose memory primitives ----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H #include // size_t #include // uint8_t, uint16_t, uint32_t, uint64_t #include "src/__support/endian.h" #include "src/string/memory_utils/utils.h" namespace __llvm_libc { // Elementary Operations // -------------------------------- // We define abstract elementary operations acting on fixed chunks of memory. // These are low level building blocks that are meant to be assembled to compose // higher order abstractions. Each function is defined twice: once with // fixed-size operations, and once with runtime-size operations. // Fixed-size copy from 'src' to 'dst'. template void copy(char *__restrict dst, const char *__restrict src) { Element::copy(dst, src); } // Runtime-size copy from 'src' to 'dst'. template void copy(char *__restrict dst, const char *__restrict src, size_t size) { Element::copy(dst, src, size); } // Fixed-size move from 'src' to 'dst'. template void move(char *dst, const char *src) { Element::move(dst, src); } // Runtime-size move from 'src' to 'dst'. template void move(char *dst, const char *src, size_t size) { Element::move(dst, src, size); } // Runtime-size move from 'src' to 'dst'. template void move_backward(char *dst, const char *src, size_t size) { Element::move_backward(dst, src, size); } // Fixed-size equality between 'lhs' and 'rhs'. template bool equals(const char *lhs, const char *rhs) { return Element::equals(lhs, rhs); } // Runtime-size equality between 'lhs' and 'rhs'. template bool equals(const char *lhs, const char *rhs, size_t size) { return Element::equals(lhs, rhs, size); } // Fixed-size three-way comparison between 'lhs' and 'rhs'. template int three_way_compare(const char *lhs, const char *rhs) { return Element::three_way_compare(lhs, rhs); } // Runtime-size three-way comparison between 'lhs' and 'rhs'. template int three_way_compare(const char *lhs, const char *rhs, size_t size) { return Element::three_way_compare(lhs, rhs, size); } // Fixed-size initialization. template void splat_set(char *dst, const unsigned char value) { Element::splat_set(dst, value); } // Runtime-size initialization. template void splat_set(char *dst, const unsigned char value, size_t size) { Element::splat_set(dst, value, size); } // Stack placeholder for Move operations. template struct Storage { char bytes[Element::SIZE]; }; // Fixed-size Higher-Order Operations // ---------------------------------- // - Repeated: Repeat the operation several times in a row. // - Chained: Chain the operation of several types. // Repeat the operation several times in a row. template struct Repeated { static constexpr size_t SIZE = ElementCount * Element::SIZE; static void copy(char *__restrict dst, const char *__restrict src) { for (size_t i = 0; i < ElementCount; ++i) { const size_t offset = i * Element::SIZE; Element::copy(dst + offset, src + offset); } } static void move(char *dst, const char *src) { const auto value = load(src); store(dst, value); } static bool equals(const char *lhs, const char *rhs) { for (size_t i = 0; i < ElementCount; ++i) { const size_t offset = i * Element::SIZE; if (!Element::equals(lhs + offset, rhs + offset)) return false; } return true; } static int three_way_compare(const char *lhs, const char *rhs) { for (size_t i = 0; i < ElementCount; ++i) { const size_t offset = i * Element::SIZE; // We make the assumption that 'equals' is cheaper than // 'three_way_compare'. if (Element::equals(lhs + offset, rhs + offset)) continue; return Element::three_way_compare(lhs + offset, rhs + offset); } return 0; } static void splat_set(char *dst, const unsigned char value) { for (size_t i = 0; i < ElementCount; ++i) { const size_t offset = i * Element::SIZE; Element::splat_set(dst + offset, value); } } static Storage load(const char *ptr) { Storage value; copy(reinterpret_cast(&value), ptr); return value; } static void store(char *ptr, Storage value) { copy(ptr, reinterpret_cast(&value)); } }; template struct Repeated { static void move(char *dst, const char *src) {} }; // Chain the operation of several types. // For instance, to handle a 3 bytes operation, one can use: // Chained::Operation(); template struct Chained; template struct Chained { static constexpr size_t SIZE = Head::SIZE + Chained::SIZE; static void copy(char *__restrict dst, const char *__restrict src) { Chained::copy(dst + Head::SIZE, src + Head::SIZE); __llvm_libc::copy(dst, src); } static void move(char *dst, const char *src) { const auto value = Head::load(src); Chained::move(dst + Head::SIZE, src + Head::SIZE); Head::store(dst, value); } static bool equals(const char *lhs, const char *rhs) { if (!__llvm_libc::equals(lhs, rhs)) return false; return Chained::equals(lhs + Head::SIZE, rhs + Head::SIZE); } static int three_way_compare(const char *lhs, const char *rhs) { if (__llvm_libc::equals(lhs, rhs)) return Chained::three_way_compare(lhs + Head::SIZE, rhs + Head::SIZE); return __llvm_libc::three_way_compare(lhs, rhs); } static void splat_set(char *dst, const unsigned char value) { Chained::splat_set(dst + Head::SIZE, value); __llvm_libc::splat_set(dst, value); } }; template <> struct Chained<> { static constexpr size_t SIZE = 0; static void copy(char *__restrict dst, const char *__restrict src) {} static void move(char *dst, const char *src) {} static bool equals(const char *lhs, const char *rhs) { return true; } static int three_way_compare(const char *lhs, const char *rhs) { return 0; } static void splat_set(char *dst, const unsigned char value) {} }; // Overlap ElementA and ElementB so they span Size bytes. template struct Overlap { static constexpr size_t SIZE = Size; static_assert(ElementB::SIZE <= ElementA::SIZE, "ElementB too big"); static_assert(ElementA::SIZE <= Size, "ElementA too big"); static_assert((ElementA::SIZE + ElementB::SIZE) >= Size, "Elements too small to overlap"); static constexpr size_t OFFSET = SIZE - ElementB::SIZE; static void copy(char *__restrict dst, const char *__restrict src) { ElementA::copy(dst, src); ElementB::copy(dst + OFFSET, src + OFFSET); } static void move(char *dst, const char *src) { const auto value_a = ElementA::load(src); const auto value_b = ElementB::load(src + OFFSET); ElementB::store(dst + OFFSET, value_b); ElementA::store(dst, value_a); } static bool equals(const char *lhs, const char *rhs) { if (!ElementA::equals(lhs, rhs)) return false; if (!ElementB::equals(lhs + OFFSET, rhs + OFFSET)) return false; return true; } static int three_way_compare(const char *lhs, const char *rhs) { if (!ElementA::equals(lhs, rhs)) return ElementA::three_way_compare(lhs, rhs); if (!ElementB::equals(lhs + OFFSET, rhs + OFFSET)) return ElementB::three_way_compare(lhs + OFFSET, rhs + OFFSET); return 0; } static void splat_set(char *dst, const unsigned char value) { ElementA::splat_set(dst, value); ElementB::splat_set(dst + OFFSET, value); } }; // Runtime-size Higher-Order Operations // ------------------------------------ // - Tail: Perform the operation on the last 'T::SIZE' bytes of the buffer. // - HeadTail: Perform the operation on the first and last 'T::SIZE' bytes // of the buffer. // - Loop: Perform a loop of fixed-sized operations. // Perform the operation on the last 'T::SIZE' bytes of the buffer. // // e.g. with // [1234567812345678123] // [__XXXXXXXXXXXXXX___] // [________XXXXXXXX___] // // Precondition: `size >= T::SIZE`. template struct Tail { static void copy(char *__restrict dst, const char *__restrict src, size_t size) { return T::copy(dst + offset(size), src + offset(size)); } static bool equals(const char *lhs, const char *rhs, size_t size) { return T::equals(lhs + offset(size), rhs + offset(size)); } static int three_way_compare(const char *lhs, const char *rhs, size_t size) { return T::three_way_compare(lhs + offset(size), rhs + offset(size)); } static void splat_set(char *dst, const unsigned char value, size_t size) { return T::splat_set(dst + offset(size), value); } static size_t offset(size_t size) { return size - T::SIZE; } }; // Perform the operation on the first and last 'T::SIZE' bytes of the buffer. // This is useful for overlapping operations. // // e.g. with // [1234567812345678123] // [__XXXXXXXXXXXXXX___] // [__XXXXXXXX_________] // [________XXXXXXXX___] // // Precondition: `size >= T::SIZE && size <= 2 x T::SIZE`. template struct HeadTail { static void copy(char *__restrict dst, const char *__restrict src, size_t size) { T::copy(dst, src); Tail::copy(dst, src, size); } static void move(char *dst, const char *src, size_t size) { const size_t offset = Tail::offset(size); const auto head_value = T::load(src); const auto tail_value = T::load(src + offset); T::store(dst + offset, tail_value); T::store(dst, head_value); } static bool equals(const char *lhs, const char *rhs, size_t size) { if (!T::equals(lhs, rhs)) return false; return Tail::equals(lhs, rhs, size); } static int three_way_compare(const char *lhs, const char *rhs, size_t size) { if (!T::equals(lhs, rhs)) return T::three_way_compare(lhs, rhs); return Tail::three_way_compare(lhs, rhs, size); } static void splat_set(char *dst, const unsigned char value, size_t size) { T::splat_set(dst, value); Tail::splat_set(dst, value, size); } }; // Simple loop ending with a Tail operation. // // e.g. with // [12345678123456781234567812345678] // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] // [__XXXXXXXX_______________________] // [__________XXXXXXXX_______________] // [__________________XXXXXXXX_______] // [______________________XXXXXXXX___] // // Precondition: // - size >= T::SIZE template struct Loop { static_assert(T::SIZE == TailT::SIZE, "Tail type must have the same size as T"); static void copy(char *__restrict dst, const char *__restrict src, size_t size) { size_t offset = 0; do { T::copy(dst + offset, src + offset); offset += T::SIZE; } while (offset < size - T::SIZE); Tail::copy(dst, src, size); } // Move forward suitable when dst < src. We load the tail bytes before // handling the loop. // // e.g. Moving two bytes // [ | | | | |] // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] // [_________________________LLLLLLLL___] // [___LLLLLLLL_________________________] // [_SSSSSSSS___________________________] // [___________LLLLLLLL_________________] // [_________SSSSSSSS___________________] // [___________________LLLLLLLL_________] // [_________________SSSSSSSS___________] // [_______________________SSSSSSSS_____] static void move(char *dst, const char *src, size_t size) { const size_t tail_offset = Tail::offset(size); const auto tail_value = TailT::load(src + tail_offset); size_t offset = 0; do { T::move(dst + offset, src + offset); offset += T::SIZE; } while (offset < size - T::SIZE); TailT::store(dst + tail_offset, tail_value); } // Move forward suitable when dst > src. We load the head bytes before // handling the loop. // // e.g. Moving two bytes // [ | | | | |] // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] // [___LLLLLLLL_________________________] // [_________________________LLLLLLLL___] // [___________________________SSSSSSSS_] // [_________________LLLLLLLL___________] // [___________________SSSSSSSS_________] // [_________LLLLLLLL___________________] // [___________SSSSSSSS_________________] // [_____SSSSSSSS_______________________] static void move_backward(char *dst, const char *src, size_t size) { const auto head_value = TailT::load(src); ptrdiff_t offset = size - T::SIZE; do { T::move(dst + offset, src + offset); offset -= T::SIZE; } while (offset >= 0); TailT::store(dst, head_value); } static bool equals(const char *lhs, const char *rhs, size_t size) { size_t offset = 0; do { if (!T::equals(lhs + offset, rhs + offset)) return false; offset += T::SIZE; } while (offset < size - T::SIZE); return Tail::equals(lhs, rhs, size); } static int three_way_compare(const char *lhs, const char *rhs, size_t size) { size_t offset = 0; do { if (!T::equals(lhs + offset, rhs + offset)) return T::three_way_compare(lhs + offset, rhs + offset); offset += T::SIZE; } while (offset < size - T::SIZE); return Tail::three_way_compare(lhs, rhs, size); } static void splat_set(char *dst, const unsigned char value, size_t size) { size_t offset = 0; do { T::splat_set(dst + offset, value); offset += T::SIZE; } while (offset < size - T::SIZE); Tail::splat_set(dst, value, size); } }; enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; namespace internal { template struct ArgSelector {}; template <> struct ArgSelector { template static T1 *__restrict &Select(T1 *__restrict &p1ref, T2 *__restrict &p2ref) { return p1ref; } }; template <> struct ArgSelector { template static T2 *__restrict &Select(T1 *__restrict &p1ref, T2 *__restrict &p2ref) { return p2ref; } }; // Provides a specialized bump function that adjusts pointers and size so first // argument (resp. second argument) gets aligned to Alignment. // We make sure the compiler knows about the adjusted pointer alignment. // The 'additional_bumps' parameter allows to reach previous / next aligned // pointers. template struct Align { template static void bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size, int additional_bumps = 0) { auto &aligned_ptr = ArgSelector::Select(p1ref, p2ref); auto offset = offset_to_next_aligned(aligned_ptr); offset += additional_bumps * Alignment; p1ref += offset; p2ref += offset; size -= offset; aligned_ptr = assume_aligned(aligned_ptr); } }; } // namespace internal // An alignment operation that: // - executes the 'AlignmentT' operation // - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected // pointer gets aligned, size is decreased accordingly. // - calls the 'NextT' operation. // // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: // copy::Then>>(dst, src, count); template struct Align { private: static constexpr size_t ALIGNMENT = AlignmentT::SIZE; static_assert(ALIGNMENT > 1, "Alignment must be more than 1"); static_assert(is_power2(ALIGNMENT), "Alignment must be a power of 2"); public: template struct Then { static void copy(char *__restrict dst, const char *__restrict src, size_t size) { AlignmentT::copy(dst, src); internal::Align::bump(dst, src, size); NextT::copy(dst, src, size); } // Move forward suitable when dst < src. The alignment is performed with an // HeadTail operation of size ∈ [Alignment, 2 x Alignment]. // // e.g. Moving two bytes and making sure src is then aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] // [____LLLLLLLL_____________________] // [___________LLLLLLLL______________] // [_SSSSSSSS________________________] // [________SSSSSSSS_________________] // // e.g. Moving two bytes and making sure dst is then aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] // [____LLLLLLLL_____________________] // [______LLLLLLLL___________________] // [_SSSSSSSS________________________] // [___SSSSSSSS______________________] static void move(char *dst, const char *src, size_t size) { char *next_dst = dst; const char *next_src = src; size_t next_size = size; internal::Align::bump(next_dst, next_src, next_size, 1); HeadTail::move(dst, src, size - next_size); NextT::move(next_dst, next_src, next_size); } // Move backward suitable when dst > src. The alignment is performed with an // HeadTail operation of size ∈ [Alignment, 2 x Alignment]. // // e.g. Moving two bytes backward and making sure src is then aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] // [ _________________LLLLLLLL_______] // [ ___________________LLLLLLLL_____] // [____________________SSSSSSSS_____] // [______________________SSSSSSSS___] // // e.g. Moving two bytes and making sure dst is then aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] // [ _______________LLLLLLLL_________] // [ ___________________LLLLLLLL_____] // [__________________SSSSSSSS_______] // [______________________SSSSSSSS___] static void move_backward(char *dst, const char *src, size_t size) { char *headtail_dst = dst + size; const char *headtail_src = src + size; size_t headtail_size = 0; internal::Align::bump(headtail_dst, headtail_src, headtail_size, -2); HeadTail::move(headtail_dst, headtail_src, headtail_size); NextT::move_backward(dst, src, size - headtail_size); } static bool equals(const char *lhs, const char *rhs, size_t size) { if (!AlignmentT::equals(lhs, rhs)) return false; internal::Align::bump(lhs, rhs, size); return NextT::equals(lhs, rhs, size); } static int three_way_compare(const char *lhs, const char *rhs, size_t size) { if (!AlignmentT::equals(lhs, rhs)) return AlignmentT::three_way_compare(lhs, rhs); internal::Align::bump(lhs, rhs, size); return NextT::three_way_compare(lhs, rhs, size); } static void splat_set(char *dst, const unsigned char value, size_t size) { AlignmentT::splat_set(dst, value); char *dummy = nullptr; internal::Align::bump(dst, dummy, size); NextT::splat_set(dst, value, size); } }; }; // An operation that allows to skip the specified amount of bytes. template struct Skip { template struct Then { static void copy(char *__restrict dst, const char *__restrict src, size_t size) { NextT::copy(dst + Bytes, src + Bytes, size - Bytes); } static void copy(char *__restrict dst, const char *__restrict src) { NextT::copy(dst + Bytes, src + Bytes); } static bool equals(const char *lhs, const char *rhs, size_t size) { return NextT::equals(lhs + Bytes, rhs + Bytes, size - Bytes); } static bool equals(const char *lhs, const char *rhs) { return NextT::equals(lhs + Bytes, rhs + Bytes); } static int three_way_compare(const char *lhs, const char *rhs, size_t size) { return NextT::three_way_compare(lhs + Bytes, rhs + Bytes, size - Bytes); } static int three_way_compare(const char *lhs, const char *rhs) { return NextT::three_way_compare(lhs + Bytes, rhs + Bytes); } static void splat_set(char *dst, const unsigned char value, size_t size) { NextT::splat_set(dst + Bytes, value, size - Bytes); } static void splat_set(char *dst, const unsigned char value) { NextT::splat_set(dst + Bytes, value); } }; }; // Fixed-size Builtin Operations // ----------------------------- // Note: Do not use 'builtin' right now as it requires the implementation of the // `_inline` versions of all the builtins. Theoretically, Clang can still turn // them into calls to the C library leading to reentrancy problems. namespace builtin { #ifndef __has_builtin #define __has_builtin(x) 0 // Compatibility with non-clang compilers. #endif template struct Builtin { static constexpr size_t SIZE = Size; static void copy(char *__restrict dst, const char *__restrict src) { #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER for_loop_copy(dst, src); #elif __has_builtin(__builtin_memcpy_inline) // __builtin_memcpy_inline guarantees to never call external functions. // Unfortunately it is not widely available. __builtin_memcpy_inline(dst, src, SIZE); #else for_loop_copy(dst, src); #endif } static void move(char *dst, const char *src) { #if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER for_loop_move(dst, src); #elif __has_builtin(__builtin_memmove) __builtin_memmove(dst, src, SIZE); #else for_loop_move(dst, src); #endif } #if __has_builtin(__builtin_memcmp_inline) #define LLVM_LIBC_MEMCMP __builtin_memcmp_inline #else #define LLVM_LIBC_MEMCMP __builtin_memcmp #endif static bool equals(const char *lhs, const char *rhs) { return LLVM_LIBC_MEMCMP(lhs, rhs, SIZE) == 0; } static int three_way_compare(const char *lhs, const char *rhs) { return LLVM_LIBC_MEMCMP(lhs, rhs, SIZE); } static void splat_set(char *dst, const unsigned char value) { __builtin_memset(dst, value, SIZE); } private: // Copies `SIZE` bytes from `src` to `dst` using a for loop. // This code requires the use of `-fno-builtin-memcpy` to prevent the compiler // from turning the for-loop back into `__builtin_memcpy`. static void for_loop_copy(char *__restrict dst, const char *__restrict src) { for (size_t i = 0; i < SIZE; ++i) dst[i] = src[i]; } static void for_loop_move(char *dst, const char *src) { for (size_t i = 0; i < SIZE; ++i) dst[i] = src[i]; } }; using _1 = Builtin<1>; using _2 = Builtin<2>; using _3 = Builtin<3>; using _4 = Builtin<4>; using _8 = Builtin<8>; using _16 = Builtin<16>; using _32 = Builtin<32>; using _64 = Builtin<64>; using _128 = Builtin<128>; } // namespace builtin // Fixed-size Scalar Operations // ---------------------------- namespace scalar { // The Scalar type makes use of simple sized integers. template struct Scalar { static constexpr size_t SIZE = sizeof(T); static void copy(char *__restrict dst, const char *__restrict src) { store(dst, load(src)); } static void move(char *dst, const char *src) { store(dst, load(src)); } static bool equals(const char *lhs, const char *rhs) { return load(lhs) == load(rhs); } static int three_way_compare(const char *lhs, const char *rhs) { return scalar_three_way_compare(load(lhs), load(rhs)); } static void splat_set(char *dst, const unsigned char value) { store(dst, get_splatted_value(value)); } static int scalar_three_way_compare(T a, T b); static T load(const char *ptr) { T value; builtin::Builtin::copy(reinterpret_cast(&value), ptr); return value; } static void store(char *ptr, T value) { builtin::Builtin::copy(ptr, reinterpret_cast(&value)); } private: static T get_splatted_value(const unsigned char value) { return T(~0) / T(0xFF) * T(value); } }; template <> inline int Scalar::scalar_three_way_compare(uint8_t a, uint8_t b) { const int16_t la = Endian::to_big_endian(a); const int16_t lb = Endian::to_big_endian(b); return la - lb; } template <> inline int Scalar::scalar_three_way_compare(uint16_t a, uint16_t b) { const int32_t la = Endian::to_big_endian(a); const int32_t lb = Endian::to_big_endian(b); return la - lb; } template <> inline int Scalar::scalar_three_way_compare(uint32_t a, uint32_t b) { const uint32_t la = Endian::to_big_endian(a); const uint32_t lb = Endian::to_big_endian(b); return la > lb ? 1 : la < lb ? -1 : 0; } template <> inline int Scalar::scalar_three_way_compare(uint64_t a, uint64_t b) { const uint64_t la = Endian::to_big_endian(a); const uint64_t lb = Endian::to_big_endian(b); return la > lb ? 1 : la < lb ? -1 : 0; } using UINT8 = Scalar; // 1 Byte using UINT16 = Scalar; // 2 Bytes using UINT32 = Scalar; // 4 Bytes using UINT64 = Scalar; // 8 Bytes using _1 = UINT8; using _2 = UINT16; using _3 = Chained; using _4 = UINT32; using _8 = UINT64; using _16 = Repeated<_8, 2>; using _32 = Repeated<_8, 4>; using _64 = Repeated<_8, 8>; using _128 = Repeated<_8, 16>; } // namespace scalar } // namespace __llvm_libc #include #include #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H