1 //===-- Memcpy implementation -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H 11 12 #include "src/__support/architectures.h" 13 #include "src/__support/common.h" 14 #include "src/string/memory_utils/elements.h" 15 #include "src/string/memory_utils/utils.h" 16 17 #include <stddef.h> // size_t 18 19 // Design rationale 20 // ================ 21 // 22 // Using a profiler to observe size distributions for calls into libc 23 // functions, it was found most operations act on a small number of bytes. 24 // This makes it important to favor small sizes. 25 // 26 // The tests for `count` are in ascending order so the cost of branching is 27 // proportional to the cost of copying. 28 // 29 // The function is written in C++ for several reasons: 30 // - The compiler can __see__ the code, this is useful when performing Profile 31 // Guided Optimization as the optimized code can take advantage of branching 32 // probabilities. 33 // - It also allows for easier customization and favors testing multiple 34 // implementation parameters. 35 // - As compilers and processors get better, the generated code is improved 36 // with little change on the code side. 37 38 namespace __llvm_libc { 39 40 static inline void inline_memcpy(char *__restrict dst, 41 const char *__restrict src, size_t count) { 42 #if defined(LLVM_LIBC_ARCH_X86) 43 ///////////////////////////////////////////////////////////////////////////// 44 // LLVM_LIBC_ARCH_X86 45 ///////////////////////////////////////////////////////////////////////////// 46 using namespace __llvm_libc::x86; 47 48 // Whether to use only rep;movsb. 49 constexpr bool kUseOnlyRepMovsb = 50 LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); 51 52 // kRepMovsBSize == -1 : Only CopyAligned is used. 53 // kRepMovsBSize == 0 : Only RepMovsb is used. 54 // else CopyAligned is used up to kRepMovsBSize and then RepMovsb. 55 constexpr size_t kRepMovsBSize = 56 #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE) 57 LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; 58 #else 59 -1; 60 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE 61 62 // Whether target supports AVX instructions. 63 constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__); 64 65 #if defined(__AVX__) 66 using LoopBlockSize = _64; 67 #else 68 using LoopBlockSize = _32; 69 #endif 70 71 if (kUseOnlyRepMovsb) 72 return Copy<Accelerator>(dst, src, count); 73 74 if (count == 0) 75 return; 76 if (count == 1) 77 return Copy<_1>(dst, src); 78 if (count == 2) 79 return Copy<_2>(dst, src); 80 if (count == 3) 81 return Copy<_3>(dst, src); 82 if (count == 4) 83 return Copy<_4>(dst, src); 84 if (count < 8) 85 return Copy<HeadTail<_4>>(dst, src, count); 86 if (count < 16) 87 return Copy<HeadTail<_8>>(dst, src, count); 88 if (count < 32) 89 return Copy<HeadTail<_16>>(dst, src, count); 90 if (count < 64) 91 return Copy<HeadTail<_32>>(dst, src, count); 92 if (count < 128) 93 return Copy<HeadTail<_64>>(dst, src, count); 94 if (kHasAvx && count < 256) 95 return Copy<HeadTail<_128>>(dst, src, count); 96 if (count <= kRepMovsBSize) 97 return Copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src, 98 count); 99 return Copy<Accelerator>(dst, src, count); 100 #elif defined(LLVM_LIBC_ARCH_AARCH64) 101 ///////////////////////////////////////////////////////////////////////////// 102 // LLVM_LIBC_ARCH_AARCH64 103 ///////////////////////////////////////////////////////////////////////////// 104 using namespace __llvm_libc::scalar; 105 if (count == 0) 106 return; 107 if (count == 1) 108 return Copy<_1>(dst, src); 109 if (count == 2) 110 return Copy<_2>(dst, src); 111 if (count == 3) 112 return Copy<_3>(dst, src); 113 if (count == 4) 114 return Copy<_4>(dst, src); 115 if (count < 8) 116 return Copy<HeadTail<_4>>(dst, src, count); 117 if (count < 16) 118 return Copy<HeadTail<_8>>(dst, src, count); 119 if (count < 32) 120 return Copy<HeadTail<_16>>(dst, src, count); 121 if (count < 64) 122 return Copy<HeadTail<_32>>(dst, src, count); 123 if (count < 128) 124 return Copy<HeadTail<_64>>(dst, src, count); 125 return Copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count); 126 #else 127 ///////////////////////////////////////////////////////////////////////////// 128 // Default 129 ///////////////////////////////////////////////////////////////////////////// 130 using namespace __llvm_libc::scalar; 131 if (count == 0) 132 return; 133 if (count == 1) 134 return Copy<_1>(dst, src); 135 if (count == 2) 136 return Copy<_2>(dst, src); 137 if (count == 3) 138 return Copy<_3>(dst, src); 139 if (count == 4) 140 return Copy<_4>(dst, src); 141 if (count < 8) 142 return Copy<HeadTail<_4>>(dst, src, count); 143 if (count < 16) 144 return Copy<HeadTail<_8>>(dst, src, count); 145 if (count < 32) 146 return Copy<HeadTail<_16>>(dst, src, count); 147 if (count < 64) 148 return Copy<HeadTail<_32>>(dst, src, count); 149 if (count < 128) 150 return Copy<HeadTail<_64>>(dst, src, count); 151 return Copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count); 152 #endif 153 } 154 155 } // namespace __llvm_libc 156 157 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H 158