1 //===-- Memcpy implementation -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H 11 12 #include "src/__support/architectures.h" 13 #include "src/__support/common.h" 14 #include "src/string/memory_utils/elements.h" 15 #include "src/string/memory_utils/utils.h" 16 17 #include <stddef.h> // size_t 18 19 // Design rationale 20 // ================ 21 // 22 // Using a profiler to observe size distributions for calls into libc 23 // functions, it was found most operations act on a small number of bytes. 24 // This makes it important to favor small sizes. 25 // 26 // The tests for `count` are in ascending order so the cost of branching is 27 // proportional to the cost of copying. 28 // 29 // The function is written in C++ for several reasons: 30 // - The compiler can __see__ the code, this is useful when performing Profile 31 // Guided Optimization as the optimized code can take advantage of branching 32 // probabilities. 33 // - It also allows for easier customization and favors testing multiple 34 // implementation parameters. 35 // - As compilers and processors get better, the generated code is improved 36 // with little change on the code side. 37 38 namespace __llvm_libc { 39 40 static inline void inline_memcpy(char *__restrict dst, 41 const char *__restrict src, size_t count) { 42 using namespace __llvm_libc::builtin; 43 #if defined(LLVM_LIBC_ARCH_X86) 44 ///////////////////////////////////////////////////////////////////////////// 45 // LLVM_LIBC_ARCH_X86 46 ///////////////////////////////////////////////////////////////////////////// 47 48 // Whether to use only rep;movsb. 49 constexpr bool USE_ONLY_REP_MOVSB = 50 LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); 51 52 // kRepMovsBSize == -1 : Only CopyAligned is used. 53 // kRepMovsBSize == 0 : Only RepMovsb is used. 54 // else CopyAligned is used up to kRepMovsBSize and then RepMovsb. 55 constexpr size_t REP_MOVS_B_SIZE = 56 #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE) 57 LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; 58 #else 59 -1; 60 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE 61 62 // Whether target supports AVX instructions. 63 constexpr bool HAS_AVX = LLVM_LIBC_IS_DEFINED(__AVX__); 64 65 #if defined(__AVX__) 66 using LoopBlockSize = _64; 67 #else 68 using LoopBlockSize = _32; 69 #endif 70 71 if (USE_ONLY_REP_MOVSB) 72 return copy<x86::Accelerator>(dst, src, count); 73 74 if (count == 0) 75 return; 76 if (count == 1) 77 return copy<_1>(dst, src); 78 if (count == 2) 79 return copy<_2>(dst, src); 80 if (count == 3) 81 return copy<_3>(dst, src); 82 if (count == 4) 83 return copy<_4>(dst, src); 84 if (count < 8) 85 return copy<HeadTail<_4>>(dst, src, count); 86 if (count < 16) 87 return copy<HeadTail<_8>>(dst, src, count); 88 if (count < 32) 89 return copy<HeadTail<_16>>(dst, src, count); 90 if (count < 64) 91 return copy<HeadTail<_32>>(dst, src, count); 92 if (count < 128) 93 return copy<HeadTail<_64>>(dst, src, count); 94 if (HAS_AVX && count < 256) 95 return copy<HeadTail<_128>>(dst, src, count); 96 if (count <= REP_MOVS_B_SIZE) 97 return copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src, 98 count); 99 return copy<x86::Accelerator>(dst, src, count); 100 #elif defined(LLVM_LIBC_ARCH_AARCH64) 101 ///////////////////////////////////////////////////////////////////////////// 102 // LLVM_LIBC_ARCH_AARCH64 103 ///////////////////////////////////////////////////////////////////////////// 104 if (count == 0) 105 return; 106 if (count == 1) 107 return copy<_1>(dst, src); 108 if (count == 2) 109 return copy<_2>(dst, src); 110 if (count == 3) 111 return copy<_3>(dst, src); 112 if (count == 4) 113 return copy<_4>(dst, src); 114 if (count < 8) 115 return copy<HeadTail<_4>>(dst, src, count); 116 if (count < 16) 117 return copy<HeadTail<_8>>(dst, src, count); 118 if (count < 32) 119 return copy<HeadTail<_16>>(dst, src, count); 120 if (count < 64) 121 return copy<HeadTail<_32>>(dst, src, count); 122 if (count < 128) 123 return copy<HeadTail<_64>>(dst, src, count); 124 return copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count); 125 #else 126 ///////////////////////////////////////////////////////////////////////////// 127 // Default 128 ///////////////////////////////////////////////////////////////////////////// 129 if (count == 0) 130 return; 131 if (count == 1) 132 return copy<_1>(dst, src); 133 if (count == 2) 134 return copy<_2>(dst, src); 135 if (count == 3) 136 return copy<_3>(dst, src); 137 if (count == 4) 138 return copy<_4>(dst, src); 139 if (count < 8) 140 return copy<HeadTail<_4>>(dst, src, count); 141 if (count < 16) 142 return copy<HeadTail<_8>>(dst, src, count); 143 if (count < 32) 144 return copy<HeadTail<_16>>(dst, src, count); 145 if (count < 64) 146 return copy<HeadTail<_32>>(dst, src, count); 147 if (count < 128) 148 return copy<HeadTail<_64>>(dst, src, count); 149 return copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count); 150 #endif 151 } 152 153 } // namespace __llvm_libc 154 155 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H 156