1 //===-- Memcpy implementation -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
11
12 #include "src/__support/architectures.h"
13 #include "src/__support/common.h"
14 #include "src/string/memory_utils/elements.h"
15 #include "src/string/memory_utils/utils.h"
16
17 #include <stddef.h> // size_t
18
19 // Design rationale
20 // ================
21 //
22 // Using a profiler to observe size distributions for calls into libc
23 // functions, it was found most operations act on a small number of bytes.
24 // This makes it important to favor small sizes.
25 //
26 // The tests for `count` are in ascending order so the cost of branching is
27 // proportional to the cost of copying.
28 //
29 // The function is written in C++ for several reasons:
30 // - The compiler can __see__ the code, this is useful when performing Profile
31 // Guided Optimization as the optimized code can take advantage of branching
32 // probabilities.
33 // - It also allows for easier customization and favors testing multiple
34 // implementation parameters.
35 // - As compilers and processors get better, the generated code is improved
36 // with little change on the code side.
37
38 namespace __llvm_libc {
39
inline_memcpy(char * __restrict dst,const char * __restrict src,size_t count)40 static inline void inline_memcpy(char *__restrict dst,
41 const char *__restrict src, size_t count) {
42 using namespace __llvm_libc::builtin;
43 #if defined(LLVM_LIBC_ARCH_X86)
44 /////////////////////////////////////////////////////////////////////////////
45 // LLVM_LIBC_ARCH_X86
46 /////////////////////////////////////////////////////////////////////////////
47
48 // Whether to use only rep;movsb.
49 constexpr bool USE_ONLY_REP_MOVSB =
50 LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
51
52 // kRepMovsBSize == -1 : Only CopyAligned is used.
53 // kRepMovsBSize == 0 : Only RepMovsb is used.
54 // else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
55 constexpr size_t REP_MOVS_B_SIZE =
56 #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE)
57 LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
58 #else
59 -1;
60 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
61
62 // Whether target supports AVX instructions.
63 constexpr bool HAS_AVX = LLVM_LIBC_IS_DEFINED(__AVX__);
64
65 #if defined(__AVX__)
66 using LoopBlockSize = _64;
67 #else
68 using LoopBlockSize = _32;
69 #endif
70
71 if (USE_ONLY_REP_MOVSB)
72 return copy<x86::Accelerator>(dst, src, count);
73
74 if (count == 0)
75 return;
76 if (count == 1)
77 return copy<_1>(dst, src);
78 if (count == 2)
79 return copy<_2>(dst, src);
80 if (count == 3)
81 return copy<_3>(dst, src);
82 if (count == 4)
83 return copy<_4>(dst, src);
84 if (count < 8)
85 return copy<HeadTail<_4>>(dst, src, count);
86 if (count < 16)
87 return copy<HeadTail<_8>>(dst, src, count);
88 if (count < 32)
89 return copy<HeadTail<_16>>(dst, src, count);
90 if (count < 64)
91 return copy<HeadTail<_32>>(dst, src, count);
92 if (count < 128)
93 return copy<HeadTail<_64>>(dst, src, count);
94 if (HAS_AVX && count < 256)
95 return copy<HeadTail<_128>>(dst, src, count);
96 if (count <= REP_MOVS_B_SIZE)
97 return copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
98 count);
99 return copy<x86::Accelerator>(dst, src, count);
100 #elif defined(LLVM_LIBC_ARCH_AARCH64)
101 /////////////////////////////////////////////////////////////////////////////
102 // LLVM_LIBC_ARCH_AARCH64
103 /////////////////////////////////////////////////////////////////////////////
104 if (count == 0)
105 return;
106 if (count == 1)
107 return copy<_1>(dst, src);
108 if (count == 2)
109 return copy<_2>(dst, src);
110 if (count == 3)
111 return copy<_3>(dst, src);
112 if (count == 4)
113 return copy<_4>(dst, src);
114 if (count < 8)
115 return copy<HeadTail<_4>>(dst, src, count);
116 if (count < 16)
117 return copy<HeadTail<_8>>(dst, src, count);
118 if (count < 32)
119 return copy<HeadTail<_16>>(dst, src, count);
120 if (count < 64)
121 return copy<HeadTail<_32>>(dst, src, count);
122 if (count < 128)
123 return copy<HeadTail<_64>>(dst, src, count);
124 return copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
125 #else
126 /////////////////////////////////////////////////////////////////////////////
127 // Default
128 /////////////////////////////////////////////////////////////////////////////
129 if (count == 0)
130 return;
131 if (count == 1)
132 return copy<_1>(dst, src);
133 if (count == 2)
134 return copy<_2>(dst, src);
135 if (count == 3)
136 return copy<_3>(dst, src);
137 if (count == 4)
138 return copy<_4>(dst, src);
139 if (count < 8)
140 return copy<HeadTail<_4>>(dst, src, count);
141 if (count < 16)
142 return copy<HeadTail<_8>>(dst, src, count);
143 if (count < 32)
144 return copy<HeadTail<_16>>(dst, src, count);
145 if (count < 64)
146 return copy<HeadTail<_32>>(dst, src, count);
147 if (count < 128)
148 return copy<HeadTail<_64>>(dst, src, count);
149 return copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count);
150 #endif
151 }
152
153 } // namespace __llvm_libc
154
155 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
156