1c2ce8f02SGuillaume Chatelet //===-- Memcpy implementation -----------------------------------*- C++ -*-===//
2c2ce8f02SGuillaume Chatelet //
3c2ce8f02SGuillaume Chatelet // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c2ce8f02SGuillaume Chatelet // See https://llvm.org/LICENSE.txt for license information.
5c2ce8f02SGuillaume Chatelet // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c2ce8f02SGuillaume Chatelet //
7c2ce8f02SGuillaume Chatelet //===----------------------------------------------------------------------===//
8c2ce8f02SGuillaume Chatelet 
9c2ce8f02SGuillaume Chatelet #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
10c2ce8f02SGuillaume Chatelet #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
11c2ce8f02SGuillaume Chatelet 
12c2ce8f02SGuillaume Chatelet #include "src/__support/architectures.h"
13c2ce8f02SGuillaume Chatelet #include "src/__support/common.h"
14c2ce8f02SGuillaume Chatelet #include "src/string/memory_utils/elements.h"
15c2ce8f02SGuillaume Chatelet #include "src/string/memory_utils/utils.h"
16c2ce8f02SGuillaume Chatelet 
17c2ce8f02SGuillaume Chatelet #include <stddef.h> // size_t
18c2ce8f02SGuillaume Chatelet 
19c2ce8f02SGuillaume Chatelet // Design rationale
20c2ce8f02SGuillaume Chatelet // ================
21c2ce8f02SGuillaume Chatelet //
22c2ce8f02SGuillaume Chatelet // Using a profiler to observe size distributions for calls into libc
23c2ce8f02SGuillaume Chatelet // functions, it was found most operations act on a small number of bytes.
24c2ce8f02SGuillaume Chatelet // This makes it important to favor small sizes.
25c2ce8f02SGuillaume Chatelet //
26c2ce8f02SGuillaume Chatelet // The tests for `count` are in ascending order so the cost of branching is
27c2ce8f02SGuillaume Chatelet // proportional to the cost of copying.
28c2ce8f02SGuillaume Chatelet //
29c2ce8f02SGuillaume Chatelet // The function is written in C++ for several reasons:
30c2ce8f02SGuillaume Chatelet // - The compiler can __see__ the code, this is useful when performing Profile
31c2ce8f02SGuillaume Chatelet //   Guided Optimization as the optimized code can take advantage of branching
32c2ce8f02SGuillaume Chatelet //   probabilities.
33c2ce8f02SGuillaume Chatelet // - It also allows for easier customization and favors testing multiple
34c2ce8f02SGuillaume Chatelet //   implementation parameters.
35c2ce8f02SGuillaume Chatelet // - As compilers and processors get better, the generated code is improved
36c2ce8f02SGuillaume Chatelet //   with little change on the code side.
37c2ce8f02SGuillaume Chatelet 
38c2ce8f02SGuillaume Chatelet namespace __llvm_libc {
39c2ce8f02SGuillaume Chatelet 
inline_memcpy(char * __restrict dst,const char * __restrict src,size_t count)40c2ce8f02SGuillaume Chatelet static inline void inline_memcpy(char *__restrict dst,
41c2ce8f02SGuillaume Chatelet                                  const char *__restrict src, size_t count) {
42*7b73f537SGuillaume Chatelet   using namespace __llvm_libc::builtin;
43c2ce8f02SGuillaume Chatelet #if defined(LLVM_LIBC_ARCH_X86)
44c2ce8f02SGuillaume Chatelet   /////////////////////////////////////////////////////////////////////////////
45c2ce8f02SGuillaume Chatelet   // LLVM_LIBC_ARCH_X86
46c2ce8f02SGuillaume Chatelet   /////////////////////////////////////////////////////////////////////////////
47c2ce8f02SGuillaume Chatelet 
48c2ce8f02SGuillaume Chatelet   // Whether to use only rep;movsb.
491c92911eSMichael Jones   constexpr bool USE_ONLY_REP_MOVSB =
50c2ce8f02SGuillaume Chatelet       LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
51c2ce8f02SGuillaume Chatelet 
52c2ce8f02SGuillaume Chatelet   // kRepMovsBSize == -1 : Only CopyAligned is used.
53c2ce8f02SGuillaume Chatelet   // kRepMovsBSize ==  0 : Only RepMovsb is used.
54c2ce8f02SGuillaume Chatelet   // else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
551c92911eSMichael Jones   constexpr size_t REP_MOVS_B_SIZE =
56c2ce8f02SGuillaume Chatelet #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE)
57c2ce8f02SGuillaume Chatelet       LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
58c2ce8f02SGuillaume Chatelet #else
59c2ce8f02SGuillaume Chatelet       -1;
60c2ce8f02SGuillaume Chatelet #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
61c2ce8f02SGuillaume Chatelet 
62c2ce8f02SGuillaume Chatelet   // Whether target supports AVX instructions.
631c92911eSMichael Jones   constexpr bool HAS_AVX = LLVM_LIBC_IS_DEFINED(__AVX__);
64c2ce8f02SGuillaume Chatelet 
65c2ce8f02SGuillaume Chatelet #if defined(__AVX__)
66c2ce8f02SGuillaume Chatelet   using LoopBlockSize = _64;
67c2ce8f02SGuillaume Chatelet #else
68c2ce8f02SGuillaume Chatelet   using LoopBlockSize = _32;
69c2ce8f02SGuillaume Chatelet #endif
70c2ce8f02SGuillaume Chatelet 
711c92911eSMichael Jones   if (USE_ONLY_REP_MOVSB)
72*7b73f537SGuillaume Chatelet     return copy<x86::Accelerator>(dst, src, count);
73c2ce8f02SGuillaume Chatelet 
74c2ce8f02SGuillaume Chatelet   if (count == 0)
75c2ce8f02SGuillaume Chatelet     return;
76c2ce8f02SGuillaume Chatelet   if (count == 1)
771c92911eSMichael Jones     return copy<_1>(dst, src);
78c2ce8f02SGuillaume Chatelet   if (count == 2)
791c92911eSMichael Jones     return copy<_2>(dst, src);
80c2ce8f02SGuillaume Chatelet   if (count == 3)
811c92911eSMichael Jones     return copy<_3>(dst, src);
82c2ce8f02SGuillaume Chatelet   if (count == 4)
831c92911eSMichael Jones     return copy<_4>(dst, src);
84c2ce8f02SGuillaume Chatelet   if (count < 8)
851c92911eSMichael Jones     return copy<HeadTail<_4>>(dst, src, count);
86c2ce8f02SGuillaume Chatelet   if (count < 16)
871c92911eSMichael Jones     return copy<HeadTail<_8>>(dst, src, count);
88c2ce8f02SGuillaume Chatelet   if (count < 32)
891c92911eSMichael Jones     return copy<HeadTail<_16>>(dst, src, count);
90c2ce8f02SGuillaume Chatelet   if (count < 64)
911c92911eSMichael Jones     return copy<HeadTail<_32>>(dst, src, count);
92c2ce8f02SGuillaume Chatelet   if (count < 128)
931c92911eSMichael Jones     return copy<HeadTail<_64>>(dst, src, count);
941c92911eSMichael Jones   if (HAS_AVX && count < 256)
951c92911eSMichael Jones     return copy<HeadTail<_128>>(dst, src, count);
961c92911eSMichael Jones   if (count <= REP_MOVS_B_SIZE)
971c92911eSMichael Jones     return copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
98c2ce8f02SGuillaume Chatelet                                                                  count);
99*7b73f537SGuillaume Chatelet   return copy<x86::Accelerator>(dst, src, count);
100c2ce8f02SGuillaume Chatelet #elif defined(LLVM_LIBC_ARCH_AARCH64)
101c2ce8f02SGuillaume Chatelet   /////////////////////////////////////////////////////////////////////////////
102c2ce8f02SGuillaume Chatelet   // LLVM_LIBC_ARCH_AARCH64
103c2ce8f02SGuillaume Chatelet   /////////////////////////////////////////////////////////////////////////////
104c2ce8f02SGuillaume Chatelet   if (count == 0)
105c2ce8f02SGuillaume Chatelet     return;
106c2ce8f02SGuillaume Chatelet   if (count == 1)
1071c92911eSMichael Jones     return copy<_1>(dst, src);
108c2ce8f02SGuillaume Chatelet   if (count == 2)
1091c92911eSMichael Jones     return copy<_2>(dst, src);
110c2ce8f02SGuillaume Chatelet   if (count == 3)
1111c92911eSMichael Jones     return copy<_3>(dst, src);
112c2ce8f02SGuillaume Chatelet   if (count == 4)
1131c92911eSMichael Jones     return copy<_4>(dst, src);
114c2ce8f02SGuillaume Chatelet   if (count < 8)
1151c92911eSMichael Jones     return copy<HeadTail<_4>>(dst, src, count);
116c2ce8f02SGuillaume Chatelet   if (count < 16)
1171c92911eSMichael Jones     return copy<HeadTail<_8>>(dst, src, count);
118c2ce8f02SGuillaume Chatelet   if (count < 32)
1191c92911eSMichael Jones     return copy<HeadTail<_16>>(dst, src, count);
120c2ce8f02SGuillaume Chatelet   if (count < 64)
1211c92911eSMichael Jones     return copy<HeadTail<_32>>(dst, src, count);
122c2ce8f02SGuillaume Chatelet   if (count < 128)
1231c92911eSMichael Jones     return copy<HeadTail<_64>>(dst, src, count);
1241c92911eSMichael Jones   return copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
125c2ce8f02SGuillaume Chatelet #else
126c2ce8f02SGuillaume Chatelet   /////////////////////////////////////////////////////////////////////////////
127c2ce8f02SGuillaume Chatelet   // Default
128c2ce8f02SGuillaume Chatelet   /////////////////////////////////////////////////////////////////////////////
129c2ce8f02SGuillaume Chatelet   if (count == 0)
130c2ce8f02SGuillaume Chatelet     return;
131c2ce8f02SGuillaume Chatelet   if (count == 1)
1321c92911eSMichael Jones     return copy<_1>(dst, src);
133c2ce8f02SGuillaume Chatelet   if (count == 2)
1341c92911eSMichael Jones     return copy<_2>(dst, src);
135c2ce8f02SGuillaume Chatelet   if (count == 3)
1361c92911eSMichael Jones     return copy<_3>(dst, src);
137c2ce8f02SGuillaume Chatelet   if (count == 4)
1381c92911eSMichael Jones     return copy<_4>(dst, src);
139c2ce8f02SGuillaume Chatelet   if (count < 8)
1401c92911eSMichael Jones     return copy<HeadTail<_4>>(dst, src, count);
141c2ce8f02SGuillaume Chatelet   if (count < 16)
1421c92911eSMichael Jones     return copy<HeadTail<_8>>(dst, src, count);
143c2ce8f02SGuillaume Chatelet   if (count < 32)
1441c92911eSMichael Jones     return copy<HeadTail<_16>>(dst, src, count);
145c2ce8f02SGuillaume Chatelet   if (count < 64)
1461c92911eSMichael Jones     return copy<HeadTail<_32>>(dst, src, count);
147c2ce8f02SGuillaume Chatelet   if (count < 128)
1481c92911eSMichael Jones     return copy<HeadTail<_64>>(dst, src, count);
1491c92911eSMichael Jones   return copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count);
150c2ce8f02SGuillaume Chatelet #endif
151c2ce8f02SGuillaume Chatelet }
152c2ce8f02SGuillaume Chatelet 
153c2ce8f02SGuillaume Chatelet } // namespace __llvm_libc
154c2ce8f02SGuillaume Chatelet 
155c2ce8f02SGuillaume Chatelet #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
156