1 //===-- Memcpy implementation -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
11 
12 #include "src/__support/architectures.h"
13 #include "src/__support/common.h"
14 #include "src/string/memory_utils/elements.h"
15 #include "src/string/memory_utils/utils.h"
16 
17 #include <stddef.h> // size_t
18 
19 // Design rationale
20 // ================
21 //
22 // Using a profiler to observe size distributions for calls into libc
23 // functions, it was found most operations act on a small number of bytes.
24 // This makes it important to favor small sizes.
25 //
26 // The tests for `count` are in ascending order so the cost of branching is
27 // proportional to the cost of copying.
28 //
29 // The function is written in C++ for several reasons:
30 // - The compiler can __see__ the code, this is useful when performing Profile
31 //   Guided Optimization as the optimized code can take advantage of branching
32 //   probabilities.
33 // - It also allows for easier customization and favors testing multiple
34 //   implementation parameters.
35 // - As compilers and processors get better, the generated code is improved
36 //   with little change on the code side.
37 
38 namespace __llvm_libc {
39 
40 static inline void inline_memcpy(char *__restrict dst,
41                                  const char *__restrict src, size_t count) {
42 #if defined(LLVM_LIBC_ARCH_X86)
43   /////////////////////////////////////////////////////////////////////////////
44   // LLVM_LIBC_ARCH_X86
45   /////////////////////////////////////////////////////////////////////////////
46   using namespace __llvm_libc::x86;
47 
48   // Whether to use only rep;movsb.
49   constexpr bool USE_ONLY_REP_MOVSB =
50       LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
51 
52   // kRepMovsBSize == -1 : Only CopyAligned is used.
53   // kRepMovsBSize ==  0 : Only RepMovsb is used.
54   // else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
55   constexpr size_t REP_MOVS_B_SIZE =
56 #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE)
57       LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
58 #else
59       -1;
60 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
61 
62   // Whether target supports AVX instructions.
63   constexpr bool HAS_AVX = LLVM_LIBC_IS_DEFINED(__AVX__);
64 
65 #if defined(__AVX__)
66   using LoopBlockSize = _64;
67 #else
68   using LoopBlockSize = _32;
69 #endif
70 
71   if (USE_ONLY_REP_MOVSB)
72     return copy<Accelerator>(dst, src, count);
73 
74   if (count == 0)
75     return;
76   if (count == 1)
77     return copy<_1>(dst, src);
78   if (count == 2)
79     return copy<_2>(dst, src);
80   if (count == 3)
81     return copy<_3>(dst, src);
82   if (count == 4)
83     return copy<_4>(dst, src);
84   if (count < 8)
85     return copy<HeadTail<_4>>(dst, src, count);
86   if (count < 16)
87     return copy<HeadTail<_8>>(dst, src, count);
88   if (count < 32)
89     return copy<HeadTail<_16>>(dst, src, count);
90   if (count < 64)
91     return copy<HeadTail<_32>>(dst, src, count);
92   if (count < 128)
93     return copy<HeadTail<_64>>(dst, src, count);
94   if (HAS_AVX && count < 256)
95     return copy<HeadTail<_128>>(dst, src, count);
96   if (count <= REP_MOVS_B_SIZE)
97     return copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
98                                                                  count);
99   return copy<Accelerator>(dst, src, count);
100 #elif defined(LLVM_LIBC_ARCH_AARCH64)
101   /////////////////////////////////////////////////////////////////////////////
102   // LLVM_LIBC_ARCH_AARCH64
103   /////////////////////////////////////////////////////////////////////////////
104   using namespace __llvm_libc::scalar;
105   if (count == 0)
106     return;
107   if (count == 1)
108     return copy<_1>(dst, src);
109   if (count == 2)
110     return copy<_2>(dst, src);
111   if (count == 3)
112     return copy<_3>(dst, src);
113   if (count == 4)
114     return copy<_4>(dst, src);
115   if (count < 8)
116     return copy<HeadTail<_4>>(dst, src, count);
117   if (count < 16)
118     return copy<HeadTail<_8>>(dst, src, count);
119   if (count < 32)
120     return copy<HeadTail<_16>>(dst, src, count);
121   if (count < 64)
122     return copy<HeadTail<_32>>(dst, src, count);
123   if (count < 128)
124     return copy<HeadTail<_64>>(dst, src, count);
125   return copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
126 #else
127   /////////////////////////////////////////////////////////////////////////////
128   // Default
129   /////////////////////////////////////////////////////////////////////////////
130   using namespace __llvm_libc::scalar;
131   if (count == 0)
132     return;
133   if (count == 1)
134     return copy<_1>(dst, src);
135   if (count == 2)
136     return copy<_2>(dst, src);
137   if (count == 3)
138     return copy<_3>(dst, src);
139   if (count == 4)
140     return copy<_4>(dst, src);
141   if (count < 8)
142     return copy<HeadTail<_4>>(dst, src, count);
143   if (count < 16)
144     return copy<HeadTail<_8>>(dst, src, count);
145   if (count < 32)
146     return copy<HeadTail<_16>>(dst, src, count);
147   if (count < 64)
148     return copy<HeadTail<_32>>(dst, src, count);
149   if (count < 128)
150     return copy<HeadTail<_64>>(dst, src, count);
151   return copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count);
152 #endif
153 }
154 
155 } // namespace __llvm_libc
156 
157 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
158