1 //===-- Memcpy implementation -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
11 
12 #include "src/__support/architectures.h"
13 #include "src/__support/common.h"
14 #include "src/string/memory_utils/elements.h"
15 #include "src/string/memory_utils/utils.h"
16 
17 #include <stddef.h> // size_t
18 
19 // Design rationale
20 // ================
21 //
22 // Using a profiler to observe size distributions for calls into libc
23 // functions, it was found most operations act on a small number of bytes.
24 // This makes it important to favor small sizes.
25 //
26 // The tests for `count` are in ascending order so the cost of branching is
27 // proportional to the cost of copying.
28 //
29 // The function is written in C++ for several reasons:
30 // - The compiler can __see__ the code, this is useful when performing Profile
31 //   Guided Optimization as the optimized code can take advantage of branching
32 //   probabilities.
33 // - It also allows for easier customization and favors testing multiple
34 //   implementation parameters.
35 // - As compilers and processors get better, the generated code is improved
36 //   with little change on the code side.
37 
38 namespace __llvm_libc {
39 
40 static inline void inline_memcpy(char *__restrict dst,
41                                  const char *__restrict src, size_t count) {
42 #if defined(LLVM_LIBC_ARCH_X86)
43   /////////////////////////////////////////////////////////////////////////////
44   // LLVM_LIBC_ARCH_X86
45   /////////////////////////////////////////////////////////////////////////////
46   using namespace __llvm_libc::x86;
47 
48   // Whether to use only rep;movsb.
49   constexpr bool kUseOnlyRepMovsb =
50       LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
51 
52   // kRepMovsBSize == -1 : Only CopyAligned is used.
53   // kRepMovsBSize ==  0 : Only RepMovsb is used.
54   // else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
55   constexpr size_t kRepMovsBSize =
56 #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE)
57       LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
58 #else
59       -1;
60 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
61 
62   // Whether target supports AVX instructions.
63   constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
64 
65 #if defined(__AVX__)
66   using LoopBlockSize = _64;
67 #else
68   using LoopBlockSize = _32;
69 #endif
70 
71   if (kUseOnlyRepMovsb)
72     return Copy<Accelerator>(dst, src, count);
73 
74   if (count == 0)
75     return;
76   if (count == 1)
77     return Copy<_1>(dst, src);
78   if (count == 2)
79     return Copy<_2>(dst, src);
80   if (count == 3)
81     return Copy<_3>(dst, src);
82   if (count == 4)
83     return Copy<_4>(dst, src);
84   if (count < 8)
85     return Copy<HeadTail<_4>>(dst, src, count);
86   if (count < 16)
87     return Copy<HeadTail<_8>>(dst, src, count);
88   if (count < 32)
89     return Copy<HeadTail<_16>>(dst, src, count);
90   if (count < 64)
91     return Copy<HeadTail<_32>>(dst, src, count);
92   if (count < 128)
93     return Copy<HeadTail<_64>>(dst, src, count);
94   if (kHasAvx && count < 256)
95     return Copy<HeadTail<_128>>(dst, src, count);
96   if (count <= kRepMovsBSize)
97     return Copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
98                                                                  count);
99   return Copy<Accelerator>(dst, src, count);
100 #elif defined(LLVM_LIBC_ARCH_AARCH64)
101   /////////////////////////////////////////////////////////////////////////////
102   // LLVM_LIBC_ARCH_AARCH64
103   /////////////////////////////////////////////////////////////////////////////
104   using namespace __llvm_libc::scalar;
105   if (count == 0)
106     return;
107   if (count == 1)
108     return Copy<_1>(dst, src);
109   if (count == 2)
110     return Copy<_2>(dst, src);
111   if (count == 3)
112     return Copy<_3>(dst, src);
113   if (count == 4)
114     return Copy<_4>(dst, src);
115   if (count < 8)
116     return Copy<HeadTail<_4>>(dst, src, count);
117   if (count < 16)
118     return Copy<HeadTail<_8>>(dst, src, count);
119   if (count < 32)
120     return Copy<HeadTail<_16>>(dst, src, count);
121   if (count < 64)
122     return Copy<HeadTail<_32>>(dst, src, count);
123   if (count < 128)
124     return Copy<HeadTail<_64>>(dst, src, count);
125   return Copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
126 #else
127   /////////////////////////////////////////////////////////////////////////////
128   // Default
129   /////////////////////////////////////////////////////////////////////////////
130   using namespace __llvm_libc::scalar;
131   if (count == 0)
132     return;
133   if (count == 1)
134     return Copy<_1>(dst, src);
135   if (count == 2)
136     return Copy<_2>(dst, src);
137   if (count == 3)
138     return Copy<_3>(dst, src);
139   if (count == 4)
140     return Copy<_4>(dst, src);
141   if (count < 8)
142     return Copy<HeadTail<_4>>(dst, src, count);
143   if (count < 16)
144     return Copy<HeadTail<_8>>(dst, src, count);
145   if (count < 32)
146     return Copy<HeadTail<_16>>(dst, src, count);
147   if (count < 64)
148     return Copy<HeadTail<_32>>(dst, src, count);
149   if (count < 128)
150     return Copy<HeadTail<_64>>(dst, src, count);
151   return Copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count);
152 #endif
153 }
154 
155 } // namespace __llvm_libc
156 
157 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
158