1 //===-- Memcpy implementation -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
11 
12 #include "src/__support/architectures.h"
13 #include "src/__support/common.h"
14 #include "src/string/memory_utils/elements.h"
15 #include "src/string/memory_utils/utils.h"
16 
17 #include <stddef.h> // size_t
18 
19 // Design rationale
20 // ================
21 //
22 // Using a profiler to observe size distributions for calls into libc
23 // functions, it was found most operations act on a small number of bytes.
24 // This makes it important to favor small sizes.
25 //
26 // The tests for `count` are in ascending order so the cost of branching is
27 // proportional to the cost of copying.
28 //
29 // The function is written in C++ for several reasons:
30 // - The compiler can __see__ the code, this is useful when performing Profile
31 //   Guided Optimization as the optimized code can take advantage of branching
32 //   probabilities.
33 // - It also allows for easier customization and favors testing multiple
34 //   implementation parameters.
35 // - As compilers and processors get better, the generated code is improved
36 //   with little change on the code side.
37 
38 namespace __llvm_libc {
39 
40 static inline void inline_memcpy(char *__restrict dst,
41                                  const char *__restrict src, size_t count) {
42   using namespace __llvm_libc::builtin;
43 #if defined(LLVM_LIBC_ARCH_X86)
44   /////////////////////////////////////////////////////////////////////////////
45   // LLVM_LIBC_ARCH_X86
46   /////////////////////////////////////////////////////////////////////////////
47 
48   // Whether to use only rep;movsb.
49   constexpr bool USE_ONLY_REP_MOVSB =
50       LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
51 
52   // kRepMovsBSize == -1 : Only CopyAligned is used.
53   // kRepMovsBSize ==  0 : Only RepMovsb is used.
54   // else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
55   constexpr size_t REP_MOVS_B_SIZE =
56 #if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE)
57       LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
58 #else
59       -1;
60 #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
61 
62   // Whether target supports AVX instructions.
63   constexpr bool HAS_AVX = LLVM_LIBC_IS_DEFINED(__AVX__);
64 
65 #if defined(__AVX__)
66   using LoopBlockSize = _64;
67 #else
68   using LoopBlockSize = _32;
69 #endif
70 
71   if (USE_ONLY_REP_MOVSB)
72     return copy<x86::Accelerator>(dst, src, count);
73 
74   if (count == 0)
75     return;
76   if (count == 1)
77     return copy<_1>(dst, src);
78   if (count == 2)
79     return copy<_2>(dst, src);
80   if (count == 3)
81     return copy<_3>(dst, src);
82   if (count == 4)
83     return copy<_4>(dst, src);
84   if (count < 8)
85     return copy<HeadTail<_4>>(dst, src, count);
86   if (count < 16)
87     return copy<HeadTail<_8>>(dst, src, count);
88   if (count < 32)
89     return copy<HeadTail<_16>>(dst, src, count);
90   if (count < 64)
91     return copy<HeadTail<_32>>(dst, src, count);
92   if (count < 128)
93     return copy<HeadTail<_64>>(dst, src, count);
94   if (HAS_AVX && count < 256)
95     return copy<HeadTail<_128>>(dst, src, count);
96   if (count <= REP_MOVS_B_SIZE)
97     return copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
98                                                                  count);
99   return copy<x86::Accelerator>(dst, src, count);
100 #elif defined(LLVM_LIBC_ARCH_AARCH64)
101   /////////////////////////////////////////////////////////////////////////////
102   // LLVM_LIBC_ARCH_AARCH64
103   /////////////////////////////////////////////////////////////////////////////
104   if (count == 0)
105     return;
106   if (count == 1)
107     return copy<_1>(dst, src);
108   if (count == 2)
109     return copy<_2>(dst, src);
110   if (count == 3)
111     return copy<_3>(dst, src);
112   if (count == 4)
113     return copy<_4>(dst, src);
114   if (count < 8)
115     return copy<HeadTail<_4>>(dst, src, count);
116   if (count < 16)
117     return copy<HeadTail<_8>>(dst, src, count);
118   if (count < 32)
119     return copy<HeadTail<_16>>(dst, src, count);
120   if (count < 64)
121     return copy<HeadTail<_32>>(dst, src, count);
122   if (count < 128)
123     return copy<HeadTail<_64>>(dst, src, count);
124   return copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
125 #else
126   /////////////////////////////////////////////////////////////////////////////
127   // Default
128   /////////////////////////////////////////////////////////////////////////////
129   if (count == 0)
130     return;
131   if (count == 1)
132     return copy<_1>(dst, src);
133   if (count == 2)
134     return copy<_2>(dst, src);
135   if (count == 3)
136     return copy<_3>(dst, src);
137   if (count == 4)
138     return copy<_4>(dst, src);
139   if (count < 8)
140     return copy<HeadTail<_4>>(dst, src, count);
141   if (count < 16)
142     return copy<HeadTail<_8>>(dst, src, count);
143   if (count < 32)
144     return copy<HeadTail<_16>>(dst, src, count);
145   if (count < 64)
146     return copy<HeadTail<_32>>(dst, src, count);
147   if (count < 128)
148     return copy<HeadTail<_64>>(dst, src, count);
149   return copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count);
150 #endif
151 }
152 
153 } // namespace __llvm_libc
154 
155 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
156