1 #include "LibcBenchmark.h"
2 #include "LibcMemoryBenchmark.h"
3 #include "MemorySizeDistributions.h"
4 #include "benchmark/benchmark.h"
5 #include "llvm/ADT/ArrayRef.h"
6 #include "llvm/ADT/Twine.h"
7 #include <chrono>
8 #include <cstdint>
9 #include <random>
10 #include <vector>
11 
12 using llvm::Align;
13 using llvm::ArrayRef;
14 using llvm::Twine;
15 using llvm::libc_benchmarks::BzeroConfiguration;
16 using llvm::libc_benchmarks::ComparisonSetup;
17 using llvm::libc_benchmarks::CopySetup;
18 using llvm::libc_benchmarks::MemcmpConfiguration;
19 using llvm::libc_benchmarks::MemcpyConfiguration;
20 using llvm::libc_benchmarks::MemorySizeDistribution;
21 using llvm::libc_benchmarks::MemsetConfiguration;
22 using llvm::libc_benchmarks::OffsetDistribution;
23 using llvm::libc_benchmarks::SetSetup;
24 
25 namespace __llvm_libc {
26 
27 extern void *memcpy(void *__restrict, const void *__restrict, size_t);
28 extern void *memset(void *, int, size_t);
29 extern void bzero(void *, size_t);
30 extern int memcmp(const void *, const void *, size_t);
31 extern int bcmp(const void *, const void *, size_t);
32 
33 } // namespace __llvm_libc
34 
35 // List of implementations to test.
36 static constexpr MemcpyConfiguration kMemcpyConfigurations[] = {
37     {__llvm_libc::memcpy, "__llvm_libc::memcpy"}};
38 
39 static constexpr MemcmpConfiguration kMemcmpConfigurations[] = {
40     {__llvm_libc::memcmp, "__llvm_libc::memcmp"}};
41 
42 static constexpr MemcmpConfiguration kBcmpConfigurations[] = {
43     {__llvm_libc::bcmp, "__llvm_libc::bcmp"}};
44 
45 static constexpr MemsetConfiguration kMemsetConfigurations[] = {
46     {__llvm_libc::memset, "__llvm_libc::memset"}};
47 
48 static constexpr BzeroConfiguration kBzeroConfigurations[] = {
49     {__llvm_libc::bzero, "__llvm_libc::bzero"}};
50 
51 // Alignment to use for when accessing the buffers.
52 static constexpr Align kBenchmarkAlignment = Align::Constant<1>();
53 
54 static std::mt19937_64 &getGenerator() {
55   static std::mt19937_64 Generator(
56       std::chrono::system_clock::now().time_since_epoch().count());
57   return Generator;
58 }
59 
60 template <typename SetupType, typename ConfigurationType> struct Runner {
61   Runner(benchmark::State &S, llvm::ArrayRef<ConfigurationType> Configurations)
62       : State(S), Distribution(SetupType::getDistributions()[State.range(0)]),
63         Probabilities(Distribution.Probabilities),
64         SizeSampler(Probabilities.begin(), Probabilities.end()),
65         OffsetSampler(Setup.BufferSize, Probabilities.size() - 1,
66                       kBenchmarkAlignment),
67         Configuration(Configurations[State.range(1)]) {
68     for (auto &P : Setup.Parameters) {
69       P.OffsetBytes = OffsetSampler(getGenerator());
70       P.SizeBytes = SizeSampler(getGenerator());
71       Setup.checkValid(P);
72     }
73   }
74 
75   ~Runner() {
76     const size_t AvgBytesPerIteration = Setup.getBatchBytes() / Setup.BatchSize;
77     const size_t TotalBytes = State.iterations() * AvgBytesPerIteration;
78     State.SetBytesProcessed(TotalBytes);
79     State.SetItemsProcessed(State.iterations());
80     State.SetLabel((Twine(Configuration.Name) + "," + Distribution.Name).str());
81     State.counters["bytes_per_cycle"] = benchmark::Counter(
82         TotalBytes / benchmark::CPUInfo::Get().cycles_per_second,
83         benchmark::Counter::kIsRate);
84   }
85 
86   inline void runBatch() {
87     for (const auto &P : Setup.Parameters)
88       benchmark::DoNotOptimize(Setup.Call(P, Configuration.Function));
89   }
90 
91   size_t getBatchSize() const { return Setup.BatchSize; }
92 
93 private:
94   SetupType Setup;
95   benchmark::State &State;
96   MemorySizeDistribution Distribution;
97   ArrayRef<double> Probabilities;
98   std::discrete_distribution<unsigned> SizeSampler;
99   OffsetDistribution OffsetSampler;
100   ConfigurationType Configuration;
101 };
102 
103 #define BENCHMARK_MEMORY_FUNCTION(BM_NAME, SETUP, CONFIGURATION_TYPE,          \
104                                   CONFIGURATION_ARRAY_REF)                     \
105   void BM_NAME(benchmark::State &State) {                                      \
106     Runner<SETUP, CONFIGURATION_TYPE> Setup(State, CONFIGURATION_ARRAY_REF);   \
107     const size_t BatchSize = Setup.getBatchSize();                             \
108     while (State.KeepRunningBatch(BatchSize))                                  \
109       Setup.runBatch();                                                        \
110   }                                                                            \
111   BENCHMARK(BM_NAME)->Apply([](benchmark::internal::Benchmark *benchmark) {    \
112     const int64_t DistributionSize = SETUP::getDistributions().size();         \
113     const int64_t ConfigurationSize = CONFIGURATION_ARRAY_REF.size();          \
114     for (int64_t DistIndex = 0; DistIndex < DistributionSize; ++DistIndex)     \
115       for (int64_t ConfIndex = 0; ConfIndex < ConfigurationSize; ++ConfIndex)  \
116         benchmark->Args({DistIndex, ConfIndex});                               \
117   })
118 
119 BENCHMARK_MEMORY_FUNCTION(BM_Memcpy, CopySetup, MemcpyConfiguration,
120                           llvm::makeArrayRef(kMemcpyConfigurations));
121 BENCHMARK_MEMORY_FUNCTION(BM_Memcmp, ComparisonSetup, MemcmpConfiguration,
122                           llvm::makeArrayRef(kMemcmpConfigurations));
123 BENCHMARK_MEMORY_FUNCTION(BM_Bcmp, ComparisonSetup, MemcmpConfiguration,
124                           llvm::makeArrayRef(kBcmpConfigurations));
125 BENCHMARK_MEMORY_FUNCTION(BM_Memset, SetSetup, MemsetConfiguration,
126                           llvm::makeArrayRef(kMemsetConfigurations));
127 BENCHMARK_MEMORY_FUNCTION(BM_Bzero, SetSetup, BzeroConfiguration,
128                           llvm::makeArrayRef(kBzeroConfigurations));
129