159198d06SGuillaume Chatelet #include "LibcBenchmark.h"
259198d06SGuillaume Chatelet #include "LibcMemoryBenchmark.h"
359198d06SGuillaume Chatelet #include "MemorySizeDistributions.h"
459198d06SGuillaume Chatelet #include "benchmark/benchmark.h"
5e4dee762SGuillaume Chatelet #include "llvm/ADT/ArrayRef.h"
6e4dee762SGuillaume Chatelet #include "llvm/ADT/Twine.h"
7e4dee762SGuillaume Chatelet #include <chrono>
859198d06SGuillaume Chatelet #include <cstdint>
959198d06SGuillaume Chatelet #include <random>
1059198d06SGuillaume Chatelet #include <vector>
1159198d06SGuillaume Chatelet 
12e4dee762SGuillaume Chatelet using llvm::Align;
13e4dee762SGuillaume Chatelet using llvm::ArrayRef;
14e4dee762SGuillaume Chatelet using llvm::Twine;
15e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::BzeroConfiguration;
16e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::ComparisonSetup;
17e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::CopySetup;
18*4a9bcb60SGuillaume Chatelet using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration;
19e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::MemcpyConfiguration;
20e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::MemorySizeDistribution;
21e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::MemsetConfiguration;
22e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::OffsetDistribution;
23e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::SetSetup;
24e4dee762SGuillaume Chatelet 
25e4dee762SGuillaume Chatelet // Alignment to use for when accessing the buffers.
2659198d06SGuillaume Chatelet static constexpr Align kBenchmarkAlignment = Align::Constant<1>();
2759198d06SGuillaume Chatelet 
28e4dee762SGuillaume Chatelet static std::mt19937_64 &getGenerator() {
29e4dee762SGuillaume Chatelet   static std::mt19937_64 Generator(
30e4dee762SGuillaume Chatelet       std::chrono::system_clock::now().time_since_epoch().count());
31e4dee762SGuillaume Chatelet   return Generator;
32e4dee762SGuillaume Chatelet }
33e4dee762SGuillaume Chatelet 
34e4dee762SGuillaume Chatelet template <typename SetupType, typename ConfigurationType> struct Runner {
35e4dee762SGuillaume Chatelet   Runner(benchmark::State &S, llvm::ArrayRef<ConfigurationType> Configurations)
36e4dee762SGuillaume Chatelet       : State(S), Distribution(SetupType::getDistributions()[State.range(0)]),
3759198d06SGuillaume Chatelet         Probabilities(Distribution.Probabilities),
3859198d06SGuillaume Chatelet         SizeSampler(Probabilities.begin(), Probabilities.end()),
39e4dee762SGuillaume Chatelet         OffsetSampler(Setup.BufferSize, Probabilities.size() - 1,
40e4dee762SGuillaume Chatelet                       kBenchmarkAlignment),
41e4dee762SGuillaume Chatelet         Configuration(Configurations[State.range(1)]) {
42e4dee762SGuillaume Chatelet     for (auto &P : Setup.Parameters) {
43e4dee762SGuillaume Chatelet       P.OffsetBytes = OffsetSampler(getGenerator());
44e4dee762SGuillaume Chatelet       P.SizeBytes = SizeSampler(getGenerator());
45e4dee762SGuillaume Chatelet       Setup.checkValid(P);
4659198d06SGuillaume Chatelet     }
4759198d06SGuillaume Chatelet   }
4859198d06SGuillaume Chatelet 
49e4dee762SGuillaume Chatelet   ~Runner() {
50e4dee762SGuillaume Chatelet     const size_t AvgBytesPerIteration = Setup.getBatchBytes() / Setup.BatchSize;
5159198d06SGuillaume Chatelet     const size_t TotalBytes = State.iterations() * AvgBytesPerIteration;
5259198d06SGuillaume Chatelet     State.SetBytesProcessed(TotalBytes);
53e4dee762SGuillaume Chatelet     State.SetItemsProcessed(State.iterations());
54e4dee762SGuillaume Chatelet     State.SetLabel((Twine(Configuration.Name) + "," + Distribution.Name).str());
5559198d06SGuillaume Chatelet     State.counters["bytes_per_cycle"] = benchmark::Counter(
5659198d06SGuillaume Chatelet         TotalBytes / benchmark::CPUInfo::Get().cycles_per_second,
5759198d06SGuillaume Chatelet         benchmark::Counter::kIsRate);
5859198d06SGuillaume Chatelet   }
5959198d06SGuillaume Chatelet 
60e4dee762SGuillaume Chatelet   inline void runBatch() {
61e4dee762SGuillaume Chatelet     for (const auto &P : Setup.Parameters)
62e4dee762SGuillaume Chatelet       benchmark::DoNotOptimize(Setup.Call(P, Configuration.Function));
6359198d06SGuillaume Chatelet   }
6459198d06SGuillaume Chatelet 
65e4dee762SGuillaume Chatelet   size_t getBatchSize() const { return Setup.BatchSize; }
66e4dee762SGuillaume Chatelet 
6759198d06SGuillaume Chatelet private:
68e4dee762SGuillaume Chatelet   SetupType Setup;
6959198d06SGuillaume Chatelet   benchmark::State &State;
7059198d06SGuillaume Chatelet   MemorySizeDistribution Distribution;
7159198d06SGuillaume Chatelet   ArrayRef<double> Probabilities;
7259198d06SGuillaume Chatelet   std::discrete_distribution<unsigned> SizeSampler;
7359198d06SGuillaume Chatelet   OffsetDistribution OffsetSampler;
74e4dee762SGuillaume Chatelet   ConfigurationType Configuration;
7559198d06SGuillaume Chatelet };
7659198d06SGuillaume Chatelet 
77e4dee762SGuillaume Chatelet #define BENCHMARK_MEMORY_FUNCTION(BM_NAME, SETUP, CONFIGURATION_TYPE,          \
78e4dee762SGuillaume Chatelet                                   CONFIGURATION_ARRAY_REF)                     \
79e4dee762SGuillaume Chatelet   void BM_NAME(benchmark::State &State) {                                      \
80e4dee762SGuillaume Chatelet     Runner<SETUP, CONFIGURATION_TYPE> Setup(State, CONFIGURATION_ARRAY_REF);   \
81e4dee762SGuillaume Chatelet     const size_t BatchSize = Setup.getBatchSize();                             \
82e4dee762SGuillaume Chatelet     while (State.KeepRunningBatch(BatchSize))                                  \
83e4dee762SGuillaume Chatelet       Setup.runBatch();                                                        \
84e4dee762SGuillaume Chatelet   }                                                                            \
85e4dee762SGuillaume Chatelet   BENCHMARK(BM_NAME)->Apply([](benchmark::internal::Benchmark *benchmark) {    \
86e4dee762SGuillaume Chatelet     const int64_t DistributionSize = SETUP::getDistributions().size();         \
87e4dee762SGuillaume Chatelet     const int64_t ConfigurationSize = CONFIGURATION_ARRAY_REF.size();          \
88e4dee762SGuillaume Chatelet     for (int64_t DistIndex = 0; DistIndex < DistributionSize; ++DistIndex)     \
89e4dee762SGuillaume Chatelet       for (int64_t ConfIndex = 0; ConfIndex < ConfigurationSize; ++ConfIndex)  \
90e4dee762SGuillaume Chatelet         benchmark->Args({DistIndex, ConfIndex});                               \
91e4dee762SGuillaume Chatelet   })
9259198d06SGuillaume Chatelet 
93adc18ad6SGuillaume Chatelet extern llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations();
94e4dee762SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Memcpy, CopySetup, MemcpyConfiguration,
95adc18ad6SGuillaume Chatelet                           getMemcpyConfigurations());
96adc18ad6SGuillaume Chatelet 
97*4a9bcb60SGuillaume Chatelet extern llvm::ArrayRef<MemcmpOrBcmpConfiguration> getMemcmpConfigurations();
98*4a9bcb60SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Memcmp, ComparisonSetup, MemcmpOrBcmpConfiguration,
99adc18ad6SGuillaume Chatelet                           getMemcmpConfigurations());
100adc18ad6SGuillaume Chatelet 
101*4a9bcb60SGuillaume Chatelet extern llvm::ArrayRef<MemcmpOrBcmpConfiguration> getBcmpConfigurations();
102*4a9bcb60SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Bcmp, ComparisonSetup, MemcmpOrBcmpConfiguration,
103adc18ad6SGuillaume Chatelet                           getBcmpConfigurations());
104adc18ad6SGuillaume Chatelet 
105adc18ad6SGuillaume Chatelet extern llvm::ArrayRef<MemsetConfiguration> getMemsetConfigurations();
106e4dee762SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Memset, SetSetup, MemsetConfiguration,
107adc18ad6SGuillaume Chatelet                           getMemsetConfigurations());
108adc18ad6SGuillaume Chatelet 
109adc18ad6SGuillaume Chatelet extern llvm::ArrayRef<BzeroConfiguration> getBzeroConfigurations();
110e4dee762SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Bzero, SetSetup, BzeroConfiguration,
111adc18ad6SGuillaume Chatelet                           getBzeroConfigurations());
112