159198d06SGuillaume Chatelet #include "LibcBenchmark.h" 259198d06SGuillaume Chatelet #include "LibcMemoryBenchmark.h" 359198d06SGuillaume Chatelet #include "MemorySizeDistributions.h" 459198d06SGuillaume Chatelet #include "benchmark/benchmark.h" 5e4dee762SGuillaume Chatelet #include "llvm/ADT/ArrayRef.h" 6e4dee762SGuillaume Chatelet #include "llvm/ADT/Twine.h" 7e4dee762SGuillaume Chatelet #include <chrono> 859198d06SGuillaume Chatelet #include <cstdint> 959198d06SGuillaume Chatelet #include <random> 1059198d06SGuillaume Chatelet #include <vector> 1159198d06SGuillaume Chatelet 12e4dee762SGuillaume Chatelet using llvm::Align; 13e4dee762SGuillaume Chatelet using llvm::ArrayRef; 14e4dee762SGuillaume Chatelet using llvm::Twine; 15e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::BzeroConfiguration; 16e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::ComparisonSetup; 17e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::CopySetup; 18*4a9bcb60SGuillaume Chatelet using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration; 19e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::MemcpyConfiguration; 20e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::MemorySizeDistribution; 21e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::MemsetConfiguration; 22e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::OffsetDistribution; 23e4dee762SGuillaume Chatelet using llvm::libc_benchmarks::SetSetup; 24e4dee762SGuillaume Chatelet 25e4dee762SGuillaume Chatelet // Alignment to use for when accessing the buffers. 2659198d06SGuillaume Chatelet static constexpr Align kBenchmarkAlignment = Align::Constant<1>(); 2759198d06SGuillaume Chatelet 28e4dee762SGuillaume Chatelet static std::mt19937_64 &getGenerator() { 29e4dee762SGuillaume Chatelet static std::mt19937_64 Generator( 30e4dee762SGuillaume Chatelet std::chrono::system_clock::now().time_since_epoch().count()); 31e4dee762SGuillaume Chatelet return Generator; 32e4dee762SGuillaume Chatelet } 33e4dee762SGuillaume Chatelet 34e4dee762SGuillaume Chatelet template <typename SetupType, typename ConfigurationType> struct Runner { 35e4dee762SGuillaume Chatelet Runner(benchmark::State &S, llvm::ArrayRef<ConfigurationType> Configurations) 36e4dee762SGuillaume Chatelet : State(S), Distribution(SetupType::getDistributions()[State.range(0)]), 3759198d06SGuillaume Chatelet Probabilities(Distribution.Probabilities), 3859198d06SGuillaume Chatelet SizeSampler(Probabilities.begin(), Probabilities.end()), 39e4dee762SGuillaume Chatelet OffsetSampler(Setup.BufferSize, Probabilities.size() - 1, 40e4dee762SGuillaume Chatelet kBenchmarkAlignment), 41e4dee762SGuillaume Chatelet Configuration(Configurations[State.range(1)]) { 42e4dee762SGuillaume Chatelet for (auto &P : Setup.Parameters) { 43e4dee762SGuillaume Chatelet P.OffsetBytes = OffsetSampler(getGenerator()); 44e4dee762SGuillaume Chatelet P.SizeBytes = SizeSampler(getGenerator()); 45e4dee762SGuillaume Chatelet Setup.checkValid(P); 4659198d06SGuillaume Chatelet } 4759198d06SGuillaume Chatelet } 4859198d06SGuillaume Chatelet 49e4dee762SGuillaume Chatelet ~Runner() { 50e4dee762SGuillaume Chatelet const size_t AvgBytesPerIteration = Setup.getBatchBytes() / Setup.BatchSize; 5159198d06SGuillaume Chatelet const size_t TotalBytes = State.iterations() * AvgBytesPerIteration; 5259198d06SGuillaume Chatelet State.SetBytesProcessed(TotalBytes); 53e4dee762SGuillaume Chatelet State.SetItemsProcessed(State.iterations()); 54e4dee762SGuillaume Chatelet State.SetLabel((Twine(Configuration.Name) + "," + Distribution.Name).str()); 5559198d06SGuillaume Chatelet State.counters["bytes_per_cycle"] = benchmark::Counter( 5659198d06SGuillaume Chatelet TotalBytes / benchmark::CPUInfo::Get().cycles_per_second, 5759198d06SGuillaume Chatelet benchmark::Counter::kIsRate); 5859198d06SGuillaume Chatelet } 5959198d06SGuillaume Chatelet 60e4dee762SGuillaume Chatelet inline void runBatch() { 61e4dee762SGuillaume Chatelet for (const auto &P : Setup.Parameters) 62e4dee762SGuillaume Chatelet benchmark::DoNotOptimize(Setup.Call(P, Configuration.Function)); 6359198d06SGuillaume Chatelet } 6459198d06SGuillaume Chatelet 65e4dee762SGuillaume Chatelet size_t getBatchSize() const { return Setup.BatchSize; } 66e4dee762SGuillaume Chatelet 6759198d06SGuillaume Chatelet private: 68e4dee762SGuillaume Chatelet SetupType Setup; 6959198d06SGuillaume Chatelet benchmark::State &State; 7059198d06SGuillaume Chatelet MemorySizeDistribution Distribution; 7159198d06SGuillaume Chatelet ArrayRef<double> Probabilities; 7259198d06SGuillaume Chatelet std::discrete_distribution<unsigned> SizeSampler; 7359198d06SGuillaume Chatelet OffsetDistribution OffsetSampler; 74e4dee762SGuillaume Chatelet ConfigurationType Configuration; 7559198d06SGuillaume Chatelet }; 7659198d06SGuillaume Chatelet 77e4dee762SGuillaume Chatelet #define BENCHMARK_MEMORY_FUNCTION(BM_NAME, SETUP, CONFIGURATION_TYPE, \ 78e4dee762SGuillaume Chatelet CONFIGURATION_ARRAY_REF) \ 79e4dee762SGuillaume Chatelet void BM_NAME(benchmark::State &State) { \ 80e4dee762SGuillaume Chatelet Runner<SETUP, CONFIGURATION_TYPE> Setup(State, CONFIGURATION_ARRAY_REF); \ 81e4dee762SGuillaume Chatelet const size_t BatchSize = Setup.getBatchSize(); \ 82e4dee762SGuillaume Chatelet while (State.KeepRunningBatch(BatchSize)) \ 83e4dee762SGuillaume Chatelet Setup.runBatch(); \ 84e4dee762SGuillaume Chatelet } \ 85e4dee762SGuillaume Chatelet BENCHMARK(BM_NAME)->Apply([](benchmark::internal::Benchmark *benchmark) { \ 86e4dee762SGuillaume Chatelet const int64_t DistributionSize = SETUP::getDistributions().size(); \ 87e4dee762SGuillaume Chatelet const int64_t ConfigurationSize = CONFIGURATION_ARRAY_REF.size(); \ 88e4dee762SGuillaume Chatelet for (int64_t DistIndex = 0; DistIndex < DistributionSize; ++DistIndex) \ 89e4dee762SGuillaume Chatelet for (int64_t ConfIndex = 0; ConfIndex < ConfigurationSize; ++ConfIndex) \ 90e4dee762SGuillaume Chatelet benchmark->Args({DistIndex, ConfIndex}); \ 91e4dee762SGuillaume Chatelet }) 9259198d06SGuillaume Chatelet 93adc18ad6SGuillaume Chatelet extern llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations(); 94e4dee762SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Memcpy, CopySetup, MemcpyConfiguration, 95adc18ad6SGuillaume Chatelet getMemcpyConfigurations()); 96adc18ad6SGuillaume Chatelet 97*4a9bcb60SGuillaume Chatelet extern llvm::ArrayRef<MemcmpOrBcmpConfiguration> getMemcmpConfigurations(); 98*4a9bcb60SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Memcmp, ComparisonSetup, MemcmpOrBcmpConfiguration, 99adc18ad6SGuillaume Chatelet getMemcmpConfigurations()); 100adc18ad6SGuillaume Chatelet 101*4a9bcb60SGuillaume Chatelet extern llvm::ArrayRef<MemcmpOrBcmpConfiguration> getBcmpConfigurations(); 102*4a9bcb60SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Bcmp, ComparisonSetup, MemcmpOrBcmpConfiguration, 103adc18ad6SGuillaume Chatelet getBcmpConfigurations()); 104adc18ad6SGuillaume Chatelet 105adc18ad6SGuillaume Chatelet extern llvm::ArrayRef<MemsetConfiguration> getMemsetConfigurations(); 106e4dee762SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Memset, SetSetup, MemsetConfiguration, 107adc18ad6SGuillaume Chatelet getMemsetConfigurations()); 108adc18ad6SGuillaume Chatelet 109adc18ad6SGuillaume Chatelet extern llvm::ArrayRef<BzeroConfiguration> getBzeroConfigurations(); 110e4dee762SGuillaume Chatelet BENCHMARK_MEMORY_FUNCTION(BM_Bzero, SetSetup, BzeroConfiguration, 111adc18ad6SGuillaume Chatelet getBzeroConfigurations()); 112