1 //===-- Benchmark ---------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "JSON.h" 10 #include "LibcBenchmark.h" 11 #include "LibcMemoryBenchmark.h" 12 #include "MemorySizeDistributions.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include "llvm/Support/FileSystem.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #include <cstring> 21 22 namespace __llvm_libc { 23 24 extern void *memcpy(void *__restrict, const void *__restrict, size_t); 25 extern void *memset(void *, int, size_t); 26 extern void bzero(void *, size_t); 27 extern int memcmp(const void *, const void *, size_t); 28 29 } // namespace __llvm_libc 30 31 namespace llvm { 32 namespace libc_benchmarks { 33 34 static cl::opt<std::string> 35 StudyName("study-name", cl::desc("The name for this study"), cl::Required); 36 37 static cl::opt<std::string> 38 SizeDistributionName("size-distribution-name", 39 cl::desc("The name of the distribution to use")); 40 41 static cl::opt<bool> 42 SweepMode("sweep-mode", 43 cl::desc("If set, benchmark all sizes from 0 to sweep-max-size")); 44 45 static cl::opt<uint32_t> 46 SweepMaxSize("sweep-max-size", 47 cl::desc("The maximum size to use in sweep-mode"), 48 cl::init(256)); 49 50 static cl::opt<uint32_t> 51 AlignedAccess("aligned-access", 52 cl::desc("The alignment to use when accessing the buffers\n" 53 "Default is unaligned\n" 54 "Use 0 to disable address randomization"), 55 cl::init(1)); 56 57 static cl::opt<std::string> Output("output", 58 cl::desc("Specify output filename"), 59 cl::value_desc("filename"), cl::init("-")); 60 61 static cl::opt<uint32_t> 62 NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"), 63 cl::init(1)); 64 65 static constexpr int64_t KiB = 1024; 66 static constexpr int64_t ParameterStorageBytes = 4 * KiB; 67 static constexpr int64_t L1LeftAsideBytes = 1 * KiB; 68 69 struct ParameterType { 70 unsigned OffsetBytes : 16; // max : 16 KiB - 1 71 unsigned SizeBytes : 16; // max : 16 KiB - 1 72 }; 73 74 #if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY) 75 struct Benchmark { 76 static constexpr auto GetDistributions = &getMemcpySizeDistributions; 77 static constexpr size_t BufferCount = 2; 78 79 Benchmark(const size_t BufferSize) 80 : SrcBuffer(BufferSize), DstBuffer(BufferSize) {} 81 82 inline auto functor() { 83 return [this](ParameterType P) { 84 __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes, 85 P.SizeBytes); 86 return DstBuffer[P.OffsetBytes]; 87 }; 88 } 89 90 AlignedBuffer SrcBuffer; 91 AlignedBuffer DstBuffer; 92 }; 93 #elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET) 94 struct Benchmark { 95 static constexpr auto GetDistributions = &getMemsetSizeDistributions; 96 static constexpr size_t BufferCount = 1; 97 98 Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} 99 100 inline auto functor() { 101 return [this](ParameterType P) { 102 __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF, 103 P.SizeBytes); 104 return DstBuffer[P.OffsetBytes]; 105 }; 106 } 107 108 AlignedBuffer DstBuffer; 109 }; 110 #elif defined(LIBC_BENCHMARK_FUNCTION_BZERO) 111 struct Benchmark { 112 static constexpr auto GetDistributions = &getMemsetSizeDistributions; 113 static constexpr size_t BufferCount = 1; 114 115 Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} 116 117 inline auto functor() { 118 return [this](ParameterType P) { 119 __llvm_libc::bzero(DstBuffer + P.OffsetBytes, P.SizeBytes); 120 return DstBuffer[P.OffsetBytes]; 121 }; 122 } 123 124 AlignedBuffer DstBuffer; 125 }; 126 #elif defined(LIBC_BENCHMARK_FUNCTION_MEMCMP) 127 struct Benchmark { 128 static constexpr auto GetDistributions = &getMemcmpSizeDistributions; 129 static constexpr size_t BufferCount = 2; 130 131 Benchmark(const size_t BufferSize) 132 : BufferA(BufferSize), BufferB(BufferSize) { 133 // The memcmp buffers always compare equal. 134 memset(BufferA.begin(), 0xF, BufferSize); 135 memset(BufferB.begin(), 0xF, BufferSize); 136 } 137 138 inline auto functor() { 139 return [this](ParameterType P) { 140 return __llvm_libc::memcmp(BufferA + P.OffsetBytes, 141 BufferB + P.OffsetBytes, P.SizeBytes); 142 }; 143 } 144 145 AlignedBuffer BufferA; 146 AlignedBuffer BufferB; 147 }; 148 #else 149 #error "Missing LIBC_BENCHMARK_FUNCTION_XXX definition" 150 #endif 151 152 struct Harness : Benchmark { 153 Harness(const size_t BufferSize, size_t BatchParameterCount, 154 std::function<unsigned()> SizeSampler, 155 std::function<unsigned()> OffsetSampler) 156 : Benchmark(BufferSize), BufferSize(BufferSize), 157 Parameters(BatchParameterCount), SizeSampler(SizeSampler), 158 OffsetSampler(OffsetSampler) {} 159 160 CircularArrayRef<ParameterType> generateBatch(size_t Iterations) { 161 for (auto &P : Parameters) { 162 P.OffsetBytes = OffsetSampler(); 163 P.SizeBytes = SizeSampler(); 164 if (P.OffsetBytes + P.SizeBytes >= BufferSize) 165 report_fatal_error("Call would result in buffer overflow"); 166 } 167 return cycle(makeArrayRef(Parameters), Iterations); 168 } 169 170 private: 171 const size_t BufferSize; 172 std::vector<ParameterType> Parameters; 173 std::function<unsigned()> SizeSampler; 174 std::function<unsigned()> OffsetSampler; 175 }; 176 177 size_t getL1DataCacheSize() { 178 const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches; 179 const auto IsL1DataCache = [](const CacheInfo &CI) { 180 return CI.Type == "Data" && CI.Level == 1; 181 }; 182 const auto CacheIt = find_if(CacheInfos, IsL1DataCache); 183 if (CacheIt != CacheInfos.end()) 184 return CacheIt->Size; 185 report_fatal_error("Unable to read L1 Cache Data Size"); 186 } 187 188 struct MemfunctionBenchmark { 189 MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize()) 190 : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes), 191 BufferSize(AvailableSize / Benchmark::BufferCount), 192 BatchParameterCount(BufferSize / sizeof(ParameterType)) { 193 // Handling command line flags 194 if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100) 195 report_fatal_error("Not enough L1 cache"); 196 197 if (!isPowerOfTwoOrZero(AlignedAccess)) 198 report_fatal_error(AlignedAccess.ArgStr + 199 Twine(" must be a power of two or zero")); 200 201 const bool HasDistributionName = !SizeDistributionName.empty(); 202 if (SweepMode && HasDistributionName) 203 report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) + 204 "` or `--" + Twine(SizeDistributionName.ArgStr) + "`"); 205 206 if (SweepMode) { 207 MaxSizeValue = SweepMaxSize; 208 } else { 209 std::map<StringRef, MemorySizeDistribution> Map; 210 for (MemorySizeDistribution Distribution : Benchmark::GetDistributions()) 211 Map[Distribution.Name] = Distribution; 212 if (Map.count(SizeDistributionName) == 0) { 213 std::string Message; 214 raw_string_ostream Stream(Message); 215 Stream << "Unknown --" << SizeDistributionName.ArgStr << "='" 216 << SizeDistributionName << "', available distributions:\n"; 217 for (const auto &Pair : Map) 218 Stream << "'" << Pair.first << "'\n"; 219 report_fatal_error(Stream.str()); 220 } 221 SizeDistribution = Map[SizeDistributionName]; 222 MaxSizeValue = SizeDistribution.Probabilities.size() - 1; 223 } 224 225 // Setup study. 226 Study.StudyName = StudyName; 227 Runtime &RI = Study.Runtime; 228 RI.Host = HostState::get(); 229 RI.BufferSize = BufferSize; 230 RI.BatchParameterCount = BatchParameterCount; 231 232 BenchmarkOptions &BO = RI.BenchmarkOptions; 233 BO.MinDuration = std::chrono::milliseconds(1); 234 BO.MaxDuration = std::chrono::seconds(1); 235 BO.MaxIterations = 10'000'000U; 236 BO.MinSamples = 4; 237 BO.MaxSamples = 1000; 238 BO.Epsilon = 0.01; // 1% 239 BO.ScalingFactor = 1.4; 240 241 StudyConfiguration &SC = Study.Configuration; 242 SC.NumTrials = NumTrials; 243 SC.IsSweepMode = SweepMode; 244 if (SweepMode) 245 SC.SweepModeMaxSize = SweepMaxSize; 246 else 247 SC.SizeDistributionName = SizeDistributionName; 248 SC.AccessAlignment = MaybeAlign(AlignedAccess); 249 SC.Function = LIBC_BENCHMARK_FUNCTION_NAME; 250 } 251 252 Study run() { 253 if (SweepMode) 254 runSweepMode(); 255 else 256 runDistributionMode(); 257 return Study; 258 } 259 260 private: 261 const int64_t AvailableSize; 262 const int64_t BufferSize; 263 const size_t BatchParameterCount; 264 size_t MaxSizeValue = 0; 265 MemorySizeDistribution SizeDistribution; 266 Study Study; 267 std::mt19937_64 Gen; 268 269 static constexpr bool isPowerOfTwoOrZero(size_t Value) { 270 return (Value & (Value - 1U)) == 0; 271 } 272 273 std::function<unsigned()> geOffsetSampler() { 274 return [this]() { 275 static OffsetDistribution OD(BufferSize, MaxSizeValue, 276 Study.Configuration.AccessAlignment); 277 return OD(Gen); 278 }; 279 } 280 281 std::function<unsigned()> getSizeSampler() { 282 return [this]() { 283 static std::discrete_distribution<unsigned> Distribution( 284 SizeDistribution.Probabilities.begin(), 285 SizeDistribution.Probabilities.end()); 286 return Distribution(Gen); 287 }; 288 } 289 290 void reportProgress() { 291 static size_t LastPercent = -1; 292 const size_t TotalSteps = Study.Measurements.capacity(); 293 const size_t Steps = Study.Measurements.size(); 294 const size_t Percent = 100 * Steps / TotalSteps; 295 if (Percent == LastPercent) 296 return; 297 LastPercent = Percent; 298 size_t I = 0; 299 errs() << '['; 300 for (; I <= Percent; ++I) 301 errs() << '#'; 302 for (; I <= 100; ++I) 303 errs() << '_'; 304 errs() << "] " << Percent << '%' << '\r'; 305 } 306 307 void runTrials(const BenchmarkOptions &Options, 308 std::function<unsigned()> SizeSampler, 309 std::function<unsigned()> OffsetSampler) { 310 Harness B(BufferSize, BatchParameterCount, SizeSampler, OffsetSampler); 311 for (size_t i = 0; i < NumTrials; ++i) { 312 const BenchmarkResult Result = benchmark(Options, B, B.functor()); 313 Study.Measurements.push_back(Result.BestGuess); 314 reportProgress(); 315 } 316 } 317 318 void runSweepMode() { 319 Study.Measurements.reserve(NumTrials * SweepMaxSize); 320 321 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 322 BO.MinDuration = std::chrono::milliseconds(1); 323 BO.InitialIterations = 100; 324 325 for (size_t Size = 0; Size <= SweepMaxSize; ++Size) { 326 const auto SizeSampler = [Size]() { return Size; }; 327 runTrials(BO, SizeSampler, geOffsetSampler()); 328 } 329 } 330 331 void runDistributionMode() { 332 Study.Measurements.reserve(NumTrials); 333 334 BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions; 335 BO.MinDuration = std::chrono::milliseconds(10); 336 BO.InitialIterations = BatchParameterCount * 10; 337 338 runTrials(BO, getSizeSampler(), geOffsetSampler()); 339 } 340 }; 341 342 void writeStudy(const Study &S) { 343 std::error_code EC; 344 raw_fd_ostream FOS(Output, EC); 345 if (EC) 346 report_fatal_error(Twine("Could not open file: ") 347 .concat(EC.message()) 348 .concat(", ") 349 .concat(Output)); 350 json::OStream JOS(FOS); 351 serializeToJson(S, JOS); 352 FOS << "\n"; 353 } 354 355 void main() { 356 checkRequirements(); 357 MemfunctionBenchmark MB; 358 writeStudy(MB.run()); 359 } 360 361 } // namespace libc_benchmarks 362 } // namespace llvm 363 364 int main(int argc, char **argv) { 365 llvm::cl::ParseCommandLineOptions(argc, argv); 366 #ifndef NDEBUG 367 static_assert( 368 false, 369 "For reproducibility benchmarks should not be compiled in DEBUG mode."); 370 #endif 371 llvm::libc_benchmarks::main(); 372 return EXIT_SUCCESS; 373 } 374