1 //===-- Benchmark ---------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "JSON.h"
10 #include "LibcBenchmark.h"
11 #include "LibcMemoryBenchmark.h"
12 #include "MemorySizeDistributions.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include "llvm/Support/FileSystem.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <cstring>
21 
22 namespace __llvm_libc {
23 
24 extern void *memcpy(void *__restrict, const void *__restrict, size_t);
25 extern void *memset(void *, int, size_t);
26 extern void bzero(void *, size_t);
27 extern int memcmp(const void *, const void *, size_t);
28 
29 } // namespace __llvm_libc
30 
31 namespace llvm {
32 namespace libc_benchmarks {
33 
34 static cl::opt<std::string>
35     StudyName("study-name", cl::desc("The name for this study"), cl::Required);
36 
37 static cl::opt<std::string>
38     SizeDistributionName("size-distribution-name",
39                          cl::desc("The name of the distribution to use"));
40 
41 static cl::opt<bool>
42     SweepMode("sweep-mode",
43               cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
44 
45 static cl::opt<uint32_t>
46     SweepMaxSize("sweep-max-size",
47                  cl::desc("The maximum size to use in sweep-mode"),
48                  cl::init(256));
49 
50 static cl::opt<uint32_t>
51     AlignedAccess("aligned-access",
52                   cl::desc("The alignment to use when accessing the buffers\n"
53                            "Default is unaligned\n"
54                            "Use 0 to disable address randomization"),
55                   cl::init(1));
56 
57 static cl::opt<std::string> Output("output",
58                                    cl::desc("Specify output filename"),
59                                    cl::value_desc("filename"), cl::init("-"));
60 
61 static cl::opt<uint32_t>
62     NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
63               cl::init(1));
64 
65 static constexpr int64_t KiB = 1024;
66 static constexpr int64_t ParameterStorageBytes = 4 * KiB;
67 static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
68 
69 struct ParameterType {
70   unsigned OffsetBytes : 16; // max : 16 KiB - 1
71   unsigned SizeBytes : 16;   // max : 16 KiB - 1
72 };
73 
74 #if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY)
75 struct Benchmark {
76   static constexpr auto GetDistributions = &getMemcpySizeDistributions;
77   static constexpr size_t BufferCount = 2;
78 
79   Benchmark(const size_t BufferSize)
80       : SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
81 
82   inline auto functor() {
83     return [this](ParameterType P) {
84       __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
85                           P.SizeBytes);
86       return DstBuffer[P.OffsetBytes];
87     };
88   }
89 
90   AlignedBuffer SrcBuffer;
91   AlignedBuffer DstBuffer;
92 };
93 #elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET)
94 struct Benchmark {
95   static constexpr auto GetDistributions = &getMemsetSizeDistributions;
96   static constexpr size_t BufferCount = 1;
97 
98   Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
99 
100   inline auto functor() {
101     return [this](ParameterType P) {
102       __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
103                           P.SizeBytes);
104       return DstBuffer[P.OffsetBytes];
105     };
106   }
107 
108   AlignedBuffer DstBuffer;
109 };
110 #elif defined(LIBC_BENCHMARK_FUNCTION_BZERO)
111 struct Benchmark {
112   static constexpr auto GetDistributions = &getMemsetSizeDistributions;
113   static constexpr size_t BufferCount = 1;
114 
115   Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
116 
117   inline auto functor() {
118     return [this](ParameterType P) {
119       __llvm_libc::bzero(DstBuffer + P.OffsetBytes, P.SizeBytes);
120       return DstBuffer[P.OffsetBytes];
121     };
122   }
123 
124   AlignedBuffer DstBuffer;
125 };
126 #elif defined(LIBC_BENCHMARK_FUNCTION_MEMCMP)
127 struct Benchmark {
128   static constexpr auto GetDistributions = &getMemcmpSizeDistributions;
129   static constexpr size_t BufferCount = 2;
130 
131   Benchmark(const size_t BufferSize)
132       : BufferA(BufferSize), BufferB(BufferSize) {
133     // The memcmp buffers always compare equal.
134     memset(BufferA.begin(), 0xF, BufferSize);
135     memset(BufferB.begin(), 0xF, BufferSize);
136   }
137 
138   inline auto functor() {
139     return [this](ParameterType P) {
140       return __llvm_libc::memcmp(BufferA + P.OffsetBytes,
141                                  BufferB + P.OffsetBytes, P.SizeBytes);
142     };
143   }
144 
145   AlignedBuffer BufferA;
146   AlignedBuffer BufferB;
147 };
148 #else
149 #error "Missing LIBC_BENCHMARK_FUNCTION_XXX definition"
150 #endif
151 
152 struct Harness : Benchmark {
153   Harness(const size_t BufferSize, size_t BatchParameterCount,
154           std::function<unsigned()> SizeSampler,
155           std::function<unsigned()> OffsetSampler)
156       : Benchmark(BufferSize), BufferSize(BufferSize),
157         Parameters(BatchParameterCount), SizeSampler(SizeSampler),
158         OffsetSampler(OffsetSampler) {}
159 
160   CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
161     for (auto &P : Parameters) {
162       P.OffsetBytes = OffsetSampler();
163       P.SizeBytes = SizeSampler();
164       if (P.OffsetBytes + P.SizeBytes >= BufferSize)
165         report_fatal_error("Call would result in buffer overflow");
166     }
167     return cycle(makeArrayRef(Parameters), Iterations);
168   }
169 
170 private:
171   const size_t BufferSize;
172   std::vector<ParameterType> Parameters;
173   std::function<unsigned()> SizeSampler;
174   std::function<unsigned()> OffsetSampler;
175 };
176 
177 size_t getL1DataCacheSize() {
178   const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
179   const auto IsL1DataCache = [](const CacheInfo &CI) {
180     return CI.Type == "Data" && CI.Level == 1;
181   };
182   const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
183   if (CacheIt != CacheInfos.end())
184     return CacheIt->Size;
185   report_fatal_error("Unable to read L1 Cache Data Size");
186 }
187 
188 struct MemfunctionBenchmark {
189   MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
190       : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
191         BufferSize(AvailableSize / Benchmark::BufferCount),
192         BatchParameterCount(BufferSize / sizeof(ParameterType)) {
193     // Handling command line flags
194     if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
195       report_fatal_error("Not enough L1 cache");
196 
197     if (!isPowerOfTwoOrZero(AlignedAccess))
198       report_fatal_error(AlignedAccess.ArgStr +
199                          Twine(" must be a power of two or zero"));
200 
201     const bool HasDistributionName = !SizeDistributionName.empty();
202     if (SweepMode && HasDistributionName)
203       report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
204                          "` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
205 
206     if (SweepMode) {
207       MaxSizeValue = SweepMaxSize;
208     } else {
209       std::map<StringRef, MemorySizeDistribution> Map;
210       for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
211         Map[Distribution.Name] = Distribution;
212       if (Map.count(SizeDistributionName) == 0) {
213         std::string Message;
214         raw_string_ostream Stream(Message);
215         Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
216                << SizeDistributionName << "', available distributions:\n";
217         for (const auto &Pair : Map)
218           Stream << "'" << Pair.first << "'\n";
219         report_fatal_error(Stream.str());
220       }
221       SizeDistribution = Map[SizeDistributionName];
222       MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
223     }
224 
225     // Setup study.
226     Study.StudyName = StudyName;
227     Runtime &RI = Study.Runtime;
228     RI.Host = HostState::get();
229     RI.BufferSize = BufferSize;
230     RI.BatchParameterCount = BatchParameterCount;
231 
232     BenchmarkOptions &BO = RI.BenchmarkOptions;
233     BO.MinDuration = std::chrono::milliseconds(1);
234     BO.MaxDuration = std::chrono::seconds(1);
235     BO.MaxIterations = 10'000'000U;
236     BO.MinSamples = 4;
237     BO.MaxSamples = 1000;
238     BO.Epsilon = 0.01; // 1%
239     BO.ScalingFactor = 1.4;
240 
241     StudyConfiguration &SC = Study.Configuration;
242     SC.NumTrials = NumTrials;
243     SC.IsSweepMode = SweepMode;
244     if (SweepMode)
245       SC.SweepModeMaxSize = SweepMaxSize;
246     else
247       SC.SizeDistributionName = SizeDistributionName;
248     SC.AccessAlignment = MaybeAlign(AlignedAccess);
249     SC.Function = LIBC_BENCHMARK_FUNCTION_NAME;
250   }
251 
252   Study run() {
253     if (SweepMode)
254       runSweepMode();
255     else
256       runDistributionMode();
257     return Study;
258   }
259 
260 private:
261   const int64_t AvailableSize;
262   const int64_t BufferSize;
263   const size_t BatchParameterCount;
264   size_t MaxSizeValue = 0;
265   MemorySizeDistribution SizeDistribution;
266   Study Study;
267   std::mt19937_64 Gen;
268 
269   static constexpr bool isPowerOfTwoOrZero(size_t Value) {
270     return (Value & (Value - 1U)) == 0;
271   }
272 
273   std::function<unsigned()> geOffsetSampler() {
274     return [this]() {
275       static OffsetDistribution OD(BufferSize, MaxSizeValue,
276                                    Study.Configuration.AccessAlignment);
277       return OD(Gen);
278     };
279   }
280 
281   std::function<unsigned()> getSizeSampler() {
282     return [this]() {
283       static std::discrete_distribution<unsigned> Distribution(
284           SizeDistribution.Probabilities.begin(),
285           SizeDistribution.Probabilities.end());
286       return Distribution(Gen);
287     };
288   }
289 
290   void reportProgress() {
291     static size_t LastPercent = -1;
292     const size_t TotalSteps = Study.Measurements.capacity();
293     const size_t Steps = Study.Measurements.size();
294     const size_t Percent = 100 * Steps / TotalSteps;
295     if (Percent == LastPercent)
296       return;
297     LastPercent = Percent;
298     size_t I = 0;
299     errs() << '[';
300     for (; I <= Percent; ++I)
301       errs() << '#';
302     for (; I <= 100; ++I)
303       errs() << '_';
304     errs() << "] " << Percent << '%' << '\r';
305   }
306 
307   void runTrials(const BenchmarkOptions &Options,
308                  std::function<unsigned()> SizeSampler,
309                  std::function<unsigned()> OffsetSampler) {
310     Harness B(BufferSize, BatchParameterCount, SizeSampler, OffsetSampler);
311     for (size_t i = 0; i < NumTrials; ++i) {
312       const BenchmarkResult Result = benchmark(Options, B, B.functor());
313       Study.Measurements.push_back(Result.BestGuess);
314       reportProgress();
315     }
316   }
317 
318   void runSweepMode() {
319     Study.Measurements.reserve(NumTrials * SweepMaxSize);
320 
321     BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
322     BO.MinDuration = std::chrono::milliseconds(1);
323     BO.InitialIterations = 100;
324 
325     for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
326       const auto SizeSampler = [Size]() { return Size; };
327       runTrials(BO, SizeSampler, geOffsetSampler());
328     }
329   }
330 
331   void runDistributionMode() {
332     Study.Measurements.reserve(NumTrials);
333 
334     BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
335     BO.MinDuration = std::chrono::milliseconds(10);
336     BO.InitialIterations = BatchParameterCount * 10;
337 
338     runTrials(BO, getSizeSampler(), geOffsetSampler());
339   }
340 };
341 
342 void writeStudy(const Study &S) {
343   std::error_code EC;
344   raw_fd_ostream FOS(Output, EC);
345   if (EC)
346     report_fatal_error(Twine("Could not open file: ")
347                            .concat(EC.message())
348                            .concat(", ")
349                            .concat(Output));
350   json::OStream JOS(FOS);
351   serializeToJson(S, JOS);
352   FOS << "\n";
353 }
354 
355 void main() {
356   checkRequirements();
357   MemfunctionBenchmark MB;
358   writeStudy(MB.run());
359 }
360 
361 } // namespace libc_benchmarks
362 } // namespace llvm
363 
364 int main(int argc, char **argv) {
365   llvm::cl::ParseCommandLineOptions(argc, argv);
366 #ifndef NDEBUG
367   static_assert(
368       false,
369       "For reproducibility benchmarks should not be compiled in DEBUG mode.");
370 #endif
371   llvm::libc_benchmarks::main();
372   return EXIT_SUCCESS;
373 }
374