1 //===-- Benchmark memory specific tools -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // This file complements the `benchmark` header with memory specific tools and
10 // benchmarking facilities.
11 
12 #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
13 #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
14 
15 #include "LibcBenchmark.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/Support/Alignment.h"
18 #include <cstdint>
19 #include <random>
20 
21 namespace llvm {
22 namespace libc_benchmarks {
23 
24 //--------------
25 // Configuration
26 //--------------
27 
28 struct StudyConfiguration {
29   // One of 'memcpy', 'memset', 'memcmp'.
30   // The underlying implementation is always the llvm libc one.
31   // e.g. 'memcpy' will test '__llvm_libc::memcpy'
32   std::string Function;
33 
34   // The number of trials to run for this benchmark.
35   // If in SweepMode, each individual sizes are measured 'NumTrials' time.
36   // i.e 'NumTrials' measurements for 0, 'NumTrials' measurements for 1 ...
37   uint32_t NumTrials = 1;
38 
39   // Toggles between Sweep Mode and Distribution Mode (default).
40   // See 'SweepModeMaxSize' and 'SizeDistributionName' below.
41   bool IsSweepMode = false;
42 
43   // Maximum size to use when measuring a ramp of size values (SweepMode).
44   // The benchmark measures all sizes from 0 to SweepModeMaxSize.
45   // Note: in sweep mode the same size is sampled several times in a row this
46   // will allow the processor to learn it and optimize the branching pattern.
47   // The resulting measurement is likely to be idealized.
48   uint32_t SweepModeMaxSize = 0; // inclusive
49 
50   // The name of the distribution to be used to randomize the size parameter.
51   // This is used when SweepMode is false (default).
52   std::string SizeDistributionName;
53 
54   // This parameter allows to control how the buffers are accessed during
55   // benchmark:
56   // None : Use a fixed address that is at least cache line aligned,
57   //    1 : Use random address,
58   //   >1 : Use random address aligned to value.
59   MaybeAlign AccessAlignment = None;
60 
61   // When Function == 'memcmp', this is the buffers mismatch position.
62   //  0 : Buffers always compare equal,
63   // >0 : Buffers compare different at byte N-1.
64   uint32_t MemcmpMismatchAt = 0;
65 };
66 
67 struct Runtime {
68   // Details about the Host (cpu name, cpu frequency, cache hierarchy).
69   HostState Host;
70 
71   // The framework will populate this value so all data accessed during the
72   // benchmark will stay in L1 data cache. This includes bookkeeping data.
73   uint32_t BufferSize = 0;
74 
75   // This is the number of distinct parameters used in a single batch.
76   // The framework always tests a batch of randomized parameter to prevent the
77   // cpu from learning branching patterns.
78   uint32_t BatchParameterCount = 0;
79 
80   // The benchmark options that were used to perform the measurement.
81   // This is decided by the framework.
82   BenchmarkOptions BenchmarkOptions;
83 };
84 
85 //--------
86 // Results
87 //--------
88 
89 // The root object containing all the data (configuration and measurements).
90 struct Study {
91   std::string StudyName;
92   Runtime Runtime;
93   StudyConfiguration Configuration;
94   std::vector<Duration> Measurements;
95 };
96 
97 //------
98 // Utils
99 //------
100 
101 // Provides an aligned, dynamically allocated buffer.
102 class AlignedBuffer {
103   char *const Buffer = nullptr;
104   size_t Size = 0;
105 
106 public:
107   static constexpr size_t Alignment = 1024;
108 
109   explicit AlignedBuffer(size_t Size)
110       : Buffer(static_cast<char *>(aligned_alloc(Alignment, Size))),
111         Size(Size) {}
112   ~AlignedBuffer() { free(Buffer); }
113 
114   inline char *operator+(size_t Index) { return Buffer + Index; }
115   inline const char *operator+(size_t Index) const { return Buffer + Index; }
116   inline char &operator[](size_t Index) { return Buffer[Index]; }
117   inline const char &operator[](size_t Index) const { return Buffer[Index]; }
118   inline char *begin() { return Buffer; }
119   inline char *end() { return Buffer + Size; }
120 };
121 
122 // Helper to generate random buffer offsets that satisfy the configuration
123 // constraints.
124 class OffsetDistribution {
125   std::uniform_int_distribution<uint32_t> Distribution;
126   uint32_t Factor;
127 
128 public:
129   explicit OffsetDistribution(size_t BufferSize, size_t MaxSizeValue,
130                               MaybeAlign AccessAlignment);
131 
132   template <class Generator> uint32_t operator()(Generator &G) {
133     return Distribution(G) * Factor;
134   }
135 };
136 
137 // Helper to generate random buffer offsets that satisfy the configuration
138 // constraints. It is specifically designed to benchmark `memcmp` functions
139 // where we may want the Nth byte to differ.
140 class MismatchOffsetDistribution {
141   std::uniform_int_distribution<size_t> MismatchIndexSelector;
142   llvm::SmallVector<uint32_t, 16> MismatchIndices;
143   const uint32_t MismatchAt;
144 
145 public:
146   explicit MismatchOffsetDistribution(size_t BufferSize, size_t MaxSizeValue,
147                                       size_t MismatchAt);
148 
149   explicit operator bool() const { return !MismatchIndices.empty(); }
150 
151   const llvm::SmallVectorImpl<uint32_t> &getMismatchIndices() const {
152     return MismatchIndices;
153   }
154 
155   template <class Generator> uint32_t operator()(Generator &G, uint32_t Size) {
156     const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)];
157     // We need to position the offset so that a mismatch occurs at MismatchAt.
158     if (Size >= MismatchAt)
159       return MismatchIndex - MismatchAt;
160     // Size is too small to trigger the mismatch.
161     return MismatchIndex - Size - 1;
162   }
163 };
164 
165 } // namespace libc_benchmarks
166 } // namespace llvm
167 
168 #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H
169