1 //===-- Analyze benchmark JSON files --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This code analyzes the json file produced by the `automemcpy` binary. 9 // 10 // As a remainder, `automemcpy` will benchmark each autogenerated memory 11 // functions against one of the predefined distributions available in the 12 // `libc/benchmarks/distributions` folder. 13 // 14 // It works as follows: 15 // - Reads one or more json files. 16 // - If there are several runs for the same function and distribution, picks the 17 // median throughput (aka `BytesPerSecond`). 18 // - Aggregates the throughput per distributions and scores them from worst (0) 19 // to best (1). 20 // - Each distribution categorizes each function into one of the following 21 // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, 22 // BAD. 23 // - A process similar to the Majority Judgment voting system is used to `elect` 24 // the best function. The histogram of grades is returned so we can 25 // distinguish between functions with the same final grade. In the following 26 // example both functions grade EXCELLENT but we may prefer the second one. 27 // 28 // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... 29 // |------------|-----------|-----------|------|----------| ... 30 // | Function_1 | 7 | 1 | 2 | | ... 31 // | Function_2 | 6 | 4 | | | ... 32 33 #include "automemcpy/ResultAnalyzer.h" 34 #include "llvm/ADT/StringRef.h" 35 #include <numeric> 36 #include <unordered_map> 37 38 namespace llvm { 39 40 namespace automemcpy { 41 42 StringRef Grade::getString(const GradeEnum &GE) { 43 switch (GE) { 44 case EXCELLENT: 45 return "EXCELLENT"; 46 case VERY_GOOD: 47 return "VERY_GOOD"; 48 case GOOD: 49 return "GOOD"; 50 case PASSABLE: 51 return "PASSABLE"; 52 case INADEQUATE: 53 return "INADEQUATE"; 54 case MEDIOCRE: 55 return "MEDIOCRE"; 56 case BAD: 57 return "BAD"; 58 case ARRAY_SIZE: 59 report_fatal_error("logic error"); 60 } 61 } 62 63 Grade::GradeEnum Grade::judge(double Score) { 64 if (Score >= 6. / 7) 65 return EXCELLENT; 66 if (Score >= 5. / 7) 67 return VERY_GOOD; 68 if (Score >= 4. / 7) 69 return GOOD; 70 if (Score >= 3. / 7) 71 return PASSABLE; 72 if (Score >= 2. / 7) 73 return INADEQUATE; 74 if (Score >= 1. / 7) 75 return MEDIOCRE; 76 return BAD; 77 } 78 79 static double computeUnbiasedSampleVariance(const std::vector<double> &Samples, 80 const double SampleMean) { 81 assert(!Samples.empty()); 82 if (Samples.size() == 1) 83 return 0; 84 double DiffSquaresSum = 0; 85 for (const double S : Samples) { 86 const double Diff = S - SampleMean; 87 DiffSquaresSum += Diff * Diff; 88 } 89 return DiffSquaresSum / (Samples.size() - 1); 90 } 91 92 static void processPerDistributionData(PerDistributionData &Data) { 93 auto &Samples = Data.BytesPerSecondSamples; 94 assert(!Samples.empty()); 95 // Sample Mean 96 const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0); 97 Data.BytesPerSecondMean = Sum / Samples.size(); 98 // Unbiased Sample Variance 99 Data.BytesPerSecondVariance = 100 computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean); 101 // Median 102 const size_t HalfSize = Samples.size() / 2; 103 std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end()); 104 Data.BytesPerSecondMedian = Samples[HalfSize]; 105 } 106 107 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) { 108 std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions; 109 for (const auto &S : Samples) { 110 if (S.Type != SampleType::ITERATION) 111 break; 112 auto &Function = Functions[S.Id.Function]; 113 auto &Data = Function.PerDistributionData[S.Id.Distribution.Name]; 114 Data.BytesPerSecondSamples.push_back(S.BytesPerSecond); 115 } 116 117 std::vector<FunctionData> Output; 118 for (auto &[FunctionId, Function] : Functions) { 119 Function.Id = FunctionId; 120 for (auto &Pair : Function.PerDistributionData) 121 processPerDistributionData(Pair.second); 122 Output.push_back(std::move(Function)); 123 } 124 return Output; 125 } 126 127 void fillScores(MutableArrayRef<FunctionData> Functions) { 128 // A key to bucket throughput per function type and distribution. 129 struct Key { 130 FunctionType Type; 131 StringRef Distribution; 132 133 COMPARABLE_AND_HASHABLE(Key, Type, Distribution) 134 }; 135 136 // Tracks minimum and maximum values. 137 struct MinMax { 138 double Min = std::numeric_limits<double>::max(); 139 double Max = std::numeric_limits<double>::min(); 140 void update(double Value) { 141 if (Value < Min) 142 Min = Value; 143 if (Value > Max) 144 Max = Value; 145 } 146 double normalize(double Value) const { return (Value - Min) / (Max - Min); } 147 }; 148 149 std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax; 150 for (const auto &Function : Functions) { 151 const FunctionType Type = Function.Id.Type; 152 for (const auto &Pair : Function.PerDistributionData) { 153 const auto &Distribution = Pair.getKey(); 154 const double Throughput = Pair.getValue().BytesPerSecondMedian; 155 const Key K{Type, Distribution}; 156 ThroughputMinMax[K].update(Throughput); 157 } 158 } 159 160 for (auto &Function : Functions) { 161 const FunctionType Type = Function.Id.Type; 162 for (const auto &Pair : Function.PerDistributionData) { 163 const auto &Distribution = Pair.getKey(); 164 const double Throughput = Pair.getValue().BytesPerSecondMedian; 165 const Key K{Type, Distribution}; 166 Function.PerDistributionData[Distribution].Score = 167 ThroughputMinMax[K].normalize(Throughput); 168 } 169 } 170 } 171 172 void castVotes(MutableArrayRef<FunctionData> Functions) { 173 for (FunctionData &Function : Functions) { 174 Function.ScoresGeoMean = 1.0; 175 for (const auto &Pair : Function.PerDistributionData) { 176 const StringRef Distribution = Pair.getKey(); 177 const double Score = Pair.getValue().Score; 178 Function.ScoresGeoMean *= Score; 179 const auto G = Grade::judge(Score); 180 ++(Function.GradeHisto[G]); 181 Function.PerDistributionData[Distribution].Grade = G; 182 } 183 } 184 185 for (FunctionData &Function : Functions) { 186 const auto &GradeHisto = Function.GradeHisto; 187 const size_t Votes = 188 std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); 189 const size_t MedianVote = Votes / 2; 190 size_t CountedVotes = 0; 191 Grade::GradeEnum MedianGrade = Grade::BAD; 192 for (size_t I = 0; I < GradeHisto.size(); ++I) { 193 CountedVotes += GradeHisto[I]; 194 if (CountedVotes > MedianVote) { 195 MedianGrade = Grade::GradeEnum(I); 196 break; 197 } 198 } 199 Function.FinalGrade = MedianGrade; 200 } 201 } 202 203 } // namespace automemcpy 204 } // namespace llvm 205