1 //===-- Analyze benchmark JSON files --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This code analyzes the json file produced by the `automemcpy` binary. 9 // 10 // As a remainder, `automemcpy` will benchmark each autogenerated memory 11 // functions against one of the predefined distributions available in the 12 // `libc/benchmarks/distributions` folder. 13 // 14 // It works as follows: 15 // - Reads one or more json files. 16 // - If there are several runs for the same function and distribution, picks the 17 // median throughput (aka `BytesPerSecond`). 18 // - Aggregates the throughput per distributions and scores them from worst (0) 19 // to best (1). 20 // - Each distribution categorizes each function into one of the following 21 // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, 22 // BAD. 23 // - A process similar to the Majority Judgment voting system is used to `elect` 24 // the best function. The histogram of grades is returned so we can 25 // distinguish between functions with the same final grade. In the following 26 // example both functions grade EXCELLENT but we may prefer the second one. 27 // 28 // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... 29 // |------------|-----------|-----------|------|----------| ... 30 // | Function_1 | 7 | 1 | 2 | | ... 31 // | Function_2 | 6 | 4 | | | ... 32 33 #include "automemcpy/ResultAnalyzer.h" 34 #include "llvm/ADT/StringRef.h" 35 #include <numeric> 36 #include <unordered_map> 37 38 namespace llvm { 39 40 namespace automemcpy { 41 42 StringRef Grade::getString(const GradeEnum &GE) { 43 switch (GE) { 44 case EXCELLENT: 45 return "EXCELLENT"; 46 case VERY_GOOD: 47 return "VERY_GOOD"; 48 case GOOD: 49 return "GOOD"; 50 case PASSABLE: 51 return "PASSABLE"; 52 case INADEQUATE: 53 return "INADEQUATE"; 54 case MEDIOCRE: 55 return "MEDIOCRE"; 56 case BAD: 57 return "BAD"; 58 case ARRAY_SIZE: 59 report_fatal_error("logic error"); 60 } 61 } 62 63 Grade::GradeEnum Grade::judge(double Score) { 64 if (Score >= 6. / 7) 65 return EXCELLENT; 66 if (Score >= 5. / 7) 67 return VERY_GOOD; 68 if (Score >= 4. / 7) 69 return GOOD; 70 if (Score >= 3. / 7) 71 return PASSABLE; 72 if (Score >= 2. / 7) 73 return INADEQUATE; 74 if (Score >= 1. / 7) 75 return MEDIOCRE; 76 return BAD; 77 } 78 79 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) { 80 std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher> 81 BucketedSamples; 82 for (const auto &S : Samples) 83 BucketedSamples[S.Id].push_back(S.BytesPerSecond); 84 std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher> 85 Throughputs; 86 for (auto &Pair : BucketedSamples) { 87 const auto &Id = Pair.first; 88 auto &Values = Pair.second; 89 const size_t HalfSize = Values.size() / 2; 90 std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end()); 91 const double MedianValue = Values[HalfSize]; 92 Throughputs[Id.Function][Id.Distribution.Name] = MedianValue; 93 } 94 std::vector<FunctionData> Output; 95 for (auto &Pair : Throughputs) { 96 FunctionData Data; 97 Data.Id = Pair.first; 98 for (const auto &Pair : Pair.second) 99 Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond = 100 Pair.getValue(); 101 Output.push_back(std::move(Data)); 102 } 103 return Output; 104 } 105 106 void fillScores(MutableArrayRef<FunctionData> Functions) { 107 // A key to bucket throughput per function type and distribution. 108 struct Key { 109 FunctionType Type; 110 StringRef Distribution; 111 112 COMPARABLE_AND_HASHABLE(Key, Type, Distribution) 113 }; 114 115 // Tracks minimum and maximum values. 116 struct MinMax { 117 double Min = std::numeric_limits<double>::max(); 118 double Max = std::numeric_limits<double>::min(); 119 void update(double Value) { 120 if (Value < Min) 121 Min = Value; 122 if (Value > Max) 123 Max = Value; 124 } 125 double normalize(double Value) const { return (Value - Min) / (Max - Min); } 126 }; 127 128 std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax; 129 for (const auto &Function : Functions) { 130 const FunctionType Type = Function.Id.Type; 131 for (const auto &Pair : Function.PerDistributionData) { 132 const auto &Distribution = Pair.getKey(); 133 const double Throughput = Pair.getValue().MedianBytesPerSecond; 134 const Key K{Type, Distribution}; 135 ThroughputMinMax[K].update(Throughput); 136 } 137 } 138 139 for (auto &Function : Functions) { 140 const FunctionType Type = Function.Id.Type; 141 for (const auto &Pair : Function.PerDistributionData) { 142 const auto &Distribution = Pair.getKey(); 143 const double Throughput = Pair.getValue().MedianBytesPerSecond; 144 const Key K{Type, Distribution}; 145 Function.PerDistributionData[Distribution].Score = 146 ThroughputMinMax[K].normalize(Throughput); 147 } 148 } 149 } 150 151 void castVotes(MutableArrayRef<FunctionData> Functions) { 152 for (FunctionData &Function : Functions) 153 for (const auto &Pair : Function.PerDistributionData) { 154 const StringRef Distribution = Pair.getKey(); 155 const double Score = Pair.getValue().Score; 156 const auto G = Grade::judge(Score); 157 ++(Function.GradeHisto[G]); 158 Function.PerDistributionData[Distribution].Grade = G; 159 } 160 161 for (FunctionData &Function : Functions) { 162 const auto &GradeHisto = Function.GradeHisto; 163 const size_t Votes = 164 std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); 165 const size_t MedianVote = Votes / 2; 166 size_t CountedVotes = 0; 167 Grade::GradeEnum MedianGrade = Grade::BAD; 168 for (size_t I = 0; I < GradeHisto.size(); ++I) { 169 CountedVotes += GradeHisto[I]; 170 if (CountedVotes > MedianVote) { 171 MedianGrade = Grade::GradeEnum(I); 172 break; 173 } 174 } 175 Function.FinalGrade = MedianGrade; 176 } 177 } 178 179 } // namespace automemcpy 180 } // namespace llvm 181