1 //===-- Analyze benchmark JSON files --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This code analyzes the json file produced by the `automemcpy` binary.
9 //
10 // As a remainder, `automemcpy` will benchmark each autogenerated memory
11 // functions against one of the predefined distributions available in the
12 // `libc/benchmarks/distributions` folder.
13 //
14 // It works as follows:
15 // - Reads one or more json files.
16 // - If there are several runs for the same function and distribution, picks the
17 //   median throughput (aka `BytesPerSecond`).
18 // - Aggregates the throughput per distributions and scores them from worst (0)
19 //   to best (1).
20 // - Each distribution categorizes each function into one of the following
21 //   categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
22 //   BAD.
23 // - A process similar to the Majority Judgment voting system is used to `elect`
24 //   the best function. The histogram of grades is returned so we can
25 //   distinguish between functions with the same final grade. In the following
26 //   example both functions grade EXCELLENT but we may prefer the second one.
27 //
28 //   |            | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
29 //   |------------|-----------|-----------|------|----------| ...
30 //   | Function_1 |     7     |     1     |   2  |          | ...
31 //   | Function_2 |     6     |     4     |      |          | ...
32 
33 #include "automemcpy/ResultAnalyzer.h"
34 #include "llvm/ADT/StringRef.h"
35 #include <numeric>
36 #include <unordered_map>
37 
38 namespace llvm {
39 
40 namespace automemcpy {
41 
42 StringRef Grade::getString(const GradeEnum &GE) {
43   switch (GE) {
44   case EXCELLENT:
45     return "EXCELLENT";
46   case VERY_GOOD:
47     return "VERY_GOOD";
48   case GOOD:
49     return "GOOD";
50   case PASSABLE:
51     return "PASSABLE";
52   case INADEQUATE:
53     return "INADEQUATE";
54   case MEDIOCRE:
55     return "MEDIOCRE";
56   case BAD:
57     return "BAD";
58   case ARRAY_SIZE:
59     report_fatal_error("logic error");
60   }
61 }
62 
63 Grade::GradeEnum Grade::judge(double Score) {
64   if (Score >= 6. / 7)
65     return EXCELLENT;
66   if (Score >= 5. / 7)
67     return VERY_GOOD;
68   if (Score >= 4. / 7)
69     return GOOD;
70   if (Score >= 3. / 7)
71     return PASSABLE;
72   if (Score >= 2. / 7)
73     return INADEQUATE;
74   if (Score >= 1. / 7)
75     return MEDIOCRE;
76   return BAD;
77 }
78 
79 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
80   std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher>
81       BucketedSamples;
82   for (const auto &S : Samples)
83     BucketedSamples[S.Id].push_back(S.BytesPerSecond);
84   std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher>
85       Throughputs;
86   for (auto &Pair : BucketedSamples) {
87     const auto &Id = Pair.first;
88     auto &Values = Pair.second;
89     const size_t HalfSize = Values.size() / 2;
90     std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end());
91     const double MedianValue = Values[HalfSize];
92     Throughputs[Id.Function][Id.Distribution.Name] = MedianValue;
93   }
94   std::vector<FunctionData> Output;
95   for (auto &Pair : Throughputs) {
96     FunctionData Data;
97     Data.Id = Pair.first;
98     for (const auto &Pair : Pair.second)
99       Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond =
100           Pair.getValue();
101     Output.push_back(std::move(Data));
102   }
103   return Output;
104 }
105 
106 void fillScores(MutableArrayRef<FunctionData> Functions) {
107   // A key to bucket throughput per function type and distribution.
108   struct Key {
109     FunctionType Type;
110     StringRef Distribution;
111 
112     COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
113   };
114 
115   // Tracks minimum and maximum values.
116   struct MinMax {
117     double Min = std::numeric_limits<double>::max();
118     double Max = std::numeric_limits<double>::min();
119     void update(double Value) {
120       if (Value < Min)
121         Min = Value;
122       if (Value > Max)
123         Max = Value;
124     }
125     double normalize(double Value) const { return (Value - Min) / (Max - Min); }
126   };
127 
128   std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
129   for (const auto &Function : Functions) {
130     const FunctionType Type = Function.Id.Type;
131     for (const auto &Pair : Function.PerDistributionData) {
132       const auto &Distribution = Pair.getKey();
133       const double Throughput = Pair.getValue().MedianBytesPerSecond;
134       const Key K{Type, Distribution};
135       ThroughputMinMax[K].update(Throughput);
136     }
137   }
138 
139   for (auto &Function : Functions) {
140     const FunctionType Type = Function.Id.Type;
141     for (const auto &Pair : Function.PerDistributionData) {
142       const auto &Distribution = Pair.getKey();
143       const double Throughput = Pair.getValue().MedianBytesPerSecond;
144       const Key K{Type, Distribution};
145       Function.PerDistributionData[Distribution].Score =
146           ThroughputMinMax[K].normalize(Throughput);
147     }
148   }
149 }
150 
151 void castVotes(MutableArrayRef<FunctionData> Functions) {
152   for (FunctionData &Function : Functions)
153     for (const auto &Pair : Function.PerDistributionData) {
154       const StringRef Distribution = Pair.getKey();
155       const double Score = Pair.getValue().Score;
156       const auto G = Grade::judge(Score);
157       ++(Function.GradeHisto[G]);
158       Function.PerDistributionData[Distribution].Grade = G;
159     }
160 
161   for (FunctionData &Function : Functions) {
162     const auto &GradeHisto = Function.GradeHisto;
163     const size_t Votes =
164         std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
165     const size_t MedianVote = Votes / 2;
166     size_t CountedVotes = 0;
167     Grade::GradeEnum MedianGrade = Grade::BAD;
168     for (size_t I = 0; I < GradeHisto.size(); ++I) {
169       CountedVotes += GradeHisto[I];
170       if (CountedVotes > MedianVote) {
171         MedianGrade = Grade::GradeEnum(I);
172         break;
173       }
174     }
175     Function.FinalGrade = MedianGrade;
176   }
177 }
178 
179 } // namespace automemcpy
180 } // namespace llvm
181