1 //===-------------- llvm-remark-size-diff/RemarkSizeDiff.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Diffs instruction count and stack size remarks between two remark files.
11 ///
12 /// This is intended for use by compiler developers who want to see how their
13 /// changes impact program code size.
14 ///
15 /// TODO: Add structured output (JSON, or YAML, or something...)
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm-c/Remarks.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/Remarks/Remark.h"
24 #include "llvm/Remarks/RemarkParser.h"
25 #include "llvm/Remarks/RemarkSerializer.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/FormatVariadic.h"
30 #include "llvm/Support/InitLLVM.h"
31 #include "llvm/Support/MemoryBuffer.h"
32 #include "llvm/Support/ToolOutputFile.h"
33 #include "llvm/Support/WithColor.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 using namespace llvm;
37 
38 enum ParserFormatOptions { yaml, bitstream };
39 static cl::OptionCategory SizeDiffCategory("llvm-remark-size-diff options");
40 static cl::opt<std::string> InputFileNameA(cl::Positional, cl::Required,
41                                            cl::cat(SizeDiffCategory),
42                                            cl::desc("remarks_a"));
43 static cl::opt<std::string> InputFileNameB(cl::Positional, cl::Required,
44                                            cl::cat(SizeDiffCategory),
45                                            cl::desc("remarks_b"));
46 static cl::opt<std::string> OutputFilename("o", cl::init("-"),
47                                            cl::cat(SizeDiffCategory),
48                                            cl::desc("Output"),
49                                            cl::value_desc("file"));
50 static cl::opt<ParserFormatOptions>
51     ParserFormat("parser", cl::cat(SizeDiffCategory), cl::init(bitstream),
52                  cl::desc("Set the remark parser format:"),
53                  cl::values(clEnumVal(yaml, "YAML format"),
54                             clEnumVal(bitstream, "Bitstream format")));
55 
56 /// Contains information from size remarks.
57 // This is a little nicer to read than a std::pair.
58 struct InstCountAndStackSize {
59   int64_t InstCount = 0;
60   int64_t StackSize = 0;
61 };
62 
63 /// Represents which files a function appeared in.
64 enum FilesPresent { A, B, BOTH };
65 
66 /// Contains the data from the remarks in file A and file B for some function.
67 /// E.g. instruction count, stack size...
68 struct FunctionDiff {
69   /// Function name from the remark.
70   std::string FuncName;
71   // Idx 0 = A, Idx 1 = B.
72   int64_t InstCount[2] = {0, 0};
73   int64_t StackSize[2] = {0, 0};
74 
75   // Calculate diffs between the first and second files.
76   int64_t getInstDiff() const { return InstCount[1] - InstCount[0]; }
77   int64_t getStackDiff() const { return StackSize[1] - StackSize[0]; }
78 
79   // Accessors for the remarks from the first file.
80   int64_t getInstCountA() const { return InstCount[0]; }
81   int64_t getStackSizeA() const { return StackSize[0]; }
82 
83   // Accessors for the remarks from the second file.
84   int64_t getInstCountB() const { return InstCount[1]; }
85   int64_t getStackSizeB() const { return StackSize[1]; }
86 
87   /// \returns which files this function was present in.
88   FilesPresent getFilesPresent() const {
89     if (getInstCountA() == 0)
90       return B;
91     if (getInstCountB() == 0)
92       return A;
93     return BOTH;
94   }
95 
96   FunctionDiff(StringRef FuncName, const InstCountAndStackSize &A,
97                const InstCountAndStackSize &B)
98       : FuncName(FuncName) {
99     InstCount[0] = A.InstCount;
100     InstCount[1] = B.InstCount;
101     StackSize[0] = A.StackSize;
102     StackSize[1] = B.StackSize;
103   }
104 };
105 
106 /// Organizes the diffs into 3 categories:
107 /// - Functions which only appeared in the first file
108 /// - Functions which only appeared in the second file
109 /// - Functions which appeared in both files
110 struct DiffsCategorizedByFilesPresent {
111   /// Diffs for functions which only appeared in the first file.
112   SmallVector<FunctionDiff> OnlyInA;
113 
114   /// Diffs for functions which only appeared in the second file.
115   SmallVector<FunctionDiff> OnlyInB;
116 
117   /// Diffs for functions which appeared in both files.
118   SmallVector<FunctionDiff> InBoth;
119 
120   /// Add a diff to the appropriate list.
121   void addDiff(FunctionDiff &FD) {
122     switch (FD.getFilesPresent()) {
123     case A:
124       OnlyInA.push_back(FD);
125       break;
126     case B:
127       OnlyInB.push_back(FD);
128       break;
129     case BOTH:
130       InBoth.push_back(FD);
131       break;
132     }
133   }
134 };
135 
136 static void printFunctionDiff(const FunctionDiff &FD, llvm::raw_ostream &OS) {
137   // Describe which files the function had remarks in.
138   FilesPresent FP = FD.getFilesPresent();
139   const std::string &FuncName = FD.FuncName;
140   const int64_t InstDiff = FD.getInstDiff();
141   assert(InstDiff && "Shouldn't get functions with no size change?");
142   const int64_t StackDiff = FD.getStackDiff();
143   // Output an indicator denoting which files the function was present in.
144   switch (FP) {
145   case FilesPresent::A:
146     OS << "-- ";
147     break;
148   case FilesPresent::B:
149     OS << "++ ";
150     break;
151   case FilesPresent::BOTH:
152     OS << "== ";
153     break;
154   }
155   // Output an indicator denoting if a function changed in size.
156   if (InstDiff > 0)
157     OS << "> ";
158   else
159     OS << "< ";
160   OS << FuncName << ", ";
161   OS << InstDiff << " instrs, ";
162   OS << StackDiff << " stack B";
163   OS << "\n";
164 }
165 
166 /// Print an item in the summary section.
167 ///
168 /// \p TotalA - Total count of the metric in file A.
169 /// \p TotalB - Total count of the metric in file B.
170 /// \p Metric - Name of the metric we want to print (e.g. instruction
171 /// count).
172 /// \p OS - The output stream.
173 static void printSummaryItem(int64_t TotalA, int64_t TotalB, StringRef Metric,
174                              llvm::raw_ostream &OS) {
175   OS << "  " << Metric << ": ";
176   int64_t TotalDiff = TotalB - TotalA;
177   if (TotalDiff == 0) {
178     OS << "None\n";
179     return;
180   }
181   OS << TotalDiff << " (" << formatv("{0:p}", TotalDiff / (double)TotalA)
182      << ")\n";
183 }
184 
185 /// Print all contents of \p Diff and a high-level summary of the differences.
186 static void printDiffsCategorizedByFilesPresent(
187     DiffsCategorizedByFilesPresent &DiffsByFilesPresent,
188     llvm::raw_ostream &OS) {
189   int64_t InstrsA = 0;
190   int64_t InstrsB = 0;
191   int64_t StackA = 0;
192   int64_t StackB = 0;
193   // Helper lambda to sort + print a list of diffs.
194   auto PrintDiffList = [&](SmallVector<FunctionDiff> &FunctionDiffList) {
195     if (FunctionDiffList.empty())
196       return;
197     stable_sort(FunctionDiffList,
198                 [](const FunctionDiff &LHS, const FunctionDiff &RHS) {
199                   return LHS.getInstDiff() < RHS.getInstDiff();
200                 });
201     for (const auto &FuncDiff : FunctionDiffList) {
202       // If there is a difference in instruction count, then print out info for
203       // the function.
204       if (FuncDiff.getInstDiff())
205         printFunctionDiff(FuncDiff, OS);
206       InstrsA += FuncDiff.getInstCountA();
207       InstrsB += FuncDiff.getInstCountB();
208       StackA += FuncDiff.getStackSizeA();
209       StackB += FuncDiff.getStackSizeB();
210     }
211   };
212   PrintDiffList(DiffsByFilesPresent.OnlyInA);
213   PrintDiffList(DiffsByFilesPresent.OnlyInB);
214   PrintDiffList(DiffsByFilesPresent.InBoth);
215   OS << "\n### Summary ###\n";
216   OS << "Total change: \n";
217   printSummaryItem(InstrsA, InstrsB, "instruction count", OS);
218   printSummaryItem(StackA, StackB, "stack byte usage", OS);
219 }
220 
221 /// Collects an expected integer value from a given argument index in a remark.
222 ///
223 /// \p Remark - The remark.
224 /// \p ArgIdx - The index where the integer value should be found.
225 /// \p ExpectedKeyName - The expected key name for the index
226 /// (e.g. "InstructionCount")
227 ///
228 /// \returns the integer value at the index if it exists, and the key-value pair
229 /// is what is expected. Otherwise, returns an Error.
230 static Expected<int64_t> getIntValFromKey(const remarks::Remark &Remark,
231                                           unsigned ArgIdx,
232                                           StringRef ExpectedKeyName) {
233   auto KeyName = Remark.Args[ArgIdx].Key;
234   if (KeyName != ExpectedKeyName)
235     return createStringError(
236         inconvertibleErrorCode(),
237         Twine("Unexpected key at argument index " + std::to_string(ArgIdx) +
238               ": Expected '" + ExpectedKeyName + "', got '" + KeyName + "'"));
239   long long Val;
240   auto ValStr = Remark.Args[ArgIdx].Val;
241   if (getAsSignedInteger(ValStr, 0, Val))
242     return createStringError(
243         inconvertibleErrorCode(),
244         Twine("Could not convert string to signed integer: " + ValStr));
245   return static_cast<int64_t>(Val);
246 }
247 
248 /// Collects relevant size information from \p Remark if it is an size-related
249 /// remark of some kind (e.g. instruction count). Otherwise records nothing.
250 ///
251 /// \p Remark - The remark.
252 /// \p FuncNameToSizeInfo - Maps function names to relevant size info.
253 /// \p NumInstCountRemarksParsed - Keeps track of the number of instruction
254 /// count remarks parsed. We need at least 1 in both files to produce a diff.
255 static Error processRemark(const remarks::Remark &Remark,
256                            StringMap<InstCountAndStackSize> &FuncNameToSizeInfo,
257                            unsigned &NumInstCountRemarksParsed) {
258   const auto &RemarkName = Remark.RemarkName;
259   const auto &PassName = Remark.PassName;
260   // Collect remarks which contain the number of instructions in a function.
261   if (PassName == "asm-printer" && RemarkName == "InstructionCount") {
262     // Expecting the 0-th argument to have the key "NumInstructions" and an
263     // integer value.
264     auto MaybeInstCount =
265         getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumInstructions");
266     if (!MaybeInstCount)
267       return MaybeInstCount.takeError();
268     FuncNameToSizeInfo[Remark.FunctionName].InstCount = *MaybeInstCount;
269     ++NumInstCountRemarksParsed;
270   }
271   // Collect remarks which contain the stack size of a function.
272   else if (PassName == "prologepilog" && RemarkName == "StackSize") {
273     // Expecting the 0-th argument to have the key "NumStackBytes" and an
274     // integer value.
275     auto MaybeStackSize =
276         getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumStackBytes");
277     if (!MaybeStackSize)
278       return MaybeStackSize.takeError();
279     FuncNameToSizeInfo[Remark.FunctionName].StackSize = *MaybeStackSize;
280   }
281   // Either we collected a remark, or it's something we don't care about. In
282   // both cases, this is a success.
283   return Error::success();
284 }
285 
286 /// Process all of the size-related remarks in a file.
287 ///
288 /// \param[in] InputFileName - Name of file to read from.
289 /// \param[in, out] FuncNameToSizeInfo - Maps function names to relevant
290 /// size info.
291 static Error readFileAndProcessRemarks(
292     StringRef InputFileName,
293     StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) {
294   auto Buf = MemoryBuffer::getFile(InputFileName);
295   if (auto EC = Buf.getError())
296     return createStringError(
297         EC, Twine("Cannot open file '" + InputFileName + "': " + EC.message()));
298   auto MaybeParser = remarks::createRemarkParserFromMeta(
299       ParserFormat == bitstream ? remarks::Format::Bitstream
300                                 : remarks::Format::YAML,
301       (*Buf)->getBuffer());
302   if (!MaybeParser)
303     return MaybeParser.takeError();
304   auto &Parser = **MaybeParser;
305   auto MaybeRemark = Parser.next();
306   unsigned NumInstCountRemarksParsed = 0;
307   for (; MaybeRemark; MaybeRemark = Parser.next()) {
308     if (auto E = processRemark(**MaybeRemark, FuncNameToSizeInfo,
309                                NumInstCountRemarksParsed))
310       return E;
311   }
312   auto E = MaybeRemark.takeError();
313   if (!E.isA<remarks::EndOfFileError>())
314     return E;
315   consumeError(std::move(E));
316   // We need at least one instruction count remark in each file to produce a
317   // meaningful diff.
318   if (NumInstCountRemarksParsed == 0)
319     return createStringError(
320         inconvertibleErrorCode(),
321         "File '" + InputFileName +
322             "' did not contain any instruction-count remarks!");
323   return Error::success();
324 }
325 
326 /// Wrapper function for readFileAndProcessRemarks which handles errors.
327 ///
328 /// \param[in] InputFileName - Name of file to read from.
329 /// \param[out] FuncNameToSizeInfo - Populated with information from size
330 /// remarks in the input file.
331 ///
332 /// \returns true if readFileAndProcessRemarks returned no errors. False
333 /// otherwise.
334 static bool tryReadFileAndProcessRemarks(
335     StringRef InputFileName,
336     StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) {
337   if (Error E = readFileAndProcessRemarks(InputFileName, FuncNameToSizeInfo)) {
338     handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) {
339       PE.log(WithColor::error());
340       errs() << '\n';
341     });
342     return false;
343   }
344   return true;
345 }
346 
347 /// Populates \p FuncDiffs with the difference between \p
348 /// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB.
349 ///
350 /// \param[in] FuncNameToSizeInfoA - Size info collected from the first
351 /// remarks file.
352 /// \param[in] FuncNameToSizeInfoB - Size info collected from
353 /// the second remarks file.
354 /// \param[out] D - Filled with the diff between \p FuncNameToSizeInfoA and
355 /// \p FuncNameToSizeInfoB.
356 static void
357 computeDiff(const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoA,
358             const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoB,
359             DiffsCategorizedByFilesPresent &DiffsByFilesPresent) {
360   SmallSet<std::string, 10> FuncNames;
361   for (const auto &FuncName : FuncNameToSizeInfoA.keys())
362     FuncNames.insert(FuncName.str());
363   for (const auto &FuncName : FuncNameToSizeInfoB.keys())
364     FuncNames.insert(FuncName.str());
365   for (const std::string &FuncName : FuncNames) {
366     const auto &SizeInfoA = FuncNameToSizeInfoA.lookup(FuncName);
367     const auto &SizeInfoB = FuncNameToSizeInfoB.lookup(FuncName);
368     FunctionDiff FuncDiff(FuncName, SizeInfoA, SizeInfoB);
369     DiffsByFilesPresent.addDiff(FuncDiff);
370   }
371 }
372 
373 /// Attempt to get the output stream for writing the diff.
374 static ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() {
375   if (OutputFilename == "")
376     OutputFilename = "-";
377   std::error_code EC;
378   auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC,
379                                               sys::fs::OF_TextWithCRLF);
380   if (!EC)
381     return std::move(Out);
382   return EC;
383 }
384 
385 /// Output all diffs in \p DiffsByFilesPresent.
386 /// \returns Error::success() on success, and an Error otherwise.
387 static Error
388 outputAllDiffs(DiffsCategorizedByFilesPresent &DiffsByFilesPresent) {
389   auto MaybeOF = getOutputStream();
390   if (std::error_code EC = MaybeOF.getError())
391     return errorCodeToError(EC);
392   std::unique_ptr<ToolOutputFile> OF = std::move(*MaybeOF);
393   printDiffsCategorizedByFilesPresent(DiffsByFilesPresent, OF->os());
394   OF->keep();
395   return Error::success();
396 }
397 
398 /// Boolean wrapper for outputDiff which handles errors.
399 static bool
400 tryOutputAllDiffs(DiffsCategorizedByFilesPresent &DiffsByFilesPresent) {
401   if (Error E = outputAllDiffs(DiffsByFilesPresent)) {
402     handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) {
403       PE.log(WithColor::error());
404       errs() << '\n';
405     });
406     return false;
407   }
408   return true;
409 }
410 
411 int main(int argc, const char **argv) {
412   InitLLVM X(argc, argv);
413   cl::HideUnrelatedOptions(SizeDiffCategory);
414   cl::ParseCommandLineOptions(argc, argv,
415                               "Diff instruction count and stack size remarks "
416                               "between two remark files.\n");
417   StringMap<InstCountAndStackSize> FuncNameToSizeInfoA;
418   StringMap<InstCountAndStackSize> FuncNameToSizeInfoB;
419   if (!tryReadFileAndProcessRemarks(InputFileNameA, FuncNameToSizeInfoA) ||
420       !tryReadFileAndProcessRemarks(InputFileNameB, FuncNameToSizeInfoB))
421     return 1;
422   DiffsCategorizedByFilesPresent DiffsByFilesPresent;
423   computeDiff(FuncNameToSizeInfoA, FuncNameToSizeInfoB, DiffsByFilesPresent);
424   if (!tryOutputAllDiffs(DiffsByFilesPresent))
425     return 1;
426 }
427