1 //===-------------- llvm-remark-size-diff/RemarkSizeDiff.cpp --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Diffs instruction count and stack size remarks between two remark files. 11 /// 12 /// This is intended for use by compiler developers who want to see how their 13 /// changes impact program code size. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm-c/Remarks.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallSet.h" 21 #include "llvm/Remarks/Remark.h" 22 #include "llvm/Remarks/RemarkParser.h" 23 #include "llvm/Remarks/RemarkSerializer.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Compiler.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/FormatVariadic.h" 29 #include "llvm/Support/InitLLVM.h" 30 #include "llvm/Support/JSON.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Support/ToolOutputFile.h" 33 #include "llvm/Support/WithColor.h" 34 #include "llvm/Support/raw_ostream.h" 35 36 using namespace llvm; 37 38 enum ParserFormatOptions { yaml, bitstream }; 39 enum ReportStyleOptions { human_output, json_output }; 40 static cl::OptionCategory SizeDiffCategory("llvm-remark-size-diff options"); 41 static cl::opt<std::string> InputFileNameA(cl::Positional, cl::Required, 42 cl::cat(SizeDiffCategory), 43 cl::desc("remarks_a")); 44 static cl::opt<std::string> InputFileNameB(cl::Positional, cl::Required, 45 cl::cat(SizeDiffCategory), 46 cl::desc("remarks_b")); 47 static cl::opt<std::string> OutputFilename("o", cl::init("-"), 48 cl::cat(SizeDiffCategory), 49 cl::desc("Output"), 50 cl::value_desc("file")); 51 static cl::opt<ParserFormatOptions> 52 ParserFormat("parser", cl::cat(SizeDiffCategory), cl::init(bitstream), 53 cl::desc("Set the remark parser format:"), 54 cl::values(clEnumVal(yaml, "YAML format"), 55 clEnumVal(bitstream, "Bitstream format"))); 56 static cl::opt<ReportStyleOptions> ReportStyle( 57 "report_style", cl::cat(SizeDiffCategory), 58 cl::init(ReportStyleOptions::human_output), 59 cl::desc("Choose the report output format:"), 60 cl::values(clEnumValN(human_output, "human", "Human-readable format"), 61 clEnumValN(json_output, "json", "JSON format"))); 62 static cl::opt<bool> PrettyPrint("pretty", cl::cat(SizeDiffCategory), 63 cl::init(false), 64 cl::desc("Pretty-print JSON")); 65 66 /// Contains information from size remarks. 67 // This is a little nicer to read than a std::pair. 68 struct InstCountAndStackSize { 69 int64_t InstCount = 0; 70 int64_t StackSize = 0; 71 }; 72 73 /// Represents which files a function appeared in. 74 enum FilesPresent { A, B, BOTH }; 75 76 /// Contains the data from the remarks in file A and file B for some function. 77 /// E.g. instruction count, stack size... 78 struct FunctionDiff { 79 /// Function name from the remark. 80 std::string FuncName; 81 // Idx 0 = A, Idx 1 = B. 82 int64_t InstCount[2] = {0, 0}; 83 int64_t StackSize[2] = {0, 0}; 84 85 // Calculate diffs between the first and second files. 86 int64_t getInstDiff() const { return InstCount[1] - InstCount[0]; } 87 int64_t getStackDiff() const { return StackSize[1] - StackSize[0]; } 88 89 // Accessors for the remarks from the first file. 90 int64_t getInstCountA() const { return InstCount[0]; } 91 int64_t getStackSizeA() const { return StackSize[0]; } 92 93 // Accessors for the remarks from the second file. 94 int64_t getInstCountB() const { return InstCount[1]; } 95 int64_t getStackSizeB() const { return StackSize[1]; } 96 97 /// \returns which files this function was present in. 98 FilesPresent getFilesPresent() const { 99 if (getInstCountA() == 0) 100 return B; 101 if (getInstCountB() == 0) 102 return A; 103 return BOTH; 104 } 105 106 FunctionDiff(StringRef FuncName, const InstCountAndStackSize &A, 107 const InstCountAndStackSize &B) 108 : FuncName(FuncName) { 109 InstCount[0] = A.InstCount; 110 InstCount[1] = B.InstCount; 111 StackSize[0] = A.StackSize; 112 StackSize[1] = B.StackSize; 113 } 114 }; 115 116 /// Organizes the diffs into 3 categories: 117 /// - Functions which only appeared in the first file 118 /// - Functions which only appeared in the second file 119 /// - Functions which appeared in both files 120 struct DiffsCategorizedByFilesPresent { 121 /// Diffs for functions which only appeared in the first file. 122 SmallVector<FunctionDiff> OnlyInA; 123 124 /// Diffs for functions which only appeared in the second file. 125 SmallVector<FunctionDiff> OnlyInB; 126 127 /// Diffs for functions which appeared in both files. 128 SmallVector<FunctionDiff> InBoth; 129 130 /// Add a diff to the appropriate list. 131 void addDiff(FunctionDiff &FD) { 132 switch (FD.getFilesPresent()) { 133 case A: 134 OnlyInA.push_back(FD); 135 break; 136 case B: 137 OnlyInB.push_back(FD); 138 break; 139 case BOTH: 140 InBoth.push_back(FD); 141 break; 142 } 143 } 144 }; 145 146 static void printFunctionDiff(const FunctionDiff &FD, llvm::raw_ostream &OS) { 147 // Describe which files the function had remarks in. 148 FilesPresent FP = FD.getFilesPresent(); 149 const std::string &FuncName = FD.FuncName; 150 const int64_t InstDiff = FD.getInstDiff(); 151 assert(InstDiff && "Shouldn't get functions with no size change?"); 152 const int64_t StackDiff = FD.getStackDiff(); 153 // Output an indicator denoting which files the function was present in. 154 switch (FP) { 155 case FilesPresent::A: 156 OS << "-- "; 157 break; 158 case FilesPresent::B: 159 OS << "++ "; 160 break; 161 case FilesPresent::BOTH: 162 OS << "== "; 163 break; 164 } 165 // Output an indicator denoting if a function changed in size. 166 if (InstDiff > 0) 167 OS << "> "; 168 else 169 OS << "< "; 170 OS << FuncName << ", "; 171 OS << InstDiff << " instrs, "; 172 OS << StackDiff << " stack B"; 173 OS << "\n"; 174 } 175 176 /// Print an item in the summary section. 177 /// 178 /// \p TotalA - Total count of the metric in file A. 179 /// \p TotalB - Total count of the metric in file B. 180 /// \p Metric - Name of the metric we want to print (e.g. instruction 181 /// count). 182 /// \p OS - The output stream. 183 static void printSummaryItem(int64_t TotalA, int64_t TotalB, StringRef Metric, 184 llvm::raw_ostream &OS) { 185 OS << " " << Metric << ": "; 186 int64_t TotalDiff = TotalB - TotalA; 187 if (TotalDiff == 0) { 188 OS << "None\n"; 189 return; 190 } 191 OS << TotalDiff << " (" << formatv("{0:p}", TotalDiff / (double)TotalA) 192 << ")\n"; 193 } 194 195 /// Print all contents of \p Diff and a high-level summary of the differences. 196 static void printDiffsCategorizedByFilesPresent( 197 DiffsCategorizedByFilesPresent &DiffsByFilesPresent, 198 llvm::raw_ostream &OS) { 199 int64_t InstrsA = 0; 200 int64_t InstrsB = 0; 201 int64_t StackA = 0; 202 int64_t StackB = 0; 203 // Helper lambda to sort + print a list of diffs. 204 auto PrintDiffList = [&](SmallVector<FunctionDiff> &FunctionDiffList) { 205 if (FunctionDiffList.empty()) 206 return; 207 stable_sort(FunctionDiffList, 208 [](const FunctionDiff &LHS, const FunctionDiff &RHS) { 209 return LHS.getInstDiff() < RHS.getInstDiff(); 210 }); 211 for (const auto &FuncDiff : FunctionDiffList) { 212 // If there is a difference in instruction count, then print out info for 213 // the function. 214 if (FuncDiff.getInstDiff()) 215 printFunctionDiff(FuncDiff, OS); 216 InstrsA += FuncDiff.getInstCountA(); 217 InstrsB += FuncDiff.getInstCountB(); 218 StackA += FuncDiff.getStackSizeA(); 219 StackB += FuncDiff.getStackSizeB(); 220 } 221 }; 222 PrintDiffList(DiffsByFilesPresent.OnlyInA); 223 PrintDiffList(DiffsByFilesPresent.OnlyInB); 224 PrintDiffList(DiffsByFilesPresent.InBoth); 225 OS << "\n### Summary ###\n"; 226 OS << "Total change: \n"; 227 printSummaryItem(InstrsA, InstrsB, "instruction count", OS); 228 printSummaryItem(StackA, StackB, "stack byte usage", OS); 229 } 230 231 /// Collects an expected integer value from a given argument index in a remark. 232 /// 233 /// \p Remark - The remark. 234 /// \p ArgIdx - The index where the integer value should be found. 235 /// \p ExpectedKeyName - The expected key name for the index 236 /// (e.g. "InstructionCount") 237 /// 238 /// \returns the integer value at the index if it exists, and the key-value pair 239 /// is what is expected. Otherwise, returns an Error. 240 static Expected<int64_t> getIntValFromKey(const remarks::Remark &Remark, 241 unsigned ArgIdx, 242 StringRef ExpectedKeyName) { 243 auto KeyName = Remark.Args[ArgIdx].Key; 244 if (KeyName != ExpectedKeyName) 245 return createStringError( 246 inconvertibleErrorCode(), 247 Twine("Unexpected key at argument index " + std::to_string(ArgIdx) + 248 ": Expected '" + ExpectedKeyName + "', got '" + KeyName + "'")); 249 long long Val; 250 auto ValStr = Remark.Args[ArgIdx].Val; 251 if (getAsSignedInteger(ValStr, 0, Val)) 252 return createStringError( 253 inconvertibleErrorCode(), 254 Twine("Could not convert string to signed integer: " + ValStr)); 255 return static_cast<int64_t>(Val); 256 } 257 258 /// Collects relevant size information from \p Remark if it is an size-related 259 /// remark of some kind (e.g. instruction count). Otherwise records nothing. 260 /// 261 /// \p Remark - The remark. 262 /// \p FuncNameToSizeInfo - Maps function names to relevant size info. 263 /// \p NumInstCountRemarksParsed - Keeps track of the number of instruction 264 /// count remarks parsed. We need at least 1 in both files to produce a diff. 265 static Error processRemark(const remarks::Remark &Remark, 266 StringMap<InstCountAndStackSize> &FuncNameToSizeInfo, 267 unsigned &NumInstCountRemarksParsed) { 268 const auto &RemarkName = Remark.RemarkName; 269 const auto &PassName = Remark.PassName; 270 // Collect remarks which contain the number of instructions in a function. 271 if (PassName == "asm-printer" && RemarkName == "InstructionCount") { 272 // Expecting the 0-th argument to have the key "NumInstructions" and an 273 // integer value. 274 auto MaybeInstCount = 275 getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumInstructions"); 276 if (!MaybeInstCount) 277 return MaybeInstCount.takeError(); 278 FuncNameToSizeInfo[Remark.FunctionName].InstCount = *MaybeInstCount; 279 ++NumInstCountRemarksParsed; 280 } 281 // Collect remarks which contain the stack size of a function. 282 else if (PassName == "prologepilog" && RemarkName == "StackSize") { 283 // Expecting the 0-th argument to have the key "NumStackBytes" and an 284 // integer value. 285 auto MaybeStackSize = 286 getIntValFromKey(Remark, /*ArgIdx = */ 0, "NumStackBytes"); 287 if (!MaybeStackSize) 288 return MaybeStackSize.takeError(); 289 FuncNameToSizeInfo[Remark.FunctionName].StackSize = *MaybeStackSize; 290 } 291 // Either we collected a remark, or it's something we don't care about. In 292 // both cases, this is a success. 293 return Error::success(); 294 } 295 296 /// Process all of the size-related remarks in a file. 297 /// 298 /// \param[in] InputFileName - Name of file to read from. 299 /// \param[in, out] FuncNameToSizeInfo - Maps function names to relevant 300 /// size info. 301 static Error readFileAndProcessRemarks( 302 StringRef InputFileName, 303 StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) { 304 auto Buf = MemoryBuffer::getFile(InputFileName); 305 if (auto EC = Buf.getError()) 306 return createStringError( 307 EC, Twine("Cannot open file '" + InputFileName + "': " + EC.message())); 308 auto MaybeParser = remarks::createRemarkParserFromMeta( 309 ParserFormat == bitstream ? remarks::Format::Bitstream 310 : remarks::Format::YAML, 311 (*Buf)->getBuffer()); 312 if (!MaybeParser) 313 return MaybeParser.takeError(); 314 auto &Parser = **MaybeParser; 315 auto MaybeRemark = Parser.next(); 316 unsigned NumInstCountRemarksParsed = 0; 317 for (; MaybeRemark; MaybeRemark = Parser.next()) { 318 if (auto E = processRemark(**MaybeRemark, FuncNameToSizeInfo, 319 NumInstCountRemarksParsed)) 320 return E; 321 } 322 auto E = MaybeRemark.takeError(); 323 if (!E.isA<remarks::EndOfFileError>()) 324 return E; 325 consumeError(std::move(E)); 326 // We need at least one instruction count remark in each file to produce a 327 // meaningful diff. 328 if (NumInstCountRemarksParsed == 0) 329 return createStringError( 330 inconvertibleErrorCode(), 331 "File '" + InputFileName + 332 "' did not contain any instruction-count remarks!"); 333 return Error::success(); 334 } 335 336 /// Wrapper function for readFileAndProcessRemarks which handles errors. 337 /// 338 /// \param[in] InputFileName - Name of file to read from. 339 /// \param[out] FuncNameToSizeInfo - Populated with information from size 340 /// remarks in the input file. 341 /// 342 /// \returns true if readFileAndProcessRemarks returned no errors. False 343 /// otherwise. 344 static bool tryReadFileAndProcessRemarks( 345 StringRef InputFileName, 346 StringMap<InstCountAndStackSize> &FuncNameToSizeInfo) { 347 if (Error E = readFileAndProcessRemarks(InputFileName, FuncNameToSizeInfo)) { 348 handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) { 349 PE.log(WithColor::error()); 350 errs() << '\n'; 351 }); 352 return false; 353 } 354 return true; 355 } 356 357 /// Populates \p FuncDiffs with the difference between \p 358 /// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB. 359 /// 360 /// \param[in] FuncNameToSizeInfoA - Size info collected from the first 361 /// remarks file. 362 /// \param[in] FuncNameToSizeInfoB - Size info collected from 363 /// the second remarks file. 364 /// \param[out] DiffsByFilesPresent - Filled with the diff between \p 365 /// FuncNameToSizeInfoA and \p FuncNameToSizeInfoB. 366 static void 367 computeDiff(const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoA, 368 const StringMap<InstCountAndStackSize> &FuncNameToSizeInfoB, 369 DiffsCategorizedByFilesPresent &DiffsByFilesPresent) { 370 SmallSet<std::string, 10> FuncNames; 371 for (const auto &FuncName : FuncNameToSizeInfoA.keys()) 372 FuncNames.insert(FuncName.str()); 373 for (const auto &FuncName : FuncNameToSizeInfoB.keys()) 374 FuncNames.insert(FuncName.str()); 375 for (const std::string &FuncName : FuncNames) { 376 const auto &SizeInfoA = FuncNameToSizeInfoA.lookup(FuncName); 377 const auto &SizeInfoB = FuncNameToSizeInfoB.lookup(FuncName); 378 FunctionDiff FuncDiff(FuncName, SizeInfoA, SizeInfoB); 379 DiffsByFilesPresent.addDiff(FuncDiff); 380 } 381 } 382 383 /// Attempt to get the output stream for writing the diff. 384 static ErrorOr<std::unique_ptr<ToolOutputFile>> getOutputStream() { 385 if (OutputFilename == "") 386 OutputFilename = "-"; 387 std::error_code EC; 388 auto Out = std::make_unique<ToolOutputFile>(OutputFilename, EC, 389 sys::fs::OF_TextWithCRLF); 390 if (!EC) 391 return std::move(Out); 392 return EC; 393 } 394 395 /// \return a json::Array representing all FunctionDiffs in \p FunctionDiffs. 396 /// \p WhichFiles represents which files the functions in \p FunctionDiffs 397 /// appeared in (A, B, or both). 398 json::Array 399 getFunctionDiffListAsJSON(const SmallVector<FunctionDiff> &FunctionDiffs, 400 const FilesPresent &WhichFiles) { 401 json::Array FunctionDiffsAsJSON; 402 int64_t InstCountA, InstCountB, StackSizeA, StackSizeB; 403 for (auto &Diff : FunctionDiffs) { 404 InstCountA = InstCountB = StackSizeA = StackSizeB = 0; 405 switch (WhichFiles) { 406 case BOTH: 407 LLVM_FALLTHROUGH; 408 case A: 409 InstCountA = Diff.getInstCountA(); 410 StackSizeA = Diff.getStackSizeA(); 411 if (WhichFiles != BOTH) 412 break; 413 LLVM_FALLTHROUGH; 414 case B: 415 InstCountB = Diff.getInstCountB(); 416 StackSizeB = Diff.getStackSizeB(); 417 break; 418 } 419 // Each metric we care about is represented like: 420 // "Val": [A, B] 421 // This allows any consumer of the JSON to calculate the diff using B - A. 422 // This is somewhat wasteful for OnlyInA and OnlyInB (we only need A or B). 423 // However, this should make writing consuming tools easier, since the tool 424 // writer doesn't need to think about slightly different formats in each 425 // section. 426 json::Object FunctionObject({{"FunctionName", Diff.FuncName}, 427 {"InstCount", {InstCountA, InstCountB}}, 428 {"StackSize", {StackSizeA, StackSizeB}}}); 429 FunctionDiffsAsJSON.push_back(std::move(FunctionObject)); 430 } 431 return FunctionDiffsAsJSON; 432 } 433 434 /// Output all diffs in \p DiffsByFilesPresent as a JSON report. This is 435 /// intended for consumption by external tools. 436 /// 437 /// \p InputFileNameA - File A used to produce the report. 438 /// \p InputFileNameB - File B used ot produce the report. 439 /// \p OS - Output stream. 440 /// 441 /// JSON output includes: 442 /// - \p InputFileNameA and \p InputFileNameB under "Files". 443 /// - Functions present in both files under "InBoth". 444 /// - Functions present only in A in "OnlyInA". 445 /// - Functions present only in B in "OnlyInB". 446 /// - Instruction count and stack size differences for each function. 447 /// 448 /// Differences are represented using [count_a, count_b]. The actual difference 449 /// can be computed via count_b - count_a. 450 static void 451 outputJSONForAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB, 452 const DiffsCategorizedByFilesPresent &DiffsByFilesPresent, 453 llvm::raw_ostream &OS) { 454 json::Object Output; 455 // Include file names in the report. 456 json::Object Files( 457 {{"A", InputFileNameA.str()}, {"B", InputFileNameB.str()}}); 458 Output["Files"] = std::move(Files); 459 Output["OnlyInA"] = getFunctionDiffListAsJSON(DiffsByFilesPresent.OnlyInA, A); 460 Output["OnlyInB"] = getFunctionDiffListAsJSON(DiffsByFilesPresent.OnlyInB, B); 461 Output["InBoth"] = 462 getFunctionDiffListAsJSON(DiffsByFilesPresent.InBoth, BOTH); 463 json::OStream JOS(OS, PrettyPrint ? 2 : 0); 464 JOS.value(std::move(Output)); 465 OS << '\n'; 466 } 467 468 /// Output all diffs in \p DiffsByFilesPresent using the desired output style. 469 /// \returns Error::success() on success, and an Error otherwise. 470 /// \p InputFileNameA - Name of input file A; may be used in the report. 471 /// \p InputFileNameB - Name of input file B; may be used in the report. 472 static Error 473 outputAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB, 474 DiffsCategorizedByFilesPresent &DiffsByFilesPresent) { 475 auto MaybeOF = getOutputStream(); 476 if (std::error_code EC = MaybeOF.getError()) 477 return errorCodeToError(EC); 478 std::unique_ptr<ToolOutputFile> OF = std::move(*MaybeOF); 479 switch (ReportStyle) { 480 case human_output: 481 printDiffsCategorizedByFilesPresent(DiffsByFilesPresent, OF->os()); 482 break; 483 case json_output: 484 outputJSONForAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent, 485 OF->os()); 486 break; 487 } 488 OF->keep(); 489 return Error::success(); 490 } 491 492 /// Boolean wrapper for outputDiff which handles errors. 493 static bool 494 tryOutputAllDiffs(StringRef InputFileNameA, StringRef InputFileNameB, 495 DiffsCategorizedByFilesPresent &DiffsByFilesPresent) { 496 if (Error E = 497 outputAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent)) { 498 handleAllErrors(std::move(E), [&](const ErrorInfoBase &PE) { 499 PE.log(WithColor::error()); 500 errs() << '\n'; 501 }); 502 return false; 503 } 504 return true; 505 } 506 507 int main(int argc, const char **argv) { 508 InitLLVM X(argc, argv); 509 cl::HideUnrelatedOptions(SizeDiffCategory); 510 cl::ParseCommandLineOptions(argc, argv, 511 "Diff instruction count and stack size remarks " 512 "between two remark files.\n"); 513 StringMap<InstCountAndStackSize> FuncNameToSizeInfoA; 514 StringMap<InstCountAndStackSize> FuncNameToSizeInfoB; 515 if (!tryReadFileAndProcessRemarks(InputFileNameA, FuncNameToSizeInfoA) || 516 !tryReadFileAndProcessRemarks(InputFileNameB, FuncNameToSizeInfoB)) 517 return 1; 518 DiffsCategorizedByFilesPresent DiffsByFilesPresent; 519 computeDiff(FuncNameToSizeInfoA, FuncNameToSizeInfoB, DiffsByFilesPresent); 520 if (!tryOutputAllDiffs(InputFileNameA, InputFileNameB, DiffsByFilesPresent)) 521 return 1; 522 } 523