10b57cec5SDimitry Andric //===- CoverageMappingWriter.cpp - Code coverage mapping writer -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains support for writing coverage mapping data for
100b57cec5SDimitry Andric // instrumentation based coverage.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
145ffd83dbSDimitry Andric #include "llvm/ProfileData/InstrProf.h"
150b57cec5SDimitry Andric #include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
160b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
185ffd83dbSDimitry Andric #include "llvm/Support/Compression.h"
190b57cec5SDimitry Andric #include "llvm/Support/LEB128.h"
200b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
210b57cec5SDimitry Andric #include <algorithm>
220b57cec5SDimitry Andric #include <cassert>
230b57cec5SDimitry Andric #include <limits>
240b57cec5SDimitry Andric #include <vector>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric using namespace llvm;
270b57cec5SDimitry Andric using namespace coverage;
280b57cec5SDimitry Andric 
CoverageFilenamesSectionWriter(ArrayRef<std::string> Filenames)298bcb0991SDimitry Andric CoverageFilenamesSectionWriter::CoverageFilenamesSectionWriter(
30*5f7ddb14SDimitry Andric     ArrayRef<std::string> Filenames)
318bcb0991SDimitry Andric     : Filenames(Filenames) {
328bcb0991SDimitry Andric #ifndef NDEBUG
338bcb0991SDimitry Andric   StringSet<> NameSet;
348bcb0991SDimitry Andric   for (StringRef Name : Filenames)
358bcb0991SDimitry Andric     assert(NameSet.insert(Name).second && "Duplicate filename");
368bcb0991SDimitry Andric #endif
378bcb0991SDimitry Andric }
388bcb0991SDimitry Andric 
write(raw_ostream & OS,bool Compress)395ffd83dbSDimitry Andric void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) {
405ffd83dbSDimitry Andric   std::string FilenamesStr;
415ffd83dbSDimitry Andric   {
425ffd83dbSDimitry Andric     raw_string_ostream FilenamesOS{FilenamesStr};
430b57cec5SDimitry Andric     for (const auto &Filename : Filenames) {
445ffd83dbSDimitry Andric       encodeULEB128(Filename.size(), FilenamesOS);
455ffd83dbSDimitry Andric       FilenamesOS << Filename;
460b57cec5SDimitry Andric     }
470b57cec5SDimitry Andric   }
480b57cec5SDimitry Andric 
495ffd83dbSDimitry Andric   SmallString<128> CompressedStr;
505ffd83dbSDimitry Andric   bool doCompression =
515ffd83dbSDimitry Andric       Compress && zlib::isAvailable() && DoInstrProfNameCompression;
525ffd83dbSDimitry Andric   if (doCompression) {
535ffd83dbSDimitry Andric     auto E =
545ffd83dbSDimitry Andric         zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression);
555ffd83dbSDimitry Andric     if (E)
565ffd83dbSDimitry Andric       report_bad_alloc_error("Failed to zlib compress coverage data");
575ffd83dbSDimitry Andric   }
585ffd83dbSDimitry Andric 
595ffd83dbSDimitry Andric   // ::= <num-filenames>
605ffd83dbSDimitry Andric   //     <uncompressed-len>
615ffd83dbSDimitry Andric   //     <compressed-len-or-zero>
625ffd83dbSDimitry Andric   //     (<compressed-filenames> | <uncompressed-filenames>)
635ffd83dbSDimitry Andric   encodeULEB128(Filenames.size(), OS);
645ffd83dbSDimitry Andric   encodeULEB128(FilenamesStr.size(), OS);
655ffd83dbSDimitry Andric   encodeULEB128(doCompression ? CompressedStr.size() : 0U, OS);
66*5f7ddb14SDimitry Andric   OS << (doCompression ? CompressedStr.str() : StringRef(FilenamesStr));
675ffd83dbSDimitry Andric }
685ffd83dbSDimitry Andric 
690b57cec5SDimitry Andric namespace {
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric /// Gather only the expressions that are used by the mapping
720b57cec5SDimitry Andric /// regions in this function.
730b57cec5SDimitry Andric class CounterExpressionsMinimizer {
740b57cec5SDimitry Andric   ArrayRef<CounterExpression> Expressions;
750b57cec5SDimitry Andric   SmallVector<CounterExpression, 16> UsedExpressions;
760b57cec5SDimitry Andric   std::vector<unsigned> AdjustedExpressionIDs;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric public:
CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,ArrayRef<CounterMappingRegion> MappingRegions)790b57cec5SDimitry Andric   CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
800b57cec5SDimitry Andric                               ArrayRef<CounterMappingRegion> MappingRegions)
810b57cec5SDimitry Andric       : Expressions(Expressions) {
820b57cec5SDimitry Andric     AdjustedExpressionIDs.resize(Expressions.size(), 0);
83af732203SDimitry Andric     for (const auto &I : MappingRegions) {
840b57cec5SDimitry Andric       mark(I.Count);
85af732203SDimitry Andric       mark(I.FalseCount);
86af732203SDimitry Andric     }
87af732203SDimitry Andric     for (const auto &I : MappingRegions) {
880b57cec5SDimitry Andric       gatherUsed(I.Count);
89af732203SDimitry Andric       gatherUsed(I.FalseCount);
90af732203SDimitry Andric     }
910b57cec5SDimitry Andric   }
920b57cec5SDimitry Andric 
mark(Counter C)930b57cec5SDimitry Andric   void mark(Counter C) {
940b57cec5SDimitry Andric     if (!C.isExpression())
950b57cec5SDimitry Andric       return;
960b57cec5SDimitry Andric     unsigned ID = C.getExpressionID();
970b57cec5SDimitry Andric     AdjustedExpressionIDs[ID] = 1;
980b57cec5SDimitry Andric     mark(Expressions[ID].LHS);
990b57cec5SDimitry Andric     mark(Expressions[ID].RHS);
1000b57cec5SDimitry Andric   }
1010b57cec5SDimitry Andric 
gatherUsed(Counter C)1020b57cec5SDimitry Andric   void gatherUsed(Counter C) {
1030b57cec5SDimitry Andric     if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()])
1040b57cec5SDimitry Andric       return;
1050b57cec5SDimitry Andric     AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size();
1060b57cec5SDimitry Andric     const auto &E = Expressions[C.getExpressionID()];
1070b57cec5SDimitry Andric     UsedExpressions.push_back(E);
1080b57cec5SDimitry Andric     gatherUsed(E.LHS);
1090b57cec5SDimitry Andric     gatherUsed(E.RHS);
1100b57cec5SDimitry Andric   }
1110b57cec5SDimitry Andric 
getExpressions() const1120b57cec5SDimitry Andric   ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; }
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric   /// Adjust the given counter to correctly transition from the old
1150b57cec5SDimitry Andric   /// expression ids to the new expression ids.
adjust(Counter C) const1160b57cec5SDimitry Andric   Counter adjust(Counter C) const {
1170b57cec5SDimitry Andric     if (C.isExpression())
1180b57cec5SDimitry Andric       C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]);
1190b57cec5SDimitry Andric     return C;
1200b57cec5SDimitry Andric   }
1210b57cec5SDimitry Andric };
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric } // end anonymous namespace
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric /// Encode the counter.
1260b57cec5SDimitry Andric ///
1270b57cec5SDimitry Andric /// The encoding uses the following format:
1280b57cec5SDimitry Andric /// Low 2 bits - Tag:
1290b57cec5SDimitry Andric ///   Counter::Zero(0) - A Counter with kind Counter::Zero
1300b57cec5SDimitry Andric ///   Counter::CounterValueReference(1) - A counter with kind
1310b57cec5SDimitry Andric ///     Counter::CounterValueReference
1320b57cec5SDimitry Andric ///   Counter::Expression(2) + CounterExpression::Subtract(0) -
1330b57cec5SDimitry Andric ///     A counter with kind Counter::Expression and an expression
1340b57cec5SDimitry Andric ///     with kind CounterExpression::Subtract
1350b57cec5SDimitry Andric ///   Counter::Expression(2) + CounterExpression::Add(1) -
1360b57cec5SDimitry Andric ///     A counter with kind Counter::Expression and an expression
1370b57cec5SDimitry Andric ///     with kind CounterExpression::Add
1380b57cec5SDimitry Andric /// Remaining bits - Counter/Expression ID.
encodeCounter(ArrayRef<CounterExpression> Expressions,Counter C)1390b57cec5SDimitry Andric static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions,
1400b57cec5SDimitry Andric                               Counter C) {
1410b57cec5SDimitry Andric   unsigned Tag = unsigned(C.getKind());
1420b57cec5SDimitry Andric   if (C.isExpression())
1430b57cec5SDimitry Andric     Tag += Expressions[C.getExpressionID()].Kind;
1440b57cec5SDimitry Andric   unsigned ID = C.getCounterID();
1450b57cec5SDimitry Andric   assert(ID <=
1460b57cec5SDimitry Andric          (std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits));
1470b57cec5SDimitry Andric   return Tag | (ID << Counter::EncodingTagBits);
1480b57cec5SDimitry Andric }
1490b57cec5SDimitry Andric 
writeCounter(ArrayRef<CounterExpression> Expressions,Counter C,raw_ostream & OS)1500b57cec5SDimitry Andric static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C,
1510b57cec5SDimitry Andric                          raw_ostream &OS) {
1520b57cec5SDimitry Andric   encodeULEB128(encodeCounter(Expressions, C), OS);
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
write(raw_ostream & OS)1550b57cec5SDimitry Andric void CoverageMappingWriter::write(raw_ostream &OS) {
1560b57cec5SDimitry Andric   // Check that we don't have any bogus regions.
1570b57cec5SDimitry Andric   assert(all_of(MappingRegions,
1580b57cec5SDimitry Andric                 [](const CounterMappingRegion &CMR) {
1590b57cec5SDimitry Andric                   return CMR.startLoc() <= CMR.endLoc();
1600b57cec5SDimitry Andric                 }) &&
1610b57cec5SDimitry Andric          "Source region does not begin before it ends");
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   // Sort the regions in an ascending order by the file id and the starting
1640b57cec5SDimitry Andric   // location. Sort by region kinds to ensure stable order for tests.
1650b57cec5SDimitry Andric   llvm::stable_sort(MappingRegions, [](const CounterMappingRegion &LHS,
1660b57cec5SDimitry Andric                                        const CounterMappingRegion &RHS) {
1670b57cec5SDimitry Andric     if (LHS.FileID != RHS.FileID)
1680b57cec5SDimitry Andric       return LHS.FileID < RHS.FileID;
1690b57cec5SDimitry Andric     if (LHS.startLoc() != RHS.startLoc())
1700b57cec5SDimitry Andric       return LHS.startLoc() < RHS.startLoc();
1710b57cec5SDimitry Andric     return LHS.Kind < RHS.Kind;
1720b57cec5SDimitry Andric   });
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   // Write out the fileid -> filename mapping.
1750b57cec5SDimitry Andric   encodeULEB128(VirtualFileMapping.size(), OS);
1760b57cec5SDimitry Andric   for (const auto &FileID : VirtualFileMapping)
1770b57cec5SDimitry Andric     encodeULEB128(FileID, OS);
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric   // Write out the expressions.
1800b57cec5SDimitry Andric   CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions);
1810b57cec5SDimitry Andric   auto MinExpressions = Minimizer.getExpressions();
1820b57cec5SDimitry Andric   encodeULEB128(MinExpressions.size(), OS);
1830b57cec5SDimitry Andric   for (const auto &E : MinExpressions) {
1840b57cec5SDimitry Andric     writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS);
1850b57cec5SDimitry Andric     writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS);
1860b57cec5SDimitry Andric   }
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric   // Write out the mapping regions.
1890b57cec5SDimitry Andric   // Split the regions into subarrays where each region in a
1900b57cec5SDimitry Andric   // subarray has a fileID which is the index of that subarray.
1910b57cec5SDimitry Andric   unsigned PrevLineStart = 0;
1920b57cec5SDimitry Andric   unsigned CurrentFileID = ~0U;
1930b57cec5SDimitry Andric   for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) {
1940b57cec5SDimitry Andric     if (I->FileID != CurrentFileID) {
1950b57cec5SDimitry Andric       // Ensure that all file ids have at least one mapping region.
1960b57cec5SDimitry Andric       assert(I->FileID == (CurrentFileID + 1));
1970b57cec5SDimitry Andric       // Find the number of regions with this file id.
1980b57cec5SDimitry Andric       unsigned RegionCount = 1;
1990b57cec5SDimitry Andric       for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J)
2000b57cec5SDimitry Andric         ++RegionCount;
2010b57cec5SDimitry Andric       // Start a new region sub-array.
2020b57cec5SDimitry Andric       encodeULEB128(RegionCount, OS);
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric       CurrentFileID = I->FileID;
2050b57cec5SDimitry Andric       PrevLineStart = 0;
2060b57cec5SDimitry Andric     }
2070b57cec5SDimitry Andric     Counter Count = Minimizer.adjust(I->Count);
208af732203SDimitry Andric     Counter FalseCount = Minimizer.adjust(I->FalseCount);
2090b57cec5SDimitry Andric     switch (I->Kind) {
2100b57cec5SDimitry Andric     case CounterMappingRegion::CodeRegion:
2110b57cec5SDimitry Andric     case CounterMappingRegion::GapRegion:
2120b57cec5SDimitry Andric       writeCounter(MinExpressions, Count, OS);
2130b57cec5SDimitry Andric       break;
2140b57cec5SDimitry Andric     case CounterMappingRegion::ExpansionRegion: {
2150b57cec5SDimitry Andric       assert(Count.isZero());
2160b57cec5SDimitry Andric       assert(I->ExpandedFileID <=
2170b57cec5SDimitry Andric              (std::numeric_limits<unsigned>::max() >>
2180b57cec5SDimitry Andric               Counter::EncodingCounterTagAndExpansionRegionTagBits));
2190b57cec5SDimitry Andric       // Mark an expansion region with a set bit that follows the counter tag,
2200b57cec5SDimitry Andric       // and pack the expanded file id into the remaining bits.
2210b57cec5SDimitry Andric       unsigned EncodedTagExpandedFileID =
2220b57cec5SDimitry Andric           (1 << Counter::EncodingTagBits) |
2230b57cec5SDimitry Andric           (I->ExpandedFileID
2240b57cec5SDimitry Andric            << Counter::EncodingCounterTagAndExpansionRegionTagBits);
2250b57cec5SDimitry Andric       encodeULEB128(EncodedTagExpandedFileID, OS);
2260b57cec5SDimitry Andric       break;
2270b57cec5SDimitry Andric     }
2280b57cec5SDimitry Andric     case CounterMappingRegion::SkippedRegion:
2290b57cec5SDimitry Andric       assert(Count.isZero());
2300b57cec5SDimitry Andric       encodeULEB128(unsigned(I->Kind)
2310b57cec5SDimitry Andric                         << Counter::EncodingCounterTagAndExpansionRegionTagBits,
2320b57cec5SDimitry Andric                     OS);
2330b57cec5SDimitry Andric       break;
234af732203SDimitry Andric     case CounterMappingRegion::BranchRegion:
235af732203SDimitry Andric       encodeULEB128(unsigned(I->Kind)
236af732203SDimitry Andric                         << Counter::EncodingCounterTagAndExpansionRegionTagBits,
237af732203SDimitry Andric                     OS);
238af732203SDimitry Andric       writeCounter(MinExpressions, Count, OS);
239af732203SDimitry Andric       writeCounter(MinExpressions, FalseCount, OS);
240af732203SDimitry Andric       break;
2410b57cec5SDimitry Andric     }
2420b57cec5SDimitry Andric     assert(I->LineStart >= PrevLineStart);
2430b57cec5SDimitry Andric     encodeULEB128(I->LineStart - PrevLineStart, OS);
2440b57cec5SDimitry Andric     encodeULEB128(I->ColumnStart, OS);
2450b57cec5SDimitry Andric     assert(I->LineEnd >= I->LineStart);
2460b57cec5SDimitry Andric     encodeULEB128(I->LineEnd - I->LineStart, OS);
2470b57cec5SDimitry Andric     encodeULEB128(I->ColumnEnd, OS);
2480b57cec5SDimitry Andric     PrevLineStart = I->LineStart;
2490b57cec5SDimitry Andric   }
2500b57cec5SDimitry Andric   // Ensure that all file ids have at least one mapping region.
2510b57cec5SDimitry Andric   assert(CurrentFileID == (VirtualFileMapping.size() - 1));
2520b57cec5SDimitry Andric }
253