1 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements GCOV-style profiling. When this pass is run it emits
10 // "gcno" files next to the existing source, and instruments the code that runs
11 // to records the edges between blocks that run and emit a complementary "gcda"
12 // file on exit.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "CFGMST.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Sequence.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/Analysis/BlockFrequencyInfo.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/Analysis/EHPersonalities.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/DebugInfo.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/InstIterator.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/CRC.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/FileSystem.h"
42 #include "llvm/Support/Path.h"
43 #include "llvm/Support/Regex.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Instrumentation.h"
46 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
47 #include "llvm/Transforms/Utils/ModuleUtils.h"
48 #include <algorithm>
49 #include <memory>
50 #include <string>
51 #include <utility>
52 
53 using namespace llvm;
54 namespace endian = llvm::support::endian;
55 
56 #define DEBUG_TYPE "insert-gcov-profiling"
57 
58 enum : uint32_t {
59   GCOV_ARC_ON_TREE = 1 << 0,
60 
61   GCOV_TAG_FUNCTION = 0x01000000,
62   GCOV_TAG_BLOCKS = 0x01410000,
63   GCOV_TAG_ARCS = 0x01430000,
64   GCOV_TAG_LINES = 0x01450000,
65 };
66 
67 static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
68                                                cl::init("408*"), cl::Hidden,
69                                                cl::ValueRequired);
70 
71 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
72                                    cl::desc("Make counter updates atomic"));
73 
74 // Returns the number of words which will be used to represent this string.
75 static unsigned wordsOfString(StringRef s) {
76   // Length + NUL-terminated string + 0~3 padding NULs.
77   return (s.size() / 4) + 2;
78 }
79 
80 GCOVOptions GCOVOptions::getDefault() {
81   GCOVOptions Options;
82   Options.EmitNotes = true;
83   Options.EmitData = true;
84   Options.NoRedZone = false;
85   Options.Atomic = AtomicCounter;
86 
87   if (DefaultGCOVVersion.size() != 4) {
88     llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
89                              DefaultGCOVVersion);
90   }
91   memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
92   return Options;
93 }
94 
95 namespace {
96 class GCOVFunction;
97 
98 class GCOVProfiler {
99 public:
100   GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
101   GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
102   bool
103   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
104               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
105               std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
106 
107   void write(uint32_t i) {
108     char Bytes[4];
109     endian::write32(Bytes, i, Endian);
110     os->write(Bytes, 4);
111   }
112   void writeString(StringRef s) {
113     write(wordsOfString(s) - 1);
114     os->write(s.data(), s.size());
115     os->write_zeros(4 - s.size() % 4);
116   }
117   void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
118 
119 private:
120   // Create the .gcno files for the Module based on DebugInfo.
121   bool
122   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
123                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
124                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
125                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
126 
127   void emitGlobalConstructor(
128       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
129 
130   bool isFunctionInstrumented(const Function &F);
131   std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
132   static bool doesFilenameMatchARegex(StringRef Filename,
133                                       std::vector<Regex> &Regexes);
134 
135   // Get pointers to the functions in the runtime library.
136   FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
137   FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
138   FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
139   FunctionCallee getSummaryInfoFunc();
140   FunctionCallee getEndFileFunc();
141 
142   // Add the function to write out all our counters to the global destructor
143   // list.
144   Function *
145   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
146   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
147 
148   bool AddFlushBeforeForkAndExec();
149 
150   enum class GCovFileType { GCNO, GCDA };
151   std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
152 
153   GCOVOptions Options;
154   support::endianness Endian;
155   raw_ostream *os;
156 
157   // Checksum, produced by hash of EdgeDestinations
158   SmallVector<uint32_t, 4> FileChecksums;
159 
160   Module *M = nullptr;
161   std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
162   LLVMContext *Ctx = nullptr;
163   SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
164   std::vector<Regex> FilterRe;
165   std::vector<Regex> ExcludeRe;
166   DenseSet<const BasicBlock *> ExecBlocks;
167   StringMap<bool> InstrumentedFiles;
168 };
169 
170 class GCOVProfilerLegacyPass : public ModulePass {
171 public:
172   static char ID;
173   GCOVProfilerLegacyPass()
174       : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {}
175   GCOVProfilerLegacyPass(const GCOVOptions &Opts)
176       : ModulePass(ID), Profiler(Opts) {
177     initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
178   }
179   StringRef getPassName() const override { return "GCOV Profiler"; }
180 
181   bool runOnModule(Module &M) override {
182     auto GetBFI = [this](Function &F) {
183       return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
184     };
185     auto GetBPI = [this](Function &F) {
186       return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
187     };
188     auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
189       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
190     };
191     return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI);
192   }
193 
194   void getAnalysisUsage(AnalysisUsage &AU) const override {
195     AU.addRequired<BlockFrequencyInfoWrapperPass>();
196     AU.addRequired<TargetLibraryInfoWrapperPass>();
197   }
198 
199 private:
200   GCOVProfiler Profiler;
201 };
202 
203 struct BBInfo {
204   BBInfo *Group;
205   uint32_t Index;
206   uint32_t Rank = 0;
207 
208   BBInfo(unsigned Index) : Group(this), Index(Index) {}
209   const std::string infoString() const {
210     return (Twine("Index=") + Twine(Index)).str();
211   }
212 };
213 
214 struct Edge {
215   // This class implements the CFG edges. Note the CFG can be a multi-graph.
216   // So there might be multiple edges with same SrcBB and DestBB.
217   const BasicBlock *SrcBB;
218   const BasicBlock *DestBB;
219   uint64_t Weight;
220   BasicBlock *Place = nullptr;
221   uint32_t SrcNumber, DstNumber;
222   bool InMST = false;
223   bool Removed = false;
224   bool IsCritical = false;
225 
226   Edge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
227       : SrcBB(Src), DestBB(Dest), Weight(W) {}
228 
229   // Return the information string of an edge.
230   const std::string infoString() const {
231     return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
232             (IsCritical ? "c" : " ") + "  W=" + Twine(Weight))
233         .str();
234   }
235 };
236 }
237 
238 char GCOVProfilerLegacyPass::ID = 0;
239 INITIALIZE_PASS_BEGIN(
240     GCOVProfilerLegacyPass, "insert-gcov-profiling",
241     "Insert instrumentation for GCOV profiling", false, false)
242 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
243 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
244 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
245 INITIALIZE_PASS_END(
246     GCOVProfilerLegacyPass, "insert-gcov-profiling",
247     "Insert instrumentation for GCOV profiling", false, false)
248 
249 ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
250   return new GCOVProfilerLegacyPass(Options);
251 }
252 
253 static StringRef getFunctionName(const DISubprogram *SP) {
254   if (!SP->getLinkageName().empty())
255     return SP->getLinkageName();
256   return SP->getName();
257 }
258 
259 /// Extract a filename for a DISubprogram.
260 ///
261 /// Prefer relative paths in the coverage notes. Clang also may split
262 /// up absolute paths into a directory and filename component. When
263 /// the relative path doesn't exist, reconstruct the absolute path.
264 static SmallString<128> getFilename(const DISubprogram *SP) {
265   SmallString<128> Path;
266   StringRef RelPath = SP->getFilename();
267   if (sys::fs::exists(RelPath))
268     Path = RelPath;
269   else
270     sys::path::append(Path, SP->getDirectory(), SP->getFilename());
271   return Path;
272 }
273 
274 namespace {
275   class GCOVRecord {
276   protected:
277     GCOVProfiler *P;
278 
279     GCOVRecord(GCOVProfiler *P) : P(P) {}
280 
281     void write(uint32_t i) { P->write(i); }
282     void writeString(StringRef s) { P->writeString(s); }
283     void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
284   };
285 
286   class GCOVFunction;
287   class GCOVBlock;
288 
289   // Constructed only by requesting it from a GCOVBlock, this object stores a
290   // list of line numbers and a single filename, representing lines that belong
291   // to the block.
292   class GCOVLines : public GCOVRecord {
293    public:
294     void addLine(uint32_t Line) {
295       assert(Line != 0 && "Line zero is not a valid real line number.");
296       Lines.push_back(Line);
297     }
298 
299     uint32_t length() const {
300       return 1 + wordsOfString(Filename) + Lines.size();
301     }
302 
303     void writeOut() {
304       write(0);
305       writeString(Filename);
306       for (int i = 0, e = Lines.size(); i != e; ++i)
307         write(Lines[i]);
308     }
309 
310     GCOVLines(GCOVProfiler *P, StringRef F)
311         : GCOVRecord(P), Filename(std::string(F)) {}
312 
313   private:
314     std::string Filename;
315     SmallVector<uint32_t, 32> Lines;
316   };
317 
318 
319   // Represent a basic block in GCOV. Each block has a unique number in the
320   // function, number of lines belonging to each block, and a set of edges to
321   // other blocks.
322   class GCOVBlock : public GCOVRecord {
323    public:
324     GCOVLines &getFile(StringRef Filename) {
325       return LinesByFile.try_emplace(Filename, P, Filename).first->second;
326     }
327 
328     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
329       OutEdges.emplace_back(&Successor, Flags);
330     }
331 
332     void writeOut() {
333       uint32_t Len = 3;
334       SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
335       for (auto &I : LinesByFile) {
336         Len += I.second.length();
337         SortedLinesByFile.push_back(&I);
338       }
339 
340       write(GCOV_TAG_LINES);
341       write(Len);
342       write(Number);
343 
344       llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
345                                        StringMapEntry<GCOVLines> *RHS) {
346         return LHS->getKey() < RHS->getKey();
347       });
348       for (auto &I : SortedLinesByFile)
349         I->getValue().writeOut();
350       write(0);
351       write(0);
352     }
353 
354     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
355       // Only allow copy before edges and lines have been added. After that,
356       // there are inter-block pointers (eg: edges) that won't take kindly to
357       // blocks being copied or moved around.
358       assert(LinesByFile.empty());
359       assert(OutEdges.empty());
360     }
361 
362     uint32_t Number;
363     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
364 
365   private:
366     friend class GCOVFunction;
367 
368     GCOVBlock(GCOVProfiler *P, uint32_t Number)
369         : GCOVRecord(P), Number(Number) {}
370 
371     StringMap<GCOVLines> LinesByFile;
372   };
373 
374   // A function has a unique identifier, a checksum (we leave as zero) and a
375   // set of blocks and a map of edges between blocks. This is the only GCOV
376   // object users can construct, the blocks and lines will be rooted here.
377   class GCOVFunction : public GCOVRecord {
378   public:
379     GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
380                  unsigned EndLine, uint32_t Ident, int Version)
381         : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
382           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
383       LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
384       bool ExitBlockBeforeBody = Version >= 48;
385       uint32_t i = ExitBlockBeforeBody ? 2 : 1;
386       for (BasicBlock &BB : *F)
387         Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
388       if (!ExitBlockBeforeBody)
389         ReturnBlock.Number = i;
390 
391       std::string FunctionNameAndLine;
392       raw_string_ostream FNLOS(FunctionNameAndLine);
393       FNLOS << getFunctionName(SP) << SP->getLine();
394       FNLOS.flush();
395       FuncChecksum = hash_value(FunctionNameAndLine);
396     }
397 
398     GCOVBlock &getBlock(const BasicBlock *BB) {
399       return Blocks.find(BB)->second;
400     }
401 
402     GCOVBlock &getEntryBlock() { return EntryBlock; }
403     GCOVBlock &getReturnBlock() {
404       return ReturnBlock;
405     }
406 
407     uint32_t getFuncChecksum() const {
408       return FuncChecksum;
409     }
410 
411     void writeOut(uint32_t CfgChecksum) {
412       write(GCOV_TAG_FUNCTION);
413       SmallString<128> Filename = getFilename(SP);
414       uint32_t BlockLen =
415           2 + (Version >= 47) + wordsOfString(getFunctionName(SP));
416       if (Version < 80)
417         BlockLen += wordsOfString(Filename) + 1;
418       else
419         BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90);
420 
421       write(BlockLen);
422       write(Ident);
423       write(FuncChecksum);
424       if (Version >= 47)
425         write(CfgChecksum);
426       writeString(getFunctionName(SP));
427       if (Version < 80) {
428         writeString(Filename);
429         write(SP->getLine());
430       } else {
431         write(SP->isArtificial()); // artificial
432         writeString(Filename);
433         write(SP->getLine()); // start_line
434         write(0);             // start_column
435         // EndLine is the last line with !dbg. It is not the } line as in GCC,
436         // but good enough.
437         write(EndLine);
438         if (Version >= 90)
439           write(0); // end_column
440       }
441 
442       // Emit count of blocks.
443       write(GCOV_TAG_BLOCKS);
444       if (Version < 80) {
445         write(Blocks.size() + 2);
446         for (int i = Blocks.size() + 2; i; --i)
447           write(0);
448       } else {
449         write(1);
450         write(Blocks.size() + 2);
451       }
452       LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
453 
454       // Emit edges between blocks.
455       const uint32_t Outgoing = EntryBlock.OutEdges.size();
456       if (Outgoing) {
457         write(GCOV_TAG_ARCS);
458         write(Outgoing * 2 + 1);
459         write(EntryBlock.Number);
460         for (const auto &E : EntryBlock.OutEdges) {
461           write(E.first->Number);
462           write(E.second);
463         }
464       }
465       std::vector<GCOVBlock *> Sorted;
466       Sorted.reserve(Blocks.size());
467       for (auto &It : Blocks)
468         Sorted.push_back(&It.second);
469       llvm::sort(Sorted, [](GCOVBlock *x, GCOVBlock *y) {
470         return x->Number < y->Number;
471       });
472       for (GCOVBlock &Block : make_pointee_range(Sorted)) {
473         if (Block.OutEdges.empty()) continue;
474 
475         write(GCOV_TAG_ARCS);
476         write(Block.OutEdges.size() * 2 + 1);
477         write(Block.Number);
478         for (const auto &E : Block.OutEdges) {
479           write(E.first->Number);
480           write(E.second);
481         }
482       }
483 
484       // Emit lines for each block.
485       for (GCOVBlock &Block : make_pointee_range(Sorted))
486         Block.writeOut();
487     }
488 
489   public:
490     const DISubprogram *SP;
491     unsigned EndLine;
492     uint32_t Ident;
493     uint32_t FuncChecksum;
494     int Version;
495     DenseMap<BasicBlock *, GCOVBlock> Blocks;
496     GCOVBlock EntryBlock;
497     GCOVBlock ReturnBlock;
498   };
499 }
500 
501 // RegexesStr is a string containing differents regex separated by a semi-colon.
502 // For example "foo\..*$;bar\..*$".
503 std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
504   std::vector<Regex> Regexes;
505   while (!RegexesStr.empty()) {
506     std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
507     if (!HeadTail.first.empty()) {
508       Regex Re(HeadTail.first);
509       std::string Err;
510       if (!Re.isValid(Err)) {
511         Ctx->emitError(Twine("Regex ") + HeadTail.first +
512                        " is not valid: " + Err);
513       }
514       Regexes.emplace_back(std::move(Re));
515     }
516     RegexesStr = HeadTail.second;
517   }
518   return Regexes;
519 }
520 
521 bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
522                                            std::vector<Regex> &Regexes) {
523   for (Regex &Re : Regexes)
524     if (Re.match(Filename))
525       return true;
526   return false;
527 }
528 
529 bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
530   if (FilterRe.empty() && ExcludeRe.empty()) {
531     return true;
532   }
533   SmallString<128> Filename = getFilename(F.getSubprogram());
534   auto It = InstrumentedFiles.find(Filename);
535   if (It != InstrumentedFiles.end()) {
536     return It->second;
537   }
538 
539   SmallString<256> RealPath;
540   StringRef RealFilename;
541 
542   // Path can be
543   // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
544   // such a case we must get the real_path.
545   if (sys::fs::real_path(Filename, RealPath)) {
546     // real_path can fail with path like "foo.c".
547     RealFilename = Filename;
548   } else {
549     RealFilename = RealPath;
550   }
551 
552   bool ShouldInstrument;
553   if (FilterRe.empty()) {
554     ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
555   } else if (ExcludeRe.empty()) {
556     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
557   } else {
558     ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
559                        !doesFilenameMatchARegex(RealFilename, ExcludeRe);
560   }
561   InstrumentedFiles[Filename] = ShouldInstrument;
562   return ShouldInstrument;
563 }
564 
565 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
566                                      GCovFileType OutputType) {
567   bool Notes = OutputType == GCovFileType::GCNO;
568 
569   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
570     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
571       MDNode *N = GCov->getOperand(i);
572       bool ThreeElement = N->getNumOperands() == 3;
573       if (!ThreeElement && N->getNumOperands() != 2)
574         continue;
575       if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
576         continue;
577 
578       if (ThreeElement) {
579         // These nodes have no mangling to apply, it's stored mangled in the
580         // bitcode.
581         MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
582         MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
583         if (!NotesFile || !DataFile)
584           continue;
585         return std::string(Notes ? NotesFile->getString()
586                                  : DataFile->getString());
587       }
588 
589       MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
590       if (!GCovFile)
591         continue;
592 
593       SmallString<128> Filename = GCovFile->getString();
594       sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
595       return std::string(Filename.str());
596     }
597   }
598 
599   SmallString<128> Filename = CU->getFilename();
600   sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
601   StringRef FName = sys::path::filename(Filename);
602   SmallString<128> CurPath;
603   if (sys::fs::current_path(CurPath))
604     return std::string(FName);
605   sys::path::append(CurPath, FName);
606   return std::string(CurPath.str());
607 }
608 
609 bool GCOVProfiler::runOnModule(
610     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
611     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
612     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
613   this->M = &M;
614   this->GetTLI = std::move(GetTLI);
615   Ctx = &M.getContext();
616 
617   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
618   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
619     return false;
620 
621   bool HasExecOrFork = AddFlushBeforeForkAndExec();
622 
623   FilterRe = createRegexesFromString(Options.Filter);
624   ExcludeRe = createRegexesFromString(Options.Exclude);
625   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
626   return true;
627 }
628 
629 PreservedAnalyses GCOVProfilerPass::run(Module &M,
630                                         ModuleAnalysisManager &AM) {
631 
632   GCOVProfiler Profiler(GCOVOpts);
633   FunctionAnalysisManager &FAM =
634       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
635 
636   auto GetBFI = [&FAM](Function &F) {
637     return &FAM.getResult<BlockFrequencyAnalysis>(F);
638   };
639   auto GetBPI = [&FAM](Function &F) {
640     return &FAM.getResult<BranchProbabilityAnalysis>(F);
641   };
642   auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & {
643     return FAM.getResult<TargetLibraryAnalysis>(F);
644   };
645 
646   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
647     return PreservedAnalyses::all();
648 
649   return PreservedAnalyses::none();
650 }
651 
652 static bool functionHasLines(const Function &F, unsigned &EndLine) {
653   // Check whether this function actually has any source lines. Not only
654   // do these waste space, they also can crash gcov.
655   EndLine = 0;
656   for (auto &BB : F) {
657     for (auto &I : BB) {
658       // Debug intrinsic locations correspond to the location of the
659       // declaration, not necessarily any statements or expressions.
660       if (isa<DbgInfoIntrinsic>(&I)) continue;
661 
662       const DebugLoc &Loc = I.getDebugLoc();
663       if (!Loc)
664         continue;
665 
666       // Artificial lines such as calls to the global constructors.
667       if (Loc.getLine() == 0) continue;
668       EndLine = std::max(EndLine, Loc.getLine());
669 
670       return true;
671     }
672   }
673   return false;
674 }
675 
676 static bool isUsingScopeBasedEH(Function &F) {
677   if (!F.hasPersonalityFn()) return false;
678 
679   EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
680   return isScopedEHPersonality(Personality);
681 }
682 
683 bool GCOVProfiler::AddFlushBeforeForkAndExec() {
684   SmallVector<CallInst *, 2> Forks;
685   SmallVector<CallInst *, 2> Execs;
686   for (auto &F : M->functions()) {
687     auto *TLI = &GetTLI(F);
688     for (auto &I : instructions(F)) {
689       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
690         if (Function *Callee = CI->getCalledFunction()) {
691           LibFunc LF;
692           if (TLI->getLibFunc(*Callee, LF)) {
693             if (LF == LibFunc_fork) {
694 #if !defined(_WIN32)
695               Forks.push_back(CI);
696 #endif
697             } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
698                        LF == LibFunc_execlp || LF == LibFunc_execv ||
699                        LF == LibFunc_execvp || LF == LibFunc_execve ||
700                        LF == LibFunc_execvpe || LF == LibFunc_execvP) {
701               Execs.push_back(CI);
702             }
703           }
704         }
705       }
706     }
707   }
708 
709   for (auto F : Forks) {
710     IRBuilder<> Builder(F);
711     BasicBlock *Parent = F->getParent();
712     auto NextInst = ++F->getIterator();
713 
714     // We've a fork so just reset the counters in the child process
715     FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
716     FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy);
717     F->setCalledFunction(GCOVFork);
718 
719     // We split just after the fork to have a counter for the lines after
720     // Anyway there's a bug:
721     // void foo() { fork(); }
722     // void bar() { foo(); blah(); }
723     // then "blah();" will be called 2 times but showed as 1
724     // because "blah()" belongs to the same block as "foo();"
725     Parent->splitBasicBlock(NextInst);
726 
727     // back() is a br instruction with a debug location
728     // equals to the one from NextAfterFork
729     // So to avoid to have two debug locs on two blocks just change it
730     DebugLoc Loc = F->getDebugLoc();
731     Parent->back().setDebugLoc(Loc);
732   }
733 
734   for (auto E : Execs) {
735     IRBuilder<> Builder(E);
736     BasicBlock *Parent = E->getParent();
737     auto NextInst = ++E->getIterator();
738 
739     // Since the process is replaced by a new one we need to write out gcdas
740     // No need to reset the counters since they'll be lost after the exec**
741     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
742     FunctionCallee WriteoutF =
743         M->getOrInsertFunction("llvm_writeout_files", FTy);
744     Builder.CreateCall(WriteoutF);
745 
746     DebugLoc Loc = E->getDebugLoc();
747     Builder.SetInsertPoint(&*NextInst);
748     // If the exec** fails we must reset the counters since they've been
749     // dumped
750     FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
751     Builder.CreateCall(ResetF)->setDebugLoc(Loc);
752     ExecBlocks.insert(Parent);
753     Parent->splitBasicBlock(NextInst);
754     Parent->back().setDebugLoc(Loc);
755   }
756 
757   return !Forks.empty() || !Execs.empty();
758 }
759 
760 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
761                               const DenseSet<const BasicBlock *> &ExecBlocks) {
762   if (E.InMST || E.Removed)
763     return nullptr;
764 
765   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
766   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
767   // For a fake edge, instrument the real BB.
768   if (SrcBB == nullptr)
769     return DestBB;
770   if (DestBB == nullptr)
771     return SrcBB;
772 
773   auto CanInstrument = [](BasicBlock *BB) -> BasicBlock * {
774     // There are basic blocks (such as catchswitch) cannot be instrumented.
775     // If the returned first insertion point is the end of BB, skip this BB.
776     if (BB->getFirstInsertionPt() == BB->end())
777       return nullptr;
778     return BB;
779   };
780 
781   // Instrument the SrcBB if it has a single successor,
782   // otherwise, the DestBB if this is not a critical edge.
783   Instruction *TI = SrcBB->getTerminator();
784   if (TI->getNumSuccessors() <= 1 && !ExecBlocks.count(SrcBB))
785     return CanInstrument(SrcBB);
786   if (!E.IsCritical)
787     return CanInstrument(DestBB);
788 
789   // Some IndirectBr critical edges cannot be split by the previous
790   // SplitIndirectBrCriticalEdges call. Bail out.
791   const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
792   BasicBlock *InstrBB =
793       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
794   if (!InstrBB)
795     return nullptr;
796 
797   MST.addEdge(SrcBB, InstrBB, 0);
798   MST.addEdge(InstrBB, DestBB, 0).InMST = true;
799   E.Removed = true;
800 
801   return CanInstrument(InstrBB);
802 }
803 
804 #ifndef NDEBUG
805 static void dumpEdges(CFGMST<Edge, BBInfo> &MST, GCOVFunction &GF) {
806   size_t ID = 0;
807   for (auto &E : make_pointee_range(MST.AllEdges)) {
808     GCOVBlock &Src = E.SrcBB ? GF.getBlock(E.SrcBB) : GF.getEntryBlock();
809     GCOVBlock &Dst = E.DestBB ? GF.getBlock(E.DestBB) : GF.getReturnBlock();
810     dbgs() << "  Edge " << ID++ << ": " << Src.Number << "->" << Dst.Number
811            << E.infoString() << "\n";
812   }
813 }
814 #endif
815 
816 bool GCOVProfiler::emitProfileNotes(
817     NamedMDNode *CUNode, bool HasExecOrFork,
818     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
819     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
820     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
821   int Version;
822   {
823     uint8_t c3 = Options.Version[0];
824     uint8_t c2 = Options.Version[1];
825     uint8_t c1 = Options.Version[2];
826     Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
827                         : (c3 - '0') * 10 + c1 - '0';
828   }
829 
830   bool EmitGCDA = Options.EmitData;
831   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
832     // Each compile unit gets its own .gcno file. This means that whether we run
833     // this pass over the original .o's as they're produced, or run it after
834     // LTO, we'll generate the same .gcno files.
835 
836     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
837 
838     // Skip module skeleton (and module) CUs.
839     if (CU->getDWOId())
840       continue;
841 
842     std::vector<uint8_t> EdgeDestinations;
843     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
844 
845     Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
846                                                  : support::endianness::big;
847     unsigned FunctionIdent = 0;
848     for (auto &F : M->functions()) {
849       DISubprogram *SP = F.getSubprogram();
850       unsigned EndLine;
851       if (!SP) continue;
852       if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
853         continue;
854       // TODO: Functions using scope-based EH are currently not supported.
855       if (isUsingScopeBasedEH(F)) continue;
856 
857       // Add the function line number to the lines of the entry block
858       // to have a counter for the function definition.
859       uint32_t Line = SP->getLine();
860       auto Filename = getFilename(SP);
861 
862       BranchProbabilityInfo *BPI = GetBPI(F);
863       BlockFrequencyInfo *BFI = GetBFI(F);
864 
865       // Split indirectbr critical edges here before computing the MST rather
866       // than later in getInstrBB() to avoid invalidating it.
867       SplitIndirectBrCriticalEdges(F, BPI, BFI);
868 
869       CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
870 
871       // getInstrBB can split basic blocks and push elements to AllEdges.
872       for (size_t I : llvm::seq<size_t>(0, MST.AllEdges.size())) {
873         auto &E = *MST.AllEdges[I];
874         // For now, disable spanning tree optimization when fork or exec* is
875         // used.
876         if (HasExecOrFork)
877           E.InMST = false;
878         E.Place = getInstrBB(MST, E, ExecBlocks);
879       }
880       // Basic blocks in F are finalized at this point.
881       BasicBlock &EntryBlock = F.getEntryBlock();
882       Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
883                                                      FunctionIdent++, Version));
884       GCOVFunction &Func = *Funcs.back();
885 
886       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
887       // as well.
888       llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
889         return E->Removed || (!E->InMST && !E->Place);
890       });
891       const size_t Measured =
892           llvm::partition(MST.AllEdges,
893                           [](std::unique_ptr<Edge> &E) { return E->Place; }) -
894           MST.AllEdges.begin();
895       for (size_t I : llvm::seq<size_t>(0, Measured)) {
896         Edge &E = *MST.AllEdges[I];
897         GCOVBlock &Src =
898             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
899         GCOVBlock &Dst =
900             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
901         E.SrcNumber = Src.Number;
902         E.DstNumber = Dst.Number;
903       }
904       std::stable_sort(
905           MST.AllEdges.begin(), MST.AllEdges.begin() + Measured,
906           [](const std::unique_ptr<Edge> &L, const std::unique_ptr<Edge> &R) {
907             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
908                                                 : L->DstNumber < R->DstNumber;
909           });
910 
911       for (const Edge &E : make_pointee_range(MST.AllEdges)) {
912         GCOVBlock &Src =
913             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
914         GCOVBlock &Dst =
915             E.DestBB ? Func.getBlock(E.DestBB) : Func.getReturnBlock();
916         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
917       }
918 
919       // Artificial functions such as global initializers
920       if (!SP->isArtificial())
921         Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
922 
923       LLVM_DEBUG(dumpEdges(MST, Func));
924 
925       for (auto &GB : Func.Blocks) {
926         const BasicBlock &BB = *GB.first;
927         auto &Block = GB.second;
928         for (auto Succ : Block.OutEdges) {
929           uint32_t Idx = Succ.first->Number;
930           do EdgeDestinations.push_back(Idx & 255);
931           while ((Idx >>= 8) > 0);
932         }
933 
934         for (auto &I : BB) {
935           // Debug intrinsic locations correspond to the location of the
936           // declaration, not necessarily any statements or expressions.
937           if (isa<DbgInfoIntrinsic>(&I)) continue;
938 
939           const DebugLoc &Loc = I.getDebugLoc();
940           if (!Loc)
941             continue;
942 
943           // Artificial lines such as calls to the global constructors.
944           if (Loc.getLine() == 0 || Loc.isImplicitCode())
945             continue;
946 
947           if (Line == Loc.getLine()) continue;
948           Line = Loc.getLine();
949           if (SP != getDISubprogram(Loc.getScope()))
950             continue;
951 
952           GCOVLines &Lines = Block.getFile(Filename);
953           Lines.addLine(Loc.getLine());
954         }
955         Line = 0;
956       }
957       if (EmitGCDA) {
958         DISubprogram *SP = F.getSubprogram();
959         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
960         GlobalVariable *Counters = new GlobalVariable(
961             *M, CounterTy, false, GlobalValue::InternalLinkage,
962             Constant::getNullValue(CounterTy), "__llvm_gcov_ctr");
963         CountersBySP.emplace_back(Counters, SP);
964 
965         for (size_t I : llvm::seq<size_t>(0, Measured)) {
966           const Edge &E = *MST.AllEdges[I];
967           IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
968           Value *V = Builder.CreateConstInBoundsGEP2_64(
969               Counters->getValueType(), Counters, 0, I);
970           if (Options.Atomic) {
971             Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1),
972                                     AtomicOrdering::Monotonic);
973           } else {
974             Value *Count =
975                 Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
976             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
977             Builder.CreateStore(Count, V);
978           }
979         }
980       }
981     }
982 
983     char Tmp[4];
984     JamCRC JC;
985     JC.update(EdgeDestinations);
986     uint32_t Stamp = JC.getCRC();
987     FileChecksums.push_back(Stamp);
988 
989     if (Options.EmitNotes) {
990       std::error_code EC;
991       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
992                          sys::fs::OF_None);
993       if (EC) {
994         Ctx->emitError(
995             Twine("failed to open coverage notes file for writing: ") +
996             EC.message());
997         continue;
998       }
999       os = &out;
1000       if (Endian == support::endianness::big) {
1001         out.write("gcno", 4);
1002         out.write(Options.Version, 4);
1003       } else {
1004         out.write("oncg", 4);
1005         std::reverse_copy(Options.Version, Options.Version + 4, Tmp);
1006         out.write(Tmp, 4);
1007       }
1008       write(Stamp);
1009       if (Version >= 90)
1010         writeString(""); // unuseful current_working_directory
1011       if (Version >= 80)
1012         write(0); // unuseful has_unexecuted_blocks
1013 
1014       for (auto &Func : Funcs)
1015         Func->writeOut(Stamp);
1016 
1017       write(0);
1018       write(0);
1019       out.close();
1020     }
1021 
1022     if (EmitGCDA) {
1023       emitGlobalConstructor(CountersBySP);
1024       EmitGCDA = false;
1025     }
1026   }
1027   return true;
1028 }
1029 
1030 void GCOVProfiler::emitGlobalConstructor(
1031     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
1032   Function *WriteoutF = insertCounterWriteout(CountersBySP);
1033   Function *ResetF = insertReset(CountersBySP);
1034 
1035   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
1036   // be executed at exit and the "__llvm_gcov_flush" function to be executed
1037   // when "__gcov_flush" is called.
1038   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1039   Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
1040                                  "__llvm_gcov_init", M);
1041   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1042   F->setLinkage(GlobalValue::InternalLinkage);
1043   F->addFnAttr(Attribute::NoInline);
1044   if (Options.NoRedZone)
1045     F->addFnAttr(Attribute::NoRedZone);
1046 
1047   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
1048   IRBuilder<> Builder(BB);
1049 
1050   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1051   auto *PFTy = PointerType::get(FTy, 0);
1052   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
1053 
1054   // Initialize the environment and register the local writeout, flush and
1055   // reset functions.
1056   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
1057   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
1058   Builder.CreateRetVoid();
1059 
1060   appendToGlobalCtors(*M, F, 0);
1061 }
1062 
1063 FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
1064   Type *Args[] = {
1065       Type::getInt8PtrTy(*Ctx), // const char *orig_filename
1066       Type::getInt32Ty(*Ctx),   // uint32_t version
1067       Type::getInt32Ty(*Ctx),   // uint32_t checksum
1068   };
1069   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1070   AttributeList AL;
1071   if (auto AK = TLI->getExtAttrForI32Param(false))
1072     AL = AL.addParamAttribute(*Ctx, 2, AK);
1073   FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
1074   return Res;
1075 }
1076 
1077 FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
1078   Type *Args[] = {
1079     Type::getInt32Ty(*Ctx),    // uint32_t ident
1080     Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
1081     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
1082   };
1083   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1084   AttributeList AL;
1085   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1086     AL = AL.addParamAttribute(*Ctx, 0, AK);
1087     AL = AL.addParamAttribute(*Ctx, 1, AK);
1088     AL = AL.addParamAttribute(*Ctx, 2, AK);
1089   }
1090   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
1091 }
1092 
1093 FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
1094   Type *Args[] = {
1095     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
1096     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
1097   };
1098   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
1099   AttributeList AL;
1100   if (auto AK = TLI->getExtAttrForI32Param(false))
1101     AL = AL.addParamAttribute(*Ctx, 0, AK);
1102   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
1103 }
1104 
1105 FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
1106   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1107   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
1108 }
1109 
1110 FunctionCallee GCOVProfiler::getEndFileFunc() {
1111   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1112   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
1113 }
1114 
1115 Function *GCOVProfiler::insertCounterWriteout(
1116     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
1117   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1118   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
1119   if (!WriteoutF)
1120     WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
1121                                  "__llvm_gcov_writeout", M);
1122   WriteoutF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1123   WriteoutF->addFnAttr(Attribute::NoInline);
1124   if (Options.NoRedZone)
1125     WriteoutF->addFnAttr(Attribute::NoRedZone);
1126 
1127   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
1128   IRBuilder<> Builder(BB);
1129 
1130   auto *TLI = &GetTLI(*WriteoutF);
1131 
1132   FunctionCallee StartFile = getStartFileFunc(TLI);
1133   FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
1134   FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
1135   FunctionCallee SummaryInfo = getSummaryInfoFunc();
1136   FunctionCallee EndFile = getEndFileFunc();
1137 
1138   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
1139   if (!CUNodes) {
1140     Builder.CreateRetVoid();
1141     return WriteoutF;
1142   }
1143 
1144   // Collect the relevant data into a large constant data structure that we can
1145   // walk to write out everything.
1146   StructType *StartFileCallArgsTy = StructType::create(
1147       {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1148       "start_file_args_ty");
1149   StructType *EmitFunctionCallArgsTy = StructType::create(
1150       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
1151       "emit_function_args_ty");
1152   StructType *EmitArcsCallArgsTy = StructType::create(
1153       {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
1154       "emit_arcs_args_ty");
1155   StructType *FileInfoTy =
1156       StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
1157                           EmitFunctionCallArgsTy->getPointerTo(),
1158                           EmitArcsCallArgsTy->getPointerTo()},
1159                          "file_info");
1160 
1161   Constant *Zero32 = Builder.getInt32(0);
1162   // Build an explicit array of two zeros for use in ConstantExpr GEP building.
1163   Constant *TwoZero32s[] = {Zero32, Zero32};
1164 
1165   SmallVector<Constant *, 8> FileInfos;
1166   for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
1167     auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
1168 
1169     // Skip module skeleton (and module) CUs.
1170     if (CU->getDWOId())
1171       continue;
1172 
1173     std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
1174     uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
1175     auto *StartFileCallArgs = ConstantStruct::get(
1176         StartFileCallArgsTy,
1177         {Builder.CreateGlobalStringPtr(FilenameGcda),
1178          Builder.getInt32(endian::read32be(Options.Version)),
1179          Builder.getInt32(CfgChecksum)});
1180 
1181     SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
1182     SmallVector<Constant *, 8> EmitArcsCallArgsArray;
1183     for (int j : llvm::seq<int>(0, CountersBySP.size())) {
1184       uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
1185       EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
1186           EmitFunctionCallArgsTy,
1187           {Builder.getInt32(j),
1188            Builder.getInt32(FuncChecksum),
1189            Builder.getInt32(CfgChecksum)}));
1190 
1191       GlobalVariable *GV = CountersBySP[j].first;
1192       unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
1193       EmitArcsCallArgsArray.push_back(ConstantStruct::get(
1194           EmitArcsCallArgsTy,
1195           {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
1196                                        GV->getValueType(), GV, TwoZero32s)}));
1197     }
1198     // Create global arrays for the two emit calls.
1199     int CountersSize = CountersBySP.size();
1200     assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
1201            "Mismatched array size!");
1202     assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
1203            "Mismatched array size!");
1204     auto *EmitFunctionCallArgsArrayTy =
1205         ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
1206     auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
1207         *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
1208         GlobalValue::InternalLinkage,
1209         ConstantArray::get(EmitFunctionCallArgsArrayTy,
1210                            EmitFunctionCallArgsArray),
1211         Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
1212     auto *EmitArcsCallArgsArrayTy =
1213         ArrayType::get(EmitArcsCallArgsTy, CountersSize);
1214     EmitFunctionCallArgsArrayGV->setUnnamedAddr(
1215         GlobalValue::UnnamedAddr::Global);
1216     auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
1217         *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
1218         GlobalValue::InternalLinkage,
1219         ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
1220         Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
1221     EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1222 
1223     FileInfos.push_back(ConstantStruct::get(
1224         FileInfoTy,
1225         {StartFileCallArgs, Builder.getInt32(CountersSize),
1226          ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
1227                                                 EmitFunctionCallArgsArrayGV,
1228                                                 TwoZero32s),
1229          ConstantExpr::getInBoundsGetElementPtr(
1230              EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
1231   }
1232 
1233   // If we didn't find anything to actually emit, bail on out.
1234   if (FileInfos.empty()) {
1235     Builder.CreateRetVoid();
1236     return WriteoutF;
1237   }
1238 
1239   // To simplify code, we cap the number of file infos we write out to fit
1240   // easily in a 32-bit signed integer. This gives consistent behavior between
1241   // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
1242   // operations on 32-bit systems. It also seems unreasonable to try to handle
1243   // more than 2 billion files.
1244   if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
1245     FileInfos.resize(INT_MAX);
1246 
1247   // Create a global for the entire data structure so we can walk it more
1248   // easily.
1249   auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
1250   auto *FileInfoArrayGV = new GlobalVariable(
1251       *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
1252       ConstantArray::get(FileInfoArrayTy, FileInfos),
1253       "__llvm_internal_gcov_emit_file_info");
1254   FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1255 
1256   // Create the CFG for walking this data structure.
1257   auto *FileLoopHeader =
1258       BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
1259   auto *CounterLoopHeader =
1260       BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
1261   auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
1262   auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
1263 
1264   // We always have at least one file, so just branch to the header.
1265   Builder.CreateBr(FileLoopHeader);
1266 
1267   // The index into the files structure is our loop induction variable.
1268   Builder.SetInsertPoint(FileLoopHeader);
1269   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1270                                   "file_idx");
1271   IV->addIncoming(Builder.getInt32(0), BB);
1272   auto *FileInfoPtr = Builder.CreateInBoundsGEP(
1273       FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
1274   auto *StartFileCallArgsPtr =
1275       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
1276   auto *StartFileCall = Builder.CreateCall(
1277       StartFile,
1278       {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
1279                           Builder.CreateStructGEP(StartFileCallArgsTy,
1280                                                   StartFileCallArgsPtr, 0),
1281                           "filename"),
1282        Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
1283                           Builder.CreateStructGEP(StartFileCallArgsTy,
1284                                                   StartFileCallArgsPtr, 1),
1285                           "version"),
1286        Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
1287                           Builder.CreateStructGEP(StartFileCallArgsTy,
1288                                                   StartFileCallArgsPtr, 2),
1289                           "stamp")});
1290   if (auto AK = TLI->getExtAttrForI32Param(false))
1291     StartFileCall->addParamAttr(2, AK);
1292   auto *NumCounters = Builder.CreateLoad(
1293       FileInfoTy->getElementType(1),
1294       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
1295   auto *EmitFunctionCallArgsArray =
1296       Builder.CreateLoad(FileInfoTy->getElementType(2),
1297                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
1298                          "emit_function_args");
1299   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
1300       FileInfoTy->getElementType(3),
1301       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
1302   auto *EnterCounterLoopCond =
1303       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
1304   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
1305 
1306   Builder.SetInsertPoint(CounterLoopHeader);
1307   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
1308                                "ctr_idx");
1309   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
1310   auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
1311       EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
1312   auto *EmitFunctionCall = Builder.CreateCall(
1313       EmitFunction,
1314       {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
1315                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1316                                                   EmitFunctionCallArgsPtr, 0),
1317                           "ident"),
1318        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
1319                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1320                                                   EmitFunctionCallArgsPtr, 1),
1321                           "func_checkssum"),
1322        Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
1323                           Builder.CreateStructGEP(EmitFunctionCallArgsTy,
1324                                                   EmitFunctionCallArgsPtr, 2),
1325                           "cfg_checksum")});
1326   if (auto AK = TLI->getExtAttrForI32Param(false)) {
1327     EmitFunctionCall->addParamAttr(0, AK);
1328     EmitFunctionCall->addParamAttr(1, AK);
1329     EmitFunctionCall->addParamAttr(2, AK);
1330   }
1331   auto *EmitArcsCallArgsPtr =
1332       Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
1333   auto *EmitArcsCall = Builder.CreateCall(
1334       EmitArcs,
1335       {Builder.CreateLoad(
1336            EmitArcsCallArgsTy->getElementType(0),
1337            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
1338            "num_counters"),
1339        Builder.CreateLoad(
1340            EmitArcsCallArgsTy->getElementType(1),
1341            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
1342            "counters")});
1343   if (auto AK = TLI->getExtAttrForI32Param(false))
1344     EmitArcsCall->addParamAttr(0, AK);
1345   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
1346   auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
1347   Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
1348   JV->addIncoming(NextJV, CounterLoopHeader);
1349 
1350   Builder.SetInsertPoint(FileLoopLatch);
1351   Builder.CreateCall(SummaryInfo, {});
1352   Builder.CreateCall(EndFile, {});
1353   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
1354   auto *FileLoopCond =
1355       Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
1356   Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
1357   IV->addIncoming(NextIV, FileLoopLatch);
1358 
1359   Builder.SetInsertPoint(ExitBB);
1360   Builder.CreateRetVoid();
1361 
1362   return WriteoutF;
1363 }
1364 
1365 Function *GCOVProfiler::insertReset(
1366     ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
1367   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
1368   Function *ResetF = M->getFunction("__llvm_gcov_reset");
1369   if (!ResetF)
1370     ResetF = Function::Create(FTy, GlobalValue::InternalLinkage,
1371                               "__llvm_gcov_reset", M);
1372   ResetF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1373   ResetF->addFnAttr(Attribute::NoInline);
1374   if (Options.NoRedZone)
1375     ResetF->addFnAttr(Attribute::NoRedZone);
1376 
1377   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
1378   IRBuilder<> Builder(Entry);
1379 
1380   // Zero out the counters.
1381   for (const auto &I : CountersBySP) {
1382     GlobalVariable *GV = I.first;
1383     Constant *Null = Constant::getNullValue(GV->getValueType());
1384     Builder.CreateStore(Null, GV);
1385   }
1386 
1387   Type *RetTy = ResetF->getReturnType();
1388   if (RetTy->isVoidTy())
1389     Builder.CreateRetVoid();
1390   else if (RetTy->isIntegerTy())
1391     // Used if __llvm_gcov_reset was implicitly declared.
1392     Builder.CreateRet(ConstantInt::get(RetTy, 0));
1393   else
1394     report_fatal_error("invalid return type for __llvm_gcov_reset");
1395 
1396   return ResetF;
1397 }
1398