1 //===--- CoverageMappingGen.cpp - Coverage mapping generation ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Instrumentation-based code coverage mapping generator
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CoverageMappingGen.h"
14 #include "CodeGenFunction.h"
15 #include "clang/AST/StmtVisitor.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Frontend/FrontendDiagnostic.h"
19 #include "clang/Lex/Lexer.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ProfileData/Coverage/CoverageMapping.h"
24 #include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
25 #include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
26 #include "llvm/ProfileData/InstrProfReader.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/Path.h"
29 
30 // This selects the coverage mapping format defined when `InstrProfData.inc`
31 // is textually included.
32 #define COVMAP_V3
33 
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm::coverage;
37 
38 CoverageSourceInfo *
39 CoverageMappingModuleGen::setUpCoverageCallbacks(Preprocessor &PP) {
40   CoverageSourceInfo *CoverageInfo = new CoverageSourceInfo();
41   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(CoverageInfo));
42   PP.addCommentHandler(CoverageInfo);
43   PP.setPreprocessToken(true);
44   PP.setTokenWatcher([CoverageInfo](clang::Token Tok) {
45     // Update previous token location.
46     CoverageInfo->PrevTokLoc = Tok.getLocation();
47     if (Tok.getKind() != clang::tok::eod)
48       CoverageInfo->updateNextTokLoc(Tok.getLocation());
49   });
50   return CoverageInfo;
51 }
52 
53 void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range, SourceLocation) {
54   SkippedRanges.push_back({Range});
55 }
56 
57 bool CoverageSourceInfo::HandleComment(Preprocessor &PP, SourceRange Range) {
58   SkippedRanges.push_back({Range, PrevTokLoc});
59   AfterComment = true;
60   return false;
61 }
62 
63 void CoverageSourceInfo::updateNextTokLoc(SourceLocation Loc) {
64   if (AfterComment) {
65     SkippedRanges.back().NextTokLoc = Loc;
66     AfterComment = false;
67   }
68 }
69 
70 namespace {
71 
72 /// A region of source code that can be mapped to a counter.
73 class SourceMappingRegion {
74   Counter Count;
75 
76   /// The region's starting location.
77   Optional<SourceLocation> LocStart;
78 
79   /// The region's ending location.
80   Optional<SourceLocation> LocEnd;
81 
82   /// Whether this region should be emitted after its parent is emitted.
83   bool DeferRegion;
84 
85   /// Whether this region is a gap region. The count from a gap region is set
86   /// as the line execution count if there are no other regions on the line.
87   bool GapRegion;
88 
89 public:
90   SourceMappingRegion(Counter Count, Optional<SourceLocation> LocStart,
91                       Optional<SourceLocation> LocEnd, bool DeferRegion = false,
92                       bool GapRegion = false)
93       : Count(Count), LocStart(LocStart), LocEnd(LocEnd),
94         DeferRegion(DeferRegion), GapRegion(GapRegion) {}
95 
96   const Counter &getCounter() const { return Count; }
97 
98   void setCounter(Counter C) { Count = C; }
99 
100   bool hasStartLoc() const { return LocStart.hasValue(); }
101 
102   void setStartLoc(SourceLocation Loc) { LocStart = Loc; }
103 
104   SourceLocation getBeginLoc() const {
105     assert(LocStart && "Region has no start location");
106     return *LocStart;
107   }
108 
109   bool hasEndLoc() const { return LocEnd.hasValue(); }
110 
111   void setEndLoc(SourceLocation Loc) {
112     assert(Loc.isValid() && "Setting an invalid end location");
113     LocEnd = Loc;
114   }
115 
116   SourceLocation getEndLoc() const {
117     assert(LocEnd && "Region has no end location");
118     return *LocEnd;
119   }
120 
121   bool isDeferred() const { return DeferRegion; }
122 
123   void setDeferred(bool Deferred) { DeferRegion = Deferred; }
124 
125   bool isGap() const { return GapRegion; }
126 
127   void setGap(bool Gap) { GapRegion = Gap; }
128 };
129 
130 /// Spelling locations for the start and end of a source region.
131 struct SpellingRegion {
132   /// The line where the region starts.
133   unsigned LineStart;
134 
135   /// The column where the region starts.
136   unsigned ColumnStart;
137 
138   /// The line where the region ends.
139   unsigned LineEnd;
140 
141   /// The column where the region ends.
142   unsigned ColumnEnd;
143 
144   SpellingRegion(SourceManager &SM, SourceLocation LocStart,
145                  SourceLocation LocEnd) {
146     LineStart = SM.getSpellingLineNumber(LocStart);
147     ColumnStart = SM.getSpellingColumnNumber(LocStart);
148     LineEnd = SM.getSpellingLineNumber(LocEnd);
149     ColumnEnd = SM.getSpellingColumnNumber(LocEnd);
150   }
151 
152   SpellingRegion(SourceManager &SM, SourceMappingRegion &R)
153       : SpellingRegion(SM, R.getBeginLoc(), R.getEndLoc()) {}
154 
155   /// Check if the start and end locations appear in source order, i.e
156   /// top->bottom, left->right.
157   bool isInSourceOrder() const {
158     return (LineStart < LineEnd) ||
159            (LineStart == LineEnd && ColumnStart <= ColumnEnd);
160   }
161 };
162 
163 /// Provides the common functionality for the different
164 /// coverage mapping region builders.
165 class CoverageMappingBuilder {
166 public:
167   CoverageMappingModuleGen &CVM;
168   SourceManager &SM;
169   const LangOptions &LangOpts;
170 
171 private:
172   /// Map of clang's FileIDs to IDs used for coverage mapping.
173   llvm::SmallDenseMap<FileID, std::pair<unsigned, SourceLocation>, 8>
174       FileIDMapping;
175 
176 public:
177   /// The coverage mapping regions for this function
178   llvm::SmallVector<CounterMappingRegion, 32> MappingRegions;
179   /// The source mapping regions for this function.
180   std::vector<SourceMappingRegion> SourceRegions;
181 
182   /// A set of regions which can be used as a filter.
183   ///
184   /// It is produced by emitExpansionRegions() and is used in
185   /// emitSourceRegions() to suppress producing code regions if
186   /// the same area is covered by expansion regions.
187   typedef llvm::SmallSet<std::pair<SourceLocation, SourceLocation>, 8>
188       SourceRegionFilter;
189 
190   CoverageMappingBuilder(CoverageMappingModuleGen &CVM, SourceManager &SM,
191                          const LangOptions &LangOpts)
192       : CVM(CVM), SM(SM), LangOpts(LangOpts) {}
193 
194   /// Return the precise end location for the given token.
195   SourceLocation getPreciseTokenLocEnd(SourceLocation Loc) {
196     // We avoid getLocForEndOfToken here, because it doesn't do what we want for
197     // macro locations, which we just treat as expanded files.
198     unsigned TokLen =
199         Lexer::MeasureTokenLength(SM.getSpellingLoc(Loc), SM, LangOpts);
200     return Loc.getLocWithOffset(TokLen);
201   }
202 
203   /// Return the start location of an included file or expanded macro.
204   SourceLocation getStartOfFileOrMacro(SourceLocation Loc) {
205     if (Loc.isMacroID())
206       return Loc.getLocWithOffset(-SM.getFileOffset(Loc));
207     return SM.getLocForStartOfFile(SM.getFileID(Loc));
208   }
209 
210   /// Return the end location of an included file or expanded macro.
211   SourceLocation getEndOfFileOrMacro(SourceLocation Loc) {
212     if (Loc.isMacroID())
213       return Loc.getLocWithOffset(SM.getFileIDSize(SM.getFileID(Loc)) -
214                                   SM.getFileOffset(Loc));
215     return SM.getLocForEndOfFile(SM.getFileID(Loc));
216   }
217 
218   /// Find out where the current file is included or macro is expanded.
219   SourceLocation getIncludeOrExpansionLoc(SourceLocation Loc) {
220     return Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin()
221                            : SM.getIncludeLoc(SM.getFileID(Loc));
222   }
223 
224   /// Return true if \c Loc is a location in a built-in macro.
225   bool isInBuiltin(SourceLocation Loc) {
226     return SM.getBufferName(SM.getSpellingLoc(Loc)) == "<built-in>";
227   }
228 
229   /// Check whether \c Loc is included or expanded from \c Parent.
230   bool isNestedIn(SourceLocation Loc, FileID Parent) {
231     do {
232       Loc = getIncludeOrExpansionLoc(Loc);
233       if (Loc.isInvalid())
234         return false;
235     } while (!SM.isInFileID(Loc, Parent));
236     return true;
237   }
238 
239   /// Get the start of \c S ignoring macro arguments and builtin macros.
240   SourceLocation getStart(const Stmt *S) {
241     SourceLocation Loc = S->getBeginLoc();
242     while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
243       Loc = SM.getImmediateExpansionRange(Loc).getBegin();
244     return Loc;
245   }
246 
247   /// Get the end of \c S ignoring macro arguments and builtin macros.
248   SourceLocation getEnd(const Stmt *S) {
249     SourceLocation Loc = S->getEndLoc();
250     while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
251       Loc = SM.getImmediateExpansionRange(Loc).getBegin();
252     return getPreciseTokenLocEnd(Loc);
253   }
254 
255   /// Find the set of files we have regions for and assign IDs
256   ///
257   /// Fills \c Mapping with the virtual file mapping needed to write out
258   /// coverage and collects the necessary file information to emit source and
259   /// expansion regions.
260   void gatherFileIDs(SmallVectorImpl<unsigned> &Mapping) {
261     FileIDMapping.clear();
262 
263     llvm::SmallSet<FileID, 8> Visited;
264     SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs;
265     for (const auto &Region : SourceRegions) {
266       SourceLocation Loc = Region.getBeginLoc();
267       FileID File = SM.getFileID(Loc);
268       if (!Visited.insert(File).second)
269         continue;
270 
271       // Do not map FileID's associated with system headers.
272       if (SM.isInSystemHeader(SM.getSpellingLoc(Loc)))
273         continue;
274 
275       unsigned Depth = 0;
276       for (SourceLocation Parent = getIncludeOrExpansionLoc(Loc);
277            Parent.isValid(); Parent = getIncludeOrExpansionLoc(Parent))
278         ++Depth;
279       FileLocs.push_back(std::make_pair(Loc, Depth));
280     }
281     llvm::stable_sort(FileLocs, llvm::less_second());
282 
283     for (const auto &FL : FileLocs) {
284       SourceLocation Loc = FL.first;
285       FileID SpellingFile = SM.getDecomposedSpellingLoc(Loc).first;
286       auto Entry = SM.getFileEntryForID(SpellingFile);
287       if (!Entry)
288         continue;
289 
290       FileIDMapping[SM.getFileID(Loc)] = std::make_pair(Mapping.size(), Loc);
291       Mapping.push_back(CVM.getFileID(Entry));
292     }
293   }
294 
295   /// Get the coverage mapping file ID for \c Loc.
296   ///
297   /// If such file id doesn't exist, return None.
298   Optional<unsigned> getCoverageFileID(SourceLocation Loc) {
299     auto Mapping = FileIDMapping.find(SM.getFileID(Loc));
300     if (Mapping != FileIDMapping.end())
301       return Mapping->second.first;
302     return None;
303   }
304 
305   /// This shrinks the skipped range if it spans a line that contains a
306   /// non-comment token. If shrinking the skipped range would make it empty,
307   /// this returns None.
308   Optional<SpellingRegion> adjustSkippedRange(SourceManager &SM,
309                                               SourceLocation LocStart,
310                                               SourceLocation LocEnd,
311                                               SourceLocation PrevTokLoc,
312                                               SourceLocation NextTokLoc) {
313     SpellingRegion SR{SM, LocStart, LocEnd};
314     // If Range begin location is invalid, it's not a comment region.
315     if (PrevTokLoc.isInvalid())
316       return SR;
317     unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc);
318     unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc);
319     SpellingRegion newSR(SR);
320     if (SM.isWrittenInSameFile(LocStart, PrevTokLoc) &&
321         SR.LineStart == PrevTokLine) {
322       newSR.LineStart = SR.LineStart + 1;
323       newSR.ColumnStart = 1;
324     }
325     if (SM.isWrittenInSameFile(LocEnd, NextTokLoc) &&
326         SR.LineEnd == NextTokLine) {
327       newSR.LineEnd = SR.LineEnd - 1;
328       newSR.ColumnEnd = SR.ColumnStart + 1;
329     }
330     if (newSR.isInSourceOrder())
331       return newSR;
332     return None;
333   }
334 
335   /// Gather all the regions that were skipped by the preprocessor
336   /// using the constructs like #if or comments.
337   void gatherSkippedRegions() {
338     /// An array of the minimum lineStarts and the maximum lineEnds
339     /// for mapping regions from the appropriate source files.
340     llvm::SmallVector<std::pair<unsigned, unsigned>, 8> FileLineRanges;
341     FileLineRanges.resize(
342         FileIDMapping.size(),
343         std::make_pair(std::numeric_limits<unsigned>::max(), 0));
344     for (const auto &R : MappingRegions) {
345       FileLineRanges[R.FileID].first =
346           std::min(FileLineRanges[R.FileID].first, R.LineStart);
347       FileLineRanges[R.FileID].second =
348           std::max(FileLineRanges[R.FileID].second, R.LineEnd);
349     }
350 
351     auto SkippedRanges = CVM.getSourceInfo().getSkippedRanges();
352     for (auto &I : SkippedRanges) {
353       SourceRange Range = I.Range;
354       auto LocStart = Range.getBegin();
355       auto LocEnd = Range.getEnd();
356       assert(SM.isWrittenInSameFile(LocStart, LocEnd) &&
357              "region spans multiple files");
358 
359       auto CovFileID = getCoverageFileID(LocStart);
360       if (!CovFileID)
361         continue;
362       Optional<SpellingRegion> SR =
363           adjustSkippedRange(SM, LocStart, LocEnd, I.PrevTokLoc, I.NextTokLoc);
364       if (!SR.hasValue())
365         continue;
366       auto Region = CounterMappingRegion::makeSkipped(
367           *CovFileID, SR->LineStart, SR->ColumnStart, SR->LineEnd,
368           SR->ColumnEnd);
369       // Make sure that we only collect the regions that are inside
370       // the source code of this function.
371       if (Region.LineStart >= FileLineRanges[*CovFileID].first &&
372           Region.LineEnd <= FileLineRanges[*CovFileID].second)
373         MappingRegions.push_back(Region);
374     }
375   }
376 
377   /// Generate the coverage counter mapping regions from collected
378   /// source regions.
379   void emitSourceRegions(const SourceRegionFilter &Filter) {
380     for (const auto &Region : SourceRegions) {
381       assert(Region.hasEndLoc() && "incomplete region");
382 
383       SourceLocation LocStart = Region.getBeginLoc();
384       assert(SM.getFileID(LocStart).isValid() && "region in invalid file");
385 
386       // Ignore regions from system headers.
387       if (SM.isInSystemHeader(SM.getSpellingLoc(LocStart)))
388         continue;
389 
390       auto CovFileID = getCoverageFileID(LocStart);
391       // Ignore regions that don't have a file, such as builtin macros.
392       if (!CovFileID)
393         continue;
394 
395       SourceLocation LocEnd = Region.getEndLoc();
396       assert(SM.isWrittenInSameFile(LocStart, LocEnd) &&
397              "region spans multiple files");
398 
399       // Don't add code regions for the area covered by expansion regions.
400       // This not only suppresses redundant regions, but sometimes prevents
401       // creating regions with wrong counters if, for example, a statement's
402       // body ends at the end of a nested macro.
403       if (Filter.count(std::make_pair(LocStart, LocEnd)))
404         continue;
405 
406       // Find the spelling locations for the mapping region.
407       SpellingRegion SR{SM, LocStart, LocEnd};
408       assert(SR.isInSourceOrder() && "region start and end out of order");
409 
410       if (Region.isGap()) {
411         MappingRegions.push_back(CounterMappingRegion::makeGapRegion(
412             Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart,
413             SR.LineEnd, SR.ColumnEnd));
414       } else {
415         MappingRegions.push_back(CounterMappingRegion::makeRegion(
416             Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart,
417             SR.LineEnd, SR.ColumnEnd));
418       }
419     }
420   }
421 
422   /// Generate expansion regions for each virtual file we've seen.
423   SourceRegionFilter emitExpansionRegions() {
424     SourceRegionFilter Filter;
425     for (const auto &FM : FileIDMapping) {
426       SourceLocation ExpandedLoc = FM.second.second;
427       SourceLocation ParentLoc = getIncludeOrExpansionLoc(ExpandedLoc);
428       if (ParentLoc.isInvalid())
429         continue;
430 
431       auto ParentFileID = getCoverageFileID(ParentLoc);
432       if (!ParentFileID)
433         continue;
434       auto ExpandedFileID = getCoverageFileID(ExpandedLoc);
435       assert(ExpandedFileID && "expansion in uncovered file");
436 
437       SourceLocation LocEnd = getPreciseTokenLocEnd(ParentLoc);
438       assert(SM.isWrittenInSameFile(ParentLoc, LocEnd) &&
439              "region spans multiple files");
440       Filter.insert(std::make_pair(ParentLoc, LocEnd));
441 
442       SpellingRegion SR{SM, ParentLoc, LocEnd};
443       assert(SR.isInSourceOrder() && "region start and end out of order");
444       MappingRegions.push_back(CounterMappingRegion::makeExpansion(
445           *ParentFileID, *ExpandedFileID, SR.LineStart, SR.ColumnStart,
446           SR.LineEnd, SR.ColumnEnd));
447     }
448     return Filter;
449   }
450 };
451 
452 /// Creates unreachable coverage regions for the functions that
453 /// are not emitted.
454 struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
455   EmptyCoverageMappingBuilder(CoverageMappingModuleGen &CVM, SourceManager &SM,
456                               const LangOptions &LangOpts)
457       : CoverageMappingBuilder(CVM, SM, LangOpts) {}
458 
459   void VisitDecl(const Decl *D) {
460     if (!D->hasBody())
461       return;
462     auto Body = D->getBody();
463     SourceLocation Start = getStart(Body);
464     SourceLocation End = getEnd(Body);
465     if (!SM.isWrittenInSameFile(Start, End)) {
466       // Walk up to find the common ancestor.
467       // Correct the locations accordingly.
468       FileID StartFileID = SM.getFileID(Start);
469       FileID EndFileID = SM.getFileID(End);
470       while (StartFileID != EndFileID && !isNestedIn(End, StartFileID)) {
471         Start = getIncludeOrExpansionLoc(Start);
472         assert(Start.isValid() &&
473                "Declaration start location not nested within a known region");
474         StartFileID = SM.getFileID(Start);
475       }
476       while (StartFileID != EndFileID) {
477         End = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(End));
478         assert(End.isValid() &&
479                "Declaration end location not nested within a known region");
480         EndFileID = SM.getFileID(End);
481       }
482     }
483     SourceRegions.emplace_back(Counter(), Start, End);
484   }
485 
486   /// Write the mapping data to the output stream
487   void write(llvm::raw_ostream &OS) {
488     SmallVector<unsigned, 16> FileIDMapping;
489     gatherFileIDs(FileIDMapping);
490     emitSourceRegions(SourceRegionFilter());
491 
492     if (MappingRegions.empty())
493       return;
494 
495     CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions);
496     Writer.write(OS);
497   }
498 };
499 
500 /// A StmtVisitor that creates coverage mapping regions which map
501 /// from the source code locations to the PGO counters.
502 struct CounterCoverageMappingBuilder
503     : public CoverageMappingBuilder,
504       public ConstStmtVisitor<CounterCoverageMappingBuilder> {
505   /// The map of statements to count values.
506   llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
507 
508   /// A stack of currently live regions.
509   std::vector<SourceMappingRegion> RegionStack;
510 
511   /// The currently deferred region: its end location and count can be set once
512   /// its parent has been popped from the region stack.
513   Optional<SourceMappingRegion> DeferredRegion;
514 
515   CounterExpressionBuilder Builder;
516 
517   /// A location in the most recently visited file or macro.
518   ///
519   /// This is used to adjust the active source regions appropriately when
520   /// expressions cross file or macro boundaries.
521   SourceLocation MostRecentLocation;
522 
523   /// Location of the last terminated region.
524   Optional<std::pair<SourceLocation, size_t>> LastTerminatedRegion;
525 
526   /// Return a counter for the subtraction of \c RHS from \c LHS
527   Counter subtractCounters(Counter LHS, Counter RHS) {
528     return Builder.subtract(LHS, RHS);
529   }
530 
531   /// Return a counter for the sum of \c LHS and \c RHS.
532   Counter addCounters(Counter LHS, Counter RHS) {
533     return Builder.add(LHS, RHS);
534   }
535 
536   Counter addCounters(Counter C1, Counter C2, Counter C3) {
537     return addCounters(addCounters(C1, C2), C3);
538   }
539 
540   /// Return the region counter for the given statement.
541   ///
542   /// This should only be called on statements that have a dedicated counter.
543   Counter getRegionCounter(const Stmt *S) {
544     return Counter::getCounter(CounterMap[S]);
545   }
546 
547   /// Push a region onto the stack.
548   ///
549   /// Returns the index on the stack where the region was pushed. This can be
550   /// used with popRegions to exit a "scope", ending the region that was pushed.
551   size_t pushRegion(Counter Count, Optional<SourceLocation> StartLoc = None,
552                     Optional<SourceLocation> EndLoc = None) {
553     if (StartLoc) {
554       MostRecentLocation = *StartLoc;
555       completeDeferred(Count, MostRecentLocation);
556     }
557     RegionStack.emplace_back(Count, StartLoc, EndLoc);
558 
559     return RegionStack.size() - 1;
560   }
561 
562   /// Complete any pending deferred region by setting its end location and
563   /// count, and then pushing it onto the region stack.
564   size_t completeDeferred(Counter Count, SourceLocation DeferredEndLoc) {
565     size_t Index = RegionStack.size();
566     if (!DeferredRegion)
567       return Index;
568 
569     // Consume the pending region.
570     SourceMappingRegion DR = DeferredRegion.getValue();
571     DeferredRegion = None;
572 
573     // If the region ends in an expansion, find the expansion site.
574     FileID StartFile = SM.getFileID(DR.getBeginLoc());
575     if (SM.getFileID(DeferredEndLoc) != StartFile) {
576       if (isNestedIn(DeferredEndLoc, StartFile)) {
577         do {
578           DeferredEndLoc = getIncludeOrExpansionLoc(DeferredEndLoc);
579         } while (StartFile != SM.getFileID(DeferredEndLoc));
580       } else {
581         return Index;
582       }
583     }
584 
585     // The parent of this deferred region ends where the containing decl ends,
586     // so the region isn't useful.
587     if (DR.getBeginLoc() == DeferredEndLoc)
588       return Index;
589 
590     // If we're visiting statements in non-source order (e.g switch cases or
591     // a loop condition) we can't construct a sensible deferred region.
592     if (!SpellingRegion(SM, DR.getBeginLoc(), DeferredEndLoc).isInSourceOrder())
593       return Index;
594 
595     DR.setGap(true);
596     DR.setCounter(Count);
597     DR.setEndLoc(DeferredEndLoc);
598     handleFileExit(DeferredEndLoc);
599     RegionStack.push_back(DR);
600     return Index;
601   }
602 
603   /// Complete a deferred region created after a terminated region at the
604   /// top-level.
605   void completeTopLevelDeferredRegion(Counter Count,
606                                       SourceLocation DeferredEndLoc) {
607     if (DeferredRegion || !LastTerminatedRegion)
608       return;
609 
610     if (LastTerminatedRegion->second != RegionStack.size())
611       return;
612 
613     SourceLocation Start = LastTerminatedRegion->first;
614     if (SM.getFileID(Start) != SM.getMainFileID())
615       return;
616 
617     SourceMappingRegion DR = RegionStack.back();
618     DR.setStartLoc(Start);
619     DR.setDeferred(false);
620     DeferredRegion = DR;
621     completeDeferred(Count, DeferredEndLoc);
622   }
623 
624   size_t locationDepth(SourceLocation Loc) {
625     size_t Depth = 0;
626     while (Loc.isValid()) {
627       Loc = getIncludeOrExpansionLoc(Loc);
628       Depth++;
629     }
630     return Depth;
631   }
632 
633   /// Pop regions from the stack into the function's list of regions.
634   ///
635   /// Adds all regions from \c ParentIndex to the top of the stack to the
636   /// function's \c SourceRegions.
637   void popRegions(size_t ParentIndex) {
638     assert(RegionStack.size() >= ParentIndex && "parent not in stack");
639     bool ParentOfDeferredRegion = false;
640     while (RegionStack.size() > ParentIndex) {
641       SourceMappingRegion &Region = RegionStack.back();
642       if (Region.hasStartLoc()) {
643         SourceLocation StartLoc = Region.getBeginLoc();
644         SourceLocation EndLoc = Region.hasEndLoc()
645                                     ? Region.getEndLoc()
646                                     : RegionStack[ParentIndex].getEndLoc();
647         size_t StartDepth = locationDepth(StartLoc);
648         size_t EndDepth = locationDepth(EndLoc);
649         while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) {
650           bool UnnestStart = StartDepth >= EndDepth;
651           bool UnnestEnd = EndDepth >= StartDepth;
652           if (UnnestEnd) {
653             // The region ends in a nested file or macro expansion. Create a
654             // separate region for each expansion.
655             SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc);
656             assert(SM.isWrittenInSameFile(NestedLoc, EndLoc));
657 
658             if (!isRegionAlreadyAdded(NestedLoc, EndLoc))
659               SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
660 
661             EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
662             if (EndLoc.isInvalid())
663               llvm::report_fatal_error("File exit not handled before popRegions");
664             EndDepth--;
665           }
666           if (UnnestStart) {
667             // The region begins in a nested file or macro expansion. Create a
668             // separate region for each expansion.
669             SourceLocation NestedLoc = getEndOfFileOrMacro(StartLoc);
670             assert(SM.isWrittenInSameFile(StartLoc, NestedLoc));
671 
672             if (!isRegionAlreadyAdded(StartLoc, NestedLoc))
673               SourceRegions.emplace_back(Region.getCounter(), StartLoc, NestedLoc);
674 
675             StartLoc = getIncludeOrExpansionLoc(StartLoc);
676             if (StartLoc.isInvalid())
677               llvm::report_fatal_error("File exit not handled before popRegions");
678             StartDepth--;
679           }
680         }
681         Region.setStartLoc(StartLoc);
682         Region.setEndLoc(EndLoc);
683 
684         MostRecentLocation = EndLoc;
685         // If this region happens to span an entire expansion, we need to make
686         // sure we don't overlap the parent region with it.
687         if (StartLoc == getStartOfFileOrMacro(StartLoc) &&
688             EndLoc == getEndOfFileOrMacro(EndLoc))
689           MostRecentLocation = getIncludeOrExpansionLoc(EndLoc);
690 
691         assert(SM.isWrittenInSameFile(Region.getBeginLoc(), EndLoc));
692         assert(SpellingRegion(SM, Region).isInSourceOrder());
693         SourceRegions.push_back(Region);
694 
695         if (ParentOfDeferredRegion) {
696           ParentOfDeferredRegion = false;
697 
698           // If there's an existing deferred region, keep the old one, because
699           // it means there are two consecutive returns (or a similar pattern).
700           if (!DeferredRegion.hasValue() &&
701               // File IDs aren't gathered within macro expansions, so it isn't
702               // useful to try and create a deferred region inside of one.
703               !EndLoc.isMacroID())
704             DeferredRegion =
705                 SourceMappingRegion(Counter::getZero(), EndLoc, None);
706         }
707       } else if (Region.isDeferred()) {
708         assert(!ParentOfDeferredRegion && "Consecutive deferred regions");
709         ParentOfDeferredRegion = true;
710       }
711       RegionStack.pop_back();
712 
713       // If the zero region pushed after the last terminated region no longer
714       // exists, clear its cached information.
715       if (LastTerminatedRegion &&
716           RegionStack.size() < LastTerminatedRegion->second)
717         LastTerminatedRegion = None;
718     }
719     assert(!ParentOfDeferredRegion && "Deferred region with no parent");
720   }
721 
722   /// Return the currently active region.
723   SourceMappingRegion &getRegion() {
724     assert(!RegionStack.empty() && "statement has no region");
725     return RegionStack.back();
726   }
727 
728   /// Propagate counts through the children of \p S if \p VisitChildren is true.
729   /// Otherwise, only emit a count for \p S itself.
730   Counter propagateCounts(Counter TopCount, const Stmt *S,
731                           bool VisitChildren = true) {
732     SourceLocation StartLoc = getStart(S);
733     SourceLocation EndLoc = getEnd(S);
734     size_t Index = pushRegion(TopCount, StartLoc, EndLoc);
735     if (VisitChildren)
736       Visit(S);
737     Counter ExitCount = getRegion().getCounter();
738     popRegions(Index);
739 
740     // The statement may be spanned by an expansion. Make sure we handle a file
741     // exit out of this expansion before moving to the next statement.
742     if (SM.isBeforeInTranslationUnit(StartLoc, S->getBeginLoc()))
743       MostRecentLocation = EndLoc;
744 
745     return ExitCount;
746   }
747 
748   /// Check whether a region with bounds \c StartLoc and \c EndLoc
749   /// is already added to \c SourceRegions.
750   bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc) {
751     return SourceRegions.rend() !=
752            std::find_if(SourceRegions.rbegin(), SourceRegions.rend(),
753                         [&](const SourceMappingRegion &Region) {
754                           return Region.getBeginLoc() == StartLoc &&
755                                  Region.getEndLoc() == EndLoc;
756                         });
757   }
758 
759   /// Adjust the most recently visited location to \c EndLoc.
760   ///
761   /// This should be used after visiting any statements in non-source order.
762   void adjustForOutOfOrderTraversal(SourceLocation EndLoc) {
763     MostRecentLocation = EndLoc;
764     // The code region for a whole macro is created in handleFileExit() when
765     // it detects exiting of the virtual file of that macro. If we visited
766     // statements in non-source order, we might already have such a region
767     // added, for example, if a body of a loop is divided among multiple
768     // macros. Avoid adding duplicate regions in such case.
769     if (getRegion().hasEndLoc() &&
770         MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation) &&
771         isRegionAlreadyAdded(getStartOfFileOrMacro(MostRecentLocation),
772                              MostRecentLocation))
773       MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation);
774   }
775 
776   /// Adjust regions and state when \c NewLoc exits a file.
777   ///
778   /// If moving from our most recently tracked location to \c NewLoc exits any
779   /// files, this adjusts our current region stack and creates the file regions
780   /// for the exited file.
781   void handleFileExit(SourceLocation NewLoc) {
782     if (NewLoc.isInvalid() ||
783         SM.isWrittenInSameFile(MostRecentLocation, NewLoc))
784       return;
785 
786     // If NewLoc is not in a file that contains MostRecentLocation, walk up to
787     // find the common ancestor.
788     SourceLocation LCA = NewLoc;
789     FileID ParentFile = SM.getFileID(LCA);
790     while (!isNestedIn(MostRecentLocation, ParentFile)) {
791       LCA = getIncludeOrExpansionLoc(LCA);
792       if (LCA.isInvalid() || SM.isWrittenInSameFile(LCA, MostRecentLocation)) {
793         // Since there isn't a common ancestor, no file was exited. We just need
794         // to adjust our location to the new file.
795         MostRecentLocation = NewLoc;
796         return;
797       }
798       ParentFile = SM.getFileID(LCA);
799     }
800 
801     llvm::SmallSet<SourceLocation, 8> StartLocs;
802     Optional<Counter> ParentCounter;
803     for (SourceMappingRegion &I : llvm::reverse(RegionStack)) {
804       if (!I.hasStartLoc())
805         continue;
806       SourceLocation Loc = I.getBeginLoc();
807       if (!isNestedIn(Loc, ParentFile)) {
808         ParentCounter = I.getCounter();
809         break;
810       }
811 
812       while (!SM.isInFileID(Loc, ParentFile)) {
813         // The most nested region for each start location is the one with the
814         // correct count. We avoid creating redundant regions by stopping once
815         // we've seen this region.
816         if (StartLocs.insert(Loc).second)
817           SourceRegions.emplace_back(I.getCounter(), Loc,
818                                      getEndOfFileOrMacro(Loc));
819         Loc = getIncludeOrExpansionLoc(Loc);
820       }
821       I.setStartLoc(getPreciseTokenLocEnd(Loc));
822     }
823 
824     if (ParentCounter) {
825       // If the file is contained completely by another region and doesn't
826       // immediately start its own region, the whole file gets a region
827       // corresponding to the parent.
828       SourceLocation Loc = MostRecentLocation;
829       while (isNestedIn(Loc, ParentFile)) {
830         SourceLocation FileStart = getStartOfFileOrMacro(Loc);
831         if (StartLocs.insert(FileStart).second) {
832           SourceRegions.emplace_back(*ParentCounter, FileStart,
833                                      getEndOfFileOrMacro(Loc));
834           assert(SpellingRegion(SM, SourceRegions.back()).isInSourceOrder());
835         }
836         Loc = getIncludeOrExpansionLoc(Loc);
837       }
838     }
839 
840     MostRecentLocation = NewLoc;
841   }
842 
843   /// Ensure that \c S is included in the current region.
844   void extendRegion(const Stmt *S) {
845     SourceMappingRegion &Region = getRegion();
846     SourceLocation StartLoc = getStart(S);
847 
848     handleFileExit(StartLoc);
849     if (!Region.hasStartLoc())
850       Region.setStartLoc(StartLoc);
851 
852     completeDeferred(Region.getCounter(), StartLoc);
853   }
854 
855   /// Mark \c S as a terminator, starting a zero region.
856   void terminateRegion(const Stmt *S) {
857     extendRegion(S);
858     SourceMappingRegion &Region = getRegion();
859     SourceLocation EndLoc = getEnd(S);
860     if (!Region.hasEndLoc())
861       Region.setEndLoc(EndLoc);
862     pushRegion(Counter::getZero());
863     auto &ZeroRegion = getRegion();
864     ZeroRegion.setDeferred(true);
865     LastTerminatedRegion = {EndLoc, RegionStack.size()};
866   }
867 
868   /// Find a valid gap range between \p AfterLoc and \p BeforeLoc.
869   Optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc,
870                                            SourceLocation BeforeLoc) {
871     // If the start and end locations of the gap are both within the same macro
872     // file, the range may not be in source order.
873     if (AfterLoc.isMacroID() || BeforeLoc.isMacroID())
874       return None;
875     if (!SM.isWrittenInSameFile(AfterLoc, BeforeLoc))
876       return None;
877     return {{AfterLoc, BeforeLoc}};
878   }
879 
880   /// Find the source range after \p AfterStmt and before \p BeforeStmt.
881   Optional<SourceRange> findGapAreaBetween(const Stmt *AfterStmt,
882                                            const Stmt *BeforeStmt) {
883     return findGapAreaBetween(getPreciseTokenLocEnd(getEnd(AfterStmt)),
884                               getStart(BeforeStmt));
885   }
886 
887   /// Emit a gap region between \p StartLoc and \p EndLoc with the given count.
888   void fillGapAreaWithCount(SourceLocation StartLoc, SourceLocation EndLoc,
889                             Counter Count) {
890     if (StartLoc == EndLoc)
891       return;
892     assert(SpellingRegion(SM, StartLoc, EndLoc).isInSourceOrder());
893     handleFileExit(StartLoc);
894     size_t Index = pushRegion(Count, StartLoc, EndLoc);
895     getRegion().setGap(true);
896     handleFileExit(EndLoc);
897     popRegions(Index);
898   }
899 
900   /// Keep counts of breaks and continues inside loops.
901   struct BreakContinue {
902     Counter BreakCount;
903     Counter ContinueCount;
904   };
905   SmallVector<BreakContinue, 8> BreakContinueStack;
906 
907   CounterCoverageMappingBuilder(
908       CoverageMappingModuleGen &CVM,
909       llvm::DenseMap<const Stmt *, unsigned> &CounterMap, SourceManager &SM,
910       const LangOptions &LangOpts)
911       : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap),
912         DeferredRegion(None) {}
913 
914   /// Write the mapping data to the output stream
915   void write(llvm::raw_ostream &OS) {
916     llvm::SmallVector<unsigned, 8> VirtualFileMapping;
917     gatherFileIDs(VirtualFileMapping);
918     SourceRegionFilter Filter = emitExpansionRegions();
919     assert(!DeferredRegion && "Deferred region never completed");
920     emitSourceRegions(Filter);
921     gatherSkippedRegions();
922 
923     if (MappingRegions.empty())
924       return;
925 
926     CoverageMappingWriter Writer(VirtualFileMapping, Builder.getExpressions(),
927                                  MappingRegions);
928     Writer.write(OS);
929   }
930 
931   void VisitStmt(const Stmt *S) {
932     if (S->getBeginLoc().isValid())
933       extendRegion(S);
934     for (const Stmt *Child : S->children())
935       if (Child)
936         this->Visit(Child);
937     handleFileExit(getEnd(S));
938   }
939 
940   void VisitDecl(const Decl *D) {
941     assert(!DeferredRegion && "Deferred region never completed");
942 
943     Stmt *Body = D->getBody();
944 
945     // Do not propagate region counts into system headers.
946     if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body))))
947       return;
948 
949     // Do not visit the artificial children nodes of defaulted methods. The
950     // lexer may not be able to report back precise token end locations for
951     // these children nodes (llvm.org/PR39822), and moreover users will not be
952     // able to see coverage for them.
953     bool Defaulted = false;
954     if (auto *Method = dyn_cast<CXXMethodDecl>(D))
955       Defaulted = Method->isDefaulted();
956 
957     propagateCounts(getRegionCounter(Body), Body,
958                     /*VisitChildren=*/!Defaulted);
959     assert(RegionStack.empty() && "Regions entered but never exited");
960 
961     // Discard the last uncompleted deferred region in a decl, if one exists.
962     // This prevents lines at the end of a function containing only whitespace
963     // or closing braces from being marked as uncovered.
964     DeferredRegion = None;
965   }
966 
967   void VisitReturnStmt(const ReturnStmt *S) {
968     extendRegion(S);
969     if (S->getRetValue())
970       Visit(S->getRetValue());
971     terminateRegion(S);
972   }
973 
974   void VisitCoroutineBodyStmt(const CoroutineBodyStmt *S) {
975     extendRegion(S);
976     Visit(S->getBody());
977   }
978 
979   void VisitCoreturnStmt(const CoreturnStmt *S) {
980     extendRegion(S);
981     if (S->getOperand())
982       Visit(S->getOperand());
983     terminateRegion(S);
984   }
985 
986   void VisitCXXThrowExpr(const CXXThrowExpr *E) {
987     extendRegion(E);
988     if (E->getSubExpr())
989       Visit(E->getSubExpr());
990     terminateRegion(E);
991   }
992 
993   void VisitGotoStmt(const GotoStmt *S) { terminateRegion(S); }
994 
995   void VisitLabelStmt(const LabelStmt *S) {
996     Counter LabelCount = getRegionCounter(S);
997     SourceLocation Start = getStart(S);
998     completeTopLevelDeferredRegion(LabelCount, Start);
999     completeDeferred(LabelCount, Start);
1000     // We can't extendRegion here or we risk overlapping with our new region.
1001     handleFileExit(Start);
1002     pushRegion(LabelCount, Start);
1003     Visit(S->getSubStmt());
1004   }
1005 
1006   void VisitBreakStmt(const BreakStmt *S) {
1007     assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
1008     BreakContinueStack.back().BreakCount = addCounters(
1009         BreakContinueStack.back().BreakCount, getRegion().getCounter());
1010     // FIXME: a break in a switch should terminate regions for all preceding
1011     // case statements, not just the most recent one.
1012     terminateRegion(S);
1013   }
1014 
1015   void VisitContinueStmt(const ContinueStmt *S) {
1016     assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
1017     BreakContinueStack.back().ContinueCount = addCounters(
1018         BreakContinueStack.back().ContinueCount, getRegion().getCounter());
1019     terminateRegion(S);
1020   }
1021 
1022   void VisitCallExpr(const CallExpr *E) {
1023     VisitStmt(E);
1024 
1025     // Terminate the region when we hit a noreturn function.
1026     // (This is helpful dealing with switch statements.)
1027     QualType CalleeType = E->getCallee()->getType();
1028     if (getFunctionExtInfo(*CalleeType).getNoReturn())
1029       terminateRegion(E);
1030   }
1031 
1032   void VisitWhileStmt(const WhileStmt *S) {
1033     extendRegion(S);
1034 
1035     Counter ParentCount = getRegion().getCounter();
1036     Counter BodyCount = getRegionCounter(S);
1037 
1038     // Handle the body first so that we can get the backedge count.
1039     BreakContinueStack.push_back(BreakContinue());
1040     extendRegion(S->getBody());
1041     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
1042     BreakContinue BC = BreakContinueStack.pop_back_val();
1043 
1044     // Go back to handle the condition.
1045     Counter CondCount =
1046         addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
1047     propagateCounts(CondCount, S->getCond());
1048     adjustForOutOfOrderTraversal(getEnd(S));
1049 
1050     // The body count applies to the area immediately after the increment.
1051     auto Gap = findGapAreaBetween(S->getCond(), S->getBody());
1052     if (Gap)
1053       fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
1054 
1055     Counter OutCount =
1056         addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
1057     if (OutCount != ParentCount)
1058       pushRegion(OutCount);
1059   }
1060 
1061   void VisitDoStmt(const DoStmt *S) {
1062     extendRegion(S);
1063 
1064     Counter ParentCount = getRegion().getCounter();
1065     Counter BodyCount = getRegionCounter(S);
1066 
1067     BreakContinueStack.push_back(BreakContinue());
1068     extendRegion(S->getBody());
1069     Counter BackedgeCount =
1070         propagateCounts(addCounters(ParentCount, BodyCount), S->getBody());
1071     BreakContinue BC = BreakContinueStack.pop_back_val();
1072 
1073     Counter CondCount = addCounters(BackedgeCount, BC.ContinueCount);
1074     propagateCounts(CondCount, S->getCond());
1075 
1076     Counter OutCount =
1077         addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
1078     if (OutCount != ParentCount)
1079       pushRegion(OutCount);
1080   }
1081 
1082   void VisitForStmt(const ForStmt *S) {
1083     extendRegion(S);
1084     if (S->getInit())
1085       Visit(S->getInit());
1086 
1087     Counter ParentCount = getRegion().getCounter();
1088     Counter BodyCount = getRegionCounter(S);
1089 
1090     // The loop increment may contain a break or continue.
1091     if (S->getInc())
1092       BreakContinueStack.emplace_back();
1093 
1094     // Handle the body first so that we can get the backedge count.
1095     BreakContinueStack.emplace_back();
1096     extendRegion(S->getBody());
1097     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
1098     BreakContinue BodyBC = BreakContinueStack.pop_back_val();
1099 
1100     // The increment is essentially part of the body but it needs to include
1101     // the count for all the continue statements.
1102     BreakContinue IncrementBC;
1103     if (const Stmt *Inc = S->getInc()) {
1104       propagateCounts(addCounters(BackedgeCount, BodyBC.ContinueCount), Inc);
1105       IncrementBC = BreakContinueStack.pop_back_val();
1106     }
1107 
1108     // Go back to handle the condition.
1109     Counter CondCount = addCounters(
1110         addCounters(ParentCount, BackedgeCount, BodyBC.ContinueCount),
1111         IncrementBC.ContinueCount);
1112     if (const Expr *Cond = S->getCond()) {
1113       propagateCounts(CondCount, Cond);
1114       adjustForOutOfOrderTraversal(getEnd(S));
1115     }
1116 
1117     // The body count applies to the area immediately after the increment.
1118     auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()),
1119                                   getStart(S->getBody()));
1120     if (Gap)
1121       fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
1122 
1123     Counter OutCount = addCounters(BodyBC.BreakCount, IncrementBC.BreakCount,
1124                                    subtractCounters(CondCount, BodyCount));
1125     if (OutCount != ParentCount)
1126       pushRegion(OutCount);
1127   }
1128 
1129   void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
1130     extendRegion(S);
1131     if (S->getInit())
1132       Visit(S->getInit());
1133     Visit(S->getLoopVarStmt());
1134     Visit(S->getRangeStmt());
1135 
1136     Counter ParentCount = getRegion().getCounter();
1137     Counter BodyCount = getRegionCounter(S);
1138 
1139     BreakContinueStack.push_back(BreakContinue());
1140     extendRegion(S->getBody());
1141     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
1142     BreakContinue BC = BreakContinueStack.pop_back_val();
1143 
1144     // The body count applies to the area immediately after the range.
1145     auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()),
1146                                   getStart(S->getBody()));
1147     if (Gap)
1148       fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
1149 
1150     Counter LoopCount =
1151         addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
1152     Counter OutCount =
1153         addCounters(BC.BreakCount, subtractCounters(LoopCount, BodyCount));
1154     if (OutCount != ParentCount)
1155       pushRegion(OutCount);
1156   }
1157 
1158   void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
1159     extendRegion(S);
1160     Visit(S->getElement());
1161 
1162     Counter ParentCount = getRegion().getCounter();
1163     Counter BodyCount = getRegionCounter(S);
1164 
1165     BreakContinueStack.push_back(BreakContinue());
1166     extendRegion(S->getBody());
1167     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
1168     BreakContinue BC = BreakContinueStack.pop_back_val();
1169 
1170     // The body count applies to the area immediately after the collection.
1171     auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()),
1172                                   getStart(S->getBody()));
1173     if (Gap)
1174       fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount);
1175 
1176     Counter LoopCount =
1177         addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
1178     Counter OutCount =
1179         addCounters(BC.BreakCount, subtractCounters(LoopCount, BodyCount));
1180     if (OutCount != ParentCount)
1181       pushRegion(OutCount);
1182   }
1183 
1184   void VisitSwitchStmt(const SwitchStmt *S) {
1185     extendRegion(S);
1186     if (S->getInit())
1187       Visit(S->getInit());
1188     Visit(S->getCond());
1189 
1190     BreakContinueStack.push_back(BreakContinue());
1191 
1192     const Stmt *Body = S->getBody();
1193     extendRegion(Body);
1194     if (const auto *CS = dyn_cast<CompoundStmt>(Body)) {
1195       if (!CS->body_empty()) {
1196         // Make a region for the body of the switch.  If the body starts with
1197         // a case, that case will reuse this region; otherwise, this covers
1198         // the unreachable code at the beginning of the switch body.
1199         size_t Index = pushRegion(Counter::getZero(), getStart(CS));
1200         getRegion().setGap(true);
1201         for (const auto *Child : CS->children())
1202           Visit(Child);
1203 
1204         // Set the end for the body of the switch, if it isn't already set.
1205         for (size_t i = RegionStack.size(); i != Index; --i) {
1206           if (!RegionStack[i - 1].hasEndLoc())
1207             RegionStack[i - 1].setEndLoc(getEnd(CS->body_back()));
1208         }
1209 
1210         popRegions(Index);
1211       }
1212     } else
1213       propagateCounts(Counter::getZero(), Body);
1214     BreakContinue BC = BreakContinueStack.pop_back_val();
1215 
1216     if (!BreakContinueStack.empty())
1217       BreakContinueStack.back().ContinueCount = addCounters(
1218           BreakContinueStack.back().ContinueCount, BC.ContinueCount);
1219 
1220     Counter ExitCount = getRegionCounter(S);
1221     SourceLocation ExitLoc = getEnd(S);
1222     pushRegion(ExitCount);
1223 
1224     // Ensure that handleFileExit recognizes when the end location is located
1225     // in a different file.
1226     MostRecentLocation = getStart(S);
1227     handleFileExit(ExitLoc);
1228   }
1229 
1230   void VisitSwitchCase(const SwitchCase *S) {
1231     extendRegion(S);
1232 
1233     SourceMappingRegion &Parent = getRegion();
1234 
1235     Counter Count = addCounters(Parent.getCounter(), getRegionCounter(S));
1236     // Reuse the existing region if it starts at our label. This is typical of
1237     // the first case in a switch.
1238     if (Parent.hasStartLoc() && Parent.getBeginLoc() == getStart(S))
1239       Parent.setCounter(Count);
1240     else
1241       pushRegion(Count, getStart(S));
1242 
1243     if (const auto *CS = dyn_cast<CaseStmt>(S)) {
1244       Visit(CS->getLHS());
1245       if (const Expr *RHS = CS->getRHS())
1246         Visit(RHS);
1247     }
1248     Visit(S->getSubStmt());
1249   }
1250 
1251   void VisitIfStmt(const IfStmt *S) {
1252     extendRegion(S);
1253     if (S->getInit())
1254       Visit(S->getInit());
1255 
1256     // Extend into the condition before we propagate through it below - this is
1257     // needed to handle macros that generate the "if" but not the condition.
1258     extendRegion(S->getCond());
1259 
1260     Counter ParentCount = getRegion().getCounter();
1261     Counter ThenCount = getRegionCounter(S);
1262 
1263     // Emitting a counter for the condition makes it easier to interpret the
1264     // counter for the body when looking at the coverage.
1265     propagateCounts(ParentCount, S->getCond());
1266 
1267     // The 'then' count applies to the area immediately after the condition.
1268     auto Gap = findGapAreaBetween(S->getCond(), S->getThen());
1269     if (Gap)
1270       fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount);
1271 
1272     extendRegion(S->getThen());
1273     Counter OutCount = propagateCounts(ThenCount, S->getThen());
1274 
1275     Counter ElseCount = subtractCounters(ParentCount, ThenCount);
1276     if (const Stmt *Else = S->getElse()) {
1277       // The 'else' count applies to the area immediately after the 'then'.
1278       Gap = findGapAreaBetween(S->getThen(), Else);
1279       if (Gap)
1280         fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ElseCount);
1281       extendRegion(Else);
1282       OutCount = addCounters(OutCount, propagateCounts(ElseCount, Else));
1283     } else
1284       OutCount = addCounters(OutCount, ElseCount);
1285 
1286     if (OutCount != ParentCount)
1287       pushRegion(OutCount);
1288   }
1289 
1290   void VisitCXXTryStmt(const CXXTryStmt *S) {
1291     extendRegion(S);
1292     // Handle macros that generate the "try" but not the rest.
1293     extendRegion(S->getTryBlock());
1294 
1295     Counter ParentCount = getRegion().getCounter();
1296     propagateCounts(ParentCount, S->getTryBlock());
1297 
1298     for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
1299       Visit(S->getHandler(I));
1300 
1301     Counter ExitCount = getRegionCounter(S);
1302     pushRegion(ExitCount);
1303   }
1304 
1305   void VisitCXXCatchStmt(const CXXCatchStmt *S) {
1306     propagateCounts(getRegionCounter(S), S->getHandlerBlock());
1307   }
1308 
1309   void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
1310     extendRegion(E);
1311 
1312     Counter ParentCount = getRegion().getCounter();
1313     Counter TrueCount = getRegionCounter(E);
1314 
1315     Visit(E->getCond());
1316 
1317     if (!isa<BinaryConditionalOperator>(E)) {
1318       // The 'then' count applies to the area immediately after the condition.
1319       auto Gap =
1320           findGapAreaBetween(E->getQuestionLoc(), getStart(E->getTrueExpr()));
1321       if (Gap)
1322         fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), TrueCount);
1323 
1324       extendRegion(E->getTrueExpr());
1325       propagateCounts(TrueCount, E->getTrueExpr());
1326     }
1327 
1328     extendRegion(E->getFalseExpr());
1329     propagateCounts(subtractCounters(ParentCount, TrueCount),
1330                     E->getFalseExpr());
1331   }
1332 
1333   void VisitBinLAnd(const BinaryOperator *E) {
1334     extendRegion(E->getLHS());
1335     propagateCounts(getRegion().getCounter(), E->getLHS());
1336     handleFileExit(getEnd(E->getLHS()));
1337 
1338     extendRegion(E->getRHS());
1339     propagateCounts(getRegionCounter(E), E->getRHS());
1340   }
1341 
1342   void VisitBinLOr(const BinaryOperator *E) {
1343     extendRegion(E->getLHS());
1344     propagateCounts(getRegion().getCounter(), E->getLHS());
1345     handleFileExit(getEnd(E->getLHS()));
1346 
1347     extendRegion(E->getRHS());
1348     propagateCounts(getRegionCounter(E), E->getRHS());
1349   }
1350 
1351   void VisitLambdaExpr(const LambdaExpr *LE) {
1352     // Lambdas are treated as their own functions for now, so we shouldn't
1353     // propagate counts into them.
1354   }
1355 };
1356 
1357 std::string normalizeFilename(StringRef Filename) {
1358   llvm::SmallString<256> Path(Filename);
1359   llvm::sys::fs::make_absolute(Path);
1360   llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
1361   return std::string(Path);
1362 }
1363 
1364 } // end anonymous namespace
1365 
1366 static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
1367                  ArrayRef<CounterExpression> Expressions,
1368                  ArrayRef<CounterMappingRegion> Regions) {
1369   OS << FunctionName << ":\n";
1370   CounterMappingContext Ctx(Expressions);
1371   for (const auto &R : Regions) {
1372     OS.indent(2);
1373     switch (R.Kind) {
1374     case CounterMappingRegion::CodeRegion:
1375       break;
1376     case CounterMappingRegion::ExpansionRegion:
1377       OS << "Expansion,";
1378       break;
1379     case CounterMappingRegion::SkippedRegion:
1380       OS << "Skipped,";
1381       break;
1382     case CounterMappingRegion::GapRegion:
1383       OS << "Gap,";
1384       break;
1385     }
1386 
1387     OS << "File " << R.FileID << ", " << R.LineStart << ":" << R.ColumnStart
1388        << " -> " << R.LineEnd << ":" << R.ColumnEnd << " = ";
1389     Ctx.dump(R.Count, OS);
1390     if (R.Kind == CounterMappingRegion::ExpansionRegion)
1391       OS << " (Expanded file = " << R.ExpandedFileID << ")";
1392     OS << "\n";
1393   }
1394 }
1395 
1396 static std::string getInstrProfSection(const CodeGenModule &CGM,
1397                                        llvm::InstrProfSectKind SK) {
1398   return llvm::getInstrProfSectionName(
1399       SK, CGM.getContext().getTargetInfo().getTriple().getObjectFormat());
1400 }
1401 
1402 void CoverageMappingModuleGen::emitFunctionMappingRecord(
1403     const FunctionInfo &Info, uint64_t FilenamesRef) {
1404   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
1405 
1406   // Assign a name to the function record. This is used to merge duplicates.
1407   std::string FuncRecordName = "__covrec_" + llvm::utohexstr(Info.NameHash);
1408 
1409   // A dummy description for a function included-but-not-used in a TU can be
1410   // replaced by full description provided by a different TU. The two kinds of
1411   // descriptions play distinct roles: therefore, assign them different names
1412   // to prevent `linkonce_odr` merging.
1413   if (Info.IsUsed)
1414     FuncRecordName += "u";
1415 
1416   // Create the function record type.
1417   const uint64_t NameHash = Info.NameHash;
1418   const uint64_t FuncHash = Info.FuncHash;
1419   const std::string &CoverageMapping = Info.CoverageMapping;
1420 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
1421   llvm::Type *FunctionRecordTypes[] = {
1422 #include "llvm/ProfileData/InstrProfData.inc"
1423   };
1424   auto *FunctionRecordTy =
1425       llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
1426                             /*isPacked=*/true);
1427 
1428   // Create the function record constant.
1429 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
1430   llvm::Constant *FunctionRecordVals[] = {
1431       #include "llvm/ProfileData/InstrProfData.inc"
1432   };
1433   auto *FuncRecordConstant = llvm::ConstantStruct::get(
1434       FunctionRecordTy, makeArrayRef(FunctionRecordVals));
1435 
1436   // Create the function record global.
1437   auto *FuncRecord = new llvm::GlobalVariable(
1438       CGM.getModule(), FunctionRecordTy, /*isConstant=*/true,
1439       llvm::GlobalValue::LinkOnceODRLinkage, FuncRecordConstant,
1440       FuncRecordName);
1441   FuncRecord->setVisibility(llvm::GlobalValue::HiddenVisibility);
1442   FuncRecord->setSection(getInstrProfSection(CGM, llvm::IPSK_covfun));
1443   FuncRecord->setAlignment(llvm::Align(8));
1444   if (CGM.supportsCOMDAT())
1445     FuncRecord->setComdat(CGM.getModule().getOrInsertComdat(FuncRecordName));
1446 
1447   // Make sure the data doesn't get deleted.
1448   CGM.addUsedGlobal(FuncRecord);
1449 }
1450 
1451 void CoverageMappingModuleGen::addFunctionMappingRecord(
1452     llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
1453     const std::string &CoverageMapping, bool IsUsed) {
1454   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
1455   const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue);
1456   FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed});
1457 
1458   if (!IsUsed)
1459     FunctionNames.push_back(
1460         llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
1461 
1462   if (CGM.getCodeGenOpts().DumpCoverageMapping) {
1463     // Dump the coverage mapping data for this function by decoding the
1464     // encoded data. This allows us to dump the mapping regions which were
1465     // also processed by the CoverageMappingWriter which performs
1466     // additional minimization operations such as reducing the number of
1467     // expressions.
1468     std::vector<StringRef> Filenames;
1469     std::vector<CounterExpression> Expressions;
1470     std::vector<CounterMappingRegion> Regions;
1471     llvm::SmallVector<std::string, 16> FilenameStrs;
1472     llvm::SmallVector<StringRef, 16> FilenameRefs;
1473     FilenameStrs.resize(FileEntries.size());
1474     FilenameRefs.resize(FileEntries.size());
1475     for (const auto &Entry : FileEntries) {
1476       auto I = Entry.second;
1477       FilenameStrs[I] = normalizeFilename(Entry.first->getName());
1478       FilenameRefs[I] = FilenameStrs[I];
1479     }
1480     RawCoverageMappingReader Reader(CoverageMapping, FilenameRefs, Filenames,
1481                                     Expressions, Regions);
1482     if (Reader.read())
1483       return;
1484     dump(llvm::outs(), NameValue, Expressions, Regions);
1485   }
1486 }
1487 
1488 void CoverageMappingModuleGen::emit() {
1489   if (FunctionRecords.empty())
1490     return;
1491   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
1492   auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
1493 
1494   // Create the filenames and merge them with coverage mappings
1495   llvm::SmallVector<std::string, 16> FilenameStrs;
1496   llvm::SmallVector<StringRef, 16> FilenameRefs;
1497   FilenameStrs.resize(FileEntries.size());
1498   FilenameRefs.resize(FileEntries.size());
1499   for (const auto &Entry : FileEntries) {
1500     auto I = Entry.second;
1501     FilenameStrs[I] = normalizeFilename(Entry.first->getName());
1502     FilenameRefs[I] = FilenameStrs[I];
1503   }
1504 
1505   std::string Filenames;
1506   {
1507     llvm::raw_string_ostream OS(Filenames);
1508     CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
1509   }
1510   auto *FilenamesVal =
1511       llvm::ConstantDataArray::getString(Ctx, Filenames, false);
1512   const int64_t FilenamesRef = llvm::IndexedInstrProf::ComputeHash(Filenames);
1513 
1514   // Emit the function records.
1515   for (const FunctionInfo &Info : FunctionRecords)
1516     emitFunctionMappingRecord(Info, FilenamesRef);
1517 
1518   const unsigned NRecords = 0;
1519   const size_t FilenamesSize = Filenames.size();
1520   const unsigned CoverageMappingSize = 0;
1521   llvm::Type *CovDataHeaderTypes[] = {
1522 #define COVMAP_HEADER(Type, LLVMType, Name, Init) LLVMType,
1523 #include "llvm/ProfileData/InstrProfData.inc"
1524   };
1525   auto CovDataHeaderTy =
1526       llvm::StructType::get(Ctx, makeArrayRef(CovDataHeaderTypes));
1527   llvm::Constant *CovDataHeaderVals[] = {
1528 #define COVMAP_HEADER(Type, LLVMType, Name, Init) Init,
1529 #include "llvm/ProfileData/InstrProfData.inc"
1530   };
1531   auto CovDataHeaderVal = llvm::ConstantStruct::get(
1532       CovDataHeaderTy, makeArrayRef(CovDataHeaderVals));
1533 
1534   // Create the coverage data record
1535   llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()};
1536   auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes));
1537   llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal};
1538   auto CovDataVal =
1539       llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals));
1540   auto CovData = new llvm::GlobalVariable(
1541       CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage,
1542       CovDataVal, llvm::getCoverageMappingVarName());
1543 
1544   CovData->setSection(getInstrProfSection(CGM, llvm::IPSK_covmap));
1545   CovData->setAlignment(llvm::Align(8));
1546 
1547   // Make sure the data doesn't get deleted.
1548   CGM.addUsedGlobal(CovData);
1549   // Create the deferred function records array
1550   if (!FunctionNames.empty()) {
1551     auto NamesArrTy = llvm::ArrayType::get(llvm::Type::getInt8PtrTy(Ctx),
1552                                            FunctionNames.size());
1553     auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames);
1554     // This variable will *NOT* be emitted to the object file. It is used
1555     // to pass the list of names referenced to codegen.
1556     new llvm::GlobalVariable(CGM.getModule(), NamesArrTy, true,
1557                              llvm::GlobalValue::InternalLinkage, NamesArrVal,
1558                              llvm::getCoverageUnusedNamesVarName());
1559   }
1560 }
1561 
1562 unsigned CoverageMappingModuleGen::getFileID(const FileEntry *File) {
1563   auto It = FileEntries.find(File);
1564   if (It != FileEntries.end())
1565     return It->second;
1566   unsigned FileID = FileEntries.size();
1567   FileEntries.insert(std::make_pair(File, FileID));
1568   return FileID;
1569 }
1570 
1571 void CoverageMappingGen::emitCounterMapping(const Decl *D,
1572                                             llvm::raw_ostream &OS) {
1573   assert(CounterMap);
1574   CounterCoverageMappingBuilder Walker(CVM, *CounterMap, SM, LangOpts);
1575   Walker.VisitDecl(D);
1576   Walker.write(OS);
1577 }
1578 
1579 void CoverageMappingGen::emitEmptyMapping(const Decl *D,
1580                                           llvm::raw_ostream &OS) {
1581   EmptyCoverageMappingBuilder Walker(CVM, SM, LangOpts);
1582   Walker.VisitDecl(D);
1583   Walker.write(OS);
1584 }
1585