1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
11 // It also builds the data structures and initialization code needed for
12 // updating execution counts and emitting the profile at runtime.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/InstrProfiling.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/Pass.h"
40 #include "llvm/ProfileData/InstrProf.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <cstddef>
51 #include <cstdint>
52 #include <string>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "instrprof"
57 
58 // The start and end values of precise value profile range for memory
59 // intrinsic sizes
60 cl::opt<std::string> MemOPSizeRange(
61     "memop-size-range",
62     cl::desc("Set the range of size in memory intrinsic calls to be profiled "
63              "precisely, in a format of <start_val>:<end_val>"),
64     cl::init(""));
65 
66 // The value that considered to be large value in  memory intrinsic.
67 cl::opt<unsigned> MemOPSizeLarge(
68     "memop-size-large",
69     cl::desc("Set large value thresthold in memory intrinsic size profiling. "
70              "Value of 0 disables the large value profiling."),
71     cl::init(8192));
72 
73 namespace {
74 
75 cl::opt<bool> DoNameCompression("enable-name-compression",
76                                 cl::desc("Enable name string compression"),
77                                 cl::init(true));
78 
79 cl::opt<bool> DoHashBasedCounterSplit(
80     "hash-based-counter-split",
81     cl::desc("Rename counter variable of a comdat function based on cfg hash"),
82     cl::init(true));
83 
84 cl::opt<bool> ValueProfileStaticAlloc(
85     "vp-static-alloc",
86     cl::desc("Do static counter allocation for value profiler"),
87     cl::init(true));
88 
89 cl::opt<double> NumCountersPerValueSite(
90     "vp-counters-per-site",
91     cl::desc("The average number of profile counters allocated "
92              "per value profiling site."),
93     // This is set to a very small value because in real programs, only
94     // a very small percentage of value sites have non-zero targets, e.g, 1/30.
95     // For those sites with non-zero profile, the average number of targets
96     // is usually smaller than 2.
97     cl::init(1.0));
98 
99 cl::opt<bool> AtomicCounterUpdatePromoted(
100     "atomic-counter-update-promoted", cl::ZeroOrMore,
101     cl::desc("Do counter update using atomic fetch add "
102              " for promoted counters only"),
103     cl::init(false));
104 
105 // If the option is not specified, the default behavior about whether
106 // counter promotion is done depends on how instrumentaiton lowering
107 // pipeline is setup, i.e., the default value of true of this option
108 // does not mean the promotion will be done by default. Explicitly
109 // setting this option can override the default behavior.
110 cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
111                                  cl::desc("Do counter register promotion"),
112                                  cl::init(false));
113 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
114     cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
115     cl::desc("Max number counter promotions per loop to avoid"
116              " increasing register pressure too much"));
117 
118 // A debug option
119 cl::opt<int>
120     MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
121                        cl::desc("Max number of allowed counter promotions"));
122 
123 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
124     cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
125     cl::desc("The max number of exiting blocks of a loop to allow "
126              " speculative counter promotion"));
127 
128 cl::opt<bool> SpeculativeCounterPromotionToLoop(
129     cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
130     cl::desc("When the option is false, if the target block is in a loop, "
131              "the promotion will be disallowed unless the promoted counter "
132              " update can be further/iteratively promoted into an acyclic "
133              " region."));
134 
135 cl::opt<bool> IterativeCounterPromotion(
136     cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
137     cl::desc("Allow counter promotion across the whole loop nest."));
138 
139 class InstrProfilingLegacyPass : public ModulePass {
140   InstrProfiling InstrProf;
141 
142 public:
143   static char ID;
144 
145   InstrProfilingLegacyPass() : ModulePass(ID) {}
146   InstrProfilingLegacyPass(const InstrProfOptions &Options)
147       : ModulePass(ID), InstrProf(Options) {}
148 
149   StringRef getPassName() const override {
150     return "Frontend instrumentation-based coverage lowering";
151   }
152 
153   bool runOnModule(Module &M) override {
154     return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
155   }
156 
157   void getAnalysisUsage(AnalysisUsage &AU) const override {
158     AU.setPreservesCFG();
159     AU.addRequired<TargetLibraryInfoWrapperPass>();
160   }
161 };
162 
163 ///
164 /// A helper class to promote one counter RMW operation in the loop
165 /// into register update.
166 ///
167 /// RWM update for the counter will be sinked out of the loop after
168 /// the transformation.
169 ///
170 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
171 public:
172   PGOCounterPromoterHelper(
173       Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
174       BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
175       ArrayRef<Instruction *> InsertPts,
176       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
177       LoopInfo &LI)
178       : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
179         InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
180     assert(isa<LoadInst>(L));
181     assert(isa<StoreInst>(S));
182     SSA.AddAvailableValue(PH, Init);
183   }
184 
185   void doExtraRewritesBeforeFinalDeletion() const override {
186     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
187       BasicBlock *ExitBlock = ExitBlocks[i];
188       Instruction *InsertPos = InsertPts[i];
189       // Get LiveIn value into the ExitBlock. If there are multiple
190       // predecessors, the value is defined by a PHI node in this
191       // block.
192       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
193       Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
194       IRBuilder<> Builder(InsertPos);
195       if (AtomicCounterUpdatePromoted)
196         // automic update currently can only be promoted across the current
197         // loop, not the whole loop nest.
198         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
199                                 AtomicOrdering::SequentiallyConsistent);
200       else {
201         LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
202         auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
203         auto *NewStore = Builder.CreateStore(NewVal, Addr);
204 
205         // Now update the parent loop's candidate list:
206         if (IterativeCounterPromotion) {
207           auto *TargetLoop = LI.getLoopFor(ExitBlock);
208           if (TargetLoop)
209             LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
210         }
211       }
212     }
213   }
214 
215 private:
216   Instruction *Store;
217   ArrayRef<BasicBlock *> ExitBlocks;
218   ArrayRef<Instruction *> InsertPts;
219   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
220   LoopInfo &LI;
221 };
222 
223 /// A helper class to do register promotion for all profile counter
224 /// updates in a loop.
225 ///
226 class PGOCounterPromoter {
227 public:
228   PGOCounterPromoter(
229       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
230       Loop &CurLoop, LoopInfo &LI)
231       : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
232         LI(LI) {
233 
234     SmallVector<BasicBlock *, 8> LoopExitBlocks;
235     SmallPtrSet<BasicBlock *, 8> BlockSet;
236     L.getExitBlocks(LoopExitBlocks);
237 
238     for (BasicBlock *ExitBlock : LoopExitBlocks) {
239       if (BlockSet.insert(ExitBlock).second) {
240         ExitBlocks.push_back(ExitBlock);
241         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
242       }
243     }
244   }
245 
246   bool run(int64_t *NumPromoted) {
247     // Skip 'infinite' loops:
248     if (ExitBlocks.size() == 0)
249       return false;
250     unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
251     if (MaxProm == 0)
252       return false;
253 
254     unsigned Promoted = 0;
255     for (auto &Cand : LoopToCandidates[&L]) {
256 
257       SmallVector<PHINode *, 4> NewPHIs;
258       SSAUpdater SSA(&NewPHIs);
259       Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
260 
261       PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
262                                         L.getLoopPreheader(), ExitBlocks,
263                                         InsertPts, LoopToCandidates, LI);
264       Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
265       Promoted++;
266       if (Promoted >= MaxProm)
267         break;
268 
269       (*NumPromoted)++;
270       if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
271         break;
272     }
273 
274     DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
275                  << L.getLoopDepth() << ")\n");
276     return Promoted != 0;
277   }
278 
279 private:
280   bool allowSpeculativeCounterPromotion(Loop *LP) {
281     SmallVector<BasicBlock *, 8> ExitingBlocks;
282     L.getExitingBlocks(ExitingBlocks);
283     // Not considierered speculative.
284     if (ExitingBlocks.size() == 1)
285       return true;
286     if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
287       return false;
288     return true;
289   }
290 
291   // Returns the max number of Counter Promotions for LP.
292   unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
293     // We can't insert into a catchswitch.
294     SmallVector<BasicBlock *, 8> LoopExitBlocks;
295     LP->getExitBlocks(LoopExitBlocks);
296     if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
297           return isa<CatchSwitchInst>(Exit->getTerminator());
298         }))
299       return 0;
300 
301     if (!LP->hasDedicatedExits())
302       return 0;
303 
304     BasicBlock *PH = LP->getLoopPreheader();
305     if (!PH)
306       return 0;
307 
308     SmallVector<BasicBlock *, 8> ExitingBlocks;
309     LP->getExitingBlocks(ExitingBlocks);
310     // Not considierered speculative.
311     if (ExitingBlocks.size() == 1)
312       return MaxNumOfPromotionsPerLoop;
313 
314     if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
315       return 0;
316 
317     // Whether the target block is in a loop does not matter:
318     if (SpeculativeCounterPromotionToLoop)
319       return MaxNumOfPromotionsPerLoop;
320 
321     // Now check the target block:
322     unsigned MaxProm = MaxNumOfPromotionsPerLoop;
323     for (auto *TargetBlock : LoopExitBlocks) {
324       auto *TargetLoop = LI.getLoopFor(TargetBlock);
325       if (!TargetLoop)
326         continue;
327       unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
328       unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
329       MaxProm =
330           std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
331                                 PendingCandsInTarget);
332     }
333     return MaxProm;
334   }
335 
336   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
337   SmallVector<BasicBlock *, 8> ExitBlocks;
338   SmallVector<Instruction *, 8> InsertPts;
339   Loop &L;
340   LoopInfo &LI;
341 };
342 
343 } // end anonymous namespace
344 
345 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
346   auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
347   if (!run(M, TLI))
348     return PreservedAnalyses::all();
349 
350   return PreservedAnalyses::none();
351 }
352 
353 char InstrProfilingLegacyPass::ID = 0;
354 INITIALIZE_PASS_BEGIN(
355     InstrProfilingLegacyPass, "instrprof",
356     "Frontend instrumentation-based coverage lowering.", false, false)
357 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
358 INITIALIZE_PASS_END(
359     InstrProfilingLegacyPass, "instrprof",
360     "Frontend instrumentation-based coverage lowering.", false, false)
361 
362 ModulePass *
363 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
364   return new InstrProfilingLegacyPass(Options);
365 }
366 
367 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
368   InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
369   if (Inc)
370     return Inc;
371   return dyn_cast<InstrProfIncrementInst>(Instr);
372 }
373 
374 bool InstrProfiling::lowerIntrinsics(Function *F) {
375   bool MadeChange = false;
376   PromotionCandidates.clear();
377   for (BasicBlock &BB : *F) {
378     for (auto I = BB.begin(), E = BB.end(); I != E;) {
379       auto Instr = I++;
380       InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
381       if (Inc) {
382         lowerIncrement(Inc);
383         MadeChange = true;
384       } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
385         lowerValueProfileInst(Ind);
386         MadeChange = true;
387       }
388     }
389   }
390 
391   if (!MadeChange)
392     return false;
393 
394   promoteCounterLoadStores(F);
395   return true;
396 }
397 
398 bool InstrProfiling::isCounterPromotionEnabled() const {
399   if (DoCounterPromotion.getNumOccurrences() > 0)
400     return DoCounterPromotion;
401 
402   return Options.DoCounterPromotion;
403 }
404 
405 void InstrProfiling::promoteCounterLoadStores(Function *F) {
406   if (!isCounterPromotionEnabled())
407     return;
408 
409   DominatorTree DT(*F);
410   LoopInfo LI(DT);
411   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
412 
413   for (const auto &LoadStore : PromotionCandidates) {
414     auto *CounterLoad = LoadStore.first;
415     auto *CounterStore = LoadStore.second;
416     BasicBlock *BB = CounterLoad->getParent();
417     Loop *ParentLoop = LI.getLoopFor(BB);
418     if (!ParentLoop)
419       continue;
420     LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
421   }
422 
423   SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
424 
425   // Do a post-order traversal of the loops so that counter updates can be
426   // iteratively hoisted outside the loop nest.
427   for (auto *Loop : llvm::reverse(Loops)) {
428     PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
429     Promoter.run(&TotalCountersPromoted);
430   }
431 }
432 
433 /// Check if the module contains uses of any profiling intrinsics.
434 static bool containsProfilingIntrinsics(Module &M) {
435   if (auto *F = M.getFunction(
436           Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
437     if (!F->use_empty())
438       return true;
439   if (auto *F = M.getFunction(
440           Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
441     if (!F->use_empty())
442       return true;
443   if (auto *F = M.getFunction(
444           Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
445     if (!F->use_empty())
446       return true;
447   return false;
448 }
449 
450 bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
451   // Improve compile time by avoiding linear scans when there is no work.
452   GlobalVariable *CoverageNamesVar =
453       M.getNamedGlobal(getCoverageUnusedNamesVarName());
454   if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
455     return false;
456 
457   bool MadeChange = false;
458 
459   this->M = &M;
460   this->TLI = &TLI;
461   NamesVar = nullptr;
462   NamesSize = 0;
463   ProfileDataMap.clear();
464   UsedVars.clear();
465   getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
466                               MemOPSizeRangeLast);
467   TT = Triple(M.getTargetTriple());
468 
469   // We did not know how many value sites there would be inside
470   // the instrumented function. This is counting the number of instrumented
471   // target value sites to enter it as field in the profile data variable.
472   for (Function &F : M) {
473     InstrProfIncrementInst *FirstProfIncInst = nullptr;
474     for (BasicBlock &BB : F)
475       for (auto I = BB.begin(), E = BB.end(); I != E; I++)
476         if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
477           computeNumValueSiteCounts(Ind);
478         else if (FirstProfIncInst == nullptr)
479           FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
480 
481     // Value profiling intrinsic lowering requires per-function profile data
482     // variable to be created first.
483     if (FirstProfIncInst != nullptr)
484       static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
485   }
486 
487   for (Function &F : M)
488     MadeChange |= lowerIntrinsics(&F);
489 
490   if (CoverageNamesVar) {
491     lowerCoverageData(CoverageNamesVar);
492     MadeChange = true;
493   }
494 
495   if (!MadeChange)
496     return false;
497 
498   emitVNodes();
499   emitNameData();
500   emitRegistration();
501   emitRuntimeHook();
502   emitUses();
503   emitInitialization();
504   return true;
505 }
506 
507 static Constant *getOrInsertValueProfilingCall(Module &M,
508                                                const TargetLibraryInfo &TLI,
509                                                bool IsRange = false) {
510   LLVMContext &Ctx = M.getContext();
511   auto *ReturnTy = Type::getVoidTy(M.getContext());
512 
513   Constant *Res;
514   if (!IsRange) {
515     Type *ParamTypes[] = {
516 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
517 #include "llvm/ProfileData/InstrProfData.inc"
518     };
519     auto *ValueProfilingCallTy =
520         FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
521     Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
522                                 ValueProfilingCallTy);
523   } else {
524     Type *RangeParamTypes[] = {
525 #define VALUE_RANGE_PROF 1
526 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
527 #include "llvm/ProfileData/InstrProfData.inc"
528 #undef VALUE_RANGE_PROF
529     };
530     auto *ValueRangeProfilingCallTy =
531         FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
532     Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
533                                 ValueRangeProfilingCallTy);
534   }
535 
536   if (Function *FunRes = dyn_cast<Function>(Res)) {
537     if (auto AK = TLI.getExtAttrForI32Param(false))
538       FunRes->addParamAttr(2, AK);
539   }
540   return Res;
541 }
542 
543 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
544   GlobalVariable *Name = Ind->getName();
545   uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
546   uint64_t Index = Ind->getIndex()->getZExtValue();
547   auto It = ProfileDataMap.find(Name);
548   if (It == ProfileDataMap.end()) {
549     PerFunctionProfileData PD;
550     PD.NumValueSites[ValueKind] = Index + 1;
551     ProfileDataMap[Name] = PD;
552   } else if (It->second.NumValueSites[ValueKind] <= Index)
553     It->second.NumValueSites[ValueKind] = Index + 1;
554 }
555 
556 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
557   GlobalVariable *Name = Ind->getName();
558   auto It = ProfileDataMap.find(Name);
559   assert(It != ProfileDataMap.end() && It->second.DataVar &&
560          "value profiling detected in function with no counter incerement");
561 
562   GlobalVariable *DataVar = It->second.DataVar;
563   uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
564   uint64_t Index = Ind->getIndex()->getZExtValue();
565   for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
566     Index += It->second.NumValueSites[Kind];
567 
568   IRBuilder<> Builder(Ind);
569   bool IsRange = (Ind->getValueKind()->getZExtValue() ==
570                   llvm::InstrProfValueKind::IPVK_MemOPSize);
571   CallInst *Call = nullptr;
572   if (!IsRange) {
573     Value *Args[3] = {Ind->getTargetValue(),
574                       Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
575                       Builder.getInt32(Index)};
576     Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
577   } else {
578     Value *Args[6] = {
579         Ind->getTargetValue(),
580         Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
581         Builder.getInt32(Index),
582         Builder.getInt64(MemOPSizeRangeStart),
583         Builder.getInt64(MemOPSizeRangeLast),
584         Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
585     Call =
586         Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
587   }
588   if (auto AK = TLI->getExtAttrForI32Param(false))
589     Call->addParamAttr(2, AK);
590   Ind->replaceAllUsesWith(Call);
591   Ind->eraseFromParent();
592 }
593 
594 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
595   GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
596 
597   IRBuilder<> Builder(Inc);
598   uint64_t Index = Inc->getIndex()->getZExtValue();
599   Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
600   Value *Load = Builder.CreateLoad(Addr, "pgocount");
601   auto *Count = Builder.CreateAdd(Load, Inc->getStep());
602   auto *Store = Builder.CreateStore(Count, Addr);
603   Inc->replaceAllUsesWith(Store);
604   if (isCounterPromotionEnabled())
605     PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
606   Inc->eraseFromParent();
607 }
608 
609 void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
610   ConstantArray *Names =
611       cast<ConstantArray>(CoverageNamesVar->getInitializer());
612   for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
613     Constant *NC = Names->getOperand(I);
614     Value *V = NC->stripPointerCasts();
615     assert(isa<GlobalVariable>(V) && "Missing reference to function name");
616     GlobalVariable *Name = cast<GlobalVariable>(V);
617 
618     Name->setLinkage(GlobalValue::PrivateLinkage);
619     ReferencedNames.push_back(Name);
620     NC->dropAllReferences();
621   }
622   CoverageNamesVar->eraseFromParent();
623 }
624 
625 /// Get the name of a profiling variable for a particular function.
626 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
627   StringRef NamePrefix = getInstrProfNameVarPrefix();
628   StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
629   Function *F = Inc->getParent()->getParent();
630   Module *M = F->getParent();
631   if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
632       !canRenameComdatFunc(*F))
633     return (Prefix + Name).str();
634   uint64_t FuncHash = Inc->getHash()->getZExtValue();
635   SmallVector<char, 24> HashPostfix;
636   if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
637     return (Prefix + Name).str();
638   return (Prefix + Name + "." + Twine(FuncHash)).str();
639 }
640 
641 static inline bool shouldRecordFunctionAddr(Function *F) {
642   // Check the linkage
643   bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
644   if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
645       !HasAvailableExternallyLinkage)
646     return true;
647 
648   // A function marked 'alwaysinline' with available_externally linkage can't
649   // have its address taken. Doing so would create an undefined external ref to
650   // the function, which would fail to link.
651   if (HasAvailableExternallyLinkage &&
652       F->hasFnAttribute(Attribute::AlwaysInline))
653     return false;
654 
655   // Prohibit function address recording if the function is both internal and
656   // COMDAT. This avoids the profile data variable referencing internal symbols
657   // in COMDAT.
658   if (F->hasLocalLinkage() && F->hasComdat())
659     return false;
660 
661   // Check uses of this function for other than direct calls or invokes to it.
662   // Inline virtual functions have linkeOnceODR linkage. When a key method
663   // exists, the vtable will only be emitted in the TU where the key method
664   // is defined. In a TU where vtable is not available, the function won't
665   // be 'addresstaken'. If its address is not recorded here, the profile data
666   // with missing address may be picked by the linker leading  to missing
667   // indirect call target info.
668   return F->hasAddressTaken() || F->hasLinkOnceLinkage();
669 }
670 
671 static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
672                                                InstrProfIncrementInst *Inc) {
673   if (!needsComdatForCounter(F, M))
674     return nullptr;
675 
676   // COFF format requires a COMDAT section to have a key symbol with the same
677   // name. The linker targeting COFF also requires that the COMDAT
678   // a section is associated to must precede the associating section. For this
679   // reason, we must choose the counter var's name as the name of the comdat.
680   StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
681                                 ? getInstrProfCountersVarPrefix()
682                                 : getInstrProfComdatPrefix());
683   return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
684 }
685 
686 static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
687   // Don't do this for Darwin.  compiler-rt uses linker magic.
688   if (Triple(M.getTargetTriple()).isOSDarwin())
689     return false;
690 
691   // Use linker script magic to get data/cnts/name start/end.
692   if (Triple(M.getTargetTriple()).isOSLinux() ||
693       Triple(M.getTargetTriple()).isOSFreeBSD() ||
694       Triple(M.getTargetTriple()).isPS4CPU())
695     return false;
696 
697   return true;
698 }
699 
700 GlobalVariable *
701 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
702   GlobalVariable *NamePtr = Inc->getName();
703   auto It = ProfileDataMap.find(NamePtr);
704   PerFunctionProfileData PD;
705   if (It != ProfileDataMap.end()) {
706     if (It->second.RegionCounters)
707       return It->second.RegionCounters;
708     PD = It->second;
709   }
710 
711   // Move the name variable to the right section. Place them in a COMDAT group
712   // if the associated function is a COMDAT. This will make sure that
713   // only one copy of counters of the COMDAT function will be emitted after
714   // linking.
715   Function *Fn = Inc->getParent()->getParent();
716   Comdat *ProfileVarsComdat = nullptr;
717   ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
718 
719   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
720   LLVMContext &Ctx = M->getContext();
721   ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
722 
723   // Create the counters variable.
724   auto *CounterPtr =
725       new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
726                          Constant::getNullValue(CounterTy),
727                          getVarName(Inc, getInstrProfCountersVarPrefix()));
728   CounterPtr->setVisibility(NamePtr->getVisibility());
729   CounterPtr->setSection(
730       getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
731   CounterPtr->setAlignment(8);
732   CounterPtr->setComdat(ProfileVarsComdat);
733 
734   auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
735   // Allocate statically the array of pointers to value profile nodes for
736   // the current function.
737   Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
738   if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
739     uint64_t NS = 0;
740     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
741       NS += PD.NumValueSites[Kind];
742     if (NS) {
743       ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
744 
745       auto *ValuesVar =
746           new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
747                              Constant::getNullValue(ValuesTy),
748                              getVarName(Inc, getInstrProfValuesVarPrefix()));
749       ValuesVar->setVisibility(NamePtr->getVisibility());
750       ValuesVar->setSection(
751           getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
752       ValuesVar->setAlignment(8);
753       ValuesVar->setComdat(ProfileVarsComdat);
754       ValuesPtrExpr =
755           ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
756     }
757   }
758 
759   // Create data variable.
760   auto *Int16Ty = Type::getInt16Ty(Ctx);
761   auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
762   Type *DataTypes[] = {
763 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
764 #include "llvm/ProfileData/InstrProfData.inc"
765   };
766   auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
767 
768   Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
769                                ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
770                                : ConstantPointerNull::get(Int8PtrTy);
771 
772   Constant *Int16ArrayVals[IPVK_Last + 1];
773   for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
774     Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
775 
776   Constant *DataVals[] = {
777 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
778 #include "llvm/ProfileData/InstrProfData.inc"
779   };
780   auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
781                                   ConstantStruct::get(DataTy, DataVals),
782                                   getVarName(Inc, getInstrProfDataVarPrefix()));
783   Data->setVisibility(NamePtr->getVisibility());
784   Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
785   Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
786   Data->setComdat(ProfileVarsComdat);
787 
788   PD.RegionCounters = CounterPtr;
789   PD.DataVar = Data;
790   ProfileDataMap[NamePtr] = PD;
791 
792   // Mark the data variable as used so that it isn't stripped out.
793   UsedVars.push_back(Data);
794   // Now that the linkage set by the FE has been passed to the data and counter
795   // variables, reset Name variable's linkage and visibility to private so that
796   // it can be removed later by the compiler.
797   NamePtr->setLinkage(GlobalValue::PrivateLinkage);
798   // Collect the referenced names to be used by emitNameData.
799   ReferencedNames.push_back(NamePtr);
800 
801   return CounterPtr;
802 }
803 
804 void InstrProfiling::emitVNodes() {
805   if (!ValueProfileStaticAlloc)
806     return;
807 
808   // For now only support this on platforms that do
809   // not require runtime registration to discover
810   // named section start/end.
811   if (needsRuntimeRegistrationOfSectionRange(*M))
812     return;
813 
814   size_t TotalNS = 0;
815   for (auto &PD : ProfileDataMap) {
816     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
817       TotalNS += PD.second.NumValueSites[Kind];
818   }
819 
820   if (!TotalNS)
821     return;
822 
823   uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
824 // Heuristic for small programs with very few total value sites.
825 // The default value of vp-counters-per-site is chosen based on
826 // the observation that large apps usually have a low percentage
827 // of value sites that actually have any profile data, and thus
828 // the average number of counters per site is low. For small
829 // apps with very few sites, this may not be true. Bump up the
830 // number of counters in this case.
831 #define INSTR_PROF_MIN_VAL_COUNTS 10
832   if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
833     NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
834 
835   auto &Ctx = M->getContext();
836   Type *VNodeTypes[] = {
837 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
838 #include "llvm/ProfileData/InstrProfData.inc"
839   };
840   auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
841 
842   ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
843   auto *VNodesVar = new GlobalVariable(
844       *M, VNodesTy, false, GlobalValue::PrivateLinkage,
845       Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
846   VNodesVar->setSection(
847       getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
848   UsedVars.push_back(VNodesVar);
849 }
850 
851 void InstrProfiling::emitNameData() {
852   std::string UncompressedData;
853 
854   if (ReferencedNames.empty())
855     return;
856 
857   std::string CompressedNameStr;
858   if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
859                                           DoNameCompression)) {
860     report_fatal_error(toString(std::move(E)), false);
861   }
862 
863   auto &Ctx = M->getContext();
864   auto *NamesVal = ConstantDataArray::getString(
865       Ctx, StringRef(CompressedNameStr), false);
866   NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
867                                 GlobalValue::PrivateLinkage, NamesVal,
868                                 getInstrProfNamesVarName());
869   NamesSize = CompressedNameStr.size();
870   NamesVar->setSection(
871       getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
872   UsedVars.push_back(NamesVar);
873 
874   for (auto *NamePtr : ReferencedNames)
875     NamePtr->eraseFromParent();
876 }
877 
878 void InstrProfiling::emitRegistration() {
879   if (!needsRuntimeRegistrationOfSectionRange(*M))
880     return;
881 
882   // Construct the function.
883   auto *VoidTy = Type::getVoidTy(M->getContext());
884   auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
885   auto *Int64Ty = Type::getInt64Ty(M->getContext());
886   auto *RegisterFTy = FunctionType::get(VoidTy, false);
887   auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
888                                      getInstrProfRegFuncsName(), M);
889   RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
890   if (Options.NoRedZone)
891     RegisterF->addFnAttr(Attribute::NoRedZone);
892 
893   auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
894   auto *RuntimeRegisterF =
895       Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
896                        getInstrProfRegFuncName(), M);
897 
898   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
899   for (Value *Data : UsedVars)
900     if (Data != NamesVar)
901       IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
902 
903   if (NamesVar) {
904     Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
905     auto *NamesRegisterTy =
906         FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
907     auto *NamesRegisterF =
908         Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
909                          getInstrProfNamesRegFuncName(), M);
910     IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
911                                     IRB.getInt64(NamesSize)});
912   }
913 
914   IRB.CreateRetVoid();
915 }
916 
917 void InstrProfiling::emitRuntimeHook() {
918   // We expect the linker to be invoked with -u<hook_var> flag for linux,
919   // for which case there is no need to emit the user function.
920   if (Triple(M->getTargetTriple()).isOSLinux())
921     return;
922 
923   // If the module's provided its own runtime, we don't need to do anything.
924   if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
925     return;
926 
927   // Declare an external variable that will pull in the runtime initialization.
928   auto *Int32Ty = Type::getInt32Ty(M->getContext());
929   auto *Var =
930       new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
931                          nullptr, getInstrProfRuntimeHookVarName());
932 
933   // Make a function that uses it.
934   auto *User = Function::Create(FunctionType::get(Int32Ty, false),
935                                 GlobalValue::LinkOnceODRLinkage,
936                                 getInstrProfRuntimeHookVarUseFuncName(), M);
937   User->addFnAttr(Attribute::NoInline);
938   if (Options.NoRedZone)
939     User->addFnAttr(Attribute::NoRedZone);
940   User->setVisibility(GlobalValue::HiddenVisibility);
941   if (Triple(M->getTargetTriple()).supportsCOMDAT())
942     User->setComdat(M->getOrInsertComdat(User->getName()));
943 
944   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
945   auto *Load = IRB.CreateLoad(Var);
946   IRB.CreateRet(Load);
947 
948   // Mark the user variable as used so that it isn't stripped out.
949   UsedVars.push_back(User);
950 }
951 
952 void InstrProfiling::emitUses() {
953   if (!UsedVars.empty())
954     appendToUsed(*M, UsedVars);
955 }
956 
957 void InstrProfiling::emitInitialization() {
958   StringRef InstrProfileOutput = Options.InstrProfileOutput;
959 
960   if (!InstrProfileOutput.empty()) {
961     // Create variable for profile name.
962     Constant *ProfileNameConst =
963         ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
964     GlobalVariable *ProfileNameVar = new GlobalVariable(
965         *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
966         ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
967     if (TT.supportsCOMDAT()) {
968       ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
969       ProfileNameVar->setComdat(M->getOrInsertComdat(
970           StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
971     }
972   }
973 
974   Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
975   if (!RegisterF)
976     return;
977 
978   // Create the initialization function.
979   auto *VoidTy = Type::getVoidTy(M->getContext());
980   auto *F = Function::Create(FunctionType::get(VoidTy, false),
981                              GlobalValue::InternalLinkage,
982                              getInstrProfInitFuncName(), M);
983   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
984   F->addFnAttr(Attribute::NoInline);
985   if (Options.NoRedZone)
986     F->addFnAttr(Attribute::NoRedZone);
987 
988   // Add the basic block and the necessary calls.
989   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
990   if (RegisterF)
991     IRB.CreateCall(RegisterF, {});
992   IRB.CreateRetVoid();
993 
994   appendToGlobalCtors(*M, F, 0);
995 }
996