1 //===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the transformation that promotes indirect calls to
11 // conditional direct calls when the indirect-call value profile metadata is
12 // available.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Analysis/BlockFrequencyInfo.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
23 #include "llvm/Analysis/IndirectCallSiteVisitor.h"
24 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/CallSite.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/MDBuilder.h"
36 #include "llvm/IR/PassManager.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/Pass.h"
39 #include "llvm/PassRegistry.h"
40 #include "llvm/PassSupport.h"
41 #include "llvm/ProfileData/InstrProf.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/MathExtras.h"
47 #include "llvm/Transforms/Instrumentation.h"
48 #include "llvm/Transforms/PGOInstrumentation.h"
49 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
50 #include <cassert>
51 #include <cstdint>
52 #include <vector>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "pgo-icall-prom"
57 
58 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
59 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
60 
61 // Command line option to disable indirect-call promotion with the default as
62 // false. This is for debug purpose.
63 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
64                                 cl::desc("Disable indirect call promotion"));
65 
66 // Set the cutoff value for the promotion. If the value is other than 0, we
67 // stop the transformation once the total number of promotions equals the cutoff
68 // value.
69 // For debug use only.
70 static cl::opt<unsigned>
71     ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
72               cl::desc("Max number of promotions for this compilation"));
73 
74 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
75 // For debug use only.
76 static cl::opt<unsigned>
77     ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
78               cl::desc("Skip Callsite up to this number for this compilation"));
79 
80 // Set if the pass is called in LTO optimization. The difference for LTO mode
81 // is the pass won't prefix the source module name to the internal linkage
82 // symbols.
83 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
84                                 cl::desc("Run indirect-call promotion in LTO "
85                                          "mode"));
86 
87 // Set if the pass is called in SamplePGO mode. The difference for SamplePGO
88 // mode is it will add prof metadatato the created direct call.
89 static cl::opt<bool>
90     ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
91                      cl::desc("Run indirect-call promotion in SamplePGO mode"));
92 
93 // If the option is set to true, only call instructions will be considered for
94 // transformation -- invoke instructions will be ignored.
95 static cl::opt<bool>
96     ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
97                 cl::desc("Run indirect-call promotion for call instructions "
98                          "only"));
99 
100 // If the option is set to true, only invoke instructions will be considered for
101 // transformation -- call instructions will be ignored.
102 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
103                                    cl::Hidden,
104                                    cl::desc("Run indirect-call promotion for "
105                                             "invoke instruction only"));
106 
107 // Dump the function level IR if the transformation happened in this
108 // function. For debug use only.
109 static cl::opt<bool>
110     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
111                  cl::desc("Dump IR after transformation happens"));
112 
113 namespace {
114 class PGOIndirectCallPromotionLegacyPass : public ModulePass {
115 public:
116   static char ID;
117 
118   PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
119       : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
120     initializePGOIndirectCallPromotionLegacyPassPass(
121         *PassRegistry::getPassRegistry());
122   }
123 
124   StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
125 
126 private:
127   bool runOnModule(Module &M) override;
128 
129   // If this pass is called in LTO. We need to special handling the PGOFuncName
130   // for the static variables due to LTO's internalization.
131   bool InLTO;
132 
133   // If this pass is called in SamplePGO. We need to add the prof metadata to
134   // the promoted direct call.
135   bool SamplePGO;
136 };
137 } // end anonymous namespace
138 
139 char PGOIndirectCallPromotionLegacyPass::ID = 0;
140 INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
141                 "Use PGO instrumentation profile to promote indirect calls to "
142                 "direct calls.",
143                 false, false)
144 
145 ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
146                                                            bool SamplePGO) {
147   return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
148 }
149 
150 namespace {
151 // The class for main data structure to promote indirect calls to conditional
152 // direct calls.
153 class ICallPromotionFunc {
154 private:
155   Function &F;
156   Module *M;
157 
158   // Symtab that maps indirect call profile values to function names and
159   // defines.
160   InstrProfSymtab *Symtab;
161 
162   bool SamplePGO;
163 
164   OptimizationRemarkEmitter &ORE;
165 
166   // A struct that records the direct target and it's call count.
167   struct PromotionCandidate {
168     Function *TargetFunction;
169     uint64_t Count;
170     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
171   };
172 
173   // Check if the indirect-call call site should be promoted. Return the number
174   // of promotions. Inst is the candidate indirect call, ValueDataRef
175   // contains the array of value profile data for profiled targets,
176   // TotalCount is the total profiled count of call executions, and
177   // NumCandidates is the number of candidate entries in ValueDataRef.
178   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
179       Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
180       uint64_t TotalCount, uint32_t NumCandidates);
181 
182   // Promote a list of targets for one indirect-call callsite. Return
183   // the number of promotions.
184   uint32_t tryToPromote(Instruction *Inst,
185                         const std::vector<PromotionCandidate> &Candidates,
186                         uint64_t &TotalCount);
187 
188   // Noncopyable
189   ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
190   ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
191 
192 public:
193   ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
194                      bool SamplePGO, OptimizationRemarkEmitter &ORE)
195       : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
196 
197   bool processFunction();
198 };
199 } // end anonymous namespace
200 
201 bool llvm::isLegalToPromote(Instruction *Inst, Function *F,
202                             const char **Reason) {
203   // Check the return type.
204   Type *CallRetType = Inst->getType();
205   if (!CallRetType->isVoidTy()) {
206     Type *FuncRetType = F->getReturnType();
207     if (FuncRetType != CallRetType &&
208         !CastInst::isBitCastable(FuncRetType, CallRetType)) {
209       if (Reason)
210         *Reason = "Return type mismatch";
211       return false;
212     }
213   }
214 
215   // Check if the arguments are compatible with the parameters
216   FunctionType *DirectCalleeType = F->getFunctionType();
217   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
218   CallSite CS(Inst);
219   unsigned ArgNum = CS.arg_size();
220 
221   if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) {
222     if (Reason)
223       *Reason = "The number of arguments mismatch";
224     return false;
225   }
226 
227   for (unsigned I = 0; I < ParamNum; ++I) {
228     Type *PTy = DirectCalleeType->getFunctionParamType(I);
229     Type *ATy = CS.getArgument(I)->getType();
230     if (PTy == ATy)
231       continue;
232     if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) {
233       if (Reason)
234         *Reason = "Argument type mismatch";
235       return false;
236     }
237   }
238 
239   DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
240                << F->getName() << "\n");
241   return true;
242 }
243 
244 // Indirect-call promotion heuristic. The direct targets are sorted based on
245 // the count. Stop at the first target that is not promoted.
246 std::vector<ICallPromotionFunc::PromotionCandidate>
247 ICallPromotionFunc::getPromotionCandidatesForCallSite(
248     Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
249     uint64_t TotalCount, uint32_t NumCandidates) {
250   std::vector<PromotionCandidate> Ret;
251 
252   DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
253                << " Num_targets: " << ValueDataRef.size()
254                << " Num_candidates: " << NumCandidates << "\n");
255   NumOfPGOICallsites++;
256   if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
257     DEBUG(dbgs() << " Skip: User options.\n");
258     return Ret;
259   }
260 
261   for (uint32_t I = 0; I < NumCandidates; I++) {
262     uint64_t Count = ValueDataRef[I].Count;
263     assert(Count <= TotalCount);
264     uint64_t Target = ValueDataRef[I].Value;
265     DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
266                  << "  Target_func: " << Target << "\n");
267 
268     if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
269       DEBUG(dbgs() << " Not promote: User options.\n");
270       ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
271                << " Not promote: User options");
272       break;
273     }
274     if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
275       DEBUG(dbgs() << " Not promote: User option.\n");
276       ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
277                << " Not promote: User options");
278       break;
279     }
280     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
281       DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
282       ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst)
283                << " Not promote: Cutoff reached");
284       break;
285     }
286 
287     Function *TargetFunction = Symtab->getFunction(Target);
288     if (TargetFunction == nullptr) {
289       DEBUG(dbgs() << " Not promote: Cannot find the target\n");
290       ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst)
291                << "Cannot promote indirect call: target not found");
292       break;
293     }
294 
295     const char *Reason = nullptr;
296     if (!isLegalToPromote(Inst, TargetFunction, &Reason)) {
297       using namespace ore;
298       ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst)
299                << "Cannot promote indirect call to "
300                << NV("TargetFunction", TargetFunction) << " with count of "
301                << NV("Count", Count) << ": " << Reason);
302       break;
303     }
304 
305     Ret.push_back(PromotionCandidate(TargetFunction, Count));
306     TotalCount -= Count;
307   }
308   return Ret;
309 }
310 
311 // Create a diamond structure for If_Then_Else. Also update the profile
312 // count. Do the fix-up for the invoke instruction.
313 static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
314                              uint64_t Count, uint64_t TotalCount,
315                              BasicBlock **DirectCallBB,
316                              BasicBlock **IndirectCallBB,
317                              BasicBlock **MergeBB) {
318   CallSite CS(Inst);
319   Value *OrigCallee = CS.getCalledValue();
320 
321   IRBuilder<> BBBuilder(Inst);
322   LLVMContext &Ctx = Inst->getContext();
323   Value *BCI1 =
324       BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
325   Value *BCI2 =
326       BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
327   Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
328 
329   uint64_t ElseCount = TotalCount - Count;
330   uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
331   uint64_t Scale = calculateCountScale(MaxCount);
332   MDBuilder MDB(Inst->getContext());
333   MDNode *BranchWeights = MDB.createBranchWeights(
334       scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
335   TerminatorInst *ThenTerm, *ElseTerm;
336   SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
337                                 BranchWeights);
338   *DirectCallBB = ThenTerm->getParent();
339   (*DirectCallBB)->setName("if.true.direct_targ");
340   *IndirectCallBB = ElseTerm->getParent();
341   (*IndirectCallBB)->setName("if.false.orig_indirect");
342   *MergeBB = Inst->getParent();
343   (*MergeBB)->setName("if.end.icp");
344 
345   // Special handing of Invoke instructions.
346   InvokeInst *II = dyn_cast<InvokeInst>(Inst);
347   if (!II)
348     return;
349 
350   // We don't need branch instructions for invoke.
351   ThenTerm->eraseFromParent();
352   ElseTerm->eraseFromParent();
353 
354   // Add jump from Merge BB to the NormalDest. This is needed for the newly
355   // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
356   BranchInst::Create(II->getNormalDest(), *MergeBB);
357 }
358 
359 // Find the PHI in BB that have the CallResult as the operand.
360 static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
361   BasicBlock *From = Inst->getParent();
362   for (auto &I : *BB) {
363     PHINode *PHI = dyn_cast<PHINode>(&I);
364     if (!PHI)
365       continue;
366     int IX = PHI->getBasicBlockIndex(From);
367     if (IX == -1)
368       continue;
369     Value *V = PHI->getIncomingValue(IX);
370     if (dyn_cast<Instruction>(V) == Inst)
371       return true;
372   }
373   return false;
374 }
375 
376 // This method fixes up PHI nodes in BB where BB is the UnwindDest of an
377 // invoke instruction. In BB, there may be PHIs with incoming block being
378 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
379 // instructions to its own BB, OrigBB is no longer the predecessor block of BB.
380 // Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
381 // so the PHI node's incoming BBs need to be fixed up accordingly.
382 static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
383                                   BasicBlock *OrigBB,
384                                   BasicBlock *IndirectCallBB,
385                                   BasicBlock *DirectCallBB) {
386   for (auto &I : *BB) {
387     PHINode *PHI = dyn_cast<PHINode>(&I);
388     if (!PHI)
389       continue;
390     int IX = PHI->getBasicBlockIndex(OrigBB);
391     if (IX == -1)
392       continue;
393     Value *V = PHI->getIncomingValue(IX);
394     PHI->addIncoming(V, IndirectCallBB);
395     PHI->setIncomingBlock(IX, DirectCallBB);
396   }
397 }
398 
399 // This method fixes up PHI nodes in BB where BB is the NormalDest of an
400 // invoke instruction. In BB, there may be PHIs with incoming block being
401 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
402 // instructions to its own BB, a new incoming edge will be added to the original
403 // NormalDstBB from the IndirectCallBB.
404 static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
405                                       BasicBlock *OrigBB,
406                                       BasicBlock *IndirectCallBB,
407                                       Instruction *NewInst) {
408   for (auto &I : *BB) {
409     PHINode *PHI = dyn_cast<PHINode>(&I);
410     if (!PHI)
411       continue;
412     int IX = PHI->getBasicBlockIndex(OrigBB);
413     if (IX == -1)
414       continue;
415     Value *V = PHI->getIncomingValue(IX);
416     if (dyn_cast<Instruction>(V) == Inst) {
417       PHI->setIncomingBlock(IX, IndirectCallBB);
418       PHI->addIncoming(NewInst, OrigBB);
419       continue;
420     }
421     PHI->addIncoming(V, IndirectCallBB);
422   }
423 }
424 
425 // Add a bitcast instruction to the direct-call return value if needed.
426 static Instruction *insertCallRetCast(const Instruction *Inst,
427                                       Instruction *DirectCallInst,
428                                       Function *DirectCallee) {
429   if (Inst->getType()->isVoidTy())
430     return DirectCallInst;
431 
432   Type *CallRetType = Inst->getType();
433   Type *FuncRetType = DirectCallee->getReturnType();
434   if (FuncRetType == CallRetType)
435     return DirectCallInst;
436 
437   BasicBlock *InsertionBB;
438   if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst))
439     InsertionBB = CI->getParent();
440   else
441     InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest();
442 
443   return (new BitCastInst(DirectCallInst, CallRetType, "",
444                           InsertionBB->getTerminator()));
445 }
446 
447 // Create a DirectCall instruction in the DirectCallBB.
448 // Parameter Inst is the indirect-call (invoke) instruction.
449 // DirectCallee is the decl of the direct-call (invoke) target.
450 // DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
451 // MergeBB is the bottom BB of the if-then-else-diamond after the
452 // transformation. For invoke instruction, the edges from DirectCallBB and
453 // IndirectCallBB to MergeBB are removed before this call (during
454 // createIfThenElse).
455 static Instruction *createDirectCallInst(const Instruction *Inst,
456                                          Function *DirectCallee,
457                                          BasicBlock *DirectCallBB,
458                                          BasicBlock *MergeBB) {
459   Instruction *NewInst = Inst->clone();
460   if (CallInst *CI = dyn_cast<CallInst>(NewInst)) {
461     CI->setCalledFunction(DirectCallee);
462     CI->mutateFunctionType(DirectCallee->getFunctionType());
463   } else {
464     // Must be an invoke instruction. Direct invoke's normal destination is
465     // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
466     // Also since IndirectCallBB does not have an edge to MergeBB, there is no
467     // need to insert new PHIs into MergeBB.
468     InvokeInst *II = dyn_cast<InvokeInst>(NewInst);
469     assert(II);
470     II->setCalledFunction(DirectCallee);
471     II->mutateFunctionType(DirectCallee->getFunctionType());
472     II->setNormalDest(MergeBB);
473   }
474 
475   DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
476                                      NewInst);
477 
478   // Clear the value profile data.
479   NewInst->setMetadata(LLVMContext::MD_prof, nullptr);
480   CallSite NewCS(NewInst);
481   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
482   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
483   for (unsigned I = 0; I < ParamNum; ++I) {
484     Type *ATy = NewCS.getArgument(I)->getType();
485     Type *PTy = DirectCalleeType->getParamType(I);
486     if (ATy != PTy) {
487       BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
488       NewCS.setArgument(I, BI);
489     }
490   }
491 
492   return insertCallRetCast(Inst, NewInst, DirectCallee);
493 }
494 
495 // Create a PHI to unify the return values of calls.
496 static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
497                              Function *DirectCallee) {
498   if (Inst->getType()->isVoidTy())
499     return;
500 
501   BasicBlock *RetValBB = CallResult->getParent();
502 
503   BasicBlock *PHIBB;
504   if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult))
505     RetValBB = II->getNormalDest();
506 
507   PHIBB = RetValBB->getSingleSuccessor();
508   if (getCallRetPHINode(PHIBB, Inst))
509     return;
510 
511   PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
512   PHIBB->getInstList().push_front(CallRetPHI);
513   Inst->replaceAllUsesWith(CallRetPHI);
514   CallRetPHI->addIncoming(Inst, Inst->getParent());
515   CallRetPHI->addIncoming(CallResult, RetValBB);
516 }
517 
518 // This function does the actual indirect-call promotion transformation:
519 // For an indirect-call like:
520 //     Ret = (*Foo)(Args);
521 // It transforms to:
522 //     if (Foo == DirectCallee)
523 //        Ret1 = DirectCallee(Args);
524 //     else
525 //        Ret2 = (*Foo)(Args);
526 //     Ret = phi(Ret1, Ret2);
527 // It adds type casts for the args do not match the parameters and the return
528 // value. Branch weights metadata also updated.
529 // If \p AttachProfToDirectCall is true, a prof metadata is attached to the
530 // new direct call to contain \p Count. This is used by SamplePGO inliner to
531 // check callsite hotness.
532 // Returns the promoted direct call instruction.
533 Instruction *llvm::promoteIndirectCall(Instruction *Inst,
534                                        Function *DirectCallee, uint64_t Count,
535                                        uint64_t TotalCount,
536                                        bool AttachProfToDirectCall,
537                                        OptimizationRemarkEmitter *ORE) {
538   assert(DirectCallee != nullptr);
539   BasicBlock *BB = Inst->getParent();
540   // Just to suppress the non-debug build warning.
541   (void)BB;
542   DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
543   DEBUG(dbgs() << *BB << "\n");
544 
545   BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
546   createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
547                    &IndirectCallBB, &MergeBB);
548 
549   Instruction *NewInst =
550       createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
551 
552   if (AttachProfToDirectCall) {
553     SmallVector<uint32_t, 1> Weights;
554     Weights.push_back(Count);
555     MDBuilder MDB(NewInst->getContext());
556     dyn_cast<Instruction>(NewInst->stripPointerCasts())
557         ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
558   }
559 
560   // Move Inst from MergeBB to IndirectCallBB.
561   Inst->removeFromParent();
562   IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
563                                        Inst);
564 
565   if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) {
566     // At this point, the original indirect invoke instruction has the original
567     // UnwindDest and NormalDest. For the direct invoke instruction, the
568     // NormalDest points to MergeBB, and MergeBB jumps to the original
569     // NormalDest. MergeBB might have a new bitcast instruction for the return
570     // value. The PHIs are with the original NormalDest. Since we now have two
571     // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
572     //
573     // UnwindDest will not use the return value. So pass nullptr here.
574     fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
575                           DirectCallBB);
576     // We don't need to update the operand from NormalDest for DirectCallBB.
577     // Pass nullptr here.
578     fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
579                               IndirectCallBB, NewInst);
580   }
581 
582   insertCallRetPHI(Inst, NewInst, DirectCallee);
583 
584   DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
585   DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
586 
587   using namespace ore;
588   if (ORE)
589     ORE->emit(OptimizationRemark(DEBUG_TYPE, "Promoted", Inst)
590               << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
591               << " with count " << NV("Count", Count) << " out of "
592               << NV("TotalCount", TotalCount));
593   return NewInst;
594 }
595 
596 // Promote indirect-call to conditional direct-call for one callsite.
597 uint32_t ICallPromotionFunc::tryToPromote(
598     Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
599     uint64_t &TotalCount) {
600   uint32_t NumPromoted = 0;
601 
602   for (auto &C : Candidates) {
603     uint64_t Count = C.Count;
604     promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO,
605                         &ORE);
606     assert(TotalCount >= Count);
607     TotalCount -= Count;
608     NumOfPGOICallPromotion++;
609     NumPromoted++;
610   }
611   return NumPromoted;
612 }
613 
614 // Traverse all the indirect-call callsite and get the value profile
615 // annotation to perform indirect-call promotion.
616 bool ICallPromotionFunc::processFunction() {
617   bool Changed = false;
618   ICallPromotionAnalysis ICallAnalysis;
619   for (auto &I : findIndirectCallSites(F)) {
620     uint32_t NumVals, NumCandidates;
621     uint64_t TotalCount;
622     auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
623         I, NumVals, TotalCount, NumCandidates);
624     if (!NumCandidates)
625       continue;
626     auto PromotionCandidates = getPromotionCandidatesForCallSite(
627         I, ICallProfDataRef, TotalCount, NumCandidates);
628     uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
629     if (NumPromoted == 0)
630       continue;
631 
632     Changed = true;
633     // Adjust the MD.prof metadata. First delete the old one.
634     I->setMetadata(LLVMContext::MD_prof, nullptr);
635     // If all promoted, we don't need the MD.prof metadata.
636     if (TotalCount == 0 || NumPromoted == NumVals)
637       continue;
638     // Otherwise we need update with the un-promoted records back.
639     annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
640                       IPVK_IndirectCallTarget, NumCandidates);
641   }
642   return Changed;
643 }
644 
645 // A wrapper function that does the actual work.
646 static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO,
647                                  ModuleAnalysisManager *AM = nullptr) {
648   if (DisableICP)
649     return false;
650   InstrProfSymtab Symtab;
651   if (Error E = Symtab.create(M, InLTO)) {
652     std::string SymtabFailure = toString(std::move(E));
653     DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n");
654     (void)SymtabFailure;
655     return false;
656   }
657   bool Changed = false;
658   for (auto &F : M) {
659     if (F.isDeclaration())
660       continue;
661     if (F.hasFnAttribute(Attribute::OptimizeNone))
662       continue;
663 
664     std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
665     OptimizationRemarkEmitter *ORE;
666     if (AM) {
667       auto &FAM =
668           AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
669       ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
670     } else {
671       OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
672       ORE = OwnedORE.get();
673     }
674 
675     ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE);
676     bool FuncChanged = ICallPromotion.processFunction();
677     if (ICPDUMPAFTER && FuncChanged) {
678       DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
679       DEBUG(dbgs() << "\n");
680     }
681     Changed |= FuncChanged;
682     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
683       DEBUG(dbgs() << " Stop: Cutoff reached.\n");
684       break;
685     }
686   }
687   return Changed;
688 }
689 
690 bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
691   // Command-line option has the priority for InLTO.
692   return promoteIndirectCalls(M, InLTO | ICPLTOMode,
693                               SamplePGO | ICPSamplePGOMode);
694 }
695 
696 PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
697                                                 ModuleAnalysisManager &AM) {
698   if (!promoteIndirectCalls(M, InLTO | ICPLTOMode, SamplePGO | ICPSamplePGOMode,
699                             &AM))
700     return PreservedAnalyses::all();
701 
702   return PreservedAnalyses::none();
703 }
704