1 //===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===//
2 //
3 //                      The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the transformation that promotes indirect calls to
11 // conditional direct calls when the indirect-call value profile metadata is
12 // available.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "IndirectCallSiteVisitor.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Analysis/CFG.h"
21 #include "llvm/IR/CallSite.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/InstIterator.h"
25 #include "llvm/IR/InstVisitor.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/MDBuilder.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Pass.h"
31 #include "llvm/ProfileData/InstrProfReader.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Transforms/Instrumentation.h"
34 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
35 #include <string>
36 #include <utility>
37 #include <vector>
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "icall-promotion"
42 
43 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
44 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
45 
46 // Command line option to disable indirect-call promotion with the default as
47 // false. This is for debug purpose.
48 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
49                                 cl::desc("Disable indirect call promotion"));
50 
51 // The minimum call count for the direct-call target to be considered as the
52 // promotion candidate.
53 static cl::opt<unsigned>
54     ICPCountThreshold("icp-count-threshold", cl::Hidden, cl::ZeroOrMore,
55                       cl::init(1000),
56                       cl::desc("The minimum count to the direct call target "
57                                "for the promotion"));
58 
59 // The percent threshold for the direct-call target (this call site vs the
60 // total call count) for it to be considered as the promotion target.
61 static cl::opt<unsigned>
62     ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden,
63                         cl::ZeroOrMore,
64                         cl::desc("The percentage threshold for the promotion"));
65 
66 // Set the maximum number of targets to promote for a single indirect-call
67 // callsite.
68 static cl::opt<unsigned>
69     MaxNumPromotions("icp-max-prom", cl::init(2), cl::Hidden, cl::ZeroOrMore,
70                      cl::desc("Max number of promotions for a single indirect "
71                               "call callsite"));
72 
73 // Set the cutoff value for the promotion. If the value is other than 0, we
74 // stop the transformation once the total number of promotions equals the cutoff
75 // value.
76 // For debug use only.
77 static cl::opt<unsigned>
78     ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
79               cl::desc("Max number of promotions for this compilaiton"));
80 
81 // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
82 // For debug use only.
83 static cl::opt<unsigned>
84     ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
85               cl::desc("Skip Callsite up to this number for this compilaiton"));
86 
87 // Set if the pass is called in LTO optimization. The difference for LTO mode
88 // is the pass won't prefix the source module name to the internal linkage
89 // symbols.
90 static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
91                                 cl::desc("Run indirect-call promotion in LTO "
92                                          "mode"));
93 // If the option is set to true, only call instructions will be considered for
94 // transformation -- invoke instructions will be ignored.
95 static cl::opt<bool>
96     ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
97                 cl::desc("Run indirect-call promotion for call instructions "
98                          "only"));
99 
100 // If the option is set to true, only invoke instructions will be considered for
101 // transformation -- call instructions will be ignored.
102 static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
103                                    cl::Hidden,
104                                    cl::desc("Run indirect-call promotion for "
105                                             "invoke instruction only"));
106 
107 // Dump the function level IR if the transformation happened in this
108 // function. For debug use only.
109 static cl::opt<bool>
110     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
111                  cl::desc("Dump IR after transformation happens"));
112 
113 namespace {
114 class PGOIndirectCallPromotion : public ModulePass {
115 public:
116   static char ID;
117 
118   PGOIndirectCallPromotion(bool InLTO = false) : ModulePass(ID), InLTO(InLTO) {
119     initializePGOIndirectCallPromotionPass(*PassRegistry::getPassRegistry());
120   }
121 
122   const char *getPassName() const override {
123     return "PGOIndirectCallPromotion";
124   }
125 
126 private:
127   bool runOnModule(Module &M) override;
128 
129   // If this pass is called in LTO. We need to special handling the PGOFuncName
130   // for the static variables due to LTO's internalization.
131   bool InLTO;
132 };
133 } // end anonymous namespace
134 
135 char PGOIndirectCallPromotion::ID = 0;
136 INITIALIZE_PASS(PGOIndirectCallPromotion, "pgo-icall-prom",
137                 "Use PGO instrumentation profile to promote indirect calls to "
138                 "direct calls.",
139                 false, false)
140 
141 ModulePass *llvm::createPGOIndirectCallPromotionPass(bool InLTO) {
142   return new PGOIndirectCallPromotion(InLTO);
143 }
144 
145 // The class for main data structure to promote indirect calls to conditional
146 // direct calls.
147 class ICallPromotionFunc {
148 private:
149   Function &F;
150   Module *M;
151 
152   // Symtab that maps indirect call profile values to function names and
153   // defines.
154   InstrProfSymtab *Symtab;
155 
156   // Allocate space to read the profile annotation.
157   std::unique_ptr<InstrProfValueData[]> ValueDataArray;
158 
159   // Count is the call count for the direct-call target and
160   // TotalCount is the call count for the indirect-call callsite.
161   // Return true we should promote this indirect-call target.
162   bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount);
163 
164   enum TargetStatus {
165     OK,                   // Should be able to promote.
166     NotAvailableInModule, // Cannot find the target in current module.
167     ReturnTypeMismatch,   // Return type mismatch b/w target and indirect-call.
168     NumArgsMismatch,      // Number of arguments does not match.
169     ArgTypeMismatch       // Type mismatch in the arguments (cannot bitcast).
170   };
171 
172   // Test if we can legally promote this direct-call of Target.
173   TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target,
174                                 Function *&F);
175 
176   // A struct that records the direct target and it's call count.
177   struct PromotionCandidate {
178     Function *TargetFunction;
179     uint64_t Count;
180     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
181   };
182 
183   // Check if the indirect-call call site should be promoted. Return the number
184   // of promotions.
185   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
186       Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
187       uint64_t TotalCount);
188 
189   // Main function that transforms Inst (either a indirect-call instruction, or
190   // an invoke instruction , to a conditional call to F. This is like:
191   //     if (Inst.CalledValue == F)
192   //        F(...);
193   //     else
194   //        Inst(...);
195   //     end
196   // TotalCount is the profile count value that the instruction executes.
197   // Count is the profile count value that F is the target function.
198   // These two values are being used to update the branch weight.
199   void promote(Instruction *Inst, Function *F, uint64_t Count,
200                uint64_t TotalCount);
201 
202   // Promote a list of targets for one indirect-call callsite. Return
203   // the number of promotions.
204   uint32_t tryToPromote(Instruction *Inst,
205                         const std::vector<PromotionCandidate> &Candidates,
206                         uint64_t &TotalCount);
207 
208   static const char *StatusToString(const TargetStatus S) {
209     switch (S) {
210     case OK:
211       return "OK to promote";
212     case NotAvailableInModule:
213       return "Cannot find the target";
214     case ReturnTypeMismatch:
215       return "Return type mismatch";
216     case NumArgsMismatch:
217       return "The number of arguments mismatch";
218     case ArgTypeMismatch:
219       return "Argument Type mismatch";
220     }
221     llvm_unreachable("Should not reach here");
222   }
223 
224   // Noncopyable
225   ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
226   ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
227 
228 public:
229   ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab)
230       : F(Func), M(Modu), Symtab(Symtab) {
231     ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MaxNumPromotions);
232   }
233   bool processFunction();
234 };
235 
236 bool ICallPromotionFunc::isPromotionProfitable(uint64_t Count,
237                                                uint64_t TotalCount) {
238   if (Count < ICPCountThreshold)
239     return false;
240 
241   unsigned Percentage = (Count * 100) / TotalCount;
242   return (Percentage >= ICPPercentThreshold);
243 }
244 
245 ICallPromotionFunc::TargetStatus
246 ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
247                                      Function *&TargetFunction) {
248   Function *DirectCallee = Symtab->getFunction(Target);
249   if (DirectCallee == nullptr)
250     return NotAvailableInModule;
251   // Check the return type.
252   Type *CallRetType = Inst->getType();
253   if (!CallRetType->isVoidTy()) {
254     Type *FuncRetType = DirectCallee->getReturnType();
255     if (FuncRetType != CallRetType &&
256         !CastInst::isBitCastable(FuncRetType, CallRetType))
257       return ReturnTypeMismatch;
258   }
259 
260   // Check if the arguments are compatible with the parameters
261   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
262   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
263   CallSite CS(Inst);
264   unsigned ArgNum = CS.arg_size();
265 
266   if (ParamNum != ArgNum && !DirectCalleeType->isVarArg())
267     return NumArgsMismatch;
268 
269   for (unsigned I = 0; I < ParamNum; ++I) {
270     Type *PTy = DirectCalleeType->getFunctionParamType(I);
271     Type *ATy = CS.getArgument(I)->getType();
272     if (PTy == ATy)
273       continue;
274     if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy))
275       return ArgTypeMismatch;
276   }
277 
278   DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
279                << Symtab->getFuncName(Target) << "\n");
280   TargetFunction = DirectCallee;
281   return OK;
282 }
283 
284 // Indirect-call promotion heuristic. The direct targets are sorted based on
285 // the count. Stop at the first target that is not promoted.
286 std::vector<ICallPromotionFunc::PromotionCandidate>
287 ICallPromotionFunc::getPromotionCandidatesForCallSite(
288     Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
289     uint64_t TotalCount) {
290   uint32_t NumVals = ValueDataRef.size();
291   std::vector<PromotionCandidate> Ret;
292 
293   DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
294                << " Num_targets: " << NumVals << "\n");
295   NumOfPGOICallsites++;
296   if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
297     DEBUG(dbgs() << " Skip: User options.\n");
298     return Ret;
299   }
300 
301   for (uint32_t I = 0; I < MaxNumPromotions && I < NumVals; I++) {
302     uint64_t Count = ValueDataRef[I].Count;
303     assert(Count <= TotalCount);
304     uint64_t Target = ValueDataRef[I].Value;
305     DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
306                  << "  Target_func: " << Target << "\n");
307 
308     if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
309       DEBUG(dbgs() << " Not promote: User options.\n");
310       break;
311     }
312     if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
313       DEBUG(dbgs() << " Not promote: User option.\n");
314       break;
315     }
316     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
317       DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
318       break;
319     }
320     if (!isPromotionProfitable(Count, TotalCount)) {
321       DEBUG(dbgs() << " Not promote: Cold target.\n");
322       break;
323     }
324     Function *TargetFunction = nullptr;
325     TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction);
326     if (Status != OK) {
327       StringRef TargetFuncName = Symtab->getFuncName(Target);
328       const char *Reason = StatusToString(Status);
329       DEBUG(dbgs() << " Not promote: " << Reason << "\n");
330       emitOptimizationRemarkMissed(
331           F.getContext(), "PGOIndirectCallPromotion", F, Inst->getDebugLoc(),
332           Twine("Cannot promote indirect call to ") +
333               (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) +
334               Twine(" with count of ") + Twine(Count) + ": " + Reason);
335       break;
336     }
337     Ret.push_back(PromotionCandidate(TargetFunction, Count));
338     TotalCount -= Count;
339   }
340   return Ret;
341 }
342 
343 // Create a diamond structure for If_Then_Else. Also update the profile
344 // count. Do the fix-up for the invoke instruction.
345 static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
346                              uint64_t Count, uint64_t TotalCount,
347                              BasicBlock **DirectCallBB,
348                              BasicBlock **IndirectCallBB,
349                              BasicBlock **MergeBB) {
350   CallSite CS(Inst);
351   Value *OrigCallee = CS.getCalledValue();
352 
353   IRBuilder<> BBBuilder(Inst);
354   LLVMContext &Ctx = Inst->getContext();
355   Value *BCI1 =
356       BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
357   Value *BCI2 =
358       BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
359   Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
360 
361   uint64_t ElseCount = TotalCount - Count;
362   uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
363   uint64_t Scale = calculateCountScale(MaxCount);
364   MDBuilder MDB(Inst->getContext());
365   MDNode *BranchWeights = MDB.createBranchWeights(
366       scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
367   TerminatorInst *ThenTerm, *ElseTerm;
368   SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
369                                 BranchWeights);
370   *DirectCallBB = ThenTerm->getParent();
371   (*DirectCallBB)->setName("if.true.direct_targ");
372   *IndirectCallBB = ElseTerm->getParent();
373   (*IndirectCallBB)->setName("if.false.orig_indirect");
374   *MergeBB = Inst->getParent();
375   (*MergeBB)->setName("if.end.icp");
376 
377   // Special handing of Invoke instructions.
378   InvokeInst *II = dyn_cast<InvokeInst>(Inst);
379   if (!II)
380     return;
381 
382   // We don't need branch instructions for invoke.
383   ThenTerm->eraseFromParent();
384   ElseTerm->eraseFromParent();
385 
386   // Add jump from Merge BB to the NormalDest. This is needed for the newly
387   // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
388   BranchInst::Create(II->getNormalDest(), *MergeBB);
389 }
390 
391 // Find the PHI in BB that have the CallResult as the operand.
392 static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
393   BasicBlock *From = Inst->getParent();
394   for (auto &I : *BB) {
395     PHINode *PHI = dyn_cast<PHINode>(&I);
396     if (!PHI)
397       continue;
398     int IX = PHI->getBasicBlockIndex(From);
399     if (IX == -1)
400       continue;
401     Value *V = PHI->getIncomingValue(IX);
402     if (dyn_cast<Instruction>(V) == Inst)
403       return true;
404   }
405   return false;
406 }
407 
408 // This method fixes up PHI nodes in BB where BB is the UnwindDest of an
409 // invoke instruction. In BB, there may be PHIs with incoming block being
410 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
411 // instructions to its own BB, OrigBB is no longer the predecessor block of BB.
412 // Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
413 // so the PHI node's incoming BBs need to be fixed up accordingly.
414 static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
415                                   BasicBlock *OrigBB,
416                                   BasicBlock *IndirectCallBB,
417                                   BasicBlock *DirectCallBB) {
418   for (auto &I : *BB) {
419     PHINode *PHI = dyn_cast<PHINode>(&I);
420     if (!PHI)
421       continue;
422     int IX = PHI->getBasicBlockIndex(OrigBB);
423     if (IX == -1)
424       continue;
425     Value *V = PHI->getIncomingValue(IX);
426     PHI->addIncoming(V, IndirectCallBB);
427     PHI->setIncomingBlock(IX, DirectCallBB);
428   }
429 }
430 
431 // This method fixes up PHI nodes in BB where BB is the NormalDest of an
432 // invoke instruction. In BB, there may be PHIs with incoming block being
433 // OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
434 // instructions to its own BB, a new incoming edge will be added to the original
435 // NormalDstBB from the IndirectCallBB.
436 static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
437                                       BasicBlock *OrigBB,
438                                       BasicBlock *IndirectCallBB,
439                                       Instruction *NewInst) {
440   for (auto &I : *BB) {
441     PHINode *PHI = dyn_cast<PHINode>(&I);
442     if (!PHI)
443       continue;
444     int IX = PHI->getBasicBlockIndex(OrigBB);
445     if (IX == -1)
446       continue;
447     Value *V = PHI->getIncomingValue(IX);
448     if (dyn_cast<Instruction>(V) == Inst) {
449       PHI->setIncomingBlock(IX, IndirectCallBB);
450       PHI->addIncoming(NewInst, OrigBB);
451       continue;
452     }
453     PHI->addIncoming(V, IndirectCallBB);
454   }
455 }
456 
457 // Add a bitcast instruction to the direct-call return value if needed.
458 // Add a bitcast instruction to the direct-call return value if needed.
459 static Instruction *insertCallRetCast(const Instruction *Inst,
460                                       Instruction *DirectCallInst,
461                                       Function *DirectCallee) {
462   if (Inst->getType()->isVoidTy())
463     return DirectCallInst;
464 
465   Type *CallRetType = Inst->getType();
466   Type *FuncRetType = DirectCallee->getReturnType();
467   if (FuncRetType == CallRetType)
468     return DirectCallInst;
469 
470   BasicBlock *InsertionBB;
471   if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst))
472     InsertionBB = CI->getParent();
473   else
474     InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest();
475 
476   return (new BitCastInst(DirectCallInst, CallRetType, "",
477                           InsertionBB->getTerminator()));
478 }
479 
480 // Create a DirectCall instruction in the DirectCallBB.
481 // Parameter Inst is the indirect-call (invoke) instruction.
482 // DirectCallee is the decl of the direct-call (invoke) target.
483 // DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
484 // MergeBB is the bottom BB of the if-then-else-diamond after the
485 // transformation. For invoke instruction, the edges from DirectCallBB and
486 // IndirectCallBB to MergeBB are removed before this call (during
487 // createIfThenElse).
488 static Instruction *createDirectCallInst(const Instruction *Inst,
489                                          Function *DirectCallee,
490                                          BasicBlock *DirectCallBB,
491                                          BasicBlock *MergeBB) {
492   Instruction *NewInst = Inst->clone();
493   if (CallInst *CI = dyn_cast<CallInst>(NewInst)) {
494     CI->setCalledFunction(DirectCallee);
495     CI->mutateFunctionType(DirectCallee->getFunctionType());
496   } else {
497     // Must be an invoke instruction. Direct invoke's normal destination is
498     // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
499     // Also since IndirectCallBB does not have an edge to MergeBB, there is no
500     // need to insert new PHIs into MergeBB.
501     InvokeInst *II = dyn_cast<InvokeInst>(NewInst);
502     assert(II);
503     II->setCalledFunction(DirectCallee);
504     II->mutateFunctionType(DirectCallee->getFunctionType());
505     II->setNormalDest(MergeBB);
506   }
507 
508   DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
509                                      NewInst);
510 
511   // Clear the value profile data.
512   NewInst->setMetadata(LLVMContext::MD_prof, 0);
513   CallSite NewCS(NewInst);
514   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
515   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
516   for (unsigned I = 0; I < ParamNum; ++I) {
517     Type *ATy = NewCS.getArgument(I)->getType();
518     Type *PTy = DirectCalleeType->getParamType(I);
519     if (ATy != PTy) {
520       BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
521       NewCS.setArgument(I, BI);
522     }
523   }
524 
525   return insertCallRetCast(Inst, NewInst, DirectCallee);
526 }
527 
528 // Create a PHI to unify the return values of calls.
529 static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
530                              Function *DirectCallee) {
531   if (Inst->getType()->isVoidTy())
532     return;
533 
534   BasicBlock *RetValBB = CallResult->getParent();
535 
536   BasicBlock *PHIBB;
537   if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult))
538     RetValBB = II->getNormalDest();
539 
540   PHIBB = RetValBB->getSingleSuccessor();
541   if (getCallRetPHINode(PHIBB, Inst))
542     return;
543 
544   PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
545   PHIBB->getInstList().push_front(CallRetPHI);
546   Inst->replaceAllUsesWith(CallRetPHI);
547   CallRetPHI->addIncoming(Inst, Inst->getParent());
548   CallRetPHI->addIncoming(CallResult, RetValBB);
549 }
550 
551 // This function does the actual indirect-call promotion transformation:
552 // For an indirect-call like:
553 //     Ret = (*Foo)(Args);
554 // It transforms to:
555 //     if (Foo == DirectCallee)
556 //        Ret1 = DirectCallee(Args);
557 //     else
558 //        Ret2 = (*Foo)(Args);
559 //     Ret = phi(Ret1, Ret2);
560 // It adds type casts for the args do not match the parameters and the return
561 // value. Branch weights metadata also updated.
562 void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
563                                  uint64_t Count, uint64_t TotalCount) {
564   assert(DirectCallee != nullptr);
565   BasicBlock *BB = Inst->getParent();
566   // Just to suppress the non-debug build warning.
567   (void)BB;
568   DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
569   DEBUG(dbgs() << *BB << "\n");
570 
571   BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
572   createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
573                    &IndirectCallBB, &MergeBB);
574 
575   Instruction *NewInst =
576       createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
577 
578   // Move Inst from MergeBB to IndirectCallBB.
579   Inst->removeFromParent();
580   IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
581                                        Inst);
582 
583   if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) {
584     // At this point, the original indirect invoke instruction has the original
585     // UnwindDest and NormalDest. For the direct invoke instruction, the
586     // NormalDest points to MergeBB, and MergeBB jumps to the original
587     // NormalDest. MergeBB might have a new bitcast instruction for the return
588     // value. The PHIs are with the original NormalDest. Since we now have two
589     // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
590     //
591     // UnwindDest will not use the return value. So pass nullptr here.
592     fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
593                           DirectCallBB);
594     // We don't need to update the operand from NormalDest for DirectCallBB.
595     // Pass nullptr here.
596     fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
597                               IndirectCallBB, NewInst);
598   }
599 
600   insertCallRetPHI(Inst, NewInst, DirectCallee);
601 
602   DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
603   DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
604 
605   emitOptimizationRemark(
606       F.getContext(), "PGOIndirectCallPromotion", F, Inst->getDebugLoc(),
607       Twine("Promote indirect call to ") + DirectCallee->getName() +
608           " with count " + Twine(Count) + " out of " + Twine(TotalCount));
609 }
610 
611 // Promote indirect-call to conditional direct-call for one callsite.
612 uint32_t ICallPromotionFunc::tryToPromote(
613     Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
614     uint64_t &TotalCount) {
615   uint32_t NumPromoted = 0;
616 
617   for (auto &C : Candidates) {
618     uint64_t Count = C.Count;
619     promote(Inst, C.TargetFunction, Count, TotalCount);
620     assert(TotalCount >= Count);
621     TotalCount -= Count;
622     NumOfPGOICallPromotion++;
623     NumPromoted++;
624   }
625   return NumPromoted;
626 }
627 
628 // Traverse all the indirect-call callsite and get the value profile
629 // annotation to perform indirect-call promotion.
630 bool ICallPromotionFunc::processFunction() {
631   bool Changed = false;
632   for (auto &I : findIndirectCallSites(F)) {
633     uint32_t NumVals;
634     uint64_t TotalCount;
635     bool Res =
636         getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions,
637                                  ValueDataArray.get(), NumVals, TotalCount);
638     if (!Res)
639       continue;
640     ArrayRef<InstrProfValueData> ValueDataArrayRef(ValueDataArray.get(),
641                                                    NumVals);
642     auto PromotionCandidates =
643         getPromotionCandidatesForCallSite(I, ValueDataArrayRef, TotalCount);
644     uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
645     if (NumPromoted == 0)
646       continue;
647 
648     Changed = true;
649     // Adjust the MD.prof metadata. First delete the old one.
650     I->setMetadata(LLVMContext::MD_prof, 0);
651     // If all promoted, we don't need the MD.prof metadata.
652     if (TotalCount == 0 || NumPromoted == NumVals)
653       continue;
654     // Otherwise we need update with the un-promoted records back.
655     annotateValueSite(*M, *I, ValueDataArrayRef.slice(NumPromoted), TotalCount,
656                       IPVK_IndirectCallTarget, MaxNumPromotions);
657   }
658   return Changed;
659 }
660 
661 // A wrapper function that does the actual work.
662 static bool promoteIndirectCalls(Module &M, bool InLTO) {
663   if (DisableICP)
664     return false;
665   InstrProfSymtab Symtab;
666   Symtab.create(M, InLTO);
667   bool Changed = false;
668   for (auto &F : M) {
669     if (F.isDeclaration())
670       continue;
671     if (F.hasFnAttribute(Attribute::OptimizeNone))
672       continue;
673     ICallPromotionFunc ICallPromotion(F, &M, &Symtab);
674     bool FuncChanged = ICallPromotion.processFunction();
675     if (ICPDUMPAFTER && FuncChanged) {
676       DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
677       DEBUG(dbgs() << "\n");
678     }
679     Changed |= FuncChanged;
680     if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
681       DEBUG(dbgs() << " Stop: Cutoff reached.\n");
682       break;
683     }
684   }
685   return Changed;
686 }
687 
688 bool PGOIndirectCallPromotion::runOnModule(Module &M) {
689   // Command-line option has the priority for InLTO.
690   InLTO |= ICPLTOMode;
691   return promoteIndirectCalls(M, InLTO);
692 }
693