1 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements some loop unrolling utilities. It does not define any
11 // actual pass or policy, but provides a single function to perform loop
12 // unrolling.
13 //
14 // The process of unrolling can produce extraneous basic blocks linked with
15 // unconditional branches.  This will be corrected in the future.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/Transforms/Utils/UnrollLoop.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/InstructionSimplify.h"
24 #include "llvm/Analysis/LoopIterator.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
27 #include "llvm/Analysis/ScalarEvolution.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/Cloning.h"
37 #include "llvm/Transforms/Utils/Local.h"
38 #include "llvm/Transforms/Utils/LoopSimplify.h"
39 #include "llvm/Transforms/Utils/LoopUtils.h"
40 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "loop-unroll"
44 
45 // TODO: Should these be here or in LoopUnroll?
46 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
47 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
48 
49 static cl::opt<bool>
50 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
51                     cl::desc("Allow runtime unrolled loops to be unrolled "
52                              "with epilog instead of prolog."));
53 
54 static cl::opt<bool>
55 UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
56                     cl::desc("Verify domtree after unrolling"),
57 #ifdef NDEBUG
58     cl::init(false)
59 #else
60     cl::init(true)
61 #endif
62                     );
63 
64 /// Convert the instruction operands from referencing the current values into
65 /// those specified by VMap.
66 static inline void remapInstruction(Instruction *I,
67                                     ValueToValueMapTy &VMap) {
68   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
69     Value *Op = I->getOperand(op);
70     ValueToValueMapTy::iterator It = VMap.find(Op);
71     if (It != VMap.end())
72       I->setOperand(op, It->second);
73   }
74 
75   if (PHINode *PN = dyn_cast<PHINode>(I)) {
76     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
77       ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
78       if (It != VMap.end())
79         PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
80     }
81   }
82 }
83 
84 /// Folds a basic block into its predecessor if it only has one predecessor, and
85 /// that predecessor only has one successor.
86 /// The LoopInfo Analysis that is passed will be kept consistent.  If folding is
87 /// successful references to the containing loop must be removed from
88 /// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have
89 /// references to the eliminated BB.  The argument ForgottenLoops contains a set
90 /// of loops that have already been forgotten to prevent redundant, expensive
91 /// calls to ScalarEvolution::forgetLoop.  Returns the new combined block.
92 static BasicBlock *
93 foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,
94                          SmallPtrSetImpl<Loop *> &ForgottenLoops,
95                          DominatorTree *DT) {
96   // Merge basic blocks into their predecessor if there is only one distinct
97   // pred, and if there is only one distinct successor of the predecessor, and
98   // if there are no PHI nodes.
99   BasicBlock *OnlyPred = BB->getSinglePredecessor();
100   if (!OnlyPred) return nullptr;
101 
102   if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
103     return nullptr;
104 
105   DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
106 
107   // Resolve any PHI nodes at the start of the block.  They are all
108   // guaranteed to have exactly one entry if they exist, unless there are
109   // multiple duplicate (but guaranteed to be equal) entries for the
110   // incoming edges.  This occurs when there are multiple edges from
111   // OnlyPred to OnlySucc.
112   FoldSingleEntryPHINodes(BB);
113 
114   // Delete the unconditional branch from the predecessor...
115   OnlyPred->getInstList().pop_back();
116 
117   // Make all PHI nodes that referred to BB now refer to Pred as their
118   // source...
119   BB->replaceAllUsesWith(OnlyPred);
120 
121   // Move all definitions in the successor to the predecessor...
122   OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
123 
124   // OldName will be valid until erased.
125   StringRef OldName = BB->getName();
126 
127   // Erase the old block and update dominator info.
128   if (DT)
129     if (DomTreeNode *DTN = DT->getNode(BB)) {
130       DomTreeNode *PredDTN = DT->getNode(OnlyPred);
131       SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
132       for (auto *DI : Children)
133         DT->changeImmediateDominator(DI, PredDTN);
134 
135       DT->eraseNode(BB);
136     }
137 
138   // ScalarEvolution holds references to loop exit blocks.
139   if (SE) {
140     if (Loop *L = LI->getLoopFor(BB)) {
141       if (ForgottenLoops.insert(L).second)
142         SE->forgetLoop(L);
143     }
144   }
145   LI->removeBlock(BB);
146 
147   // Inherit predecessor's name if it exists...
148   if (!OldName.empty() && !OnlyPred->hasName())
149     OnlyPred->setName(OldName);
150 
151   BB->eraseFromParent();
152 
153   return OnlyPred;
154 }
155 
156 /// Check if unrolling created a situation where we need to insert phi nodes to
157 /// preserve LCSSA form.
158 /// \param Blocks is a vector of basic blocks representing unrolled loop.
159 /// \param L is the outer loop.
160 /// It's possible that some of the blocks are in L, and some are not. In this
161 /// case, if there is a use is outside L, and definition is inside L, we need to
162 /// insert a phi-node, otherwise LCSSA will be broken.
163 /// The function is just a helper function for llvm::UnrollLoop that returns
164 /// true if this situation occurs, indicating that LCSSA needs to be fixed.
165 static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
166                                      LoopInfo *LI) {
167   for (BasicBlock *BB : Blocks) {
168     if (LI->getLoopFor(BB) == L)
169       continue;
170     for (Instruction &I : *BB) {
171       for (Use &U : I.operands()) {
172         if (auto Def = dyn_cast<Instruction>(U)) {
173           Loop *DefLoop = LI->getLoopFor(Def->getParent());
174           if (!DefLoop)
175             continue;
176           if (DefLoop->contains(L))
177             return true;
178         }
179       }
180     }
181   }
182   return false;
183 }
184 
185 /// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
186 /// and adds a mapping from the original loop to the new loop to NewLoops.
187 /// Returns nullptr if no new loop was created and a pointer to the
188 /// original loop OriginalBB was part of otherwise.
189 const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
190                                            BasicBlock *ClonedBB, LoopInfo *LI,
191                                            NewLoopsMap &NewLoops) {
192   // Figure out which loop New is in.
193   const Loop *OldLoop = LI->getLoopFor(OriginalBB);
194   assert(OldLoop && "Should (at least) be in the loop being unrolled!");
195 
196   Loop *&NewLoop = NewLoops[OldLoop];
197   if (!NewLoop) {
198     // Found a new sub-loop.
199     assert(OriginalBB == OldLoop->getHeader() &&
200            "Header should be first in RPO");
201 
202     Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
203     assert(NewLoopParent &&
204            "Expected parent loop before sub-loop in RPO");
205     NewLoop = new Loop;
206     NewLoopParent->addChildLoop(NewLoop);
207     NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
208     return OldLoop;
209   } else {
210     NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
211     return nullptr;
212   }
213 }
214 
215 /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
216 /// if unrolling was successful, or false if the loop was unmodified. Unrolling
217 /// can only fail when the loop's latch block is not terminated by a conditional
218 /// branch instruction. However, if the trip count (and multiple) are not known,
219 /// loop unrolling will mostly produce more code that is no faster.
220 ///
221 /// TripCount is the upper bound of the iteration on which control exits
222 /// LatchBlock. Control may exit the loop prior to TripCount iterations either
223 /// via an early branch in other loop block or via LatchBlock terminator. This
224 /// is relaxed from the general definition of trip count which is the number of
225 /// times the loop header executes. Note that UnrollLoop assumes that the loop
226 /// counter test is in LatchBlock in order to remove unnecesssary instances of
227 /// the test.  If control can exit the loop from the LatchBlock's terminator
228 /// prior to TripCount iterations, flag PreserveCondBr needs to be set.
229 ///
230 /// PreserveCondBr indicates whether the conditional branch of the LatchBlock
231 /// needs to be preserved.  It is needed when we use trip count upper bound to
232 /// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
233 /// conditional branch needs to be preserved.
234 ///
235 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
236 /// execute without exiting the loop.
237 ///
238 /// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
239 /// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
240 /// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
241 /// iterations before branching into the unrolled loop.  UnrollLoop will not
242 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
243 /// AllowExpensiveTripCount is false.
244 ///
245 /// If we want to perform PGO-based loop peeling, PeelCount is set to the
246 /// number of iterations we want to peel off.
247 ///
248 /// The LoopInfo Analysis that is passed will be kept consistent.
249 ///
250 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
251 /// DominatorTree if they are non-null.
252 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
253                       bool AllowRuntime, bool AllowExpensiveTripCount,
254                       bool PreserveCondBr, bool PreserveOnlyFirst,
255                       unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
256                       ScalarEvolution *SE, DominatorTree *DT,
257                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
258                       bool PreserveLCSSA) {
259 
260   BasicBlock *Preheader = L->getLoopPreheader();
261   if (!Preheader) {
262     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
263     return false;
264   }
265 
266   BasicBlock *LatchBlock = L->getLoopLatch();
267   if (!LatchBlock) {
268     DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
269     return false;
270   }
271 
272   // Loops with indirectbr cannot be cloned.
273   if (!L->isSafeToClone()) {
274     DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
275     return false;
276   }
277 
278   BasicBlock *Header = L->getHeader();
279   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
280 
281   if (!BI || BI->isUnconditional()) {
282     // The loop-rotate pass can be helpful to avoid this in many cases.
283     DEBUG(dbgs() <<
284              "  Can't unroll; loop not terminated by a conditional branch.\n");
285     return false;
286   }
287 
288   if (Header->hasAddressTaken()) {
289     // The loop-rotate pass can be helpful to avoid this in many cases.
290     DEBUG(dbgs() <<
291           "  Won't unroll loop: address of header block is taken.\n");
292     return false;
293   }
294 
295   if (TripCount != 0)
296     DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
297   if (TripMultiple != 1)
298     DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
299 
300   // Effectively "DCE" unrolled iterations that are beyond the tripcount
301   // and will never be executed.
302   if (TripCount != 0 && Count > TripCount)
303     Count = TripCount;
304 
305   // Don't enter the unroll code if there is nothing to do.
306   if (TripCount == 0 && Count < 2 && PeelCount == 0)
307     return false;
308 
309   assert(Count > 0);
310   assert(TripMultiple > 0);
311   assert(TripCount == 0 || TripCount % TripMultiple == 0);
312 
313   // Are we eliminating the loop control altogether?
314   bool CompletelyUnroll = Count == TripCount;
315   SmallVector<BasicBlock *, 4> ExitBlocks;
316   L->getExitBlocks(ExitBlocks);
317   std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
318 
319   // Go through all exits of L and see if there are any phi-nodes there. We just
320   // conservatively assume that they're inserted to preserve LCSSA form, which
321   // means that complete unrolling might break this form. We need to either fix
322   // it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
323   // now we just recompute LCSSA for the outer loop, but it should be possible
324   // to fix it in-place.
325   bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
326                         any_of(ExitBlocks, [](const BasicBlock *BB) {
327                           return isa<PHINode>(BB->begin());
328                         });
329 
330   // We assume a run-time trip count if the compiler cannot
331   // figure out the loop trip count and the unroll-runtime
332   // flag is specified.
333   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
334 
335   assert((!RuntimeTripCount || !PeelCount) &&
336          "Did not expect runtime trip-count unrolling "
337          "and peeling for the same loop");
338 
339   if (PeelCount)
340     peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
341 
342   // Loops containing convergent instructions must have a count that divides
343   // their TripMultiple.
344   DEBUG(
345       {
346         bool HasConvergent = false;
347         for (auto &BB : L->blocks())
348           for (auto &I : *BB)
349             if (auto CS = CallSite(&I))
350               HasConvergent |= CS.isConvergent();
351         assert((!HasConvergent || TripMultiple % Count == 0) &&
352                "Unroll count must divide trip multiple if loop contains a "
353                "convergent operation.");
354       });
355 
356   if (RuntimeTripCount && TripMultiple % Count != 0 &&
357       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
358                                   UnrollRuntimeEpilog, LI, SE, DT,
359                                   PreserveLCSSA)) {
360     if (Force)
361       RuntimeTripCount = false;
362     else
363       return false;
364   }
365 
366   // Notify ScalarEvolution that the loop will be substantially changed,
367   // if not outright eliminated.
368   if (SE)
369     SE->forgetLoop(L);
370 
371   // If we know the trip count, we know the multiple...
372   unsigned BreakoutTrip = 0;
373   if (TripCount != 0) {
374     BreakoutTrip = TripCount % Count;
375     TripMultiple = 0;
376   } else {
377     // Figure out what multiple to use.
378     BreakoutTrip = TripMultiple =
379       (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
380   }
381 
382   using namespace ore;
383   // Report the unrolling decision.
384   if (CompletelyUnroll) {
385     DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
386           << " with trip count " << TripCount << "!\n");
387     ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
388                                  L->getHeader())
389               << "completely unrolled loop with "
390               << NV("UnrollCount", TripCount) << " iterations");
391   } else if (PeelCount) {
392     DEBUG(dbgs() << "PEELING loop %" << Header->getName()
393                  << " with iteration count " << PeelCount << "!\n");
394     ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
395                                  L->getHeader())
396               << " peeled loop by " << NV("PeelCount", PeelCount)
397               << " iterations");
398   } else {
399     OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
400                             L->getHeader());
401     Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count);
402 
403     DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
404           << " by " << Count);
405     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
406       DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
407       ORE->emit(Diag << " with a breakout at trip "
408                      << NV("BreakoutTrip", BreakoutTrip));
409     } else if (TripMultiple != 1) {
410       DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
411       ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple)
412                      << " trips per branch");
413     } else if (RuntimeTripCount) {
414       DEBUG(dbgs() << " with run-time trip count");
415       ORE->emit(Diag << " with run-time trip count");
416     }
417     DEBUG(dbgs() << "!\n");
418   }
419 
420   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
421   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
422 
423   // For the first iteration of the loop, we should use the precloned values for
424   // PHI nodes.  Insert associations now.
425   ValueToValueMapTy LastValueMap;
426   std::vector<PHINode*> OrigPHINode;
427   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
428     OrigPHINode.push_back(cast<PHINode>(I));
429   }
430 
431   std::vector<BasicBlock*> Headers;
432   std::vector<BasicBlock*> Latches;
433   Headers.push_back(Header);
434   Latches.push_back(LatchBlock);
435 
436   // The current on-the-fly SSA update requires blocks to be processed in
437   // reverse postorder so that LastValueMap contains the correct value at each
438   // exit.
439   LoopBlocksDFS DFS(L);
440   DFS.perform(LI);
441 
442   // Stash the DFS iterators before adding blocks to the loop.
443   LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
444   LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
445 
446   std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
447 
448   // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
449   // might break loop-simplified form for these loops (as they, e.g., would
450   // share the same exit blocks). We'll keep track of loops for which we can
451   // break this so that later we can re-simplify them.
452   SmallSetVector<Loop *, 4> LoopsToSimplify;
453   for (Loop *SubLoop : *L)
454     LoopsToSimplify.insert(SubLoop);
455 
456   for (unsigned It = 1; It != Count; ++It) {
457     std::vector<BasicBlock*> NewBlocks;
458     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
459     NewLoops[L] = L;
460 
461     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
462       ValueToValueMapTy VMap;
463       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
464       Header->getParent()->getBasicBlockList().push_back(New);
465 
466       // Tell LI about New.
467       if (*BB == Header) {
468         assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
469         L->addBasicBlockToLoop(New, *LI);
470       } else {
471         const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
472         if (OldLoop) {
473           LoopsToSimplify.insert(NewLoops[OldLoop]);
474 
475           // Forget the old loop, since its inputs may have changed.
476           if (SE)
477             SE->forgetLoop(OldLoop);
478         }
479       }
480 
481       if (*BB == Header)
482         // Loop over all of the PHI nodes in the block, changing them to use
483         // the incoming values from the previous block.
484         for (PHINode *OrigPHI : OrigPHINode) {
485           PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
486           Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
487           if (Instruction *InValI = dyn_cast<Instruction>(InVal))
488             if (It > 1 && L->contains(InValI))
489               InVal = LastValueMap[InValI];
490           VMap[OrigPHI] = InVal;
491           New->getInstList().erase(NewPHI);
492         }
493 
494       // Update our running map of newest clones
495       LastValueMap[*BB] = New;
496       for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
497            VI != VE; ++VI)
498         LastValueMap[VI->first] = VI->second;
499 
500       // Add phi entries for newly created values to all exit blocks.
501       for (BasicBlock *Succ : successors(*BB)) {
502         if (L->contains(Succ))
503           continue;
504         for (BasicBlock::iterator BBI = Succ->begin();
505              PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
506           Value *Incoming = phi->getIncomingValueForBlock(*BB);
507           ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
508           if (It != LastValueMap.end())
509             Incoming = It->second;
510           phi->addIncoming(Incoming, New);
511         }
512       }
513       // Keep track of new headers and latches as we create them, so that
514       // we can insert the proper branches later.
515       if (*BB == Header)
516         Headers.push_back(New);
517       if (*BB == LatchBlock)
518         Latches.push_back(New);
519 
520       NewBlocks.push_back(New);
521       UnrolledLoopBlocks.push_back(New);
522 
523       // Update DomTree: since we just copy the loop body, and each copy has a
524       // dedicated entry block (copy of the header block), this header's copy
525       // dominates all copied blocks. That means, dominance relations in the
526       // copied body are the same as in the original body.
527       if (DT) {
528         if (*BB == Header)
529           DT->addNewBlock(New, Latches[It - 1]);
530         else {
531           auto BBDomNode = DT->getNode(*BB);
532           auto BBIDom = BBDomNode->getIDom();
533           BasicBlock *OriginalBBIDom = BBIDom->getBlock();
534           DT->addNewBlock(
535               New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
536         }
537       }
538     }
539 
540     // Remap all instructions in the most recent iteration
541     for (BasicBlock *NewBlock : NewBlocks) {
542       for (Instruction &I : *NewBlock) {
543         ::remapInstruction(&I, LastValueMap);
544         if (auto *II = dyn_cast<IntrinsicInst>(&I))
545           if (II->getIntrinsicID() == Intrinsic::assume)
546             AC->registerAssumption(II);
547       }
548     }
549   }
550 
551   // Loop over the PHI nodes in the original block, setting incoming values.
552   for (PHINode *PN : OrigPHINode) {
553     if (CompletelyUnroll) {
554       PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
555       Header->getInstList().erase(PN);
556     }
557     else if (Count > 1) {
558       Value *InVal = PN->removeIncomingValue(LatchBlock, false);
559       // If this value was defined in the loop, take the value defined by the
560       // last iteration of the loop.
561       if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
562         if (L->contains(InValI))
563           InVal = LastValueMap[InVal];
564       }
565       assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
566       PN->addIncoming(InVal, Latches.back());
567     }
568   }
569 
570   // Now that all the basic blocks for the unrolled iterations are in place,
571   // set up the branches to connect them.
572   for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
573     // The original branch was replicated in each unrolled iteration.
574     BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
575 
576     // The branch destination.
577     unsigned j = (i + 1) % e;
578     BasicBlock *Dest = Headers[j];
579     bool NeedConditional = true;
580 
581     if (RuntimeTripCount && j != 0) {
582       NeedConditional = false;
583     }
584 
585     // For a complete unroll, make the last iteration end with a branch
586     // to the exit block.
587     if (CompletelyUnroll) {
588       if (j == 0)
589         Dest = LoopExit;
590       // If using trip count upper bound to completely unroll, we need to keep
591       // the conditional branch except the last one because the loop may exit
592       // after any iteration.
593       assert(NeedConditional &&
594              "NeedCondition cannot be modified by both complete "
595              "unrolling and runtime unrolling");
596       NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
597     } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
598       // If we know the trip count or a multiple of it, we can safely use an
599       // unconditional branch for some iterations.
600       NeedConditional = false;
601     }
602 
603     if (NeedConditional) {
604       // Update the conditional branch's successor for the following
605       // iteration.
606       Term->setSuccessor(!ContinueOnTrue, Dest);
607     } else {
608       // Remove phi operands at this loop exit
609       if (Dest != LoopExit) {
610         BasicBlock *BB = Latches[i];
611         for (BasicBlock *Succ: successors(BB)) {
612           if (Succ == Headers[i])
613             continue;
614           for (BasicBlock::iterator BBI = Succ->begin();
615                PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
616             Phi->removeIncomingValue(BB, false);
617           }
618         }
619       }
620       // Replace the conditional branch with an unconditional one.
621       BranchInst::Create(Dest, Term);
622       Term->eraseFromParent();
623     }
624   }
625 
626   // Update dominators of blocks we might reach through exits.
627   // Immediate dominator of such block might change, because we add more
628   // routes which can lead to the exit: we can now reach it from the copied
629   // iterations too.
630   if (DT && Count > 1) {
631     for (auto *BB : OriginalLoopBlocks) {
632       auto *BBDomNode = DT->getNode(BB);
633       SmallVector<BasicBlock *, 16> ChildrenToUpdate;
634       for (auto *ChildDomNode : BBDomNode->getChildren()) {
635         auto *ChildBB = ChildDomNode->getBlock();
636         if (!L->contains(ChildBB))
637           ChildrenToUpdate.push_back(ChildBB);
638       }
639       BasicBlock *NewIDom;
640       if (BB == LatchBlock) {
641         // The latch is special because we emit unconditional branches in
642         // some cases where the original loop contained a conditional branch.
643         // Since the latch is always at the bottom of the loop, if the latch
644         // dominated an exit before unrolling, the new dominator of that exit
645         // must also be a latch.  Specifically, the dominator is the first
646         // latch which ends in a conditional branch, or the last latch if
647         // there is no such latch.
648         NewIDom = Latches.back();
649         for (BasicBlock *IterLatch : Latches) {
650           TerminatorInst *Term = IterLatch->getTerminator();
651           if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
652             NewIDom = IterLatch;
653             break;
654           }
655         }
656       } else {
657         // The new idom of the block will be the nearest common dominator
658         // of all copies of the previous idom. This is equivalent to the
659         // nearest common dominator of the previous idom and the first latch,
660         // which dominates all copies of the previous idom.
661         NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
662       }
663       for (auto *ChildBB : ChildrenToUpdate)
664         DT->changeImmediateDominator(ChildBB, NewIDom);
665     }
666   }
667 
668   if (DT && UnrollVerifyDomtree)
669     DT->verifyDomTree();
670 
671   // Merge adjacent basic blocks, if possible.
672   SmallPtrSet<Loop *, 4> ForgottenLoops;
673   for (BasicBlock *Latch : Latches) {
674     BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
675     if (Term->isUnconditional()) {
676       BasicBlock *Dest = Term->getSuccessor(0);
677       if (BasicBlock *Fold =
678               foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) {
679         // Dest has been folded into Fold. Update our worklists accordingly.
680         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
681         UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
682                                              UnrolledLoopBlocks.end(), Dest),
683                                  UnrolledLoopBlocks.end());
684       }
685     }
686   }
687 
688   // Simplify any new induction variables in the partially unrolled loop.
689   if (SE && !CompletelyUnroll && Count > 1) {
690     SmallVector<WeakVH, 16> DeadInsts;
691     simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
692 
693     // Aggressively clean up dead instructions that simplifyLoopIVs already
694     // identified. Any remaining should be cleaned up below.
695     while (!DeadInsts.empty())
696       if (Instruction *Inst =
697               dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
698         RecursivelyDeleteTriviallyDeadInstructions(Inst);
699   }
700 
701   // At this point, the code is well formed.  We now do a quick sweep over the
702   // inserted code, doing constant propagation and dead code elimination as we
703   // go.
704   const DataLayout &DL = Header->getModule()->getDataLayout();
705   const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
706   for (BasicBlock *BB : NewLoopBlocks) {
707     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
708       Instruction *Inst = &*I++;
709 
710       if (Value *V = SimplifyInstruction(Inst, DL))
711         if (LI->replacementPreservesLCSSAForm(Inst, V))
712           Inst->replaceAllUsesWith(V);
713       if (isInstructionTriviallyDead(Inst))
714         BB->getInstList().erase(Inst);
715     }
716   }
717 
718   // TODO: after peeling or unrolling, previously loop variant conditions are
719   // likely to fold to constants, eagerly propagating those here will require
720   // fewer cleanup passes to be run.  Alternatively, a LoopEarlyCSE might be
721   // appropriate.
722 
723   NumCompletelyUnrolled += CompletelyUnroll;
724   ++NumUnrolled;
725 
726   Loop *OuterL = L->getParentLoop();
727   // Update LoopInfo if the loop is completely removed.
728   if (CompletelyUnroll)
729     LI->markAsRemoved(L);
730 
731   // After complete unrolling most of the blocks should be contained in OuterL.
732   // However, some of them might happen to be out of OuterL (e.g. if they
733   // precede a loop exit). In this case we might need to insert PHI nodes in
734   // order to preserve LCSSA form.
735   // We don't need to check this if we already know that we need to fix LCSSA
736   // form.
737   // TODO: For now we just recompute LCSSA for the outer loop in this case, but
738   // it should be possible to fix it in-place.
739   if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
740     NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
741 
742   // If we have a pass and a DominatorTree we should re-simplify impacted loops
743   // to ensure subsequent analyses can rely on this form. We want to simplify
744   // at least one layer outside of the loop that was unrolled so that any
745   // changes to the parent loop exposed by the unrolling are considered.
746   if (DT) {
747     if (OuterL) {
748       // OuterL includes all loops for which we can break loop-simplify, so
749       // it's sufficient to simplify only it (it'll recursively simplify inner
750       // loops too).
751       // TODO: That potentially might be compile-time expensive. We should try
752       // to fix the loop-simplified form incrementally.
753       simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
754 
755       // LCSSA must be performed on the outermost affected loop. The unrolled
756       // loop's last loop latch is guaranteed to be in the outermost loop after
757       // LoopInfo's been updated by markAsRemoved.
758       Loop *LatchLoop = LI->getLoopFor(Latches.back());
759       if (!OuterL->contains(LatchLoop))
760         while (OuterL->getParentLoop() != LatchLoop)
761           OuterL = OuterL->getParentLoop();
762 
763       if (NeedToFixLCSSA)
764         formLCSSARecursively(*OuterL, *DT, LI, SE);
765       else
766         assert(OuterL->isLCSSAForm(*DT) &&
767                "Loops should be in LCSSA form after loop-unroll.");
768     } else {
769       // Simplify loops for which we might've broken loop-simplify form.
770       for (Loop *SubLoop : LoopsToSimplify)
771         simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
772     }
773   }
774 
775   return true;
776 }
777 
778 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
779 /// node with the given name (for example, "llvm.loop.unroll.count"). If no
780 /// such metadata node exists, then nullptr is returned.
781 MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
782   // First operand should refer to the loop id itself.
783   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
784   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
785 
786   for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
787     MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
788     if (!MD)
789       continue;
790 
791     MDString *S = dyn_cast<MDString>(MD->getOperand(0));
792     if (!S)
793       continue;
794 
795     if (Name.equals(S->getString()))
796       return MD;
797   }
798   return nullptr;
799 }
800