1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/IVDescriptors.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include "llvm/Support/GenericDomTreeConstruction.h"
41 #include "llvm/Support/GraphWriter.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
44 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
45 #include <cassert>
46 #include <iterator>
47 #include <string>
48 #include <vector>
49 
50 using namespace llvm;
51 extern cl::opt<bool> EnableVPlanNativePath;
52 
53 #define DEBUG_TYPE "vplan"
54 
55 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
56 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
57   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
58   VPSlotTracker SlotTracker(
59       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
60   V.print(OS, SlotTracker);
61   return OS;
62 }
63 #endif
64 
65 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
66                                 const ElementCount &VF) const {
67   switch (LaneKind) {
68   case VPLane::Kind::ScalableLast:
69     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
70     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
71                              Builder.getInt32(VF.getKnownMinValue() - Lane));
72   case VPLane::Kind::First:
73     return Builder.getInt32(Lane);
74   }
75   llvm_unreachable("Unknown lane kind");
76 }
77 
78 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
79     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
80   if (Def)
81     Def->addDefinedValue(this);
82 }
83 
84 VPValue::~VPValue() {
85   assert(Users.empty() && "trying to delete a VPValue with remaining users");
86   if (Def)
87     Def->removeDefinedValue(this);
88 }
89 
90 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
91 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
92   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
93     R->print(OS, "", SlotTracker);
94   else
95     printAsOperand(OS, SlotTracker);
96 }
97 
98 void VPValue::dump() const {
99   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
100   VPSlotTracker SlotTracker(
101       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
102   print(dbgs(), SlotTracker);
103   dbgs() << "\n";
104 }
105 
106 void VPDef::dump() const {
107   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
108   VPSlotTracker SlotTracker(
109       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
110   print(dbgs(), "", SlotTracker);
111   dbgs() << "\n";
112 }
113 #endif
114 
115 // Get the top-most entry block of \p Start. This is the entry block of the
116 // containing VPlan. This function is templated to support both const and non-const blocks
117 template <typename T> static T *getPlanEntry(T *Start) {
118   T *Next = Start;
119   T *Current = Start;
120   while ((Next = Next->getParent()))
121     Current = Next;
122 
123   SmallSetVector<T *, 8> WorkList;
124   WorkList.insert(Current);
125 
126   for (unsigned i = 0; i < WorkList.size(); i++) {
127     T *Current = WorkList[i];
128     if (Current->getNumPredecessors() == 0)
129       return Current;
130     auto &Predecessors = Current->getPredecessors();
131     WorkList.insert(Predecessors.begin(), Predecessors.end());
132   }
133 
134   llvm_unreachable("VPlan without any entry node without predecessors");
135 }
136 
137 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
138 
139 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
140 
141 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
142 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
143   const VPBlockBase *Block = this;
144   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
145     Block = Region->getEntry();
146   return cast<VPBasicBlock>(Block);
147 }
148 
149 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
150   VPBlockBase *Block = this;
151   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
152     Block = Region->getEntry();
153   return cast<VPBasicBlock>(Block);
154 }
155 
156 void VPBlockBase::setPlan(VPlan *ParentPlan) {
157   assert(ParentPlan->getEntry() == this &&
158          "Can only set plan on its entry block.");
159   Plan = ParentPlan;
160 }
161 
162 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
163 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
164   const VPBlockBase *Block = this;
165   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
166     Block = Region->getExit();
167   return cast<VPBasicBlock>(Block);
168 }
169 
170 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
171   VPBlockBase *Block = this;
172   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
173     Block = Region->getExit();
174   return cast<VPBasicBlock>(Block);
175 }
176 
177 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
178   if (!Successors.empty() || !Parent)
179     return this;
180   assert(Parent->getExit() == this &&
181          "Block w/o successors not the exit of its parent.");
182   return Parent->getEnclosingBlockWithSuccessors();
183 }
184 
185 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
186   if (!Predecessors.empty() || !Parent)
187     return this;
188   assert(Parent->getEntry() == this &&
189          "Block w/o predecessors not the entry of its parent.");
190   return Parent->getEnclosingBlockWithPredecessors();
191 }
192 
193 VPValue *VPBlockBase::getCondBit() {
194   return CondBitUser.getSingleOperandOrNull();
195 }
196 
197 const VPValue *VPBlockBase::getCondBit() const {
198   return CondBitUser.getSingleOperandOrNull();
199 }
200 
201 void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); }
202 
203 VPValue *VPBlockBase::getPredicate() {
204   return PredicateUser.getSingleOperandOrNull();
205 }
206 
207 const VPValue *VPBlockBase::getPredicate() const {
208   return PredicateUser.getSingleOperandOrNull();
209 }
210 
211 void VPBlockBase::setPredicate(VPValue *CV) {
212   PredicateUser.resetSingleOpUser(CV);
213 }
214 
215 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
216   SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
217 
218   for (VPBlockBase *Block : Blocks)
219     delete Block;
220 }
221 
222 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
223   iterator It = begin();
224   while (It != end() && It->isPhi())
225     It++;
226   return It;
227 }
228 
229 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
230   if (!Def->getDef())
231     return Def->getLiveInIRValue();
232 
233   if (hasScalarValue(Def, Instance)) {
234     return Data
235         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
236   }
237 
238   assert(hasVectorValue(Def, Instance.Part));
239   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
240   if (!VecPart->getType()->isVectorTy()) {
241     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
242     return VecPart;
243   }
244   // TODO: Cache created scalar values.
245   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
246   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
247   // set(Def, Extract, Instance);
248   return Extract;
249 }
250 
251 BasicBlock *
252 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
253   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
254   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
255   BasicBlock *PrevBB = CFG.PrevBB;
256   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
257                                          PrevBB->getParent(), CFG.LastBB);
258   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
259 
260   // Hook up the new basic block to its predecessors.
261   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
262     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
263     auto &PredVPSuccessors = PredVPBB->getSuccessors();
264     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
265 
266     // In outer loop vectorization scenario, the predecessor BBlock may not yet
267     // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
268     // vectorization. We do not encounter this case in inner loop vectorization
269     // as we start out by building a loop skeleton with the vector loop header
270     // and latch blocks. As a result, we never enter this function for the
271     // header block in the non VPlan-native path.
272     if (!PredBB) {
273       assert(EnableVPlanNativePath &&
274              "Unexpected null predecessor in non VPlan-native path");
275       CFG.VPBBsToFix.push_back(PredVPBB);
276       continue;
277     }
278 
279     assert(PredBB && "Predecessor basic-block not found building successor.");
280     auto *PredBBTerminator = PredBB->getTerminator();
281     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
282     if (isa<UnreachableInst>(PredBBTerminator)) {
283       assert(PredVPSuccessors.size() == 1 &&
284              "Predecessor ending w/o branch must have single successor.");
285       PredBBTerminator->eraseFromParent();
286       BranchInst::Create(NewBB, PredBB);
287     } else {
288       assert(PredVPSuccessors.size() == 2 &&
289              "Predecessor ending with branch must have two successors.");
290       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
291       assert(!PredBBTerminator->getSuccessor(idx) &&
292              "Trying to reset an existing successor block.");
293       PredBBTerminator->setSuccessor(idx, NewBB);
294     }
295   }
296   return NewBB;
297 }
298 
299 void VPBasicBlock::execute(VPTransformState *State) {
300   bool Replica = State->Instance && !State->Instance->isFirstIteration();
301   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
302   VPBlockBase *SingleHPred = nullptr;
303   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
304 
305   // 1. Create an IR basic block, or reuse the last one if possible.
306   // The last IR basic block is reused, as an optimization, in three cases:
307   // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
308   // B. when the current VPBB has a single (hierarchical) predecessor which
309   //    is PrevVPBB and the latter has a single (hierarchical) successor; and
310   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
311   //    is the exit of this region from a previous instance, or the predecessor
312   //    of this region.
313   if (PrevVPBB && /* A */
314       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
315         SingleHPred->getExitBasicBlock() == PrevVPBB &&
316         PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
317       !(Replica && getPredecessors().empty())) {       /* C */
318     NewBB = createEmptyBasicBlock(State->CFG);
319     State->Builder.SetInsertPoint(NewBB);
320     // Temporarily terminate with unreachable until CFG is rewired.
321     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
322     State->Builder.SetInsertPoint(Terminator);
323     // Register NewBB in its loop. In innermost loops its the same for all BB's.
324     Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
325     L->addBasicBlockToLoop(NewBB, *State->LI);
326     State->CFG.PrevBB = NewBB;
327   }
328 
329   // 2. Fill the IR basic block with IR instructions.
330   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
331                     << " in BB:" << NewBB->getName() << '\n');
332 
333   State->CFG.VPBB2IRBB[this] = NewBB;
334   State->CFG.PrevVPBB = this;
335 
336   for (VPRecipeBase &Recipe : Recipes)
337     Recipe.execute(*State);
338 
339   VPValue *CBV;
340   if (EnableVPlanNativePath && (CBV = getCondBit())) {
341     assert(CBV->getUnderlyingValue() &&
342            "Unexpected null underlying value for condition bit");
343 
344     // Condition bit value in a VPBasicBlock is used as the branch selector. In
345     // the VPlan-native path case, since all branches are uniform we generate a
346     // branch instruction using the condition value from vector lane 0 and dummy
347     // successors. The successors are fixed later when the successor blocks are
348     // visited.
349     Value *NewCond = State->get(CBV, {0, 0});
350 
351     // Replace the temporary unreachable terminator with the new conditional
352     // branch.
353     auto *CurrentTerminator = NewBB->getTerminator();
354     assert(isa<UnreachableInst>(CurrentTerminator) &&
355            "Expected to replace unreachable terminator with conditional "
356            "branch.");
357     auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
358     CondBr->setSuccessor(0, nullptr);
359     ReplaceInstWithInst(CurrentTerminator, CondBr);
360   }
361 
362   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
363 }
364 
365 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
366   for (VPRecipeBase &R : Recipes) {
367     for (auto *Def : R.definedValues())
368       Def->replaceAllUsesWith(NewValue);
369 
370     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
371       R.setOperand(I, NewValue);
372   }
373 }
374 
375 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
376   assert((SplitAt == end() || SplitAt->getParent() == this) &&
377          "can only split at a position in the same block");
378 
379   SmallVector<VPBlockBase *, 2> Succs(successors());
380   // First, disconnect the current block from its successors.
381   for (VPBlockBase *Succ : Succs)
382     VPBlockUtils::disconnectBlocks(this, Succ);
383 
384   // Create new empty block after the block to split.
385   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
386   VPBlockUtils::insertBlockAfter(SplitBlock, this);
387 
388   // Add successors for block to split to new block.
389   for (VPBlockBase *Succ : Succs)
390     VPBlockUtils::connectBlocks(SplitBlock, Succ);
391 
392   // Finally, move the recipes starting at SplitAt to new block.
393   for (VPRecipeBase &ToMove :
394        make_early_inc_range(make_range(SplitAt, this->end())))
395     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
396 
397   return SplitBlock;
398 }
399 
400 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
401 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
402   if (getSuccessors().empty()) {
403     O << Indent << "No successors\n";
404   } else {
405     O << Indent << "Successor(s): ";
406     ListSeparator LS;
407     for (auto *Succ : getSuccessors())
408       O << LS << Succ->getName();
409     O << '\n';
410   }
411 }
412 
413 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
414                          VPSlotTracker &SlotTracker) const {
415   O << Indent << getName() << ":\n";
416   if (const VPValue *Pred = getPredicate()) {
417     O << Indent << "BlockPredicate:";
418     Pred->printAsOperand(O, SlotTracker);
419     if (const auto *PredInst = dyn_cast<VPInstruction>(Pred))
420       O << " (" << PredInst->getParent()->getName() << ")";
421     O << '\n';
422   }
423 
424   auto RecipeIndent = Indent + "  ";
425   for (const VPRecipeBase &Recipe : *this) {
426     Recipe.print(O, RecipeIndent, SlotTracker);
427     O << '\n';
428   }
429 
430   printSuccessors(O, Indent);
431 
432   if (const VPValue *CBV = getCondBit()) {
433     O << Indent << "CondBit: ";
434     CBV->printAsOperand(O, SlotTracker);
435     if (const auto *CBI = dyn_cast<VPInstruction>(CBV))
436       O << " (" << CBI->getParent()->getName() << ")";
437     O << '\n';
438   }
439 }
440 #endif
441 
442 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
443   for (VPBlockBase *Block : depth_first(Entry))
444     // Drop all references in VPBasicBlocks and replace all uses with
445     // DummyValue.
446     Block->dropAllReferences(NewValue);
447 }
448 
449 void VPRegionBlock::execute(VPTransformState *State) {
450   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
451 
452   if (!isReplicator()) {
453     // Visit the VPBlocks connected to "this", starting from it.
454     for (VPBlockBase *Block : RPOT) {
455       if (EnableVPlanNativePath) {
456         // The inner loop vectorization path does not represent loop preheader
457         // and exit blocks as part of the VPlan. In the VPlan-native path, skip
458         // vectorizing loop preheader block. In future, we may replace this
459         // check with the check for loop preheader.
460         if (Block->getNumPredecessors() == 0)
461           continue;
462 
463         // Skip vectorizing loop exit block. In future, we may replace this
464         // check with the check for loop exit.
465         if (Block->getNumSuccessors() == 0)
466           continue;
467       }
468 
469       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
470       Block->execute(State);
471     }
472     return;
473   }
474 
475   assert(!State->Instance && "Replicating a Region with non-null instance.");
476 
477   // Enter replicating mode.
478   State->Instance = VPIteration(0, 0);
479 
480   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
481     State->Instance->Part = Part;
482     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
483     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
484          ++Lane) {
485       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
486       // Visit the VPBlocks connected to \p this, starting from it.
487       for (VPBlockBase *Block : RPOT) {
488         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
489         Block->execute(State);
490       }
491     }
492   }
493 
494   // Exit replicating mode.
495   State->Instance.reset();
496 }
497 
498 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
499 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
500                           VPSlotTracker &SlotTracker) const {
501   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
502   auto NewIndent = Indent + "  ";
503   for (auto *BlockBase : depth_first(Entry)) {
504     O << '\n';
505     BlockBase->print(O, NewIndent, SlotTracker);
506   }
507   O << Indent << "}\n";
508 
509   printSuccessors(O, Indent);
510 }
511 #endif
512 
513 bool VPRecipeBase::mayWriteToMemory() const {
514   switch (getVPDefID()) {
515   case VPWidenMemoryInstructionSC: {
516     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
517   }
518   case VPReplicateSC:
519   case VPWidenCallSC:
520     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
521         ->mayWriteToMemory();
522   case VPBranchOnMaskSC:
523     return false;
524   case VPWidenIntOrFpInductionSC:
525   case VPWidenCanonicalIVSC:
526   case VPWidenPHISC:
527   case VPBlendSC:
528   case VPWidenSC:
529   case VPWidenGEPSC:
530   case VPReductionSC:
531   case VPWidenSelectSC: {
532     const Instruction *I =
533         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
534     (void)I;
535     assert((!I || !I->mayWriteToMemory()) &&
536            "underlying instruction may write to memory");
537     return false;
538   }
539   default:
540     return true;
541   }
542 }
543 
544 bool VPRecipeBase::mayReadFromMemory() const {
545   switch (getVPDefID()) {
546   case VPWidenMemoryInstructionSC: {
547     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
548   }
549   case VPReplicateSC:
550   case VPWidenCallSC:
551     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
552         ->mayReadFromMemory();
553   case VPBranchOnMaskSC:
554     return false;
555   case VPWidenIntOrFpInductionSC:
556   case VPWidenCanonicalIVSC:
557   case VPWidenPHISC:
558   case VPBlendSC:
559   case VPWidenSC:
560   case VPWidenGEPSC:
561   case VPReductionSC:
562   case VPWidenSelectSC: {
563     const Instruction *I =
564         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
565     (void)I;
566     assert((!I || !I->mayReadFromMemory()) &&
567            "underlying instruction may read from memory");
568     return false;
569   }
570   default:
571     return true;
572   }
573 }
574 
575 bool VPRecipeBase::mayHaveSideEffects() const {
576   switch (getVPDefID()) {
577   case VPBranchOnMaskSC:
578     return false;
579   case VPWidenIntOrFpInductionSC:
580   case VPWidenCanonicalIVSC:
581   case VPWidenPHISC:
582   case VPBlendSC:
583   case VPWidenSC:
584   case VPWidenGEPSC:
585   case VPReductionSC:
586   case VPWidenSelectSC:
587   case VPScalarIVStepsSC: {
588     const Instruction *I =
589         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
590     (void)I;
591     assert((!I || !I->mayHaveSideEffects()) &&
592            "underlying instruction has side-effects");
593     return false;
594   }
595   case VPReplicateSC: {
596     auto *R = cast<VPReplicateRecipe>(this);
597     return R->getUnderlyingInstr()->mayHaveSideEffects();
598   }
599   default:
600     return true;
601   }
602 }
603 
604 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
605   assert(!Parent && "Recipe already in some VPBasicBlock");
606   assert(InsertPos->getParent() &&
607          "Insertion position not in any VPBasicBlock");
608   Parent = InsertPos->getParent();
609   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
610 }
611 
612 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
613                                 iplist<VPRecipeBase>::iterator I) {
614   assert(!Parent && "Recipe already in some VPBasicBlock");
615   assert(I == BB.end() || I->getParent() == &BB);
616   Parent = &BB;
617   BB.getRecipeList().insert(I, this);
618 }
619 
620 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
621   assert(!Parent && "Recipe already in some VPBasicBlock");
622   assert(InsertPos->getParent() &&
623          "Insertion position not in any VPBasicBlock");
624   Parent = InsertPos->getParent();
625   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
626 }
627 
628 void VPRecipeBase::removeFromParent() {
629   assert(getParent() && "Recipe not in any VPBasicBlock");
630   getParent()->getRecipeList().remove(getIterator());
631   Parent = nullptr;
632 }
633 
634 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
635   assert(getParent() && "Recipe not in any VPBasicBlock");
636   return getParent()->getRecipeList().erase(getIterator());
637 }
638 
639 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
640   removeFromParent();
641   insertAfter(InsertPos);
642 }
643 
644 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
645                               iplist<VPRecipeBase>::iterator I) {
646   removeFromParent();
647   insertBefore(BB, I);
648 }
649 
650 void VPInstruction::generateInstruction(VPTransformState &State,
651                                         unsigned Part) {
652   IRBuilderBase &Builder = State.Builder;
653   Builder.SetCurrentDebugLocation(DL);
654 
655   if (Instruction::isBinaryOp(getOpcode())) {
656     Value *A = State.get(getOperand(0), Part);
657     Value *B = State.get(getOperand(1), Part);
658     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
659     State.set(this, V, Part);
660     return;
661   }
662 
663   switch (getOpcode()) {
664   case VPInstruction::Not: {
665     Value *A = State.get(getOperand(0), Part);
666     Value *V = Builder.CreateNot(A);
667     State.set(this, V, Part);
668     break;
669   }
670   case VPInstruction::ICmpULE: {
671     Value *IV = State.get(getOperand(0), Part);
672     Value *TC = State.get(getOperand(1), Part);
673     Value *V = Builder.CreateICmpULE(IV, TC);
674     State.set(this, V, Part);
675     break;
676   }
677   case Instruction::Select: {
678     Value *Cond = State.get(getOperand(0), Part);
679     Value *Op1 = State.get(getOperand(1), Part);
680     Value *Op2 = State.get(getOperand(2), Part);
681     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
682     State.set(this, V, Part);
683     break;
684   }
685   case VPInstruction::ActiveLaneMask: {
686     // Get first lane of vector induction variable.
687     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
688     // Get the original loop tripcount.
689     Value *ScalarTC = State.get(getOperand(1), Part);
690 
691     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
692     auto *PredTy = VectorType::get(Int1Ty, State.VF);
693     Instruction *Call = Builder.CreateIntrinsic(
694         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
695         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
696     State.set(this, Call, Part);
697     break;
698   }
699   case VPInstruction::FirstOrderRecurrenceSplice: {
700     // Generate code to combine the previous and current values in vector v3.
701     //
702     //   vector.ph:
703     //     v_init = vector(..., ..., ..., a[-1])
704     //     br vector.body
705     //
706     //   vector.body
707     //     i = phi [0, vector.ph], [i+4, vector.body]
708     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
709     //     v2 = a[i, i+1, i+2, i+3];
710     //     v3 = vector(v1(3), v2(0, 1, 2))
711 
712     // For the first part, use the recurrence phi (v1), otherwise v2.
713     auto *V1 = State.get(getOperand(0), 0);
714     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
715     if (!PartMinus1->getType()->isVectorTy()) {
716       State.set(this, PartMinus1, Part);
717     } else {
718       Value *V2 = State.get(getOperand(1), Part);
719       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
720     }
721     break;
722   }
723 
724   case VPInstruction::CanonicalIVIncrement:
725   case VPInstruction::CanonicalIVIncrementNUW: {
726     Value *Next = nullptr;
727     if (Part == 0) {
728       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
729       auto *Phi = State.get(getOperand(0), 0);
730       // The loop step is equal to the vectorization factor (num of SIMD
731       // elements) times the unroll factor (num of SIMD instructions).
732       Value *Step =
733           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
734       Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
735     } else {
736       Next = State.get(this, 0);
737     }
738 
739     State.set(this, Next, Part);
740     break;
741   }
742   case VPInstruction::BranchOnCount: {
743     if (Part != 0)
744       break;
745     // First create the compare.
746     Value *IV = State.get(getOperand(0), Part);
747     Value *TC = State.get(getOperand(1), Part);
748     Value *Cond = Builder.CreateICmpEQ(IV, TC);
749 
750     // Now create the branch.
751     auto *Plan = getParent()->getPlan();
752     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
753     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
754     if (Header->empty()) {
755       assert(EnableVPlanNativePath &&
756              "empty entry block only expected in VPlanNativePath");
757       Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
758     }
759     // TODO: Once the exit block is modeled in VPlan, use it instead of going
760     // through State.CFG.LastBB.
761     BasicBlock *Exit =
762         cast<BranchInst>(State.CFG.LastBB->getTerminator())->getSuccessor(0);
763 
764     Builder.CreateCondBr(Cond, Exit, State.CFG.VPBB2IRBB[Header]);
765     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
766     break;
767   }
768   default:
769     llvm_unreachable("Unsupported opcode for instruction");
770   }
771 }
772 
773 void VPInstruction::execute(VPTransformState &State) {
774   assert(!State.Instance && "VPInstruction executing an Instance");
775   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
776   State.Builder.setFastMathFlags(FMF);
777   for (unsigned Part = 0; Part < State.UF; ++Part)
778     generateInstruction(State, Part);
779 }
780 
781 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
782 void VPInstruction::dump() const {
783   VPSlotTracker SlotTracker(getParent()->getPlan());
784   print(dbgs(), "", SlotTracker);
785 }
786 
787 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
788                           VPSlotTracker &SlotTracker) const {
789   O << Indent << "EMIT ";
790 
791   if (hasResult()) {
792     printAsOperand(O, SlotTracker);
793     O << " = ";
794   }
795 
796   switch (getOpcode()) {
797   case VPInstruction::Not:
798     O << "not";
799     break;
800   case VPInstruction::ICmpULE:
801     O << "icmp ule";
802     break;
803   case VPInstruction::SLPLoad:
804     O << "combined load";
805     break;
806   case VPInstruction::SLPStore:
807     O << "combined store";
808     break;
809   case VPInstruction::ActiveLaneMask:
810     O << "active lane mask";
811     break;
812   case VPInstruction::FirstOrderRecurrenceSplice:
813     O << "first-order splice";
814     break;
815   case VPInstruction::CanonicalIVIncrement:
816     O << "VF * UF + ";
817     break;
818   case VPInstruction::CanonicalIVIncrementNUW:
819     O << "VF * UF +(nuw) ";
820     break;
821   case VPInstruction::BranchOnCount:
822     O << "branch-on-count ";
823     break;
824   default:
825     O << Instruction::getOpcodeName(getOpcode());
826   }
827 
828   O << FMF;
829 
830   for (const VPValue *Operand : operands()) {
831     O << " ";
832     Operand->printAsOperand(O, SlotTracker);
833   }
834 
835   if (DL) {
836     O << ", !dbg ";
837     DL.print(O);
838   }
839 }
840 #endif
841 
842 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
843   // Make sure the VPInstruction is a floating-point operation.
844   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
845           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
846           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
847           Opcode == Instruction::FCmp) &&
848          "this op can't take fast-math flags");
849   FMF = FMFNew;
850 }
851 
852 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
853                              Value *CanonicalIVStartValue,
854                              VPTransformState &State) {
855   // Check if the trip count is needed, and if so build it.
856   if (TripCount && TripCount->getNumUsers()) {
857     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
858       State.set(TripCount, TripCountV, Part);
859   }
860 
861   // Check if the backedge taken count is needed, and if so build it.
862   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
863     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
864     auto *TCMO = Builder.CreateSub(TripCountV,
865                                    ConstantInt::get(TripCountV->getType(), 1),
866                                    "trip.count.minus.1");
867     auto VF = State.VF;
868     Value *VTCMO =
869         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
870     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
871       State.set(BackedgeTakenCount, VTCMO, Part);
872   }
873 
874   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
875     State.set(&VectorTripCount, VectorTripCountV, Part);
876 
877   // When vectorizing the epilogue loop, the canonical induction start value
878   // needs to be changed from zero to the value after the main vector loop.
879   if (CanonicalIVStartValue) {
880     VPValue *VPV = new VPValue(CanonicalIVStartValue);
881     addExternalDef(VPV);
882     auto *IV = getCanonicalIV();
883     assert(all_of(IV->users(),
884                   [](const VPUser *U) {
885                     if (isa<VPScalarIVStepsRecipe>(U))
886                       return true;
887                     auto *VPI = cast<VPInstruction>(U);
888                     return VPI->getOpcode() ==
889                                VPInstruction::CanonicalIVIncrement ||
890                            VPI->getOpcode() ==
891                                VPInstruction::CanonicalIVIncrementNUW;
892                   }) &&
893            "the canonical IV should only be used by its increments or "
894            "ScalarIVSteps when "
895            "resetting the start value");
896     IV->setOperand(0, VPV);
897   }
898 }
899 
900 /// Generate the code inside the body of the vectorized loop. Assumes a single
901 /// LoopVectorBody basic-block was created for this. Introduce additional
902 /// basic-blocks as needed, and fill them all.
903 void VPlan::execute(VPTransformState *State) {
904   // 0. Set the reverse mapping from VPValues to Values for code generation.
905   for (auto &Entry : Value2VPValue)
906     State->VPValue2Value[Entry.second] = Entry.first;
907 
908   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
909   State->CFG.VectorPreHeader = VectorPreHeaderBB;
910   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
911   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
912 
913   // 1. Make room to generate basic-blocks inside loop body if needed.
914   BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
915       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
916   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
917   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
918   // Remove the edge between Header and Latch to allow other connections.
919   // Temporarily terminate with unreachable until CFG is rewired.
920   // Note: this asserts the generated code's assumption that
921   // getFirstInsertionPt() can be dereferenced into an Instruction.
922   VectorHeaderBB->getTerminator()->eraseFromParent();
923   State->Builder.SetInsertPoint(VectorHeaderBB);
924   UnreachableInst *Terminator = State->Builder.CreateUnreachable();
925   State->Builder.SetInsertPoint(Terminator);
926 
927   // 2. Generate code in loop body.
928   State->CFG.PrevVPBB = nullptr;
929   State->CFG.PrevBB = VectorHeaderBB;
930   State->CFG.LastBB = VectorLatchBB;
931 
932   for (VPBlockBase *Block : depth_first(Entry))
933     Block->execute(State);
934 
935   // Setup branch terminator successors for VPBBs in VPBBsToFix based on
936   // VPBB's successors.
937   for (auto VPBB : State->CFG.VPBBsToFix) {
938     assert(EnableVPlanNativePath &&
939            "Unexpected VPBBsToFix in non VPlan-native path");
940     BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
941     assert(BB && "Unexpected null basic block for VPBB");
942 
943     unsigned Idx = 0;
944     auto *BBTerminator = BB->getTerminator();
945 
946     for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
947       VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
948       BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
949       ++Idx;
950     }
951   }
952 
953   // 3. Merge the temporary latch created with the last basic-block filled.
954   BasicBlock *LastBB = State->CFG.PrevBB;
955   assert(isa<BranchInst>(LastBB->getTerminator()) &&
956          "Expected VPlan CFG to terminate with branch");
957 
958   // Move both the branch and check from LastBB to VectorLatchBB.
959   auto *LastBranch = cast<BranchInst>(LastBB->getTerminator());
960   LastBranch->moveBefore(VectorLatchBB->getTerminator());
961   VectorLatchBB->getTerminator()->eraseFromParent();
962   // Move condition so it is guaranteed to be next to branch. This is only done
963   // to avoid excessive test updates.
964   // TODO: Remove special handling once the increments for all inductions are
965   // modeled explicitly in VPlan.
966   cast<Instruction>(LastBranch->getCondition())->moveBefore(LastBranch);
967   // Connect LastBB to VectorLatchBB to facilitate their merge.
968   BranchInst::Create(VectorLatchBB, LastBB);
969 
970   // Merge LastBB with Latch.
971   bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
972   (void)Merged;
973   assert(Merged && "Could not merge last basic block with latch.");
974   VectorLatchBB = LastBB;
975 
976   // Fix the latch value of canonical, reduction and first-order recurrences
977   // phis in the vector loop.
978   VPBasicBlock *Header = Entry->getEntryBasicBlock();
979   if (Header->empty()) {
980     assert(EnableVPlanNativePath);
981     Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
982   }
983   for (VPRecipeBase &R : Header->phis()) {
984     // Skip phi-like recipes that generate their backedege values themselves.
985     // TODO: Model their backedge values explicitly.
986     if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R))
987       continue;
988 
989     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
990     // For  canonical IV, first-order recurrences and in-order reduction phis,
991     // only a single part is generated, which provides the last part from the
992     // previous iteration. For non-ordered reductions all UF parts are
993     // generated.
994     bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
995                             isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
996                             cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
997     unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
998 
999     for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1000       Value *Phi = State->get(PhiR, Part);
1001       Value *Val = State->get(PhiR->getBackedgeValue(),
1002                               SinglePartNeeded ? State->UF - 1 : Part);
1003       cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1004     }
1005   }
1006 
1007   // We do not attempt to preserve DT for outer loop vectorization currently.
1008   if (!EnableVPlanNativePath)
1009     updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
1010                         L->getExitBlock());
1011 }
1012 
1013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1014 LLVM_DUMP_METHOD
1015 void VPlan::print(raw_ostream &O) const {
1016   VPSlotTracker SlotTracker(this);
1017 
1018   O << "VPlan '" << Name << "' {";
1019 
1020   if (VectorTripCount.getNumUsers() > 0) {
1021     O << "\nLive-in ";
1022     VectorTripCount.printAsOperand(O, SlotTracker);
1023     O << " = vector-trip-count\n";
1024   }
1025 
1026   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1027     O << "\nLive-in ";
1028     BackedgeTakenCount->printAsOperand(O, SlotTracker);
1029     O << " = backedge-taken count\n";
1030   }
1031 
1032   for (const VPBlockBase *Block : depth_first(getEntry())) {
1033     O << '\n';
1034     Block->print(O, "", SlotTracker);
1035   }
1036   O << "}\n";
1037 }
1038 
1039 LLVM_DUMP_METHOD
1040 void VPlan::printDOT(raw_ostream &O) const {
1041   VPlanPrinter Printer(O, *this);
1042   Printer.dump();
1043 }
1044 
1045 LLVM_DUMP_METHOD
1046 void VPlan::dump() const { print(dbgs()); }
1047 #endif
1048 
1049 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
1050                                 BasicBlock *LoopLatchBB,
1051                                 BasicBlock *LoopExitBB) {
1052   BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
1053   assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
1054   // The vector body may be more than a single basic-block by this point.
1055   // Update the dominator tree information inside the vector body by propagating
1056   // it from header to latch, expecting only triangular control-flow, if any.
1057   BasicBlock *PostDomSucc = nullptr;
1058   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
1059     // Get the list of successors of this block.
1060     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
1061     assert(Succs.size() <= 2 &&
1062            "Basic block in vector loop has more than 2 successors.");
1063     PostDomSucc = Succs[0];
1064     if (Succs.size() == 1) {
1065       assert(PostDomSucc->getSinglePredecessor() &&
1066              "PostDom successor has more than one predecessor.");
1067       DT->addNewBlock(PostDomSucc, BB);
1068       continue;
1069     }
1070     BasicBlock *InterimSucc = Succs[1];
1071     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
1072       PostDomSucc = Succs[1];
1073       InterimSucc = Succs[0];
1074     }
1075     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
1076            "One successor of a basic block does not lead to the other.");
1077     assert(InterimSucc->getSinglePredecessor() &&
1078            "Interim successor has more than one predecessor.");
1079     assert(PostDomSucc->hasNPredecessors(2) &&
1080            "PostDom successor has more than two predecessors.");
1081     DT->addNewBlock(InterimSucc, BB);
1082     DT->addNewBlock(PostDomSucc, BB);
1083   }
1084   // Latch block is a new dominator for the loop exit.
1085   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
1086   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
1087 }
1088 
1089 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1090 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1091   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1092          Twine(getOrCreateBID(Block));
1093 }
1094 
1095 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1096   const std::string &Name = Block->getName();
1097   if (!Name.empty())
1098     return Name;
1099   return "VPB" + Twine(getOrCreateBID(Block));
1100 }
1101 
1102 void VPlanPrinter::dump() {
1103   Depth = 1;
1104   bumpIndent(0);
1105   OS << "digraph VPlan {\n";
1106   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1107   if (!Plan.getName().empty())
1108     OS << "\\n" << DOT::EscapeString(Plan.getName());
1109   if (Plan.BackedgeTakenCount) {
1110     OS << ", where:\\n";
1111     Plan.BackedgeTakenCount->print(OS, SlotTracker);
1112     OS << " := BackedgeTakenCount";
1113   }
1114   OS << "\"]\n";
1115   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1116   OS << "edge [fontname=Courier, fontsize=30]\n";
1117   OS << "compound=true\n";
1118 
1119   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
1120     dumpBlock(Block);
1121 
1122   OS << "}\n";
1123 }
1124 
1125 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1126   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1127     dumpBasicBlock(BasicBlock);
1128   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1129     dumpRegion(Region);
1130   else
1131     llvm_unreachable("Unsupported kind of VPBlock.");
1132 }
1133 
1134 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1135                             bool Hidden, const Twine &Label) {
1136   // Due to "dot" we print an edge between two regions as an edge between the
1137   // exit basic block and the entry basic of the respective regions.
1138   const VPBlockBase *Tail = From->getExitBasicBlock();
1139   const VPBlockBase *Head = To->getEntryBasicBlock();
1140   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1141   OS << " [ label=\"" << Label << '\"';
1142   if (Tail != From)
1143     OS << " ltail=" << getUID(From);
1144   if (Head != To)
1145     OS << " lhead=" << getUID(To);
1146   if (Hidden)
1147     OS << "; splines=none";
1148   OS << "]\n";
1149 }
1150 
1151 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1152   auto &Successors = Block->getSuccessors();
1153   if (Successors.size() == 1)
1154     drawEdge(Block, Successors.front(), false, "");
1155   else if (Successors.size() == 2) {
1156     drawEdge(Block, Successors.front(), false, "T");
1157     drawEdge(Block, Successors.back(), false, "F");
1158   } else {
1159     unsigned SuccessorNumber = 0;
1160     for (auto *Successor : Successors)
1161       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1162   }
1163 }
1164 
1165 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1166   // Implement dot-formatted dump by performing plain-text dump into the
1167   // temporary storage followed by some post-processing.
1168   OS << Indent << getUID(BasicBlock) << " [label =\n";
1169   bumpIndent(1);
1170   std::string Str;
1171   raw_string_ostream SS(Str);
1172   // Use no indentation as we need to wrap the lines into quotes ourselves.
1173   BasicBlock->print(SS, "", SlotTracker);
1174 
1175   // We need to process each line of the output separately, so split
1176   // single-string plain-text dump.
1177   SmallVector<StringRef, 0> Lines;
1178   StringRef(Str).rtrim('\n').split(Lines, "\n");
1179 
1180   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1181     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1182   };
1183 
1184   // Don't need the "+" after the last line.
1185   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1186     EmitLine(Line, " +\n");
1187   EmitLine(Lines.back(), "\n");
1188 
1189   bumpIndent(-1);
1190   OS << Indent << "]\n";
1191 
1192   dumpEdges(BasicBlock);
1193 }
1194 
1195 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1196   OS << Indent << "subgraph " << getUID(Region) << " {\n";
1197   bumpIndent(1);
1198   OS << Indent << "fontname=Courier\n"
1199      << Indent << "label=\""
1200      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1201      << DOT::EscapeString(Region->getName()) << "\"\n";
1202   // Dump the blocks of the region.
1203   assert(Region->getEntry() && "Region contains no inner blocks.");
1204   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
1205     dumpBlock(Block);
1206   bumpIndent(-1);
1207   OS << Indent << "}\n";
1208   dumpEdges(Region);
1209 }
1210 
1211 void VPlanIngredient::print(raw_ostream &O) const {
1212   if (auto *Inst = dyn_cast<Instruction>(V)) {
1213     if (!Inst->getType()->isVoidTy()) {
1214       Inst->printAsOperand(O, false);
1215       O << " = ";
1216     }
1217     O << Inst->getOpcodeName() << " ";
1218     unsigned E = Inst->getNumOperands();
1219     if (E > 0) {
1220       Inst->getOperand(0)->printAsOperand(O, false);
1221       for (unsigned I = 1; I < E; ++I)
1222         Inst->getOperand(I)->printAsOperand(O << ", ", false);
1223     }
1224   } else // !Inst
1225     V->printAsOperand(O, false);
1226 }
1227 
1228 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
1229                               VPSlotTracker &SlotTracker) const {
1230   O << Indent << "WIDEN-CALL ";
1231 
1232   auto *CI = cast<CallInst>(getUnderlyingInstr());
1233   if (CI->getType()->isVoidTy())
1234     O << "void ";
1235   else {
1236     printAsOperand(O, SlotTracker);
1237     O << " = ";
1238   }
1239 
1240   O << "call @" << CI->getCalledFunction()->getName() << "(";
1241   printOperands(O, SlotTracker);
1242   O << ")";
1243 }
1244 
1245 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
1246                                 VPSlotTracker &SlotTracker) const {
1247   O << Indent << "WIDEN-SELECT ";
1248   printAsOperand(O, SlotTracker);
1249   O << " = select ";
1250   getOperand(0)->printAsOperand(O, SlotTracker);
1251   O << ", ";
1252   getOperand(1)->printAsOperand(O, SlotTracker);
1253   O << ", ";
1254   getOperand(2)->printAsOperand(O, SlotTracker);
1255   O << (InvariantCond ? " (condition is loop invariant)" : "");
1256 }
1257 
1258 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
1259                           VPSlotTracker &SlotTracker) const {
1260   O << Indent << "WIDEN ";
1261   printAsOperand(O, SlotTracker);
1262   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
1263   printOperands(O, SlotTracker);
1264 }
1265 
1266 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1267                                           VPSlotTracker &SlotTracker) const {
1268   O << Indent << "WIDEN-INDUCTION";
1269   if (getTruncInst()) {
1270     O << "\\l\"";
1271     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1272     O << " +\n" << Indent << "\"  ";
1273     getVPValue(0)->printAsOperand(O, SlotTracker);
1274   } else
1275     O << " " << VPlanIngredient(IV);
1276 }
1277 #endif
1278 
1279 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1280   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1281   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
1282   return StartC && StartC->isZero() && StepC && StepC->isOne();
1283 }
1284 
1285 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
1286   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
1287 }
1288 
1289 bool VPScalarIVStepsRecipe::isCanonical() const {
1290   auto *CanIV = getCanonicalIV();
1291   // The start value of the steps-recipe must match the start value of the
1292   // canonical induction and it must step by 1.
1293   if (CanIV->getStartValue() != getStartValue())
1294     return false;
1295   auto *StepVPV = getStepValue();
1296   if (StepVPV->getDef())
1297     return false;
1298   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
1299   return StepC && StepC->isOne();
1300 }
1301 
1302 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1303 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1304                                   VPSlotTracker &SlotTracker) const {
1305   O << Indent;
1306   printAsOperand(O, SlotTracker);
1307   O << Indent << "= SCALAR-STEPS ";
1308   printOperands(O, SlotTracker);
1309 }
1310 
1311 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1312                              VPSlotTracker &SlotTracker) const {
1313   O << Indent << "WIDEN-GEP ";
1314   O << (IsPtrLoopInvariant ? "Inv" : "Var");
1315   size_t IndicesNumber = IsIndexLoopInvariant.size();
1316   for (size_t I = 0; I < IndicesNumber; ++I)
1317     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
1318 
1319   O << " ";
1320   printAsOperand(O, SlotTracker);
1321   O << " = getelementptr ";
1322   printOperands(O, SlotTracker);
1323 }
1324 
1325 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1326                              VPSlotTracker &SlotTracker) const {
1327   O << Indent << "WIDEN-PHI ";
1328 
1329   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1330   // Unless all incoming values are modeled in VPlan  print the original PHI
1331   // directly.
1332   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1333   // values as VPValues.
1334   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1335     O << VPlanIngredient(OriginalPhi);
1336     return;
1337   }
1338 
1339   printAsOperand(O, SlotTracker);
1340   O << " = phi ";
1341   printOperands(O, SlotTracker);
1342 }
1343 
1344 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1345                           VPSlotTracker &SlotTracker) const {
1346   O << Indent << "BLEND ";
1347   Phi->printAsOperand(O, false);
1348   O << " =";
1349   if (getNumIncomingValues() == 1) {
1350     // Not a User of any mask: not really blending, this is a
1351     // single-predecessor phi.
1352     O << " ";
1353     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1354   } else {
1355     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1356       O << " ";
1357       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1358       O << "/";
1359       getMask(I)->printAsOperand(O, SlotTracker);
1360     }
1361   }
1362 }
1363 
1364 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1365                               VPSlotTracker &SlotTracker) const {
1366   O << Indent << "REDUCE ";
1367   printAsOperand(O, SlotTracker);
1368   O << " = ";
1369   getChainOp()->printAsOperand(O, SlotTracker);
1370   O << " +";
1371   if (isa<FPMathOperator>(getUnderlyingInstr()))
1372     O << getUnderlyingInstr()->getFastMathFlags();
1373   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
1374   getVecOp()->printAsOperand(O, SlotTracker);
1375   if (getCondOp()) {
1376     O << ", ";
1377     getCondOp()->printAsOperand(O, SlotTracker);
1378   }
1379   O << ")";
1380 }
1381 
1382 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1383                               VPSlotTracker &SlotTracker) const {
1384   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1385 
1386   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1387     printAsOperand(O, SlotTracker);
1388     O << " = ";
1389   }
1390   O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
1391   printOperands(O, SlotTracker);
1392 
1393   if (AlsoPack)
1394     O << " (S->V)";
1395 }
1396 
1397 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1398                                 VPSlotTracker &SlotTracker) const {
1399   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1400   printAsOperand(O, SlotTracker);
1401   O << " = ";
1402   printOperands(O, SlotTracker);
1403 }
1404 
1405 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1406                                            VPSlotTracker &SlotTracker) const {
1407   O << Indent << "WIDEN ";
1408 
1409   if (!isStore()) {
1410     printAsOperand(O, SlotTracker);
1411     O << " = ";
1412   }
1413   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1414 
1415   printOperands(O, SlotTracker);
1416 }
1417 #endif
1418 
1419 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1420   Value *Start = getStartValue()->getLiveInIRValue();
1421   PHINode *EntryPart = PHINode::Create(
1422       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
1423   EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
1424   EntryPart->setDebugLoc(DL);
1425   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1426     State.set(this, EntryPart, Part);
1427 }
1428 
1429 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1430 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1431                                    VPSlotTracker &SlotTracker) const {
1432   O << Indent << "EMIT ";
1433   printAsOperand(O, SlotTracker);
1434   O << " = CANONICAL-INDUCTION";
1435 }
1436 #endif
1437 
1438 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1439   assert(!State.Instance && "cannot be used in per-lane");
1440   const DataLayout &DL =
1441       State.CFG.VectorPreHeader->getModule()->getDataLayout();
1442   SCEVExpander Exp(SE, DL, "induction");
1443   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1444                                  State.CFG.VectorPreHeader->getTerminator());
1445 
1446   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1447     State.set(this, Res, Part);
1448 }
1449 
1450 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1451 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1452                                VPSlotTracker &SlotTracker) const {
1453   O << Indent << "EMIT ";
1454   getVPSingleValue()->printAsOperand(O, SlotTracker);
1455   O << " = EXPAND SCEV " << *Expr;
1456 }
1457 #endif
1458 
1459 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1460   Value *CanonicalIV = State.get(getOperand(0), 0);
1461   Type *STy = CanonicalIV->getType();
1462   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1463   ElementCount VF = State.VF;
1464   Value *VStart = VF.isScalar()
1465                       ? CanonicalIV
1466                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1467   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1468     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1469     if (VF.isVector()) {
1470       VStep = Builder.CreateVectorSplat(VF, VStep);
1471       VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1472     }
1473     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1474     State.set(this, CanonicalVectorIV, Part);
1475   }
1476 }
1477 
1478 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1479 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1480                                      VPSlotTracker &SlotTracker) const {
1481   O << Indent << "EMIT ";
1482   printAsOperand(O, SlotTracker);
1483   O << " = WIDEN-CANONICAL-INDUCTION ";
1484   printOperands(O, SlotTracker);
1485 }
1486 #endif
1487 
1488 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1489   auto &Builder = State.Builder;
1490   // Create a vector from the initial value.
1491   auto *VectorInit = getStartValue()->getLiveInIRValue();
1492 
1493   Type *VecTy = State.VF.isScalar()
1494                     ? VectorInit->getType()
1495                     : VectorType::get(VectorInit->getType(), State.VF);
1496 
1497   if (State.VF.isVector()) {
1498     auto *IdxTy = Builder.getInt32Ty();
1499     auto *One = ConstantInt::get(IdxTy, 1);
1500     IRBuilder<>::InsertPointGuard Guard(Builder);
1501     Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
1502     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1503     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1504     VectorInit = Builder.CreateInsertElement(
1505         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1506   }
1507 
1508   // Create a phi node for the new recurrence.
1509   PHINode *EntryPart = PHINode::Create(
1510       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1511   EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
1512   State.set(this, EntryPart, 0);
1513 }
1514 
1515 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1516 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1517                                             VPSlotTracker &SlotTracker) const {
1518   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1519   printAsOperand(O, SlotTracker);
1520   O << " = phi ";
1521   printOperands(O, SlotTracker);
1522 }
1523 #endif
1524 
1525 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1526   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1527   auto &Builder = State.Builder;
1528 
1529   // In order to support recurrences we need to be able to vectorize Phi nodes.
1530   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1531   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1532   // this value when we vectorize all of the instructions that use the PHI.
1533   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1534   Type *VecTy =
1535       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1536 
1537   BasicBlock *HeaderBB = State.CFG.PrevBB;
1538   assert(State.LI->getLoopFor(HeaderBB)->getHeader() == HeaderBB &&
1539          "recipe must be in the vector loop header");
1540   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1541   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1542     Value *EntryPart =
1543         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1544     State.set(this, EntryPart, Part);
1545   }
1546 
1547   // Reductions do not have to start at zero. They can start with
1548   // any loop invariant values.
1549   VPValue *StartVPV = getStartValue();
1550   Value *StartV = StartVPV->getLiveInIRValue();
1551 
1552   Value *Iden = nullptr;
1553   RecurKind RK = RdxDesc.getRecurrenceKind();
1554   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1555       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
1556     // MinMax reduction have the start value as their identify.
1557     if (ScalarPHI) {
1558       Iden = StartV;
1559     } else {
1560       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1561       Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
1562       StartV = Iden =
1563           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1564     }
1565   } else {
1566     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1567                                          RdxDesc.getFastMathFlags());
1568 
1569     if (!ScalarPHI) {
1570       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1571       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1572       Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
1573       Constant *Zero = Builder.getInt32(0);
1574       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1575     }
1576   }
1577 
1578   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1579     Value *EntryPart = State.get(this, Part);
1580     // Make sure to add the reduction start value only to the
1581     // first unroll part.
1582     Value *StartVal = (Part == 0) ? StartV : Iden;
1583     cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
1584   }
1585 }
1586 
1587 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1588 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1589                                  VPSlotTracker &SlotTracker) const {
1590   O << Indent << "WIDEN-REDUCTION-PHI ";
1591 
1592   printAsOperand(O, SlotTracker);
1593   O << " = phi ";
1594   printOperands(O, SlotTracker);
1595 }
1596 #endif
1597 
1598 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1599 
1600 void VPValue::replaceAllUsesWith(VPValue *New) {
1601   for (unsigned J = 0; J < getNumUsers();) {
1602     VPUser *User = Users[J];
1603     unsigned NumUsers = getNumUsers();
1604     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1605       if (User->getOperand(I) == this)
1606         User->setOperand(I, New);
1607     // If a user got removed after updating the current user, the next user to
1608     // update will be moved to the current position, so we only need to
1609     // increment the index if the number of users did not change.
1610     if (NumUsers == getNumUsers())
1611       J++;
1612   }
1613 }
1614 
1615 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1616 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1617   if (const Value *UV = getUnderlyingValue()) {
1618     OS << "ir<";
1619     UV->printAsOperand(OS, false);
1620     OS << ">";
1621     return;
1622   }
1623 
1624   unsigned Slot = Tracker.getSlot(this);
1625   if (Slot == unsigned(-1))
1626     OS << "<badref>";
1627   else
1628     OS << "vp<%" << Tracker.getSlot(this) << ">";
1629 }
1630 
1631 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1632   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1633     Op->printAsOperand(O, SlotTracker);
1634   });
1635 }
1636 #endif
1637 
1638 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1639                                           Old2NewTy &Old2New,
1640                                           InterleavedAccessInfo &IAI) {
1641   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1642   for (VPBlockBase *Base : RPOT) {
1643     visitBlock(Base, Old2New, IAI);
1644   }
1645 }
1646 
1647 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1648                                          InterleavedAccessInfo &IAI) {
1649   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1650     for (VPRecipeBase &VPI : *VPBB) {
1651       if (isa<VPHeaderPHIRecipe>(&VPI))
1652         continue;
1653       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1654       auto *VPInst = cast<VPInstruction>(&VPI);
1655       auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
1656       auto *IG = IAI.getInterleaveGroup(Inst);
1657       if (!IG)
1658         continue;
1659 
1660       auto NewIGIter = Old2New.find(IG);
1661       if (NewIGIter == Old2New.end())
1662         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1663             IG->getFactor(), IG->isReverse(), IG->getAlign());
1664 
1665       if (Inst == IG->getInsertPos())
1666         Old2New[IG]->setInsertPos(VPInst);
1667 
1668       InterleaveGroupMap[VPInst] = Old2New[IG];
1669       InterleaveGroupMap[VPInst]->insertMember(
1670           VPInst, IG->getIndex(Inst),
1671           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1672                                 : IG->getFactor()));
1673     }
1674   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1675     visitRegion(Region, Old2New, IAI);
1676   else
1677     llvm_unreachable("Unsupported kind of VPBlock.");
1678 }
1679 
1680 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1681                                                  InterleavedAccessInfo &IAI) {
1682   Old2NewTy Old2New;
1683   visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
1684 }
1685 
1686 void VPSlotTracker::assignSlot(const VPValue *V) {
1687   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1688   Slots[V] = NextSlot++;
1689 }
1690 
1691 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1692 
1693   for (const VPValue *V : Plan.VPExternalDefs)
1694     assignSlot(V);
1695 
1696   assignSlot(&Plan.VectorTripCount);
1697   if (Plan.BackedgeTakenCount)
1698     assignSlot(Plan.BackedgeTakenCount);
1699 
1700   ReversePostOrderTraversal<
1701       VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
1702       RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
1703           Plan.getEntry()));
1704   for (const VPBasicBlock *VPBB :
1705        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1706     for (const VPRecipeBase &Recipe : *VPBB)
1707       for (VPValue *Def : Recipe.definedValues())
1708         assignSlot(Def);
1709 }
1710 
1711 bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1712   return all_of(Def->users(), [Def](VPUser *U) {
1713     return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
1714   });
1715 }
1716