1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/IVDescriptors.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Instructions.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/GenericDomTreeConstruction.h"
40 #include "llvm/Support/GraphWriter.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
43 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
44 #include <cassert>
45 #include <string>
46 #include <vector>
47 
48 using namespace llvm;
49 extern cl::opt<bool> EnableVPlanNativePath;
50 
51 #define DEBUG_TYPE "vplan"
52 
53 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
54 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
55   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
56   VPSlotTracker SlotTracker(
57       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
58   V.print(OS, SlotTracker);
59   return OS;
60 }
61 #endif
62 
63 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
64                                 const ElementCount &VF) const {
65   switch (LaneKind) {
66   case VPLane::Kind::ScalableLast:
67     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
68     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
69                              Builder.getInt32(VF.getKnownMinValue() - Lane));
70   case VPLane::Kind::First:
71     return Builder.getInt32(Lane);
72   }
73   llvm_unreachable("Unknown lane kind");
74 }
75 
76 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
77     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
78   if (Def)
79     Def->addDefinedValue(this);
80 }
81 
82 VPValue::~VPValue() {
83   assert(Users.empty() && "trying to delete a VPValue with remaining users");
84   if (Def)
85     Def->removeDefinedValue(this);
86 }
87 
88 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
90   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
91     R->print(OS, "", SlotTracker);
92   else
93     printAsOperand(OS, SlotTracker);
94 }
95 
96 void VPValue::dump() const {
97   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
98   VPSlotTracker SlotTracker(
99       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
100   print(dbgs(), SlotTracker);
101   dbgs() << "\n";
102 }
103 
104 void VPDef::dump() const {
105   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
106   VPSlotTracker SlotTracker(
107       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
108   print(dbgs(), "", SlotTracker);
109   dbgs() << "\n";
110 }
111 #endif
112 
113 // Get the top-most entry block of \p Start. This is the entry block of the
114 // containing VPlan. This function is templated to support both const and non-const blocks
115 template <typename T> static T *getPlanEntry(T *Start) {
116   T *Next = Start;
117   T *Current = Start;
118   while ((Next = Next->getParent()))
119     Current = Next;
120 
121   SmallSetVector<T *, 8> WorkList;
122   WorkList.insert(Current);
123 
124   for (unsigned i = 0; i < WorkList.size(); i++) {
125     T *Current = WorkList[i];
126     if (Current->getNumPredecessors() == 0)
127       return Current;
128     auto &Predecessors = Current->getPredecessors();
129     WorkList.insert(Predecessors.begin(), Predecessors.end());
130   }
131 
132   llvm_unreachable("VPlan without any entry node without predecessors");
133 }
134 
135 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
136 
137 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
138 
139 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
140 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
141   const VPBlockBase *Block = this;
142   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
143     Block = Region->getEntry();
144   return cast<VPBasicBlock>(Block);
145 }
146 
147 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
148   VPBlockBase *Block = this;
149   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
150     Block = Region->getEntry();
151   return cast<VPBasicBlock>(Block);
152 }
153 
154 void VPBlockBase::setPlan(VPlan *ParentPlan) {
155   assert(ParentPlan->getEntry() == this &&
156          "Can only set plan on its entry block.");
157   Plan = ParentPlan;
158 }
159 
160 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
161 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
162   const VPBlockBase *Block = this;
163   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
164     Block = Region->getExiting();
165   return cast<VPBasicBlock>(Block);
166 }
167 
168 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
169   VPBlockBase *Block = this;
170   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
171     Block = Region->getExiting();
172   return cast<VPBasicBlock>(Block);
173 }
174 
175 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
176   if (!Successors.empty() || !Parent)
177     return this;
178   assert(Parent->getExiting() == this &&
179          "Block w/o successors not the exiting block of its parent.");
180   return Parent->getEnclosingBlockWithSuccessors();
181 }
182 
183 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
184   if (!Predecessors.empty() || !Parent)
185     return this;
186   assert(Parent->getEntry() == this &&
187          "Block w/o predecessors not the entry of its parent.");
188   return Parent->getEnclosingBlockWithPredecessors();
189 }
190 
191 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
192   SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
193 
194   for (VPBlockBase *Block : Blocks)
195     delete Block;
196 }
197 
198 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
199   iterator It = begin();
200   while (It != end() && It->isPhi())
201     It++;
202   return It;
203 }
204 
205 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
206   if (!Def->getDef())
207     return Def->getLiveInIRValue();
208 
209   if (hasScalarValue(Def, Instance)) {
210     return Data
211         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
212   }
213 
214   assert(hasVectorValue(Def, Instance.Part));
215   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
216   if (!VecPart->getType()->isVectorTy()) {
217     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
218     return VecPart;
219   }
220   // TODO: Cache created scalar values.
221   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
222   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
223   // set(Def, Extract, Instance);
224   return Extract;
225 }
226 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
227   VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
228   return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
229 }
230 
231 BasicBlock *
232 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
233   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
234   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
235   BasicBlock *PrevBB = CFG.PrevBB;
236   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
237                                          PrevBB->getParent(), CFG.ExitBB);
238   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
239 
240   // Hook up the new basic block to its predecessors.
241   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
242     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
243     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
244     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
245 
246     assert(PredBB && "Predecessor basic-block not found building successor.");
247     auto *PredBBTerminator = PredBB->getTerminator();
248     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
249 
250     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
251     if (isa<UnreachableInst>(PredBBTerminator)) {
252       assert(PredVPSuccessors.size() == 1 &&
253              "Predecessor ending w/o branch must have single successor.");
254       DebugLoc DL = PredBBTerminator->getDebugLoc();
255       PredBBTerminator->eraseFromParent();
256       auto *Br = BranchInst::Create(NewBB, PredBB);
257       Br->setDebugLoc(DL);
258     } else if (TermBr && !TermBr->isConditional()) {
259       TermBr->setSuccessor(0, NewBB);
260     } else {
261       // Set each forward successor here when it is created, excluding
262       // backedges. A backward successor is set when the branch is created.
263       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
264       assert(!TermBr->getSuccessor(idx) &&
265              "Trying to reset an existing successor block.");
266       TermBr->setSuccessor(idx, NewBB);
267     }
268   }
269   return NewBB;
270 }
271 
272 void VPBasicBlock::execute(VPTransformState *State) {
273   bool Replica = State->Instance && !State->Instance->isFirstIteration();
274   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
275   VPBlockBase *SingleHPred = nullptr;
276   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
277 
278   auto IsLoopRegion = [](VPBlockBase *BB) {
279     auto *R = dyn_cast<VPRegionBlock>(BB);
280     return R && !R->isReplicator();
281   };
282 
283   // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
284   if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
285     // ExitBB can be re-used for the exit block of the Plan.
286     NewBB = State->CFG.ExitBB;
287     State->CFG.PrevBB = NewBB;
288 
289     // Update the branch instruction in the predecessor to branch to ExitBB.
290     VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
291     VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
292     assert(PredVPB->getSingleSuccessor() == this &&
293            "predecessor must have the current block as only successor");
294     BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
295     // The Exit block of a loop is always set to be successor 0 of the Exiting
296     // block.
297     cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
298   } else if (PrevVPBB && /* A */
299              !((SingleHPred = getSingleHierarchicalPredecessor()) &&
300                SingleHPred->getExitingBasicBlock() == PrevVPBB &&
301                PrevVPBB->getSingleHierarchicalSuccessor() &&
302                (SingleHPred->getParent() == getEnclosingLoopRegion() &&
303                 !IsLoopRegion(SingleHPred))) &&         /* B */
304              !(Replica && getPredecessors().empty())) { /* C */
305     // The last IR basic block is reused, as an optimization, in three cases:
306     // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
307     // B. when the current VPBB has a single (hierarchical) predecessor which
308     //    is PrevVPBB and the latter has a single (hierarchical) successor which
309     //    both are in the same non-replicator region; and
310     // C. when the current VPBB is an entry of a region replica - where PrevVPBB
311     //    is the exiting VPBB of this region from a previous instance, or the
312     //    predecessor of this region.
313 
314     NewBB = createEmptyBasicBlock(State->CFG);
315     State->Builder.SetInsertPoint(NewBB);
316     // Temporarily terminate with unreachable until CFG is rewired.
317     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
318     // Register NewBB in its loop. In innermost loops its the same for all
319     // BB's.
320     if (State->CurrentVectorLoop)
321       State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
322     State->Builder.SetInsertPoint(Terminator);
323     State->CFG.PrevBB = NewBB;
324   }
325 
326   // 2. Fill the IR basic block with IR instructions.
327   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
328                     << " in BB:" << NewBB->getName() << '\n');
329 
330   State->CFG.VPBB2IRBB[this] = NewBB;
331   State->CFG.PrevVPBB = this;
332 
333   for (VPRecipeBase &Recipe : Recipes)
334     Recipe.execute(*State);
335 
336   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
337 }
338 
339 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
340   for (VPRecipeBase &R : Recipes) {
341     for (auto *Def : R.definedValues())
342       Def->replaceAllUsesWith(NewValue);
343 
344     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
345       R.setOperand(I, NewValue);
346   }
347 }
348 
349 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
350   assert((SplitAt == end() || SplitAt->getParent() == this) &&
351          "can only split at a position in the same block");
352 
353   SmallVector<VPBlockBase *, 2> Succs(successors());
354   // First, disconnect the current block from its successors.
355   for (VPBlockBase *Succ : Succs)
356     VPBlockUtils::disconnectBlocks(this, Succ);
357 
358   // Create new empty block after the block to split.
359   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
360   VPBlockUtils::insertBlockAfter(SplitBlock, this);
361 
362   // Add successors for block to split to new block.
363   for (VPBlockBase *Succ : Succs)
364     VPBlockUtils::connectBlocks(SplitBlock, Succ);
365 
366   // Finally, move the recipes starting at SplitAt to new block.
367   for (VPRecipeBase &ToMove :
368        make_early_inc_range(make_range(SplitAt, this->end())))
369     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
370 
371   return SplitBlock;
372 }
373 
374 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
375   VPRegionBlock *P = getParent();
376   if (P && P->isReplicator()) {
377     P = P->getParent();
378     assert(!cast<VPRegionBlock>(P)->isReplicator() &&
379            "unexpected nested replicate regions");
380   }
381   return P;
382 }
383 
384 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
385   if (VPBB->empty()) {
386     assert(
387         VPBB->getNumSuccessors() < 2 &&
388         "block with multiple successors doesn't have a recipe as terminator");
389     return false;
390   }
391 
392   const VPRecipeBase *R = &VPBB->back();
393   auto *VPI = dyn_cast<VPInstruction>(R);
394   bool IsCondBranch =
395       isa<VPBranchOnMaskRecipe>(R) ||
396       (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
397                VPI->getOpcode() == VPInstruction::BranchOnCount));
398   (void)IsCondBranch;
399 
400   if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
401     assert(IsCondBranch && "block with multiple successors not terminated by "
402                            "conditional branch recipe");
403 
404     return true;
405   }
406 
407   assert(
408       !IsCondBranch &&
409       "block with 0 or 1 successors terminated by conditional branch recipe");
410   return false;
411 }
412 
413 VPRecipeBase *VPBasicBlock::getTerminator() {
414   if (hasConditionalTerminator(this))
415     return &back();
416   return nullptr;
417 }
418 
419 const VPRecipeBase *VPBasicBlock::getTerminator() const {
420   if (hasConditionalTerminator(this))
421     return &back();
422   return nullptr;
423 }
424 
425 bool VPBasicBlock::isExiting() const {
426   return getParent()->getExitingBasicBlock() == this;
427 }
428 
429 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
430 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
431   if (getSuccessors().empty()) {
432     O << Indent << "No successors\n";
433   } else {
434     O << Indent << "Successor(s): ";
435     ListSeparator LS;
436     for (auto *Succ : getSuccessors())
437       O << LS << Succ->getName();
438     O << '\n';
439   }
440 }
441 
442 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
443                          VPSlotTracker &SlotTracker) const {
444   O << Indent << getName() << ":\n";
445 
446   auto RecipeIndent = Indent + "  ";
447   for (const VPRecipeBase &Recipe : *this) {
448     Recipe.print(O, RecipeIndent, SlotTracker);
449     O << '\n';
450   }
451 
452   printSuccessors(O, Indent);
453 }
454 #endif
455 
456 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
457   for (VPBlockBase *Block : depth_first(Entry))
458     // Drop all references in VPBasicBlocks and replace all uses with
459     // DummyValue.
460     Block->dropAllReferences(NewValue);
461 }
462 
463 void VPRegionBlock::execute(VPTransformState *State) {
464   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
465 
466   if (!isReplicator()) {
467     // Create and register the new vector loop.
468     Loop *PrevLoop = State->CurrentVectorLoop;
469     State->CurrentVectorLoop = State->LI->AllocateLoop();
470     BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
471     Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
472 
473     // Insert the new loop into the loop nest and register the new basic blocks
474     // before calling any utilities such as SCEV that require valid LoopInfo.
475     if (ParentLoop)
476       ParentLoop->addChildLoop(State->CurrentVectorLoop);
477     else
478       State->LI->addTopLevelLoop(State->CurrentVectorLoop);
479 
480     // Visit the VPBlocks connected to "this", starting from it.
481     for (VPBlockBase *Block : RPOT) {
482       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
483       Block->execute(State);
484     }
485 
486     State->CurrentVectorLoop = PrevLoop;
487     return;
488   }
489 
490   assert(!State->Instance && "Replicating a Region with non-null instance.");
491 
492   // Enter replicating mode.
493   State->Instance = VPIteration(0, 0);
494 
495   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
496     State->Instance->Part = Part;
497     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
498     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
499          ++Lane) {
500       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
501       // Visit the VPBlocks connected to \p this, starting from it.
502       for (VPBlockBase *Block : RPOT) {
503         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
504         Block->execute(State);
505       }
506     }
507   }
508 
509   // Exit replicating mode.
510   State->Instance.reset();
511 }
512 
513 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
514 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
515                           VPSlotTracker &SlotTracker) const {
516   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
517   auto NewIndent = Indent + "  ";
518   for (auto *BlockBase : depth_first(Entry)) {
519     O << '\n';
520     BlockBase->print(O, NewIndent, SlotTracker);
521   }
522   O << Indent << "}\n";
523 
524   printSuccessors(O, Indent);
525 }
526 #endif
527 
528 bool VPRecipeBase::mayWriteToMemory() const {
529   switch (getVPDefID()) {
530   case VPWidenMemoryInstructionSC: {
531     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
532   }
533   case VPReplicateSC:
534   case VPWidenCallSC:
535     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
536         ->mayWriteToMemory();
537   case VPBranchOnMaskSC:
538     return false;
539   case VPWidenIntOrFpInductionSC:
540   case VPWidenCanonicalIVSC:
541   case VPWidenPHISC:
542   case VPBlendSC:
543   case VPWidenSC:
544   case VPWidenGEPSC:
545   case VPReductionSC:
546   case VPWidenSelectSC: {
547     const Instruction *I =
548         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
549     (void)I;
550     assert((!I || !I->mayWriteToMemory()) &&
551            "underlying instruction may write to memory");
552     return false;
553   }
554   default:
555     return true;
556   }
557 }
558 
559 bool VPRecipeBase::mayReadFromMemory() const {
560   switch (getVPDefID()) {
561   case VPWidenMemoryInstructionSC: {
562     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
563   }
564   case VPReplicateSC:
565   case VPWidenCallSC:
566     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
567         ->mayReadFromMemory();
568   case VPBranchOnMaskSC:
569     return false;
570   case VPWidenIntOrFpInductionSC:
571   case VPWidenCanonicalIVSC:
572   case VPWidenPHISC:
573   case VPBlendSC:
574   case VPWidenSC:
575   case VPWidenGEPSC:
576   case VPReductionSC:
577   case VPWidenSelectSC: {
578     const Instruction *I =
579         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
580     (void)I;
581     assert((!I || !I->mayReadFromMemory()) &&
582            "underlying instruction may read from memory");
583     return false;
584   }
585   default:
586     return true;
587   }
588 }
589 
590 bool VPRecipeBase::mayHaveSideEffects() const {
591   switch (getVPDefID()) {
592   case VPBranchOnMaskSC:
593     return false;
594   case VPWidenIntOrFpInductionSC:
595   case VPWidenPointerInductionSC:
596   case VPWidenCanonicalIVSC:
597   case VPWidenPHISC:
598   case VPBlendSC:
599   case VPWidenSC:
600   case VPWidenGEPSC:
601   case VPReductionSC:
602   case VPWidenSelectSC:
603   case VPScalarIVStepsSC: {
604     const Instruction *I =
605         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
606     (void)I;
607     assert((!I || !I->mayHaveSideEffects()) &&
608            "underlying instruction has side-effects");
609     return false;
610   }
611   case VPReplicateSC: {
612     auto *R = cast<VPReplicateRecipe>(this);
613     return R->getUnderlyingInstr()->mayHaveSideEffects();
614   }
615   default:
616     return true;
617   }
618 }
619 
620 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
621   auto Lane = VPLane::getLastLaneForVF(State.VF);
622   VPValue *ExitValue = getOperand(0);
623   if (Plan.isUniformAfterVectorization(ExitValue))
624     Lane = VPLane::getFirstLane();
625   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
626                    State.Builder.GetInsertBlock());
627 }
628 
629 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
630   assert(!Parent && "Recipe already in some VPBasicBlock");
631   assert(InsertPos->getParent() &&
632          "Insertion position not in any VPBasicBlock");
633   Parent = InsertPos->getParent();
634   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
635 }
636 
637 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
638                                 iplist<VPRecipeBase>::iterator I) {
639   assert(!Parent && "Recipe already in some VPBasicBlock");
640   assert(I == BB.end() || I->getParent() == &BB);
641   Parent = &BB;
642   BB.getRecipeList().insert(I, this);
643 }
644 
645 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
646   assert(!Parent && "Recipe already in some VPBasicBlock");
647   assert(InsertPos->getParent() &&
648          "Insertion position not in any VPBasicBlock");
649   Parent = InsertPos->getParent();
650   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
651 }
652 
653 void VPRecipeBase::removeFromParent() {
654   assert(getParent() && "Recipe not in any VPBasicBlock");
655   getParent()->getRecipeList().remove(getIterator());
656   Parent = nullptr;
657 }
658 
659 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
660   assert(getParent() && "Recipe not in any VPBasicBlock");
661   return getParent()->getRecipeList().erase(getIterator());
662 }
663 
664 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
665   removeFromParent();
666   insertAfter(InsertPos);
667 }
668 
669 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
670                               iplist<VPRecipeBase>::iterator I) {
671   removeFromParent();
672   insertBefore(BB, I);
673 }
674 
675 void VPInstruction::generateInstruction(VPTransformState &State,
676                                         unsigned Part) {
677   IRBuilderBase &Builder = State.Builder;
678   Builder.SetCurrentDebugLocation(DL);
679 
680   if (Instruction::isBinaryOp(getOpcode())) {
681     Value *A = State.get(getOperand(0), Part);
682     Value *B = State.get(getOperand(1), Part);
683     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
684     State.set(this, V, Part);
685     return;
686   }
687 
688   switch (getOpcode()) {
689   case VPInstruction::Not: {
690     Value *A = State.get(getOperand(0), Part);
691     Value *V = Builder.CreateNot(A);
692     State.set(this, V, Part);
693     break;
694   }
695   case VPInstruction::ICmpULE: {
696     Value *IV = State.get(getOperand(0), Part);
697     Value *TC = State.get(getOperand(1), Part);
698     Value *V = Builder.CreateICmpULE(IV, TC);
699     State.set(this, V, Part);
700     break;
701   }
702   case Instruction::Select: {
703     Value *Cond = State.get(getOperand(0), Part);
704     Value *Op1 = State.get(getOperand(1), Part);
705     Value *Op2 = State.get(getOperand(2), Part);
706     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
707     State.set(this, V, Part);
708     break;
709   }
710   case VPInstruction::ActiveLaneMask: {
711     // Get first lane of vector induction variable.
712     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
713     // Get the original loop tripcount.
714     Value *ScalarTC = State.get(getOperand(1), Part);
715 
716     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
717     auto *PredTy = VectorType::get(Int1Ty, State.VF);
718     Instruction *Call = Builder.CreateIntrinsic(
719         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
720         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
721     State.set(this, Call, Part);
722     break;
723   }
724   case VPInstruction::FirstOrderRecurrenceSplice: {
725     // Generate code to combine the previous and current values in vector v3.
726     //
727     //   vector.ph:
728     //     v_init = vector(..., ..., ..., a[-1])
729     //     br vector.body
730     //
731     //   vector.body
732     //     i = phi [0, vector.ph], [i+4, vector.body]
733     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
734     //     v2 = a[i, i+1, i+2, i+3];
735     //     v3 = vector(v1(3), v2(0, 1, 2))
736 
737     // For the first part, use the recurrence phi (v1), otherwise v2.
738     auto *V1 = State.get(getOperand(0), 0);
739     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
740     if (!PartMinus1->getType()->isVectorTy()) {
741       State.set(this, PartMinus1, Part);
742     } else {
743       Value *V2 = State.get(getOperand(1), Part);
744       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
745     }
746     break;
747   }
748 
749   case VPInstruction::CanonicalIVIncrement:
750   case VPInstruction::CanonicalIVIncrementNUW: {
751     Value *Next = nullptr;
752     if (Part == 0) {
753       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
754       auto *Phi = State.get(getOperand(0), 0);
755       // The loop step is equal to the vectorization factor (num of SIMD
756       // elements) times the unroll factor (num of SIMD instructions).
757       Value *Step =
758           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
759       Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
760     } else {
761       Next = State.get(this, 0);
762     }
763 
764     State.set(this, Next, Part);
765     break;
766   }
767   case VPInstruction::BranchOnCond: {
768     if (Part != 0)
769       break;
770     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
771     VPRegionBlock *ParentRegion = getParent()->getParent();
772     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
773 
774     // Replace the temporary unreachable terminator with a new conditional
775     // branch, hooking it up to backward destination for exiting blocks now and
776     // to forward destination(s) later when they are created.
777     BranchInst *CondBr =
778         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
779 
780     if (getParent()->isExiting())
781       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
782 
783     CondBr->setSuccessor(0, nullptr);
784     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
785     break;
786   }
787   case VPInstruction::BranchOnCount: {
788     if (Part != 0)
789       break;
790     // First create the compare.
791     Value *IV = State.get(getOperand(0), Part);
792     Value *TC = State.get(getOperand(1), Part);
793     Value *Cond = Builder.CreateICmpEQ(IV, TC);
794 
795     // Now create the branch.
796     auto *Plan = getParent()->getPlan();
797     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
798     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
799 
800     // Replace the temporary unreachable terminator with a new conditional
801     // branch, hooking it up to backward destination (the header) now and to the
802     // forward destination (the exit/middle block) later when it is created.
803     // Note that CreateCondBr expects a valid BB as first argument, so we need
804     // to set it to nullptr later.
805     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
806                                               State.CFG.VPBB2IRBB[Header]);
807     CondBr->setSuccessor(0, nullptr);
808     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
809     break;
810   }
811   default:
812     llvm_unreachable("Unsupported opcode for instruction");
813   }
814 }
815 
816 void VPInstruction::execute(VPTransformState &State) {
817   assert(!State.Instance && "VPInstruction executing an Instance");
818   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
819   State.Builder.setFastMathFlags(FMF);
820   for (unsigned Part = 0; Part < State.UF; ++Part)
821     generateInstruction(State, Part);
822 }
823 
824 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
825 void VPInstruction::dump() const {
826   VPSlotTracker SlotTracker(getParent()->getPlan());
827   print(dbgs(), "", SlotTracker);
828 }
829 
830 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
831                           VPSlotTracker &SlotTracker) const {
832   O << Indent << "EMIT ";
833 
834   if (hasResult()) {
835     printAsOperand(O, SlotTracker);
836     O << " = ";
837   }
838 
839   switch (getOpcode()) {
840   case VPInstruction::Not:
841     O << "not";
842     break;
843   case VPInstruction::ICmpULE:
844     O << "icmp ule";
845     break;
846   case VPInstruction::SLPLoad:
847     O << "combined load";
848     break;
849   case VPInstruction::SLPStore:
850     O << "combined store";
851     break;
852   case VPInstruction::ActiveLaneMask:
853     O << "active lane mask";
854     break;
855   case VPInstruction::FirstOrderRecurrenceSplice:
856     O << "first-order splice";
857     break;
858   case VPInstruction::CanonicalIVIncrement:
859     O << "VF * UF + ";
860     break;
861   case VPInstruction::CanonicalIVIncrementNUW:
862     O << "VF * UF +(nuw) ";
863     break;
864   case VPInstruction::BranchOnCond:
865     O << "branch-on-cond";
866     break;
867   case VPInstruction::BranchOnCount:
868     O << "branch-on-count ";
869     break;
870   default:
871     O << Instruction::getOpcodeName(getOpcode());
872   }
873 
874   O << FMF;
875 
876   for (const VPValue *Operand : operands()) {
877     O << " ";
878     Operand->printAsOperand(O, SlotTracker);
879   }
880 
881   if (DL) {
882     O << ", !dbg ";
883     DL.print(O);
884   }
885 }
886 #endif
887 
888 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
889   // Make sure the VPInstruction is a floating-point operation.
890   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
891           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
892           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
893           Opcode == Instruction::FCmp) &&
894          "this op can't take fast-math flags");
895   FMF = FMFNew;
896 }
897 
898 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
899                              Value *CanonicalIVStartValue,
900                              VPTransformState &State) {
901   // Check if the trip count is needed, and if so build it.
902   if (TripCount && TripCount->getNumUsers()) {
903     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
904       State.set(TripCount, TripCountV, Part);
905   }
906 
907   // Check if the backedge taken count is needed, and if so build it.
908   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
909     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
910     auto *TCMO = Builder.CreateSub(TripCountV,
911                                    ConstantInt::get(TripCountV->getType(), 1),
912                                    "trip.count.minus.1");
913     auto VF = State.VF;
914     Value *VTCMO =
915         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
916     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
917       State.set(BackedgeTakenCount, VTCMO, Part);
918   }
919 
920   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
921     State.set(&VectorTripCount, VectorTripCountV, Part);
922 
923   // When vectorizing the epilogue loop, the canonical induction start value
924   // needs to be changed from zero to the value after the main vector loop.
925   if (CanonicalIVStartValue) {
926     VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
927     auto *IV = getCanonicalIV();
928     assert(all_of(IV->users(),
929                   [](const VPUser *U) {
930                     if (isa<VPScalarIVStepsRecipe>(U))
931                       return true;
932                     auto *VPI = cast<VPInstruction>(U);
933                     return VPI->getOpcode() ==
934                                VPInstruction::CanonicalIVIncrement ||
935                            VPI->getOpcode() ==
936                                VPInstruction::CanonicalIVIncrementNUW;
937                   }) &&
938            "the canonical IV should only be used by its increments or "
939            "ScalarIVSteps when "
940            "resetting the start value");
941     IV->setOperand(0, VPV);
942   }
943 }
944 
945 /// Generate the code inside the preheader and body of the vectorized loop.
946 /// Assumes a single pre-header basic-block was created for this. Introduce
947 /// additional basic-blocks as needed, and fill them all.
948 void VPlan::execute(VPTransformState *State) {
949   // Set the reverse mapping from VPValues to Values for code generation.
950   for (auto &Entry : Value2VPValue)
951     State->VPValue2Value[Entry.second] = Entry.first;
952 
953   // Initialize CFG state.
954   State->CFG.PrevVPBB = nullptr;
955   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
956   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
957   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
958 
959   // Generate code in the loop pre-header and body.
960   for (VPBlockBase *Block : depth_first(Entry))
961     Block->execute(State);
962 
963   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
964   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
965 
966   // Fix the latch value of canonical, reduction and first-order recurrences
967   // phis in the vector loop.
968   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
969   for (VPRecipeBase &R : Header->phis()) {
970     // Skip phi-like recipes that generate their backedege values themselves.
971     if (isa<VPWidenPHIRecipe>(&R))
972       continue;
973 
974     if (isa<VPWidenPointerInductionRecipe>(&R) ||
975         isa<VPWidenIntOrFpInductionRecipe>(&R)) {
976       PHINode *Phi = nullptr;
977       if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
978         Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
979       } else {
980         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
981         // TODO: Split off the case that all users of a pointer phi are scalar
982         // from the VPWidenPointerInductionRecipe.
983         if (WidenPhi->onlyScalarsGenerated(State->VF))
984           continue;
985 
986         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
987         Phi = cast<PHINode>(GEP->getPointerOperand());
988       }
989 
990       Phi->setIncomingBlock(1, VectorLatchBB);
991 
992       // Move the last step to the end of the latch block. This ensures
993       // consistent placement of all induction updates.
994       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
995       Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
996       continue;
997     }
998 
999     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1000     // For  canonical IV, first-order recurrences and in-order reduction phis,
1001     // only a single part is generated, which provides the last part from the
1002     // previous iteration. For non-ordered reductions all UF parts are
1003     // generated.
1004     bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
1005                             isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
1006                             cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
1007     unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
1008 
1009     for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1010       Value *Phi = State->get(PhiR, Part);
1011       Value *Val = State->get(PhiR->getBackedgeValue(),
1012                               SinglePartNeeded ? State->UF - 1 : Part);
1013       cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1014     }
1015   }
1016 
1017   // We do not attempt to preserve DT for outer loop vectorization currently.
1018   if (!EnableVPlanNativePath) {
1019     BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
1020     State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
1021     updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
1022                         State->CFG.ExitBB);
1023   }
1024 }
1025 
1026 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1027 LLVM_DUMP_METHOD
1028 void VPlan::print(raw_ostream &O) const {
1029   VPSlotTracker SlotTracker(this);
1030 
1031   O << "VPlan '" << Name << "' {";
1032 
1033   if (VectorTripCount.getNumUsers() > 0) {
1034     O << "\nLive-in ";
1035     VectorTripCount.printAsOperand(O, SlotTracker);
1036     O << " = vector-trip-count\n";
1037   }
1038 
1039   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1040     O << "\nLive-in ";
1041     BackedgeTakenCount->printAsOperand(O, SlotTracker);
1042     O << " = backedge-taken count\n";
1043   }
1044 
1045   for (const VPBlockBase *Block : depth_first(getEntry())) {
1046     O << '\n';
1047     Block->print(O, "", SlotTracker);
1048   }
1049 
1050   if (!LiveOuts.empty())
1051     O << "\n";
1052   for (auto &KV : LiveOuts) {
1053     O << "Live-out ";
1054     KV.second->getPhi()->printAsOperand(O);
1055     O << " = ";
1056     KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
1057     O << "\n";
1058   }
1059 
1060   O << "}\n";
1061 }
1062 
1063 LLVM_DUMP_METHOD
1064 void VPlan::printDOT(raw_ostream &O) const {
1065   VPlanPrinter Printer(O, *this);
1066   Printer.dump();
1067 }
1068 
1069 LLVM_DUMP_METHOD
1070 void VPlan::dump() const { print(dbgs()); }
1071 #endif
1072 
1073 void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1074   assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1075   LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1076 }
1077 
1078 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
1079                                 BasicBlock *LoopLatchBB,
1080                                 BasicBlock *LoopExitBB) {
1081   // The vector body may be more than a single basic-block by this point.
1082   // Update the dominator tree information inside the vector body by propagating
1083   // it from header to latch, expecting only triangular control-flow, if any.
1084   BasicBlock *PostDomSucc = nullptr;
1085   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
1086     // Get the list of successors of this block.
1087     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
1088     assert(Succs.size() <= 2 &&
1089            "Basic block in vector loop has more than 2 successors.");
1090     PostDomSucc = Succs[0];
1091     if (Succs.size() == 1) {
1092       assert(PostDomSucc->getSinglePredecessor() &&
1093              "PostDom successor has more than one predecessor.");
1094       DT->addNewBlock(PostDomSucc, BB);
1095       continue;
1096     }
1097     BasicBlock *InterimSucc = Succs[1];
1098     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
1099       PostDomSucc = Succs[1];
1100       InterimSucc = Succs[0];
1101     }
1102     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
1103            "One successor of a basic block does not lead to the other.");
1104     assert(InterimSucc->getSinglePredecessor() &&
1105            "Interim successor has more than one predecessor.");
1106     assert(PostDomSucc->hasNPredecessors(2) &&
1107            "PostDom successor has more than two predecessors.");
1108     DT->addNewBlock(InterimSucc, BB);
1109     DT->addNewBlock(PostDomSucc, BB);
1110   }
1111   // Latch block is a new dominator for the loop exit.
1112   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
1113   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
1114 }
1115 
1116 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1117 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1118   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1119          Twine(getOrCreateBID(Block));
1120 }
1121 
1122 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1123   const std::string &Name = Block->getName();
1124   if (!Name.empty())
1125     return Name;
1126   return "VPB" + Twine(getOrCreateBID(Block));
1127 }
1128 
1129 void VPlanPrinter::dump() {
1130   Depth = 1;
1131   bumpIndent(0);
1132   OS << "digraph VPlan {\n";
1133   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1134   if (!Plan.getName().empty())
1135     OS << "\\n" << DOT::EscapeString(Plan.getName());
1136   if (Plan.BackedgeTakenCount) {
1137     OS << ", where:\\n";
1138     Plan.BackedgeTakenCount->print(OS, SlotTracker);
1139     OS << " := BackedgeTakenCount";
1140   }
1141   OS << "\"]\n";
1142   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1143   OS << "edge [fontname=Courier, fontsize=30]\n";
1144   OS << "compound=true\n";
1145 
1146   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
1147     dumpBlock(Block);
1148 
1149   OS << "}\n";
1150 }
1151 
1152 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1153   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1154     dumpBasicBlock(BasicBlock);
1155   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1156     dumpRegion(Region);
1157   else
1158     llvm_unreachable("Unsupported kind of VPBlock.");
1159 }
1160 
1161 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1162                             bool Hidden, const Twine &Label) {
1163   // Due to "dot" we print an edge between two regions as an edge between the
1164   // exiting basic block and the entry basic of the respective regions.
1165   const VPBlockBase *Tail = From->getExitingBasicBlock();
1166   const VPBlockBase *Head = To->getEntryBasicBlock();
1167   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1168   OS << " [ label=\"" << Label << '\"';
1169   if (Tail != From)
1170     OS << " ltail=" << getUID(From);
1171   if (Head != To)
1172     OS << " lhead=" << getUID(To);
1173   if (Hidden)
1174     OS << "; splines=none";
1175   OS << "]\n";
1176 }
1177 
1178 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1179   auto &Successors = Block->getSuccessors();
1180   if (Successors.size() == 1)
1181     drawEdge(Block, Successors.front(), false, "");
1182   else if (Successors.size() == 2) {
1183     drawEdge(Block, Successors.front(), false, "T");
1184     drawEdge(Block, Successors.back(), false, "F");
1185   } else {
1186     unsigned SuccessorNumber = 0;
1187     for (auto *Successor : Successors)
1188       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1189   }
1190 }
1191 
1192 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1193   // Implement dot-formatted dump by performing plain-text dump into the
1194   // temporary storage followed by some post-processing.
1195   OS << Indent << getUID(BasicBlock) << " [label =\n";
1196   bumpIndent(1);
1197   std::string Str;
1198   raw_string_ostream SS(Str);
1199   // Use no indentation as we need to wrap the lines into quotes ourselves.
1200   BasicBlock->print(SS, "", SlotTracker);
1201 
1202   // We need to process each line of the output separately, so split
1203   // single-string plain-text dump.
1204   SmallVector<StringRef, 0> Lines;
1205   StringRef(Str).rtrim('\n').split(Lines, "\n");
1206 
1207   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1208     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1209   };
1210 
1211   // Don't need the "+" after the last line.
1212   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1213     EmitLine(Line, " +\n");
1214   EmitLine(Lines.back(), "\n");
1215 
1216   bumpIndent(-1);
1217   OS << Indent << "]\n";
1218 
1219   dumpEdges(BasicBlock);
1220 }
1221 
1222 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1223   OS << Indent << "subgraph " << getUID(Region) << " {\n";
1224   bumpIndent(1);
1225   OS << Indent << "fontname=Courier\n"
1226      << Indent << "label=\""
1227      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1228      << DOT::EscapeString(Region->getName()) << "\"\n";
1229   // Dump the blocks of the region.
1230   assert(Region->getEntry() && "Region contains no inner blocks.");
1231   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
1232     dumpBlock(Block);
1233   bumpIndent(-1);
1234   OS << Indent << "}\n";
1235   dumpEdges(Region);
1236 }
1237 
1238 void VPlanIngredient::print(raw_ostream &O) const {
1239   if (auto *Inst = dyn_cast<Instruction>(V)) {
1240     if (!Inst->getType()->isVoidTy()) {
1241       Inst->printAsOperand(O, false);
1242       O << " = ";
1243     }
1244     O << Inst->getOpcodeName() << " ";
1245     unsigned E = Inst->getNumOperands();
1246     if (E > 0) {
1247       Inst->getOperand(0)->printAsOperand(O, false);
1248       for (unsigned I = 1; I < E; ++I)
1249         Inst->getOperand(I)->printAsOperand(O << ", ", false);
1250     }
1251   } else // !Inst
1252     V->printAsOperand(O, false);
1253 }
1254 
1255 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
1256                               VPSlotTracker &SlotTracker) const {
1257   O << Indent << "WIDEN-CALL ";
1258 
1259   auto *CI = cast<CallInst>(getUnderlyingInstr());
1260   if (CI->getType()->isVoidTy())
1261     O << "void ";
1262   else {
1263     printAsOperand(O, SlotTracker);
1264     O << " = ";
1265   }
1266 
1267   O << "call @" << CI->getCalledFunction()->getName() << "(";
1268   printOperands(O, SlotTracker);
1269   O << ")";
1270 }
1271 
1272 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
1273                                 VPSlotTracker &SlotTracker) const {
1274   O << Indent << "WIDEN-SELECT ";
1275   printAsOperand(O, SlotTracker);
1276   O << " = select ";
1277   getOperand(0)->printAsOperand(O, SlotTracker);
1278   O << ", ";
1279   getOperand(1)->printAsOperand(O, SlotTracker);
1280   O << ", ";
1281   getOperand(2)->printAsOperand(O, SlotTracker);
1282   O << (InvariantCond ? " (condition is loop invariant)" : "");
1283 }
1284 
1285 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
1286                           VPSlotTracker &SlotTracker) const {
1287   O << Indent << "WIDEN ";
1288   printAsOperand(O, SlotTracker);
1289   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
1290   printOperands(O, SlotTracker);
1291 }
1292 
1293 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1294                                           VPSlotTracker &SlotTracker) const {
1295   O << Indent << "WIDEN-INDUCTION";
1296   if (getTruncInst()) {
1297     O << "\\l\"";
1298     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
1299     O << " +\n" << Indent << "\"  ";
1300     getVPValue(0)->printAsOperand(O, SlotTracker);
1301   } else
1302     O << " " << VPlanIngredient(IV);
1303 
1304   O << ", ";
1305   getStepValue()->printAsOperand(O, SlotTracker);
1306 }
1307 
1308 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1309                                           VPSlotTracker &SlotTracker) const {
1310   O << Indent << "EMIT ";
1311   printAsOperand(O, SlotTracker);
1312   O << " = WIDEN-POINTER-INDUCTION ";
1313   getStartValue()->printAsOperand(O, SlotTracker);
1314   O << ", " << *IndDesc.getStep();
1315 }
1316 
1317 #endif
1318 
1319 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1320   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1321   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
1322   return StartC && StartC->isZero() && StepC && StepC->isOne();
1323 }
1324 
1325 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
1326   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
1327 }
1328 
1329 bool VPScalarIVStepsRecipe::isCanonical() const {
1330   auto *CanIV = getCanonicalIV();
1331   // The start value of the steps-recipe must match the start value of the
1332   // canonical induction and it must step by 1.
1333   if (CanIV->getStartValue() != getStartValue())
1334     return false;
1335   auto *StepVPV = getStepValue();
1336   if (StepVPV->getDef())
1337     return false;
1338   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
1339   return StepC && StepC->isOne();
1340 }
1341 
1342 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1343 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1344                                   VPSlotTracker &SlotTracker) const {
1345   O << Indent;
1346   printAsOperand(O, SlotTracker);
1347   O << Indent << "= SCALAR-STEPS ";
1348   printOperands(O, SlotTracker);
1349 }
1350 
1351 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1352                              VPSlotTracker &SlotTracker) const {
1353   O << Indent << "WIDEN-GEP ";
1354   O << (IsPtrLoopInvariant ? "Inv" : "Var");
1355   size_t IndicesNumber = IsIndexLoopInvariant.size();
1356   for (size_t I = 0; I < IndicesNumber; ++I)
1357     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
1358 
1359   O << " ";
1360   printAsOperand(O, SlotTracker);
1361   O << " = getelementptr ";
1362   printOperands(O, SlotTracker);
1363 }
1364 
1365 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1366                              VPSlotTracker &SlotTracker) const {
1367   O << Indent << "WIDEN-PHI ";
1368 
1369   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1370   // Unless all incoming values are modeled in VPlan  print the original PHI
1371   // directly.
1372   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1373   // values as VPValues.
1374   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1375     O << VPlanIngredient(OriginalPhi);
1376     return;
1377   }
1378 
1379   printAsOperand(O, SlotTracker);
1380   O << " = phi ";
1381   printOperands(O, SlotTracker);
1382 }
1383 
1384 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1385                           VPSlotTracker &SlotTracker) const {
1386   O << Indent << "BLEND ";
1387   Phi->printAsOperand(O, false);
1388   O << " =";
1389   if (getNumIncomingValues() == 1) {
1390     // Not a User of any mask: not really blending, this is a
1391     // single-predecessor phi.
1392     O << " ";
1393     getIncomingValue(0)->printAsOperand(O, SlotTracker);
1394   } else {
1395     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1396       O << " ";
1397       getIncomingValue(I)->printAsOperand(O, SlotTracker);
1398       O << "/";
1399       getMask(I)->printAsOperand(O, SlotTracker);
1400     }
1401   }
1402 }
1403 
1404 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1405                               VPSlotTracker &SlotTracker) const {
1406   O << Indent << "REDUCE ";
1407   printAsOperand(O, SlotTracker);
1408   O << " = ";
1409   getChainOp()->printAsOperand(O, SlotTracker);
1410   O << " +";
1411   if (isa<FPMathOperator>(getUnderlyingInstr()))
1412     O << getUnderlyingInstr()->getFastMathFlags();
1413   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
1414   getVecOp()->printAsOperand(O, SlotTracker);
1415   if (getCondOp()) {
1416     O << ", ";
1417     getCondOp()->printAsOperand(O, SlotTracker);
1418   }
1419   O << ")";
1420   if (RdxDesc->IntermediateStore)
1421     O << " (with final reduction value stored in invariant address sank "
1422          "outside of loop)";
1423 }
1424 
1425 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1426                               VPSlotTracker &SlotTracker) const {
1427   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1428 
1429   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1430     printAsOperand(O, SlotTracker);
1431     O << " = ";
1432   }
1433   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1434     O << "call @" << CB->getCalledFunction()->getName() << "(";
1435     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
1436                     O, [&O, &SlotTracker](VPValue *Op) {
1437                       Op->printAsOperand(O, SlotTracker);
1438                     });
1439     O << ")";
1440   } else {
1441     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
1442     printOperands(O, SlotTracker);
1443   }
1444 
1445   if (AlsoPack)
1446     O << " (S->V)";
1447 }
1448 
1449 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1450                                 VPSlotTracker &SlotTracker) const {
1451   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1452   printAsOperand(O, SlotTracker);
1453   O << " = ";
1454   printOperands(O, SlotTracker);
1455 }
1456 
1457 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1458                                            VPSlotTracker &SlotTracker) const {
1459   O << Indent << "WIDEN ";
1460 
1461   if (!isStore()) {
1462     getVPSingleValue()->printAsOperand(O, SlotTracker);
1463     O << " = ";
1464   }
1465   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1466 
1467   printOperands(O, SlotTracker);
1468 }
1469 #endif
1470 
1471 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1472   Value *Start = getStartValue()->getLiveInIRValue();
1473   PHINode *EntryPart = PHINode::Create(
1474       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
1475 
1476   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1477   EntryPart->addIncoming(Start, VectorPH);
1478   EntryPart->setDebugLoc(DL);
1479   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1480     State.set(this, EntryPart, Part);
1481 }
1482 
1483 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1484 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1485                                    VPSlotTracker &SlotTracker) const {
1486   O << Indent << "EMIT ";
1487   printAsOperand(O, SlotTracker);
1488   O << " = CANONICAL-INDUCTION";
1489 }
1490 #endif
1491 
1492 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
1493   bool IsUniform = vputils::onlyFirstLaneUsed(this);
1494   return all_of(users(),
1495                 [&](const VPUser *U) { return U->usesScalars(this); }) &&
1496          (IsUniform || !VF.isScalable());
1497 }
1498 
1499 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1500   assert(!State.Instance && "cannot be used in per-lane");
1501   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1502   SCEVExpander Exp(SE, DL, "induction");
1503 
1504   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1505                                  &*State.Builder.GetInsertPoint());
1506 
1507   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1508     State.set(this, Res, Part);
1509 }
1510 
1511 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1512 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1513                                VPSlotTracker &SlotTracker) const {
1514   O << Indent << "EMIT ";
1515   getVPSingleValue()->printAsOperand(O, SlotTracker);
1516   O << " = EXPAND SCEV " << *Expr;
1517 }
1518 #endif
1519 
1520 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1521   Value *CanonicalIV = State.get(getOperand(0), 0);
1522   Type *STy = CanonicalIV->getType();
1523   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1524   ElementCount VF = State.VF;
1525   Value *VStart = VF.isScalar()
1526                       ? CanonicalIV
1527                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1528   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1529     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1530     if (VF.isVector()) {
1531       VStep = Builder.CreateVectorSplat(VF, VStep);
1532       VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1533     }
1534     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1535     State.set(this, CanonicalVectorIV, Part);
1536   }
1537 }
1538 
1539 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1540 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1541                                      VPSlotTracker &SlotTracker) const {
1542   O << Indent << "EMIT ";
1543   printAsOperand(O, SlotTracker);
1544   O << " = WIDEN-CANONICAL-INDUCTION ";
1545   printOperands(O, SlotTracker);
1546 }
1547 #endif
1548 
1549 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1550   auto &Builder = State.Builder;
1551   // Create a vector from the initial value.
1552   auto *VectorInit = getStartValue()->getLiveInIRValue();
1553 
1554   Type *VecTy = State.VF.isScalar()
1555                     ? VectorInit->getType()
1556                     : VectorType::get(VectorInit->getType(), State.VF);
1557 
1558   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1559   if (State.VF.isVector()) {
1560     auto *IdxTy = Builder.getInt32Ty();
1561     auto *One = ConstantInt::get(IdxTy, 1);
1562     IRBuilder<>::InsertPointGuard Guard(Builder);
1563     Builder.SetInsertPoint(VectorPH->getTerminator());
1564     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1565     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1566     VectorInit = Builder.CreateInsertElement(
1567         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1568   }
1569 
1570   // Create a phi node for the new recurrence.
1571   PHINode *EntryPart = PHINode::Create(
1572       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1573   EntryPart->addIncoming(VectorInit, VectorPH);
1574   State.set(this, EntryPart, 0);
1575 }
1576 
1577 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1578 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1579                                             VPSlotTracker &SlotTracker) const {
1580   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1581   printAsOperand(O, SlotTracker);
1582   O << " = phi ";
1583   printOperands(O, SlotTracker);
1584 }
1585 #endif
1586 
1587 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1588   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1589   auto &Builder = State.Builder;
1590 
1591   // In order to support recurrences we need to be able to vectorize Phi nodes.
1592   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1593   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1594   // this value when we vectorize all of the instructions that use the PHI.
1595   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1596   Type *VecTy =
1597       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1598 
1599   BasicBlock *HeaderBB = State.CFG.PrevBB;
1600   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1601          "recipe must be in the vector loop header");
1602   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1603   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1604     Value *EntryPart =
1605         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1606     State.set(this, EntryPart, Part);
1607   }
1608 
1609   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1610 
1611   // Reductions do not have to start at zero. They can start with
1612   // any loop invariant values.
1613   VPValue *StartVPV = getStartValue();
1614   Value *StartV = StartVPV->getLiveInIRValue();
1615 
1616   Value *Iden = nullptr;
1617   RecurKind RK = RdxDesc.getRecurrenceKind();
1618   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1619       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
1620     // MinMax reduction have the start value as their identify.
1621     if (ScalarPHI) {
1622       Iden = StartV;
1623     } else {
1624       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1625       Builder.SetInsertPoint(VectorPH->getTerminator());
1626       StartV = Iden =
1627           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1628     }
1629   } else {
1630     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1631                                          RdxDesc.getFastMathFlags());
1632 
1633     if (!ScalarPHI) {
1634       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1635       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1636       Builder.SetInsertPoint(VectorPH->getTerminator());
1637       Constant *Zero = Builder.getInt32(0);
1638       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1639     }
1640   }
1641 
1642   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1643     Value *EntryPart = State.get(this, Part);
1644     // Make sure to add the reduction start value only to the
1645     // first unroll part.
1646     Value *StartVal = (Part == 0) ? StartV : Iden;
1647     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1648   }
1649 }
1650 
1651 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1652 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1653                                  VPSlotTracker &SlotTracker) const {
1654   O << Indent << "WIDEN-REDUCTION-PHI ";
1655 
1656   printAsOperand(O, SlotTracker);
1657   O << " = phi ";
1658   printOperands(O, SlotTracker);
1659 }
1660 #endif
1661 
1662 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1663 
1664 void VPValue::replaceAllUsesWith(VPValue *New) {
1665   for (unsigned J = 0; J < getNumUsers();) {
1666     VPUser *User = Users[J];
1667     unsigned NumUsers = getNumUsers();
1668     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1669       if (User->getOperand(I) == this)
1670         User->setOperand(I, New);
1671     // If a user got removed after updating the current user, the next user to
1672     // update will be moved to the current position, so we only need to
1673     // increment the index if the number of users did not change.
1674     if (NumUsers == getNumUsers())
1675       J++;
1676   }
1677 }
1678 
1679 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1680 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1681   if (const Value *UV = getUnderlyingValue()) {
1682     OS << "ir<";
1683     UV->printAsOperand(OS, false);
1684     OS << ">";
1685     return;
1686   }
1687 
1688   unsigned Slot = Tracker.getSlot(this);
1689   if (Slot == unsigned(-1))
1690     OS << "<badref>";
1691   else
1692     OS << "vp<%" << Tracker.getSlot(this) << ">";
1693 }
1694 
1695 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1696   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1697     Op->printAsOperand(O, SlotTracker);
1698   });
1699 }
1700 #endif
1701 
1702 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1703                                           Old2NewTy &Old2New,
1704                                           InterleavedAccessInfo &IAI) {
1705   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1706   for (VPBlockBase *Base : RPOT) {
1707     visitBlock(Base, Old2New, IAI);
1708   }
1709 }
1710 
1711 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1712                                          InterleavedAccessInfo &IAI) {
1713   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1714     for (VPRecipeBase &VPI : *VPBB) {
1715       if (isa<VPHeaderPHIRecipe>(&VPI))
1716         continue;
1717       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1718       auto *VPInst = cast<VPInstruction>(&VPI);
1719       auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
1720       auto *IG = IAI.getInterleaveGroup(Inst);
1721       if (!IG)
1722         continue;
1723 
1724       auto NewIGIter = Old2New.find(IG);
1725       if (NewIGIter == Old2New.end())
1726         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1727             IG->getFactor(), IG->isReverse(), IG->getAlign());
1728 
1729       if (Inst == IG->getInsertPos())
1730         Old2New[IG]->setInsertPos(VPInst);
1731 
1732       InterleaveGroupMap[VPInst] = Old2New[IG];
1733       InterleaveGroupMap[VPInst]->insertMember(
1734           VPInst, IG->getIndex(Inst),
1735           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1736                                 : IG->getFactor()));
1737     }
1738   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1739     visitRegion(Region, Old2New, IAI);
1740   else
1741     llvm_unreachable("Unsupported kind of VPBlock.");
1742 }
1743 
1744 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1745                                                  InterleavedAccessInfo &IAI) {
1746   Old2NewTy Old2New;
1747   visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1748 }
1749 
1750 void VPSlotTracker::assignSlot(const VPValue *V) {
1751   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1752   Slots[V] = NextSlot++;
1753 }
1754 
1755 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1756 
1757   for (const auto &P : Plan.VPExternalDefs)
1758     assignSlot(P.second);
1759 
1760   assignSlot(&Plan.VectorTripCount);
1761   if (Plan.BackedgeTakenCount)
1762     assignSlot(Plan.BackedgeTakenCount);
1763 
1764   ReversePostOrderTraversal<
1765       VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
1766       RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
1767           Plan.getEntry()));
1768   for (const VPBasicBlock *VPBB :
1769        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1770     for (const VPRecipeBase &Recipe : *VPBB)
1771       for (VPValue *Def : Recipe.definedValues())
1772         assignSlot(Def);
1773 }
1774 
1775 bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1776   return all_of(Def->users(),
1777                 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1778 }
1779 
1780 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
1781                                                 ScalarEvolution &SE) {
1782   if (auto *E = dyn_cast<SCEVConstant>(Expr))
1783     return Plan.getOrAddExternalDef(E->getValue());
1784   if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1785     return Plan.getOrAddExternalDef(E->getValue());
1786 
1787   VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1788   VPValue *Step = new VPExpandSCEVRecipe(Expr, SE);
1789   Preheader->appendRecipe(cast<VPRecipeBase>(Step->getDef()));
1790   return Step;
1791 }
1792