1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/IR/Value.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
30 #include <cassert>
31 
32 using namespace llvm;
33 
34 extern cl::opt<bool> EnableVPlanNativePath;
35 
36 #define LV_NAME "loop-vectorize"
37 #define DEBUG_TYPE LV_NAME
38 
39 bool VPRecipeBase::mayWriteToMemory() const {
40   switch (getVPDefID()) {
41   case VPWidenMemoryInstructionSC: {
42     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
43   }
44   case VPReplicateSC:
45   case VPWidenCallSC:
46     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
47         ->mayWriteToMemory();
48   case VPBranchOnMaskSC:
49     return false;
50   case VPWidenIntOrFpInductionSC:
51   case VPWidenCanonicalIVSC:
52   case VPWidenPHISC:
53   case VPBlendSC:
54   case VPWidenSC:
55   case VPWidenGEPSC:
56   case VPReductionSC:
57   case VPWidenSelectSC: {
58     const Instruction *I =
59         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
60     (void)I;
61     assert((!I || !I->mayWriteToMemory()) &&
62            "underlying instruction may write to memory");
63     return false;
64   }
65   default:
66     return true;
67   }
68 }
69 
70 bool VPRecipeBase::mayReadFromMemory() const {
71   switch (getVPDefID()) {
72   case VPWidenMemoryInstructionSC: {
73     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
74   }
75   case VPReplicateSC:
76   case VPWidenCallSC:
77     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
78         ->mayReadFromMemory();
79   case VPBranchOnMaskSC:
80     return false;
81   case VPWidenIntOrFpInductionSC:
82   case VPWidenCanonicalIVSC:
83   case VPWidenPHISC:
84   case VPBlendSC:
85   case VPWidenSC:
86   case VPWidenGEPSC:
87   case VPReductionSC:
88   case VPWidenSelectSC: {
89     const Instruction *I =
90         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
91     (void)I;
92     assert((!I || !I->mayReadFromMemory()) &&
93            "underlying instruction may read from memory");
94     return false;
95   }
96   default:
97     return true;
98   }
99 }
100 
101 bool VPRecipeBase::mayHaveSideEffects() const {
102   switch (getVPDefID()) {
103   case VPWidenIntOrFpInductionSC:
104   case VPWidenPointerInductionSC:
105   case VPWidenCanonicalIVSC:
106   case VPWidenPHISC:
107   case VPBlendSC:
108   case VPWidenSC:
109   case VPWidenGEPSC:
110   case VPReductionSC:
111   case VPWidenSelectSC:
112   case VPScalarIVStepsSC: {
113     const Instruction *I =
114         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
115     (void)I;
116     assert((!I || !I->mayHaveSideEffects()) &&
117            "underlying instruction has side-effects");
118     return false;
119   }
120   case VPReplicateSC: {
121     auto *R = cast<VPReplicateRecipe>(this);
122     return R->getUnderlyingInstr()->mayHaveSideEffects();
123   }
124   default:
125     return true;
126   }
127 }
128 
129 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
130   auto Lane = VPLane::getLastLaneForVF(State.VF);
131   VPValue *ExitValue = getOperand(0);
132   if (Plan.isUniformAfterVectorization(ExitValue))
133     Lane = VPLane::getFirstLane();
134   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
135                    State.Builder.GetInsertBlock());
136 }
137 
138 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
139   assert(!Parent && "Recipe already in some VPBasicBlock");
140   assert(InsertPos->getParent() &&
141          "Insertion position not in any VPBasicBlock");
142   Parent = InsertPos->getParent();
143   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
144 }
145 
146 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
147                                 iplist<VPRecipeBase>::iterator I) {
148   assert(!Parent && "Recipe already in some VPBasicBlock");
149   assert(I == BB.end() || I->getParent() == &BB);
150   Parent = &BB;
151   BB.getRecipeList().insert(I, this);
152 }
153 
154 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
155   assert(!Parent && "Recipe already in some VPBasicBlock");
156   assert(InsertPos->getParent() &&
157          "Insertion position not in any VPBasicBlock");
158   Parent = InsertPos->getParent();
159   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
160 }
161 
162 void VPRecipeBase::removeFromParent() {
163   assert(getParent() && "Recipe not in any VPBasicBlock");
164   getParent()->getRecipeList().remove(getIterator());
165   Parent = nullptr;
166 }
167 
168 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
169   assert(getParent() && "Recipe not in any VPBasicBlock");
170   return getParent()->getRecipeList().erase(getIterator());
171 }
172 
173 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
174   removeFromParent();
175   insertAfter(InsertPos);
176 }
177 
178 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
179                               iplist<VPRecipeBase>::iterator I) {
180   removeFromParent();
181   insertBefore(BB, I);
182 }
183 
184 void VPInstruction::generateInstruction(VPTransformState &State,
185                                         unsigned Part) {
186   IRBuilderBase &Builder = State.Builder;
187   Builder.SetCurrentDebugLocation(DL);
188 
189   if (Instruction::isBinaryOp(getOpcode())) {
190     Value *A = State.get(getOperand(0), Part);
191     Value *B = State.get(getOperand(1), Part);
192     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
193     State.set(this, V, Part);
194     return;
195   }
196 
197   switch (getOpcode()) {
198   case VPInstruction::Not: {
199     Value *A = State.get(getOperand(0), Part);
200     Value *V = Builder.CreateNot(A);
201     State.set(this, V, Part);
202     break;
203   }
204   case VPInstruction::ICmpULE: {
205     Value *IV = State.get(getOperand(0), Part);
206     Value *TC = State.get(getOperand(1), Part);
207     Value *V = Builder.CreateICmpULE(IV, TC);
208     State.set(this, V, Part);
209     break;
210   }
211   case Instruction::Select: {
212     Value *Cond = State.get(getOperand(0), Part);
213     Value *Op1 = State.get(getOperand(1), Part);
214     Value *Op2 = State.get(getOperand(2), Part);
215     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
216     State.set(this, V, Part);
217     break;
218   }
219   case VPInstruction::ActiveLaneMask: {
220     // Get first lane of vector induction variable.
221     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
222     // Get the original loop tripcount.
223     Value *ScalarTC = State.get(getOperand(1), Part);
224 
225     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
226     auto *PredTy = VectorType::get(Int1Ty, State.VF);
227     Instruction *Call = Builder.CreateIntrinsic(
228         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
229         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
230     State.set(this, Call, Part);
231     break;
232   }
233   case VPInstruction::FirstOrderRecurrenceSplice: {
234     // Generate code to combine the previous and current values in vector v3.
235     //
236     //   vector.ph:
237     //     v_init = vector(..., ..., ..., a[-1])
238     //     br vector.body
239     //
240     //   vector.body
241     //     i = phi [0, vector.ph], [i+4, vector.body]
242     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
243     //     v2 = a[i, i+1, i+2, i+3];
244     //     v3 = vector(v1(3), v2(0, 1, 2))
245 
246     // For the first part, use the recurrence phi (v1), otherwise v2.
247     auto *V1 = State.get(getOperand(0), 0);
248     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
249     if (!PartMinus1->getType()->isVectorTy()) {
250       State.set(this, PartMinus1, Part);
251     } else {
252       Value *V2 = State.get(getOperand(1), Part);
253       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
254     }
255     break;
256   }
257   case VPInstruction::CanonicalIVIncrement:
258   case VPInstruction::CanonicalIVIncrementNUW: {
259     Value *Next = nullptr;
260     if (Part == 0) {
261       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
262       auto *Phi = State.get(getOperand(0), 0);
263       // The loop step is equal to the vectorization factor (num of SIMD
264       // elements) times the unroll factor (num of SIMD instructions).
265       Value *Step =
266           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
267       Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
268     } else {
269       Next = State.get(this, 0);
270     }
271 
272     State.set(this, Next, Part);
273     break;
274   }
275   case VPInstruction::BranchOnCond: {
276     if (Part != 0)
277       break;
278 
279     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
280     VPRegionBlock *ParentRegion = getParent()->getParent();
281     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
282 
283     // Replace the temporary unreachable terminator with a new conditional
284     // branch, hooking it up to backward destination for exiting blocks now and
285     // to forward destination(s) later when they are created.
286     BranchInst *CondBr =
287         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
288 
289     if (getParent()->isExiting())
290       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
291 
292     CondBr->setSuccessor(0, nullptr);
293     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
294     break;
295   }
296   case VPInstruction::BranchOnCount: {
297     if (Part != 0)
298       break;
299     // First create the compare.
300     Value *IV = State.get(getOperand(0), Part);
301     Value *TC = State.get(getOperand(1), Part);
302     Value *Cond = Builder.CreateICmpEQ(IV, TC);
303 
304     // Now create the branch.
305     auto *Plan = getParent()->getPlan();
306     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
307     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
308 
309     // Replace the temporary unreachable terminator with a new conditional
310     // branch, hooking it up to backward destination (the header) now and to the
311     // forward destination (the exit/middle block) later when it is created.
312     // Note that CreateCondBr expects a valid BB as first argument, so we need
313     // to set it to nullptr later.
314     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
315                                               State.CFG.VPBB2IRBB[Header]);
316     CondBr->setSuccessor(0, nullptr);
317     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
318     break;
319   }
320   default:
321     llvm_unreachable("Unsupported opcode for instruction");
322   }
323 }
324 
325 void VPInstruction::execute(VPTransformState &State) {
326   assert(!State.Instance && "VPInstruction executing an Instance");
327   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
328   State.Builder.setFastMathFlags(FMF);
329   for (unsigned Part = 0; Part < State.UF; ++Part)
330     generateInstruction(State, Part);
331 }
332 
333 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
334 void VPInstruction::dump() const {
335   VPSlotTracker SlotTracker(getParent()->getPlan());
336   print(dbgs(), "", SlotTracker);
337 }
338 
339 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
340                           VPSlotTracker &SlotTracker) const {
341   O << Indent << "EMIT ";
342 
343   if (hasResult()) {
344     printAsOperand(O, SlotTracker);
345     O << " = ";
346   }
347 
348   switch (getOpcode()) {
349   case VPInstruction::Not:
350     O << "not";
351     break;
352   case VPInstruction::ICmpULE:
353     O << "icmp ule";
354     break;
355   case VPInstruction::SLPLoad:
356     O << "combined load";
357     break;
358   case VPInstruction::SLPStore:
359     O << "combined store";
360     break;
361   case VPInstruction::ActiveLaneMask:
362     O << "active lane mask";
363     break;
364   case VPInstruction::FirstOrderRecurrenceSplice:
365     O << "first-order splice";
366     break;
367   case VPInstruction::CanonicalIVIncrement:
368     O << "VF * UF + ";
369     break;
370   case VPInstruction::CanonicalIVIncrementNUW:
371     O << "VF * UF +(nuw) ";
372     break;
373   case VPInstruction::BranchOnCond:
374     O << "branch-on-cond";
375     break;
376   case VPInstruction::BranchOnCount:
377     O << "branch-on-count ";
378     break;
379   default:
380     O << Instruction::getOpcodeName(getOpcode());
381   }
382 
383   O << FMF;
384 
385   for (const VPValue *Operand : operands()) {
386     O << " ";
387     Operand->printAsOperand(O, SlotTracker);
388   }
389 
390   if (DL) {
391     O << ", !dbg ";
392     DL.print(O);
393   }
394 }
395 #endif
396 
397 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
398   // Make sure the VPInstruction is a floating-point operation.
399   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
400           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
401           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
402           Opcode == Instruction::FCmp) &&
403          "this op can't take fast-math flags");
404   FMF = FMFNew;
405 }
406 
407 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
408 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
409                               VPSlotTracker &SlotTracker) const {
410   O << Indent << "WIDEN-CALL ";
411 
412   auto *CI = cast<CallInst>(getUnderlyingInstr());
413   if (CI->getType()->isVoidTy())
414     O << "void ";
415   else {
416     printAsOperand(O, SlotTracker);
417     O << " = ";
418   }
419 
420   O << "call @" << CI->getCalledFunction()->getName() << "(";
421   printOperands(O, SlotTracker);
422   O << ")";
423 }
424 
425 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
426                                 VPSlotTracker &SlotTracker) const {
427   O << Indent << "WIDEN-SELECT ";
428   printAsOperand(O, SlotTracker);
429   O << " = select ";
430   getOperand(0)->printAsOperand(O, SlotTracker);
431   O << ", ";
432   getOperand(1)->printAsOperand(O, SlotTracker);
433   O << ", ";
434   getOperand(2)->printAsOperand(O, SlotTracker);
435   O << (InvariantCond ? " (condition is loop invariant)" : "");
436 }
437 #endif
438 
439 void VPWidenSelectRecipe::execute(VPTransformState &State) {
440   auto &I = *cast<SelectInst>(getUnderlyingInstr());
441   State.setDebugLocFromInst(&I);
442 
443   // The condition can be loop invariant but still defined inside the
444   // loop. This means that we can't just use the original 'cond' value.
445   // We have to take the 'vectorized' value and pick the first lane.
446   // Instcombine will make this a no-op.
447   auto *InvarCond =
448       InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
449 
450   for (unsigned Part = 0; Part < State.UF; ++Part) {
451     Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
452     Value *Op0 = State.get(getOperand(1), Part);
453     Value *Op1 = State.get(getOperand(2), Part);
454     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
455     State.set(this, Sel, Part);
456     State.addMetadata(Sel, &I);
457   }
458 }
459 
460 void VPWidenRecipe::execute(VPTransformState &State) {
461   auto &I = *cast<Instruction>(getUnderlyingValue());
462   auto &Builder = State.Builder;
463   switch (I.getOpcode()) {
464   case Instruction::Call:
465   case Instruction::Br:
466   case Instruction::PHI:
467   case Instruction::GetElementPtr:
468   case Instruction::Select:
469     llvm_unreachable("This instruction is handled by a different recipe.");
470   case Instruction::UDiv:
471   case Instruction::SDiv:
472   case Instruction::SRem:
473   case Instruction::URem:
474   case Instruction::Add:
475   case Instruction::FAdd:
476   case Instruction::Sub:
477   case Instruction::FSub:
478   case Instruction::FNeg:
479   case Instruction::Mul:
480   case Instruction::FMul:
481   case Instruction::FDiv:
482   case Instruction::FRem:
483   case Instruction::Shl:
484   case Instruction::LShr:
485   case Instruction::AShr:
486   case Instruction::And:
487   case Instruction::Or:
488   case Instruction::Xor: {
489     // Just widen unops and binops.
490     State.setDebugLocFromInst(&I);
491 
492     for (unsigned Part = 0; Part < State.UF; ++Part) {
493       SmallVector<Value *, 2> Ops;
494       for (VPValue *VPOp : operands())
495         Ops.push_back(State.get(VPOp, Part));
496 
497       Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
498 
499       if (auto *VecOp = dyn_cast<Instruction>(V)) {
500         VecOp->copyIRFlags(&I);
501 
502         // If the instruction is vectorized and was in a basic block that needed
503         // predication, we can't propagate poison-generating flags (nuw/nsw,
504         // exact, etc.). The control flow has been linearized and the
505         // instruction is no longer guarded by the predicate, which could make
506         // the flag properties to no longer hold.
507         if (State.MayGeneratePoisonRecipes.contains(this))
508           VecOp->dropPoisonGeneratingFlags();
509       }
510 
511       // Use this vector value for all users of the original instruction.
512       State.set(this, V, Part);
513       State.addMetadata(V, &I);
514     }
515 
516     break;
517   }
518   case Instruction::Freeze: {
519     State.setDebugLocFromInst(&I);
520 
521     for (unsigned Part = 0; Part < State.UF; ++Part) {
522       Value *Op = State.get(getOperand(0), Part);
523 
524       Value *Freeze = Builder.CreateFreeze(Op);
525       State.set(this, Freeze, Part);
526     }
527     break;
528   }
529   case Instruction::ICmp:
530   case Instruction::FCmp: {
531     // Widen compares. Generate vector compares.
532     bool FCmp = (I.getOpcode() == Instruction::FCmp);
533     auto *Cmp = cast<CmpInst>(&I);
534     State.setDebugLocFromInst(Cmp);
535     for (unsigned Part = 0; Part < State.UF; ++Part) {
536       Value *A = State.get(getOperand(0), Part);
537       Value *B = State.get(getOperand(1), Part);
538       Value *C = nullptr;
539       if (FCmp) {
540         // Propagate fast math flags.
541         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
542         Builder.setFastMathFlags(Cmp->getFastMathFlags());
543         C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
544       } else {
545         C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
546       }
547       State.set(this, C, Part);
548       State.addMetadata(C, &I);
549     }
550 
551     break;
552   }
553 
554   case Instruction::ZExt:
555   case Instruction::SExt:
556   case Instruction::FPToUI:
557   case Instruction::FPToSI:
558   case Instruction::FPExt:
559   case Instruction::PtrToInt:
560   case Instruction::IntToPtr:
561   case Instruction::SIToFP:
562   case Instruction::UIToFP:
563   case Instruction::Trunc:
564   case Instruction::FPTrunc:
565   case Instruction::BitCast: {
566     auto *CI = cast<CastInst>(&I);
567     State.setDebugLocFromInst(CI);
568 
569     /// Vectorize casts.
570     Type *DestTy = (State.VF.isScalar())
571                        ? CI->getType()
572                        : VectorType::get(CI->getType(), State.VF);
573 
574     for (unsigned Part = 0; Part < State.UF; ++Part) {
575       Value *A = State.get(getOperand(0), Part);
576       Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
577       State.set(this, Cast, Part);
578       State.addMetadata(Cast, &I);
579     }
580     break;
581   }
582   default:
583     // This instruction is not vectorized by simple widening.
584     LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
585     llvm_unreachable("Unhandled instruction!");
586   } // end of switch.
587 }
588 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
589 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
590                           VPSlotTracker &SlotTracker) const {
591   O << Indent << "WIDEN ";
592   printAsOperand(O, SlotTracker);
593   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
594   printOperands(O, SlotTracker);
595 }
596 
597 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
598                                           VPSlotTracker &SlotTracker) const {
599   O << Indent << "WIDEN-INDUCTION";
600   if (getTruncInst()) {
601     O << "\\l\"";
602     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
603     O << " +\n" << Indent << "\"  ";
604     getVPValue(0)->printAsOperand(O, SlotTracker);
605   } else
606     O << " " << VPlanIngredient(IV);
607 
608   O << ", ";
609   getStepValue()->printAsOperand(O, SlotTracker);
610 }
611 #endif
612 
613 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
614   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
615   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
616   return StartC && StartC->isZero() && StepC && StepC->isOne();
617 }
618 
619 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
620   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
621 }
622 
623 bool VPScalarIVStepsRecipe::isCanonical() const {
624   auto *CanIV = getCanonicalIV();
625   // The start value of the steps-recipe must match the start value of the
626   // canonical induction and it must step by 1.
627   if (CanIV->getStartValue() != getStartValue())
628     return false;
629   auto *StepVPV = getStepValue();
630   if (StepVPV->getDef())
631     return false;
632   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
633   return StepC && StepC->isOne();
634 }
635 
636 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
637 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
638                                   VPSlotTracker &SlotTracker) const {
639   O << Indent;
640   printAsOperand(O, SlotTracker);
641   O << Indent << "= SCALAR-STEPS ";
642   printOperands(O, SlotTracker);
643 }
644 
645 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
646                              VPSlotTracker &SlotTracker) const {
647   O << Indent << "WIDEN-GEP ";
648   O << (IsPtrLoopInvariant ? "Inv" : "Var");
649   size_t IndicesNumber = IsIndexLoopInvariant.size();
650   for (size_t I = 0; I < IndicesNumber; ++I)
651     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
652 
653   O << " ";
654   printAsOperand(O, SlotTracker);
655   O << " = getelementptr ";
656   printOperands(O, SlotTracker);
657 }
658 
659 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
660                           VPSlotTracker &SlotTracker) const {
661   O << Indent << "BLEND ";
662   Phi->printAsOperand(O, false);
663   O << " =";
664   if (getNumIncomingValues() == 1) {
665     // Not a User of any mask: not really blending, this is a
666     // single-predecessor phi.
667     O << " ";
668     getIncomingValue(0)->printAsOperand(O, SlotTracker);
669   } else {
670     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
671       O << " ";
672       getIncomingValue(I)->printAsOperand(O, SlotTracker);
673       O << "/";
674       getMask(I)->printAsOperand(O, SlotTracker);
675     }
676   }
677 }
678 
679 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
680                               VPSlotTracker &SlotTracker) const {
681   O << Indent << "REDUCE ";
682   printAsOperand(O, SlotTracker);
683   O << " = ";
684   getChainOp()->printAsOperand(O, SlotTracker);
685   O << " +";
686   if (isa<FPMathOperator>(getUnderlyingInstr()))
687     O << getUnderlyingInstr()->getFastMathFlags();
688   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
689   getVecOp()->printAsOperand(O, SlotTracker);
690   if (getCondOp()) {
691     O << ", ";
692     getCondOp()->printAsOperand(O, SlotTracker);
693   }
694   O << ")";
695   if (RdxDesc->IntermediateStore)
696     O << " (with final reduction value stored in invariant address sank "
697          "outside of loop)";
698 }
699 
700 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
701                               VPSlotTracker &SlotTracker) const {
702   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
703 
704   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
705     printAsOperand(O, SlotTracker);
706     O << " = ";
707   }
708   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
709     O << "call @" << CB->getCalledFunction()->getName() << "(";
710     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
711                     O, [&O, &SlotTracker](VPValue *Op) {
712                       Op->printAsOperand(O, SlotTracker);
713                     });
714     O << ")";
715   } else {
716     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
717     printOperands(O, SlotTracker);
718   }
719 
720   if (AlsoPack)
721     O << " (S->V)";
722 }
723 
724 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
725                                 VPSlotTracker &SlotTracker) const {
726   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
727   printAsOperand(O, SlotTracker);
728   O << " = ";
729   printOperands(O, SlotTracker);
730 }
731 
732 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
733                                            VPSlotTracker &SlotTracker) const {
734   O << Indent << "WIDEN ";
735 
736   if (!isStore()) {
737     getVPSingleValue()->printAsOperand(O, SlotTracker);
738     O << " = ";
739   }
740   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
741 
742   printOperands(O, SlotTracker);
743 }
744 #endif
745 
746 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
747   Value *Start = getStartValue()->getLiveInIRValue();
748   PHINode *EntryPart = PHINode::Create(
749       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
750 
751   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
752   EntryPart->addIncoming(Start, VectorPH);
753   EntryPart->setDebugLoc(DL);
754   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
755     State.set(this, EntryPart, Part);
756 }
757 
758 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
759 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
760                                    VPSlotTracker &SlotTracker) const {
761   O << Indent << "EMIT ";
762   printAsOperand(O, SlotTracker);
763   O << " = CANONICAL-INDUCTION";
764 }
765 #endif
766 
767 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
768   bool IsUniform = vputils::onlyFirstLaneUsed(this);
769   return all_of(users(),
770                 [&](const VPUser *U) { return U->usesScalars(this); }) &&
771          (IsUniform || !VF.isScalable());
772 }
773 
774 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
775 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
776                                           VPSlotTracker &SlotTracker) const {
777   O << Indent << "EMIT ";
778   printAsOperand(O, SlotTracker);
779   O << " = WIDEN-POINTER-INDUCTION ";
780   getStartValue()->printAsOperand(O, SlotTracker);
781   O << ", " << *IndDesc.getStep();
782 }
783 #endif
784 
785 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
786   assert(!State.Instance && "cannot be used in per-lane");
787   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
788   SCEVExpander Exp(SE, DL, "induction");
789 
790   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
791                                  &*State.Builder.GetInsertPoint());
792 
793   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
794     State.set(this, Res, Part);
795 }
796 
797 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
798 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
799                                VPSlotTracker &SlotTracker) const {
800   O << Indent << "EMIT ";
801   getVPSingleValue()->printAsOperand(O, SlotTracker);
802   O << " = EXPAND SCEV " << *Expr;
803 }
804 #endif
805 
806 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
807   Value *CanonicalIV = State.get(getOperand(0), 0);
808   Type *STy = CanonicalIV->getType();
809   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
810   ElementCount VF = State.VF;
811   Value *VStart = VF.isScalar()
812                       ? CanonicalIV
813                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
814   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
815     Value *VStep = createStepForVF(Builder, STy, VF, Part);
816     if (VF.isVector()) {
817       VStep = Builder.CreateVectorSplat(VF, VStep);
818       VStep =
819           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
820     }
821     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
822     State.set(this, CanonicalVectorIV, Part);
823   }
824 }
825 
826 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
827 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
828                                      VPSlotTracker &SlotTracker) const {
829   O << Indent << "EMIT ";
830   printAsOperand(O, SlotTracker);
831   O << " = WIDEN-CANONICAL-INDUCTION ";
832   printOperands(O, SlotTracker);
833 }
834 #endif
835 
836 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
837   auto &Builder = State.Builder;
838   // Create a vector from the initial value.
839   auto *VectorInit = getStartValue()->getLiveInIRValue();
840 
841   Type *VecTy = State.VF.isScalar()
842                     ? VectorInit->getType()
843                     : VectorType::get(VectorInit->getType(), State.VF);
844 
845   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
846   if (State.VF.isVector()) {
847     auto *IdxTy = Builder.getInt32Ty();
848     auto *One = ConstantInt::get(IdxTy, 1);
849     IRBuilder<>::InsertPointGuard Guard(Builder);
850     Builder.SetInsertPoint(VectorPH->getTerminator());
851     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
852     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
853     VectorInit = Builder.CreateInsertElement(
854         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
855   }
856 
857   // Create a phi node for the new recurrence.
858   PHINode *EntryPart = PHINode::Create(
859       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
860   EntryPart->addIncoming(VectorInit, VectorPH);
861   State.set(this, EntryPart, 0);
862 }
863 
864 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
865 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
866                                             VPSlotTracker &SlotTracker) const {
867   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
868   printAsOperand(O, SlotTracker);
869   O << " = phi ";
870   printOperands(O, SlotTracker);
871 }
872 #endif
873 
874 void VPReductionPHIRecipe::execute(VPTransformState &State) {
875   PHINode *PN = cast<PHINode>(getUnderlyingValue());
876   auto &Builder = State.Builder;
877 
878   // In order to support recurrences we need to be able to vectorize Phi nodes.
879   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
880   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
881   // this value when we vectorize all of the instructions that use the PHI.
882   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
883   Type *VecTy =
884       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
885 
886   BasicBlock *HeaderBB = State.CFG.PrevBB;
887   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
888          "recipe must be in the vector loop header");
889   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
890   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
891     Value *EntryPart =
892         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
893     State.set(this, EntryPart, Part);
894   }
895 
896   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
897 
898   // Reductions do not have to start at zero. They can start with
899   // any loop invariant values.
900   VPValue *StartVPV = getStartValue();
901   Value *StartV = StartVPV->getLiveInIRValue();
902 
903   Value *Iden = nullptr;
904   RecurKind RK = RdxDesc.getRecurrenceKind();
905   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
906       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
907     // MinMax reduction have the start value as their identify.
908     if (ScalarPHI) {
909       Iden = StartV;
910     } else {
911       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
912       Builder.SetInsertPoint(VectorPH->getTerminator());
913       StartV = Iden =
914           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
915     }
916   } else {
917     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
918                                          RdxDesc.getFastMathFlags());
919 
920     if (!ScalarPHI) {
921       Iden = Builder.CreateVectorSplat(State.VF, Iden);
922       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
923       Builder.SetInsertPoint(VectorPH->getTerminator());
924       Constant *Zero = Builder.getInt32(0);
925       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
926     }
927   }
928 
929   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
930     Value *EntryPart = State.get(this, Part);
931     // Make sure to add the reduction start value only to the
932     // first unroll part.
933     Value *StartVal = (Part == 0) ? StartV : Iden;
934     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
935   }
936 }
937 
938 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
939 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
940                                  VPSlotTracker &SlotTracker) const {
941   O << Indent << "WIDEN-REDUCTION-PHI ";
942 
943   printAsOperand(O, SlotTracker);
944   O << " = phi ";
945   printOperands(O, SlotTracker);
946 }
947 #endif
948 
949 void VPWidenPHIRecipe::execute(VPTransformState &State) {
950   assert(EnableVPlanNativePath &&
951          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
952 
953   // Currently we enter here in the VPlan-native path for non-induction
954   // PHIs where all control flow is uniform. We simply widen these PHIs.
955   // Create a vector phi with no operands - the vector phi operands will be
956   // set at the end of vector code generation.
957   VPBasicBlock *Parent = getParent();
958   VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
959   unsigned StartIdx = 0;
960   // For phis in header blocks of loop regions, use the index of the value
961   // coming from the preheader.
962   if (LoopRegion->getEntryBasicBlock() == Parent) {
963     for (unsigned I = 0; I < getNumOperands(); ++I) {
964       if (getIncomingBlock(I) ==
965           LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
966         StartIdx = I;
967     }
968   }
969   Value *Op0 = State.get(getOperand(StartIdx), 0);
970   Type *VecTy = Op0->getType();
971   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
972   State.set(this, VecPhi, 0);
973 }
974 
975 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
976 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
977                              VPSlotTracker &SlotTracker) const {
978   O << Indent << "WIDEN-PHI ";
979 
980   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
981   // Unless all incoming values are modeled in VPlan  print the original PHI
982   // directly.
983   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
984   // values as VPValues.
985   if (getNumOperands() != OriginalPhi->getNumOperands()) {
986     O << VPlanIngredient(OriginalPhi);
987     return;
988   }
989 
990   printAsOperand(O, SlotTracker);
991   O << " = phi ";
992   printOperands(O, SlotTracker);
993 }
994 #endif
995