1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/IR/Value.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
30 #include <cassert>
31 
32 using namespace llvm;
33 
34 extern cl::opt<bool> EnableVPlanNativePath;
35 
36 bool VPRecipeBase::mayWriteToMemory() const {
37   switch (getVPDefID()) {
38   case VPWidenMemoryInstructionSC: {
39     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
40   }
41   case VPReplicateSC:
42   case VPWidenCallSC:
43     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
44         ->mayWriteToMemory();
45   case VPBranchOnMaskSC:
46     return false;
47   case VPWidenIntOrFpInductionSC:
48   case VPWidenCanonicalIVSC:
49   case VPWidenPHISC:
50   case VPBlendSC:
51   case VPWidenSC:
52   case VPWidenGEPSC:
53   case VPReductionSC:
54   case VPWidenSelectSC: {
55     const Instruction *I =
56         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
57     (void)I;
58     assert((!I || !I->mayWriteToMemory()) &&
59            "underlying instruction may write to memory");
60     return false;
61   }
62   default:
63     return true;
64   }
65 }
66 
67 bool VPRecipeBase::mayReadFromMemory() const {
68   switch (getVPDefID()) {
69   case VPWidenMemoryInstructionSC: {
70     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
71   }
72   case VPReplicateSC:
73   case VPWidenCallSC:
74     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
75         ->mayReadFromMemory();
76   case VPBranchOnMaskSC:
77     return false;
78   case VPWidenIntOrFpInductionSC:
79   case VPWidenCanonicalIVSC:
80   case VPWidenPHISC:
81   case VPBlendSC:
82   case VPWidenSC:
83   case VPWidenGEPSC:
84   case VPReductionSC:
85   case VPWidenSelectSC: {
86     const Instruction *I =
87         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
88     (void)I;
89     assert((!I || !I->mayReadFromMemory()) &&
90            "underlying instruction may read from memory");
91     return false;
92   }
93   default:
94     return true;
95   }
96 }
97 
98 bool VPRecipeBase::mayHaveSideEffects() const {
99   switch (getVPDefID()) {
100   case VPWidenIntOrFpInductionSC:
101   case VPWidenPointerInductionSC:
102   case VPWidenCanonicalIVSC:
103   case VPWidenPHISC:
104   case VPBlendSC:
105   case VPWidenSC:
106   case VPWidenGEPSC:
107   case VPReductionSC:
108   case VPWidenSelectSC:
109   case VPScalarIVStepsSC: {
110     const Instruction *I =
111         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
112     (void)I;
113     assert((!I || !I->mayHaveSideEffects()) &&
114            "underlying instruction has side-effects");
115     return false;
116   }
117   case VPReplicateSC: {
118     auto *R = cast<VPReplicateRecipe>(this);
119     return R->getUnderlyingInstr()->mayHaveSideEffects();
120   }
121   default:
122     return true;
123   }
124 }
125 
126 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
127   auto Lane = VPLane::getLastLaneForVF(State.VF);
128   VPValue *ExitValue = getOperand(0);
129   if (Plan.isUniformAfterVectorization(ExitValue))
130     Lane = VPLane::getFirstLane();
131   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
132                    State.Builder.GetInsertBlock());
133 }
134 
135 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
136   assert(!Parent && "Recipe already in some VPBasicBlock");
137   assert(InsertPos->getParent() &&
138          "Insertion position not in any VPBasicBlock");
139   Parent = InsertPos->getParent();
140   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
141 }
142 
143 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
144                                 iplist<VPRecipeBase>::iterator I) {
145   assert(!Parent && "Recipe already in some VPBasicBlock");
146   assert(I == BB.end() || I->getParent() == &BB);
147   Parent = &BB;
148   BB.getRecipeList().insert(I, this);
149 }
150 
151 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
152   assert(!Parent && "Recipe already in some VPBasicBlock");
153   assert(InsertPos->getParent() &&
154          "Insertion position not in any VPBasicBlock");
155   Parent = InsertPos->getParent();
156   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
157 }
158 
159 void VPRecipeBase::removeFromParent() {
160   assert(getParent() && "Recipe not in any VPBasicBlock");
161   getParent()->getRecipeList().remove(getIterator());
162   Parent = nullptr;
163 }
164 
165 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
166   assert(getParent() && "Recipe not in any VPBasicBlock");
167   return getParent()->getRecipeList().erase(getIterator());
168 }
169 
170 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
171   removeFromParent();
172   insertAfter(InsertPos);
173 }
174 
175 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
176                               iplist<VPRecipeBase>::iterator I) {
177   removeFromParent();
178   insertBefore(BB, I);
179 }
180 
181 void VPInstruction::generateInstruction(VPTransformState &State,
182                                         unsigned Part) {
183   IRBuilderBase &Builder = State.Builder;
184   Builder.SetCurrentDebugLocation(DL);
185 
186   if (Instruction::isBinaryOp(getOpcode())) {
187     Value *A = State.get(getOperand(0), Part);
188     Value *B = State.get(getOperand(1), Part);
189     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
190     State.set(this, V, Part);
191     return;
192   }
193 
194   switch (getOpcode()) {
195   case VPInstruction::Not: {
196     Value *A = State.get(getOperand(0), Part);
197     Value *V = Builder.CreateNot(A);
198     State.set(this, V, Part);
199     break;
200   }
201   case VPInstruction::ICmpULE: {
202     Value *IV = State.get(getOperand(0), Part);
203     Value *TC = State.get(getOperand(1), Part);
204     Value *V = Builder.CreateICmpULE(IV, TC);
205     State.set(this, V, Part);
206     break;
207   }
208   case Instruction::Select: {
209     Value *Cond = State.get(getOperand(0), Part);
210     Value *Op1 = State.get(getOperand(1), Part);
211     Value *Op2 = State.get(getOperand(2), Part);
212     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
213     State.set(this, V, Part);
214     break;
215   }
216   case VPInstruction::ActiveLaneMask: {
217     // Get first lane of vector induction variable.
218     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
219     // Get the original loop tripcount.
220     Value *ScalarTC = State.get(getOperand(1), Part);
221 
222     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
223     auto *PredTy = VectorType::get(Int1Ty, State.VF);
224     Instruction *Call = Builder.CreateIntrinsic(
225         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
226         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
227     State.set(this, Call, Part);
228     break;
229   }
230   case VPInstruction::FirstOrderRecurrenceSplice: {
231     // Generate code to combine the previous and current values in vector v3.
232     //
233     //   vector.ph:
234     //     v_init = vector(..., ..., ..., a[-1])
235     //     br vector.body
236     //
237     //   vector.body
238     //     i = phi [0, vector.ph], [i+4, vector.body]
239     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
240     //     v2 = a[i, i+1, i+2, i+3];
241     //     v3 = vector(v1(3), v2(0, 1, 2))
242 
243     // For the first part, use the recurrence phi (v1), otherwise v2.
244     auto *V1 = State.get(getOperand(0), 0);
245     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
246     if (!PartMinus1->getType()->isVectorTy()) {
247       State.set(this, PartMinus1, Part);
248     } else {
249       Value *V2 = State.get(getOperand(1), Part);
250       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
251     }
252     break;
253   }
254   case VPInstruction::CanonicalIVIncrement:
255   case VPInstruction::CanonicalIVIncrementNUW: {
256     Value *Next = nullptr;
257     if (Part == 0) {
258       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
259       auto *Phi = State.get(getOperand(0), 0);
260       // The loop step is equal to the vectorization factor (num of SIMD
261       // elements) times the unroll factor (num of SIMD instructions).
262       Value *Step =
263           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
264       Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
265     } else {
266       Next = State.get(this, 0);
267     }
268 
269     State.set(this, Next, Part);
270     break;
271   }
272   case VPInstruction::BranchOnCond: {
273     if (Part != 0)
274       break;
275 
276     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
277     VPRegionBlock *ParentRegion = getParent()->getParent();
278     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
279 
280     // Replace the temporary unreachable terminator with a new conditional
281     // branch, hooking it up to backward destination for exiting blocks now and
282     // to forward destination(s) later when they are created.
283     BranchInst *CondBr =
284         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
285 
286     if (getParent()->isExiting())
287       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
288 
289     CondBr->setSuccessor(0, nullptr);
290     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
291     break;
292   }
293   case VPInstruction::BranchOnCount: {
294     if (Part != 0)
295       break;
296     // First create the compare.
297     Value *IV = State.get(getOperand(0), Part);
298     Value *TC = State.get(getOperand(1), Part);
299     Value *Cond = Builder.CreateICmpEQ(IV, TC);
300 
301     // Now create the branch.
302     auto *Plan = getParent()->getPlan();
303     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
304     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
305 
306     // Replace the temporary unreachable terminator with a new conditional
307     // branch, hooking it up to backward destination (the header) now and to the
308     // forward destination (the exit/middle block) later when it is created.
309     // Note that CreateCondBr expects a valid BB as first argument, so we need
310     // to set it to nullptr later.
311     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
312                                               State.CFG.VPBB2IRBB[Header]);
313     CondBr->setSuccessor(0, nullptr);
314     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
315     break;
316   }
317   default:
318     llvm_unreachable("Unsupported opcode for instruction");
319   }
320 }
321 
322 void VPInstruction::execute(VPTransformState &State) {
323   assert(!State.Instance && "VPInstruction executing an Instance");
324   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
325   State.Builder.setFastMathFlags(FMF);
326   for (unsigned Part = 0; Part < State.UF; ++Part)
327     generateInstruction(State, Part);
328 }
329 
330 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
331 void VPInstruction::dump() const {
332   VPSlotTracker SlotTracker(getParent()->getPlan());
333   print(dbgs(), "", SlotTracker);
334 }
335 
336 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
337                           VPSlotTracker &SlotTracker) const {
338   O << Indent << "EMIT ";
339 
340   if (hasResult()) {
341     printAsOperand(O, SlotTracker);
342     O << " = ";
343   }
344 
345   switch (getOpcode()) {
346   case VPInstruction::Not:
347     O << "not";
348     break;
349   case VPInstruction::ICmpULE:
350     O << "icmp ule";
351     break;
352   case VPInstruction::SLPLoad:
353     O << "combined load";
354     break;
355   case VPInstruction::SLPStore:
356     O << "combined store";
357     break;
358   case VPInstruction::ActiveLaneMask:
359     O << "active lane mask";
360     break;
361   case VPInstruction::FirstOrderRecurrenceSplice:
362     O << "first-order splice";
363     break;
364   case VPInstruction::CanonicalIVIncrement:
365     O << "VF * UF + ";
366     break;
367   case VPInstruction::CanonicalIVIncrementNUW:
368     O << "VF * UF +(nuw) ";
369     break;
370   case VPInstruction::BranchOnCond:
371     O << "branch-on-cond";
372     break;
373   case VPInstruction::BranchOnCount:
374     O << "branch-on-count ";
375     break;
376   default:
377     O << Instruction::getOpcodeName(getOpcode());
378   }
379 
380   O << FMF;
381 
382   for (const VPValue *Operand : operands()) {
383     O << " ";
384     Operand->printAsOperand(O, SlotTracker);
385   }
386 
387   if (DL) {
388     O << ", !dbg ";
389     DL.print(O);
390   }
391 }
392 #endif
393 
394 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
395   // Make sure the VPInstruction is a floating-point operation.
396   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
397           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
398           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
399           Opcode == Instruction::FCmp) &&
400          "this op can't take fast-math flags");
401   FMF = FMFNew;
402 }
403 
404 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
405 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
406                               VPSlotTracker &SlotTracker) const {
407   O << Indent << "WIDEN-CALL ";
408 
409   auto *CI = cast<CallInst>(getUnderlyingInstr());
410   if (CI->getType()->isVoidTy())
411     O << "void ";
412   else {
413     printAsOperand(O, SlotTracker);
414     O << " = ";
415   }
416 
417   O << "call @" << CI->getCalledFunction()->getName() << "(";
418   printOperands(O, SlotTracker);
419   O << ")";
420 }
421 
422 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
423                                 VPSlotTracker &SlotTracker) const {
424   O << Indent << "WIDEN-SELECT ";
425   printAsOperand(O, SlotTracker);
426   O << " = select ";
427   getOperand(0)->printAsOperand(O, SlotTracker);
428   O << ", ";
429   getOperand(1)->printAsOperand(O, SlotTracker);
430   O << ", ";
431   getOperand(2)->printAsOperand(O, SlotTracker);
432   O << (InvariantCond ? " (condition is loop invariant)" : "");
433 }
434 #endif
435 
436 void VPWidenSelectRecipe::execute(VPTransformState &State) {
437   auto &I = *cast<SelectInst>(getUnderlyingInstr());
438   State.setDebugLocFromInst(&I);
439 
440   // The condition can be loop invariant but still defined inside the
441   // loop. This means that we can't just use the original 'cond' value.
442   // We have to take the 'vectorized' value and pick the first lane.
443   // Instcombine will make this a no-op.
444   auto *InvarCond =
445       InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
446 
447   for (unsigned Part = 0; Part < State.UF; ++Part) {
448     Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
449     Value *Op0 = State.get(getOperand(1), Part);
450     Value *Op1 = State.get(getOperand(2), Part);
451     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
452     State.set(this, Sel, Part);
453     State.addMetadata(Sel, &I);
454   }
455 }
456 
457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
458 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
459                           VPSlotTracker &SlotTracker) const {
460   O << Indent << "WIDEN ";
461   printAsOperand(O, SlotTracker);
462   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
463   printOperands(O, SlotTracker);
464 }
465 
466 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
467                                           VPSlotTracker &SlotTracker) const {
468   O << Indent << "WIDEN-INDUCTION";
469   if (getTruncInst()) {
470     O << "\\l\"";
471     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
472     O << " +\n" << Indent << "\"  ";
473     getVPValue(0)->printAsOperand(O, SlotTracker);
474   } else
475     O << " " << VPlanIngredient(IV);
476 
477   O << ", ";
478   getStepValue()->printAsOperand(O, SlotTracker);
479 }
480 #endif
481 
482 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
483   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
484   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
485   return StartC && StartC->isZero() && StepC && StepC->isOne();
486 }
487 
488 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
489   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
490 }
491 
492 bool VPScalarIVStepsRecipe::isCanonical() const {
493   auto *CanIV = getCanonicalIV();
494   // The start value of the steps-recipe must match the start value of the
495   // canonical induction and it must step by 1.
496   if (CanIV->getStartValue() != getStartValue())
497     return false;
498   auto *StepVPV = getStepValue();
499   if (StepVPV->getDef())
500     return false;
501   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
502   return StepC && StepC->isOne();
503 }
504 
505 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
506 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
507                                   VPSlotTracker &SlotTracker) const {
508   O << Indent;
509   printAsOperand(O, SlotTracker);
510   O << Indent << "= SCALAR-STEPS ";
511   printOperands(O, SlotTracker);
512 }
513 
514 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
515                              VPSlotTracker &SlotTracker) const {
516   O << Indent << "WIDEN-GEP ";
517   O << (IsPtrLoopInvariant ? "Inv" : "Var");
518   size_t IndicesNumber = IsIndexLoopInvariant.size();
519   for (size_t I = 0; I < IndicesNumber; ++I)
520     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
521 
522   O << " ";
523   printAsOperand(O, SlotTracker);
524   O << " = getelementptr ";
525   printOperands(O, SlotTracker);
526 }
527 
528 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
529                           VPSlotTracker &SlotTracker) const {
530   O << Indent << "BLEND ";
531   Phi->printAsOperand(O, false);
532   O << " =";
533   if (getNumIncomingValues() == 1) {
534     // Not a User of any mask: not really blending, this is a
535     // single-predecessor phi.
536     O << " ";
537     getIncomingValue(0)->printAsOperand(O, SlotTracker);
538   } else {
539     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
540       O << " ";
541       getIncomingValue(I)->printAsOperand(O, SlotTracker);
542       O << "/";
543       getMask(I)->printAsOperand(O, SlotTracker);
544     }
545   }
546 }
547 
548 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
549                               VPSlotTracker &SlotTracker) const {
550   O << Indent << "REDUCE ";
551   printAsOperand(O, SlotTracker);
552   O << " = ";
553   getChainOp()->printAsOperand(O, SlotTracker);
554   O << " +";
555   if (isa<FPMathOperator>(getUnderlyingInstr()))
556     O << getUnderlyingInstr()->getFastMathFlags();
557   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
558   getVecOp()->printAsOperand(O, SlotTracker);
559   if (getCondOp()) {
560     O << ", ";
561     getCondOp()->printAsOperand(O, SlotTracker);
562   }
563   O << ")";
564   if (RdxDesc->IntermediateStore)
565     O << " (with final reduction value stored in invariant address sank "
566          "outside of loop)";
567 }
568 
569 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
570                               VPSlotTracker &SlotTracker) const {
571   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
572 
573   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
574     printAsOperand(O, SlotTracker);
575     O << " = ";
576   }
577   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
578     O << "call @" << CB->getCalledFunction()->getName() << "(";
579     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
580                     O, [&O, &SlotTracker](VPValue *Op) {
581                       Op->printAsOperand(O, SlotTracker);
582                     });
583     O << ")";
584   } else {
585     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
586     printOperands(O, SlotTracker);
587   }
588 
589   if (AlsoPack)
590     O << " (S->V)";
591 }
592 
593 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
594                                 VPSlotTracker &SlotTracker) const {
595   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
596   printAsOperand(O, SlotTracker);
597   O << " = ";
598   printOperands(O, SlotTracker);
599 }
600 
601 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
602                                            VPSlotTracker &SlotTracker) const {
603   O << Indent << "WIDEN ";
604 
605   if (!isStore()) {
606     getVPSingleValue()->printAsOperand(O, SlotTracker);
607     O << " = ";
608   }
609   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
610 
611   printOperands(O, SlotTracker);
612 }
613 #endif
614 
615 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
616   Value *Start = getStartValue()->getLiveInIRValue();
617   PHINode *EntryPart = PHINode::Create(
618       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
619 
620   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
621   EntryPart->addIncoming(Start, VectorPH);
622   EntryPart->setDebugLoc(DL);
623   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
624     State.set(this, EntryPart, Part);
625 }
626 
627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
628 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
629                                    VPSlotTracker &SlotTracker) const {
630   O << Indent << "EMIT ";
631   printAsOperand(O, SlotTracker);
632   O << " = CANONICAL-INDUCTION";
633 }
634 #endif
635 
636 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
637   bool IsUniform = vputils::onlyFirstLaneUsed(this);
638   return all_of(users(),
639                 [&](const VPUser *U) { return U->usesScalars(this); }) &&
640          (IsUniform || !VF.isScalable());
641 }
642 
643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
644 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
645                                           VPSlotTracker &SlotTracker) const {
646   O << Indent << "EMIT ";
647   printAsOperand(O, SlotTracker);
648   O << " = WIDEN-POINTER-INDUCTION ";
649   getStartValue()->printAsOperand(O, SlotTracker);
650   O << ", " << *IndDesc.getStep();
651 }
652 #endif
653 
654 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
655   assert(!State.Instance && "cannot be used in per-lane");
656   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
657   SCEVExpander Exp(SE, DL, "induction");
658 
659   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
660                                  &*State.Builder.GetInsertPoint());
661 
662   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
663     State.set(this, Res, Part);
664 }
665 
666 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
667 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
668                                VPSlotTracker &SlotTracker) const {
669   O << Indent << "EMIT ";
670   getVPSingleValue()->printAsOperand(O, SlotTracker);
671   O << " = EXPAND SCEV " << *Expr;
672 }
673 #endif
674 
675 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
676   Value *CanonicalIV = State.get(getOperand(0), 0);
677   Type *STy = CanonicalIV->getType();
678   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
679   ElementCount VF = State.VF;
680   Value *VStart = VF.isScalar()
681                       ? CanonicalIV
682                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
683   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
684     Value *VStep = createStepForVF(Builder, STy, VF, Part);
685     if (VF.isVector()) {
686       VStep = Builder.CreateVectorSplat(VF, VStep);
687       VStep =
688           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
689     }
690     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
691     State.set(this, CanonicalVectorIV, Part);
692   }
693 }
694 
695 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
696 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
697                                      VPSlotTracker &SlotTracker) const {
698   O << Indent << "EMIT ";
699   printAsOperand(O, SlotTracker);
700   O << " = WIDEN-CANONICAL-INDUCTION ";
701   printOperands(O, SlotTracker);
702 }
703 #endif
704 
705 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
706   auto &Builder = State.Builder;
707   // Create a vector from the initial value.
708   auto *VectorInit = getStartValue()->getLiveInIRValue();
709 
710   Type *VecTy = State.VF.isScalar()
711                     ? VectorInit->getType()
712                     : VectorType::get(VectorInit->getType(), State.VF);
713 
714   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
715   if (State.VF.isVector()) {
716     auto *IdxTy = Builder.getInt32Ty();
717     auto *One = ConstantInt::get(IdxTy, 1);
718     IRBuilder<>::InsertPointGuard Guard(Builder);
719     Builder.SetInsertPoint(VectorPH->getTerminator());
720     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
721     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
722     VectorInit = Builder.CreateInsertElement(
723         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
724   }
725 
726   // Create a phi node for the new recurrence.
727   PHINode *EntryPart = PHINode::Create(
728       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
729   EntryPart->addIncoming(VectorInit, VectorPH);
730   State.set(this, EntryPart, 0);
731 }
732 
733 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
734 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
735                                             VPSlotTracker &SlotTracker) const {
736   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
737   printAsOperand(O, SlotTracker);
738   O << " = phi ";
739   printOperands(O, SlotTracker);
740 }
741 #endif
742 
743 void VPReductionPHIRecipe::execute(VPTransformState &State) {
744   PHINode *PN = cast<PHINode>(getUnderlyingValue());
745   auto &Builder = State.Builder;
746 
747   // In order to support recurrences we need to be able to vectorize Phi nodes.
748   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
749   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
750   // this value when we vectorize all of the instructions that use the PHI.
751   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
752   Type *VecTy =
753       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
754 
755   BasicBlock *HeaderBB = State.CFG.PrevBB;
756   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
757          "recipe must be in the vector loop header");
758   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
759   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
760     Value *EntryPart =
761         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
762     State.set(this, EntryPart, Part);
763   }
764 
765   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
766 
767   // Reductions do not have to start at zero. They can start with
768   // any loop invariant values.
769   VPValue *StartVPV = getStartValue();
770   Value *StartV = StartVPV->getLiveInIRValue();
771 
772   Value *Iden = nullptr;
773   RecurKind RK = RdxDesc.getRecurrenceKind();
774   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
775       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
776     // MinMax reduction have the start value as their identify.
777     if (ScalarPHI) {
778       Iden = StartV;
779     } else {
780       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
781       Builder.SetInsertPoint(VectorPH->getTerminator());
782       StartV = Iden =
783           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
784     }
785   } else {
786     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
787                                          RdxDesc.getFastMathFlags());
788 
789     if (!ScalarPHI) {
790       Iden = Builder.CreateVectorSplat(State.VF, Iden);
791       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
792       Builder.SetInsertPoint(VectorPH->getTerminator());
793       Constant *Zero = Builder.getInt32(0);
794       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
795     }
796   }
797 
798   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
799     Value *EntryPart = State.get(this, Part);
800     // Make sure to add the reduction start value only to the
801     // first unroll part.
802     Value *StartVal = (Part == 0) ? StartV : Iden;
803     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
804   }
805 }
806 
807 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
808 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
809                                  VPSlotTracker &SlotTracker) const {
810   O << Indent << "WIDEN-REDUCTION-PHI ";
811 
812   printAsOperand(O, SlotTracker);
813   O << " = phi ";
814   printOperands(O, SlotTracker);
815 }
816 #endif
817 
818 void VPWidenPHIRecipe::execute(VPTransformState &State) {
819   assert(EnableVPlanNativePath &&
820          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
821 
822   // Currently we enter here in the VPlan-native path for non-induction
823   // PHIs where all control flow is uniform. We simply widen these PHIs.
824   // Create a vector phi with no operands - the vector phi operands will be
825   // set at the end of vector code generation.
826   VPBasicBlock *Parent = getParent();
827   VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
828   unsigned StartIdx = 0;
829   // For phis in header blocks of loop regions, use the index of the value
830   // coming from the preheader.
831   if (LoopRegion->getEntryBasicBlock() == Parent) {
832     for (unsigned I = 0; I < getNumOperands(); ++I) {
833       if (getIncomingBlock(I) ==
834           LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
835         StartIdx = I;
836     }
837   }
838   Value *Op0 = State.get(getOperand(StartIdx), 0);
839   Type *VecTy = Op0->getType();
840   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
841   State.set(this, VecPhi, 0);
842 }
843 
844 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
845 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
846                              VPSlotTracker &SlotTracker) const {
847   O << Indent << "WIDEN-PHI ";
848 
849   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
850   // Unless all incoming values are modeled in VPlan  print the original PHI
851   // directly.
852   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
853   // values as VPValues.
854   if (getNumOperands() != OriginalPhi->getNumOperands()) {
855     O << VPlanIngredient(OriginalPhi);
856     return;
857   }
858 
859   printAsOperand(O, SlotTracker);
860   O << " = phi ";
861   printOperands(O, SlotTracker);
862 }
863 #endif
864