1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/IR/Value.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
30 #include <cassert>
31 
32 using namespace llvm;
33 
34 extern cl::opt<bool> EnableVPlanNativePath;
35 
36 bool VPRecipeBase::mayWriteToMemory() const {
37   switch (getVPDefID()) {
38   case VPWidenMemoryInstructionSC: {
39     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
40   }
41   case VPReplicateSC:
42   case VPWidenCallSC:
43     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
44         ->mayWriteToMemory();
45   case VPBranchOnMaskSC:
46     return false;
47   case VPWidenIntOrFpInductionSC:
48   case VPWidenCanonicalIVSC:
49   case VPWidenPHISC:
50   case VPBlendSC:
51   case VPWidenSC:
52   case VPWidenGEPSC:
53   case VPReductionSC:
54   case VPWidenSelectSC: {
55     const Instruction *I =
56         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
57     (void)I;
58     assert((!I || !I->mayWriteToMemory()) &&
59            "underlying instruction may write to memory");
60     return false;
61   }
62   default:
63     return true;
64   }
65 }
66 
67 bool VPRecipeBase::mayReadFromMemory() const {
68   switch (getVPDefID()) {
69   case VPWidenMemoryInstructionSC: {
70     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
71   }
72   case VPReplicateSC:
73   case VPWidenCallSC:
74     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
75         ->mayReadFromMemory();
76   case VPBranchOnMaskSC:
77     return false;
78   case VPWidenIntOrFpInductionSC:
79   case VPWidenCanonicalIVSC:
80   case VPWidenPHISC:
81   case VPBlendSC:
82   case VPWidenSC:
83   case VPWidenGEPSC:
84   case VPReductionSC:
85   case VPWidenSelectSC: {
86     const Instruction *I =
87         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
88     (void)I;
89     assert((!I || !I->mayReadFromMemory()) &&
90            "underlying instruction may read from memory");
91     return false;
92   }
93   default:
94     return true;
95   }
96 }
97 
98 bool VPRecipeBase::mayHaveSideEffects() const {
99   switch (getVPDefID()) {
100   case VPWidenIntOrFpInductionSC:
101   case VPWidenPointerInductionSC:
102   case VPWidenCanonicalIVSC:
103   case VPWidenPHISC:
104   case VPBlendSC:
105   case VPWidenSC:
106   case VPWidenGEPSC:
107   case VPReductionSC:
108   case VPWidenSelectSC:
109   case VPScalarIVStepsSC: {
110     const Instruction *I =
111         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
112     (void)I;
113     assert((!I || !I->mayHaveSideEffects()) &&
114            "underlying instruction has side-effects");
115     return false;
116   }
117   case VPReplicateSC: {
118     auto *R = cast<VPReplicateRecipe>(this);
119     return R->getUnderlyingInstr()->mayHaveSideEffects();
120   }
121   default:
122     return true;
123   }
124 }
125 
126 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
127   auto Lane = VPLane::getLastLaneForVF(State.VF);
128   VPValue *ExitValue = getOperand(0);
129   if (Plan.isUniformAfterVectorization(ExitValue))
130     Lane = VPLane::getFirstLane();
131   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
132                    State.Builder.GetInsertBlock());
133 }
134 
135 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
136   assert(!Parent && "Recipe already in some VPBasicBlock");
137   assert(InsertPos->getParent() &&
138          "Insertion position not in any VPBasicBlock");
139   Parent = InsertPos->getParent();
140   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
141 }
142 
143 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
144                                 iplist<VPRecipeBase>::iterator I) {
145   assert(!Parent && "Recipe already in some VPBasicBlock");
146   assert(I == BB.end() || I->getParent() == &BB);
147   Parent = &BB;
148   BB.getRecipeList().insert(I, this);
149 }
150 
151 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
152   assert(!Parent && "Recipe already in some VPBasicBlock");
153   assert(InsertPos->getParent() &&
154          "Insertion position not in any VPBasicBlock");
155   Parent = InsertPos->getParent();
156   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
157 }
158 
159 void VPRecipeBase::removeFromParent() {
160   assert(getParent() && "Recipe not in any VPBasicBlock");
161   getParent()->getRecipeList().remove(getIterator());
162   Parent = nullptr;
163 }
164 
165 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
166   assert(getParent() && "Recipe not in any VPBasicBlock");
167   return getParent()->getRecipeList().erase(getIterator());
168 }
169 
170 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
171   removeFromParent();
172   insertAfter(InsertPos);
173 }
174 
175 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
176                               iplist<VPRecipeBase>::iterator I) {
177   removeFromParent();
178   insertBefore(BB, I);
179 }
180 
181 void VPInstruction::generateInstruction(VPTransformState &State,
182                                         unsigned Part) {
183   IRBuilderBase &Builder = State.Builder;
184   Builder.SetCurrentDebugLocation(DL);
185 
186   if (Instruction::isBinaryOp(getOpcode())) {
187     Value *A = State.get(getOperand(0), Part);
188     Value *B = State.get(getOperand(1), Part);
189     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
190     State.set(this, V, Part);
191     return;
192   }
193 
194   switch (getOpcode()) {
195   case VPInstruction::Not: {
196     Value *A = State.get(getOperand(0), Part);
197     Value *V = Builder.CreateNot(A);
198     State.set(this, V, Part);
199     break;
200   }
201   case VPInstruction::ICmpULE: {
202     Value *IV = State.get(getOperand(0), Part);
203     Value *TC = State.get(getOperand(1), Part);
204     Value *V = Builder.CreateICmpULE(IV, TC);
205     State.set(this, V, Part);
206     break;
207   }
208   case Instruction::Select: {
209     Value *Cond = State.get(getOperand(0), Part);
210     Value *Op1 = State.get(getOperand(1), Part);
211     Value *Op2 = State.get(getOperand(2), Part);
212     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
213     State.set(this, V, Part);
214     break;
215   }
216   case VPInstruction::ActiveLaneMask: {
217     // Get first lane of vector induction variable.
218     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
219     // Get the original loop tripcount.
220     Value *ScalarTC = State.get(getOperand(1), Part);
221 
222     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
223     auto *PredTy = VectorType::get(Int1Ty, State.VF);
224     Instruction *Call = Builder.CreateIntrinsic(
225         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
226         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
227     State.set(this, Call, Part);
228     break;
229   }
230   case VPInstruction::FirstOrderRecurrenceSplice: {
231     // Generate code to combine the previous and current values in vector v3.
232     //
233     //   vector.ph:
234     //     v_init = vector(..., ..., ..., a[-1])
235     //     br vector.body
236     //
237     //   vector.body
238     //     i = phi [0, vector.ph], [i+4, vector.body]
239     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
240     //     v2 = a[i, i+1, i+2, i+3];
241     //     v3 = vector(v1(3), v2(0, 1, 2))
242 
243     // For the first part, use the recurrence phi (v1), otherwise v2.
244     auto *V1 = State.get(getOperand(0), 0);
245     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
246     if (!PartMinus1->getType()->isVectorTy()) {
247       State.set(this, PartMinus1, Part);
248     } else {
249       Value *V2 = State.get(getOperand(1), Part);
250       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
251     }
252     break;
253   }
254   case VPInstruction::CanonicalIVIncrement:
255   case VPInstruction::CanonicalIVIncrementNUW: {
256     Value *Next = nullptr;
257     if (Part == 0) {
258       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
259       auto *Phi = State.get(getOperand(0), 0);
260       // The loop step is equal to the vectorization factor (num of SIMD
261       // elements) times the unroll factor (num of SIMD instructions).
262       Value *Step =
263           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
264       Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
265     } else {
266       Next = State.get(this, 0);
267     }
268 
269     State.set(this, Next, Part);
270     break;
271   }
272   case VPInstruction::BranchOnCond: {
273     if (Part != 0)
274       break;
275 
276     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
277     VPRegionBlock *ParentRegion = getParent()->getParent();
278     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
279 
280     // Replace the temporary unreachable terminator with a new conditional
281     // branch, hooking it up to backward destination for exiting blocks now and
282     // to forward destination(s) later when they are created.
283     BranchInst *CondBr =
284         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
285 
286     if (getParent()->isExiting())
287       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
288 
289     CondBr->setSuccessor(0, nullptr);
290     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
291     break;
292   }
293   case VPInstruction::BranchOnCount: {
294     if (Part != 0)
295       break;
296     // First create the compare.
297     Value *IV = State.get(getOperand(0), Part);
298     Value *TC = State.get(getOperand(1), Part);
299     Value *Cond = Builder.CreateICmpEQ(IV, TC);
300 
301     // Now create the branch.
302     auto *Plan = getParent()->getPlan();
303     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
304     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
305 
306     // Replace the temporary unreachable terminator with a new conditional
307     // branch, hooking it up to backward destination (the header) now and to the
308     // forward destination (the exit/middle block) later when it is created.
309     // Note that CreateCondBr expects a valid BB as first argument, so we need
310     // to set it to nullptr later.
311     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
312                                               State.CFG.VPBB2IRBB[Header]);
313     CondBr->setSuccessor(0, nullptr);
314     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
315     break;
316   }
317   default:
318     llvm_unreachable("Unsupported opcode for instruction");
319   }
320 }
321 
322 void VPInstruction::execute(VPTransformState &State) {
323   assert(!State.Instance && "VPInstruction executing an Instance");
324   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
325   State.Builder.setFastMathFlags(FMF);
326   for (unsigned Part = 0; Part < State.UF; ++Part)
327     generateInstruction(State, Part);
328 }
329 
330 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
331 void VPInstruction::dump() const {
332   VPSlotTracker SlotTracker(getParent()->getPlan());
333   print(dbgs(), "", SlotTracker);
334 }
335 
336 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
337                           VPSlotTracker &SlotTracker) const {
338   O << Indent << "EMIT ";
339 
340   if (hasResult()) {
341     printAsOperand(O, SlotTracker);
342     O << " = ";
343   }
344 
345   switch (getOpcode()) {
346   case VPInstruction::Not:
347     O << "not";
348     break;
349   case VPInstruction::ICmpULE:
350     O << "icmp ule";
351     break;
352   case VPInstruction::SLPLoad:
353     O << "combined load";
354     break;
355   case VPInstruction::SLPStore:
356     O << "combined store";
357     break;
358   case VPInstruction::ActiveLaneMask:
359     O << "active lane mask";
360     break;
361   case VPInstruction::FirstOrderRecurrenceSplice:
362     O << "first-order splice";
363     break;
364   case VPInstruction::CanonicalIVIncrement:
365     O << "VF * UF + ";
366     break;
367   case VPInstruction::CanonicalIVIncrementNUW:
368     O << "VF * UF +(nuw) ";
369     break;
370   case VPInstruction::BranchOnCond:
371     O << "branch-on-cond";
372     break;
373   case VPInstruction::BranchOnCount:
374     O << "branch-on-count ";
375     break;
376   default:
377     O << Instruction::getOpcodeName(getOpcode());
378   }
379 
380   O << FMF;
381 
382   for (const VPValue *Operand : operands()) {
383     O << " ";
384     Operand->printAsOperand(O, SlotTracker);
385   }
386 
387   if (DL) {
388     O << ", !dbg ";
389     DL.print(O);
390   }
391 }
392 #endif
393 
394 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
395   // Make sure the VPInstruction is a floating-point operation.
396   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
397           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
398           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
399           Opcode == Instruction::FCmp) &&
400          "this op can't take fast-math flags");
401   FMF = FMFNew;
402 }
403 
404 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
405 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
406                               VPSlotTracker &SlotTracker) const {
407   O << Indent << "WIDEN-CALL ";
408 
409   auto *CI = cast<CallInst>(getUnderlyingInstr());
410   if (CI->getType()->isVoidTy())
411     O << "void ";
412   else {
413     printAsOperand(O, SlotTracker);
414     O << " = ";
415   }
416 
417   O << "call @" << CI->getCalledFunction()->getName() << "(";
418   printOperands(O, SlotTracker);
419   O << ")";
420 }
421 
422 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
423                                 VPSlotTracker &SlotTracker) const {
424   O << Indent << "WIDEN-SELECT ";
425   printAsOperand(O, SlotTracker);
426   O << " = select ";
427   getOperand(0)->printAsOperand(O, SlotTracker);
428   O << ", ";
429   getOperand(1)->printAsOperand(O, SlotTracker);
430   O << ", ";
431   getOperand(2)->printAsOperand(O, SlotTracker);
432   O << (InvariantCond ? " (condition is loop invariant)" : "");
433 }
434 
435 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
436                           VPSlotTracker &SlotTracker) const {
437   O << Indent << "WIDEN ";
438   printAsOperand(O, SlotTracker);
439   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
440   printOperands(O, SlotTracker);
441 }
442 
443 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
444                                           VPSlotTracker &SlotTracker) const {
445   O << Indent << "WIDEN-INDUCTION";
446   if (getTruncInst()) {
447     O << "\\l\"";
448     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
449     O << " +\n" << Indent << "\"  ";
450     getVPValue(0)->printAsOperand(O, SlotTracker);
451   } else
452     O << " " << VPlanIngredient(IV);
453 
454   O << ", ";
455   getStepValue()->printAsOperand(O, SlotTracker);
456 }
457 #endif
458 
459 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
460   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
461   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
462   return StartC && StartC->isZero() && StepC && StepC->isOne();
463 }
464 
465 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
466   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
467 }
468 
469 bool VPScalarIVStepsRecipe::isCanonical() const {
470   auto *CanIV = getCanonicalIV();
471   // The start value of the steps-recipe must match the start value of the
472   // canonical induction and it must step by 1.
473   if (CanIV->getStartValue() != getStartValue())
474     return false;
475   auto *StepVPV = getStepValue();
476   if (StepVPV->getDef())
477     return false;
478   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
479   return StepC && StepC->isOne();
480 }
481 
482 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
483 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
484                                   VPSlotTracker &SlotTracker) const {
485   O << Indent;
486   printAsOperand(O, SlotTracker);
487   O << Indent << "= SCALAR-STEPS ";
488   printOperands(O, SlotTracker);
489 }
490 
491 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
492                              VPSlotTracker &SlotTracker) const {
493   O << Indent << "WIDEN-GEP ";
494   O << (IsPtrLoopInvariant ? "Inv" : "Var");
495   size_t IndicesNumber = IsIndexLoopInvariant.size();
496   for (size_t I = 0; I < IndicesNumber; ++I)
497     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
498 
499   O << " ";
500   printAsOperand(O, SlotTracker);
501   O << " = getelementptr ";
502   printOperands(O, SlotTracker);
503 }
504 
505 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
506                           VPSlotTracker &SlotTracker) const {
507   O << Indent << "BLEND ";
508   Phi->printAsOperand(O, false);
509   O << " =";
510   if (getNumIncomingValues() == 1) {
511     // Not a User of any mask: not really blending, this is a
512     // single-predecessor phi.
513     O << " ";
514     getIncomingValue(0)->printAsOperand(O, SlotTracker);
515   } else {
516     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
517       O << " ";
518       getIncomingValue(I)->printAsOperand(O, SlotTracker);
519       O << "/";
520       getMask(I)->printAsOperand(O, SlotTracker);
521     }
522   }
523 }
524 
525 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
526                               VPSlotTracker &SlotTracker) const {
527   O << Indent << "REDUCE ";
528   printAsOperand(O, SlotTracker);
529   O << " = ";
530   getChainOp()->printAsOperand(O, SlotTracker);
531   O << " +";
532   if (isa<FPMathOperator>(getUnderlyingInstr()))
533     O << getUnderlyingInstr()->getFastMathFlags();
534   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
535   getVecOp()->printAsOperand(O, SlotTracker);
536   if (getCondOp()) {
537     O << ", ";
538     getCondOp()->printAsOperand(O, SlotTracker);
539   }
540   O << ")";
541   if (RdxDesc->IntermediateStore)
542     O << " (with final reduction value stored in invariant address sank "
543          "outside of loop)";
544 }
545 
546 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
547                               VPSlotTracker &SlotTracker) const {
548   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
549 
550   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
551     printAsOperand(O, SlotTracker);
552     O << " = ";
553   }
554   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
555     O << "call @" << CB->getCalledFunction()->getName() << "(";
556     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
557                     O, [&O, &SlotTracker](VPValue *Op) {
558                       Op->printAsOperand(O, SlotTracker);
559                     });
560     O << ")";
561   } else {
562     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
563     printOperands(O, SlotTracker);
564   }
565 
566   if (AlsoPack)
567     O << " (S->V)";
568 }
569 
570 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
571                                 VPSlotTracker &SlotTracker) const {
572   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
573   printAsOperand(O, SlotTracker);
574   O << " = ";
575   printOperands(O, SlotTracker);
576 }
577 
578 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
579                                            VPSlotTracker &SlotTracker) const {
580   O << Indent << "WIDEN ";
581 
582   if (!isStore()) {
583     getVPSingleValue()->printAsOperand(O, SlotTracker);
584     O << " = ";
585   }
586   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
587 
588   printOperands(O, SlotTracker);
589 }
590 #endif
591 
592 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
593   Value *Start = getStartValue()->getLiveInIRValue();
594   PHINode *EntryPart = PHINode::Create(
595       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
596 
597   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
598   EntryPart->addIncoming(Start, VectorPH);
599   EntryPart->setDebugLoc(DL);
600   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
601     State.set(this, EntryPart, Part);
602 }
603 
604 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
605 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
606                                    VPSlotTracker &SlotTracker) const {
607   O << Indent << "EMIT ";
608   printAsOperand(O, SlotTracker);
609   O << " = CANONICAL-INDUCTION";
610 }
611 #endif
612 
613 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
614   bool IsUniform = vputils::onlyFirstLaneUsed(this);
615   return all_of(users(),
616                 [&](const VPUser *U) { return U->usesScalars(this); }) &&
617          (IsUniform || !VF.isScalable());
618 }
619 
620 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
621 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
622                                           VPSlotTracker &SlotTracker) const {
623   O << Indent << "EMIT ";
624   printAsOperand(O, SlotTracker);
625   O << " = WIDEN-POINTER-INDUCTION ";
626   getStartValue()->printAsOperand(O, SlotTracker);
627   O << ", " << *IndDesc.getStep();
628 }
629 #endif
630 
631 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
632   assert(!State.Instance && "cannot be used in per-lane");
633   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
634   SCEVExpander Exp(SE, DL, "induction");
635 
636   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
637                                  &*State.Builder.GetInsertPoint());
638 
639   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
640     State.set(this, Res, Part);
641 }
642 
643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
644 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
645                                VPSlotTracker &SlotTracker) const {
646   O << Indent << "EMIT ";
647   getVPSingleValue()->printAsOperand(O, SlotTracker);
648   O << " = EXPAND SCEV " << *Expr;
649 }
650 #endif
651 
652 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
653   Value *CanonicalIV = State.get(getOperand(0), 0);
654   Type *STy = CanonicalIV->getType();
655   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
656   ElementCount VF = State.VF;
657   Value *VStart = VF.isScalar()
658                       ? CanonicalIV
659                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
660   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
661     Value *VStep = createStepForVF(Builder, STy, VF, Part);
662     if (VF.isVector()) {
663       VStep = Builder.CreateVectorSplat(VF, VStep);
664       VStep =
665           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
666     }
667     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
668     State.set(this, CanonicalVectorIV, Part);
669   }
670 }
671 
672 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
673 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
674                                      VPSlotTracker &SlotTracker) const {
675   O << Indent << "EMIT ";
676   printAsOperand(O, SlotTracker);
677   O << " = WIDEN-CANONICAL-INDUCTION ";
678   printOperands(O, SlotTracker);
679 }
680 #endif
681 
682 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
683   auto &Builder = State.Builder;
684   // Create a vector from the initial value.
685   auto *VectorInit = getStartValue()->getLiveInIRValue();
686 
687   Type *VecTy = State.VF.isScalar()
688                     ? VectorInit->getType()
689                     : VectorType::get(VectorInit->getType(), State.VF);
690 
691   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
692   if (State.VF.isVector()) {
693     auto *IdxTy = Builder.getInt32Ty();
694     auto *One = ConstantInt::get(IdxTy, 1);
695     IRBuilder<>::InsertPointGuard Guard(Builder);
696     Builder.SetInsertPoint(VectorPH->getTerminator());
697     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
698     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
699     VectorInit = Builder.CreateInsertElement(
700         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
701   }
702 
703   // Create a phi node for the new recurrence.
704   PHINode *EntryPart = PHINode::Create(
705       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
706   EntryPart->addIncoming(VectorInit, VectorPH);
707   State.set(this, EntryPart, 0);
708 }
709 
710 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
711 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
712                                             VPSlotTracker &SlotTracker) const {
713   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
714   printAsOperand(O, SlotTracker);
715   O << " = phi ";
716   printOperands(O, SlotTracker);
717 }
718 #endif
719 
720 void VPReductionPHIRecipe::execute(VPTransformState &State) {
721   PHINode *PN = cast<PHINode>(getUnderlyingValue());
722   auto &Builder = State.Builder;
723 
724   // In order to support recurrences we need to be able to vectorize Phi nodes.
725   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
726   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
727   // this value when we vectorize all of the instructions that use the PHI.
728   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
729   Type *VecTy =
730       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
731 
732   BasicBlock *HeaderBB = State.CFG.PrevBB;
733   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
734          "recipe must be in the vector loop header");
735   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
736   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
737     Value *EntryPart =
738         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
739     State.set(this, EntryPart, Part);
740   }
741 
742   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
743 
744   // Reductions do not have to start at zero. They can start with
745   // any loop invariant values.
746   VPValue *StartVPV = getStartValue();
747   Value *StartV = StartVPV->getLiveInIRValue();
748 
749   Value *Iden = nullptr;
750   RecurKind RK = RdxDesc.getRecurrenceKind();
751   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
752       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
753     // MinMax reduction have the start value as their identify.
754     if (ScalarPHI) {
755       Iden = StartV;
756     } else {
757       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
758       Builder.SetInsertPoint(VectorPH->getTerminator());
759       StartV = Iden =
760           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
761     }
762   } else {
763     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
764                                          RdxDesc.getFastMathFlags());
765 
766     if (!ScalarPHI) {
767       Iden = Builder.CreateVectorSplat(State.VF, Iden);
768       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
769       Builder.SetInsertPoint(VectorPH->getTerminator());
770       Constant *Zero = Builder.getInt32(0);
771       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
772     }
773   }
774 
775   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
776     Value *EntryPart = State.get(this, Part);
777     // Make sure to add the reduction start value only to the
778     // first unroll part.
779     Value *StartVal = (Part == 0) ? StartV : Iden;
780     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
781   }
782 }
783 
784 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
785 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
786                                  VPSlotTracker &SlotTracker) const {
787   O << Indent << "WIDEN-REDUCTION-PHI ";
788 
789   printAsOperand(O, SlotTracker);
790   O << " = phi ";
791   printOperands(O, SlotTracker);
792 }
793 #endif
794 
795 void VPWidenPHIRecipe::execute(VPTransformState &State) {
796   assert(EnableVPlanNativePath &&
797          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
798 
799   // Currently we enter here in the VPlan-native path for non-induction
800   // PHIs where all control flow is uniform. We simply widen these PHIs.
801   // Create a vector phi with no operands - the vector phi operands will be
802   // set at the end of vector code generation.
803   VPBasicBlock *Parent = getParent();
804   VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
805   unsigned StartIdx = 0;
806   // For phis in header blocks of loop regions, use the index of the value
807   // coming from the preheader.
808   if (LoopRegion->getEntryBasicBlock() == Parent) {
809     for (unsigned I = 0; I < getNumOperands(); ++I) {
810       if (getIncomingBlock(I) ==
811           LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
812         StartIdx = I;
813     }
814   }
815   Value *Op0 = State.get(getOperand(StartIdx), 0);
816   Type *VecTy = Op0->getType();
817   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
818   State.set(this, VecPhi, 0);
819 }
820 
821 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
822 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
823                              VPSlotTracker &SlotTracker) const {
824   O << Indent << "WIDEN-PHI ";
825 
826   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
827   // Unless all incoming values are modeled in VPlan  print the original PHI
828   // directly.
829   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
830   // values as VPValues.
831   if (getNumOperands() != OriginalPhi->getNumOperands()) {
832     O << VPlanIngredient(OriginalPhi);
833     return;
834   }
835 
836   printAsOperand(O, SlotTracker);
837   O << " = phi ";
838   printOperands(O, SlotTracker);
839 }
840 #endif
841