1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/IR/Value.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
30 #include <cassert>
31 
32 using namespace llvm;
33 
34 using VectorParts = SmallVector<Value *, 2>;
35 
36 extern cl::opt<bool> EnableVPlanNativePath;
37 
38 #define LV_NAME "loop-vectorize"
39 #define DEBUG_TYPE LV_NAME
40 
41 bool VPRecipeBase::mayWriteToMemory() const {
42   switch (getVPDefID()) {
43   case VPWidenMemoryInstructionSC: {
44     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
45   }
46   case VPReplicateSC:
47   case VPWidenCallSC:
48     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
49         ->mayWriteToMemory();
50   case VPBranchOnMaskSC:
51     return false;
52   case VPWidenIntOrFpInductionSC:
53   case VPWidenCanonicalIVSC:
54   case VPWidenPHISC:
55   case VPBlendSC:
56   case VPWidenSC:
57   case VPWidenGEPSC:
58   case VPReductionSC:
59   case VPWidenSelectSC: {
60     const Instruction *I =
61         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
62     (void)I;
63     assert((!I || !I->mayWriteToMemory()) &&
64            "underlying instruction may write to memory");
65     return false;
66   }
67   default:
68     return true;
69   }
70 }
71 
72 bool VPRecipeBase::mayReadFromMemory() const {
73   switch (getVPDefID()) {
74   case VPWidenMemoryInstructionSC: {
75     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
76   }
77   case VPReplicateSC:
78   case VPWidenCallSC:
79     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
80         ->mayReadFromMemory();
81   case VPBranchOnMaskSC:
82     return false;
83   case VPWidenIntOrFpInductionSC:
84   case VPWidenCanonicalIVSC:
85   case VPWidenPHISC:
86   case VPBlendSC:
87   case VPWidenSC:
88   case VPWidenGEPSC:
89   case VPReductionSC:
90   case VPWidenSelectSC: {
91     const Instruction *I =
92         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
93     (void)I;
94     assert((!I || !I->mayReadFromMemory()) &&
95            "underlying instruction may read from memory");
96     return false;
97   }
98   default:
99     return true;
100   }
101 }
102 
103 bool VPRecipeBase::mayHaveSideEffects() const {
104   switch (getVPDefID()) {
105   case VPWidenIntOrFpInductionSC:
106   case VPWidenPointerInductionSC:
107   case VPWidenCanonicalIVSC:
108   case VPWidenPHISC:
109   case VPBlendSC:
110   case VPWidenSC:
111   case VPWidenGEPSC:
112   case VPReductionSC:
113   case VPWidenSelectSC:
114   case VPScalarIVStepsSC: {
115     const Instruction *I =
116         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
117     (void)I;
118     assert((!I || !I->mayHaveSideEffects()) &&
119            "underlying instruction has side-effects");
120     return false;
121   }
122   case VPReplicateSC: {
123     auto *R = cast<VPReplicateRecipe>(this);
124     return R->getUnderlyingInstr()->mayHaveSideEffects();
125   }
126   default:
127     return true;
128   }
129 }
130 
131 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
132   auto Lane = VPLane::getLastLaneForVF(State.VF);
133   VPValue *ExitValue = getOperand(0);
134   if (Plan.isUniformAfterVectorization(ExitValue))
135     Lane = VPLane::getFirstLane();
136   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
137                    State.Builder.GetInsertBlock());
138 }
139 
140 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
141   assert(!Parent && "Recipe already in some VPBasicBlock");
142   assert(InsertPos->getParent() &&
143          "Insertion position not in any VPBasicBlock");
144   Parent = InsertPos->getParent();
145   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
146 }
147 
148 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
149                                 iplist<VPRecipeBase>::iterator I) {
150   assert(!Parent && "Recipe already in some VPBasicBlock");
151   assert(I == BB.end() || I->getParent() == &BB);
152   Parent = &BB;
153   BB.getRecipeList().insert(I, this);
154 }
155 
156 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
157   assert(!Parent && "Recipe already in some VPBasicBlock");
158   assert(InsertPos->getParent() &&
159          "Insertion position not in any VPBasicBlock");
160   Parent = InsertPos->getParent();
161   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
162 }
163 
164 void VPRecipeBase::removeFromParent() {
165   assert(getParent() && "Recipe not in any VPBasicBlock");
166   getParent()->getRecipeList().remove(getIterator());
167   Parent = nullptr;
168 }
169 
170 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
171   assert(getParent() && "Recipe not in any VPBasicBlock");
172   return getParent()->getRecipeList().erase(getIterator());
173 }
174 
175 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
176   removeFromParent();
177   insertAfter(InsertPos);
178 }
179 
180 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
181                               iplist<VPRecipeBase>::iterator I) {
182   removeFromParent();
183   insertBefore(BB, I);
184 }
185 
186 void VPInstruction::generateInstruction(VPTransformState &State,
187                                         unsigned Part) {
188   IRBuilderBase &Builder = State.Builder;
189   Builder.SetCurrentDebugLocation(DL);
190 
191   if (Instruction::isBinaryOp(getOpcode())) {
192     Value *A = State.get(getOperand(0), Part);
193     Value *B = State.get(getOperand(1), Part);
194     Value *V =
195         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
196     State.set(this, V, Part);
197     return;
198   }
199 
200   switch (getOpcode()) {
201   case VPInstruction::Not: {
202     Value *A = State.get(getOperand(0), Part);
203     Value *V = Builder.CreateNot(A, Name);
204     State.set(this, V, Part);
205     break;
206   }
207   case VPInstruction::ICmpULE: {
208     Value *IV = State.get(getOperand(0), Part);
209     Value *TC = State.get(getOperand(1), Part);
210     Value *V = Builder.CreateICmpULE(IV, TC, Name);
211     State.set(this, V, Part);
212     break;
213   }
214   case Instruction::Select: {
215     Value *Cond = State.get(getOperand(0), Part);
216     Value *Op1 = State.get(getOperand(1), Part);
217     Value *Op2 = State.get(getOperand(2), Part);
218     Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
219     State.set(this, V, Part);
220     break;
221   }
222   case VPInstruction::ActiveLaneMask: {
223     // Get first lane of vector induction variable.
224     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
225     // Get the original loop tripcount.
226     Value *ScalarTC = State.get(getOperand(1), Part);
227 
228     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
229     auto *PredTy = VectorType::get(Int1Ty, State.VF);
230     Instruction *Call = Builder.CreateIntrinsic(
231         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
232         {VIVElem0, ScalarTC}, nullptr, Name);
233     State.set(this, Call, Part);
234     break;
235   }
236   case VPInstruction::FirstOrderRecurrenceSplice: {
237     // Generate code to combine the previous and current values in vector v3.
238     //
239     //   vector.ph:
240     //     v_init = vector(..., ..., ..., a[-1])
241     //     br vector.body
242     //
243     //   vector.body
244     //     i = phi [0, vector.ph], [i+4, vector.body]
245     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
246     //     v2 = a[i, i+1, i+2, i+3];
247     //     v3 = vector(v1(3), v2(0, 1, 2))
248 
249     // For the first part, use the recurrence phi (v1), otherwise v2.
250     auto *V1 = State.get(getOperand(0), 0);
251     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
252     if (!PartMinus1->getType()->isVectorTy()) {
253       State.set(this, PartMinus1, Part);
254     } else {
255       Value *V2 = State.get(getOperand(1), Part);
256       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
257                 Part);
258     }
259     break;
260   }
261   case VPInstruction::CanonicalIVIncrement:
262   case VPInstruction::CanonicalIVIncrementNUW: {
263     Value *Next = nullptr;
264     if (Part == 0) {
265       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
266       auto *Phi = State.get(getOperand(0), 0);
267       // The loop step is equal to the vectorization factor (num of SIMD
268       // elements) times the unroll factor (num of SIMD instructions).
269       Value *Step =
270           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
271       Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
272     } else {
273       Next = State.get(this, 0);
274     }
275 
276     State.set(this, Next, Part);
277     break;
278   }
279 
280   case VPInstruction::CanonicalIVIncrementForPart:
281   case VPInstruction::CanonicalIVIncrementForPartNUW: {
282     bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
283     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
284     if (Part == 0) {
285       State.set(this, IV, Part);
286       break;
287     }
288 
289     // The canonical IV is incremented by the vectorization factor (num of SIMD
290     // elements) times the unroll part.
291     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
292     Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
293     State.set(this, Next, Part);
294     break;
295   }
296   case VPInstruction::BranchOnCond: {
297     if (Part != 0)
298       break;
299 
300     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
301     VPRegionBlock *ParentRegion = getParent()->getParent();
302     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
303 
304     // Replace the temporary unreachable terminator with a new conditional
305     // branch, hooking it up to backward destination for exiting blocks now and
306     // to forward destination(s) later when they are created.
307     BranchInst *CondBr =
308         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
309 
310     if (getParent()->isExiting())
311       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
312 
313     CondBr->setSuccessor(0, nullptr);
314     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
315     break;
316   }
317   case VPInstruction::BranchOnCount: {
318     if (Part != 0)
319       break;
320     // First create the compare.
321     Value *IV = State.get(getOperand(0), Part);
322     Value *TC = State.get(getOperand(1), Part);
323     Value *Cond = Builder.CreateICmpEQ(IV, TC);
324 
325     // Now create the branch.
326     auto *Plan = getParent()->getPlan();
327     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
328     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
329 
330     // Replace the temporary unreachable terminator with a new conditional
331     // branch, hooking it up to backward destination (the header) now and to the
332     // forward destination (the exit/middle block) later when it is created.
333     // Note that CreateCondBr expects a valid BB as first argument, so we need
334     // to set it to nullptr later.
335     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
336                                               State.CFG.VPBB2IRBB[Header]);
337     CondBr->setSuccessor(0, nullptr);
338     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
339     break;
340   }
341   default:
342     llvm_unreachable("Unsupported opcode for instruction");
343   }
344 }
345 
346 void VPInstruction::execute(VPTransformState &State) {
347   assert(!State.Instance && "VPInstruction executing an Instance");
348   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
349   State.Builder.setFastMathFlags(FMF);
350   for (unsigned Part = 0; Part < State.UF; ++Part)
351     generateInstruction(State, Part);
352 }
353 
354 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
355 void VPInstruction::dump() const {
356   VPSlotTracker SlotTracker(getParent()->getPlan());
357   print(dbgs(), "", SlotTracker);
358 }
359 
360 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
361                           VPSlotTracker &SlotTracker) const {
362   O << Indent << "EMIT ";
363 
364   if (hasResult()) {
365     printAsOperand(O, SlotTracker);
366     O << " = ";
367   }
368 
369   switch (getOpcode()) {
370   case VPInstruction::Not:
371     O << "not";
372     break;
373   case VPInstruction::ICmpULE:
374     O << "icmp ule";
375     break;
376   case VPInstruction::SLPLoad:
377     O << "combined load";
378     break;
379   case VPInstruction::SLPStore:
380     O << "combined store";
381     break;
382   case VPInstruction::ActiveLaneMask:
383     O << "active lane mask";
384     break;
385   case VPInstruction::FirstOrderRecurrenceSplice:
386     O << "first-order splice";
387     break;
388   case VPInstruction::CanonicalIVIncrement:
389     O << "VF * UF + ";
390     break;
391   case VPInstruction::CanonicalIVIncrementNUW:
392     O << "VF * UF +(nuw) ";
393     break;
394   case VPInstruction::BranchOnCond:
395     O << "branch-on-cond";
396     break;
397   case VPInstruction::CanonicalIVIncrementForPart:
398     O << "VF * Part + ";
399     break;
400   case VPInstruction::CanonicalIVIncrementForPartNUW:
401     O << "VF * Part +(nuw) ";
402     break;
403   case VPInstruction::BranchOnCount:
404     O << "branch-on-count ";
405     break;
406   default:
407     O << Instruction::getOpcodeName(getOpcode());
408   }
409 
410   O << FMF;
411 
412   for (const VPValue *Operand : operands()) {
413     O << " ";
414     Operand->printAsOperand(O, SlotTracker);
415   }
416 
417   if (DL) {
418     O << ", !dbg ";
419     DL.print(O);
420   }
421 }
422 #endif
423 
424 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
425   // Make sure the VPInstruction is a floating-point operation.
426   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
427           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
428           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
429           Opcode == Instruction::FCmp) &&
430          "this op can't take fast-math flags");
431   FMF = FMFNew;
432 }
433 
434 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
435 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
436                               VPSlotTracker &SlotTracker) const {
437   O << Indent << "WIDEN-CALL ";
438 
439   auto *CI = cast<CallInst>(getUnderlyingInstr());
440   if (CI->getType()->isVoidTy())
441     O << "void ";
442   else {
443     printAsOperand(O, SlotTracker);
444     O << " = ";
445   }
446 
447   O << "call @" << CI->getCalledFunction()->getName() << "(";
448   printOperands(O, SlotTracker);
449   O << ")";
450 }
451 
452 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
453                                 VPSlotTracker &SlotTracker) const {
454   O << Indent << "WIDEN-SELECT ";
455   printAsOperand(O, SlotTracker);
456   O << " = select ";
457   getOperand(0)->printAsOperand(O, SlotTracker);
458   O << ", ";
459   getOperand(1)->printAsOperand(O, SlotTracker);
460   O << ", ";
461   getOperand(2)->printAsOperand(O, SlotTracker);
462   O << (InvariantCond ? " (condition is loop invariant)" : "");
463 }
464 #endif
465 
466 void VPWidenSelectRecipe::execute(VPTransformState &State) {
467   auto &I = *cast<SelectInst>(getUnderlyingInstr());
468   State.setDebugLocFromInst(&I);
469 
470   // The condition can be loop invariant but still defined inside the
471   // loop. This means that we can't just use the original 'cond' value.
472   // We have to take the 'vectorized' value and pick the first lane.
473   // Instcombine will make this a no-op.
474   auto *InvarCond =
475       InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
476 
477   for (unsigned Part = 0; Part < State.UF; ++Part) {
478     Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
479     Value *Op0 = State.get(getOperand(1), Part);
480     Value *Op1 = State.get(getOperand(2), Part);
481     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
482     State.set(this, Sel, Part);
483     State.addMetadata(Sel, &I);
484   }
485 }
486 
487 void VPWidenRecipe::execute(VPTransformState &State) {
488   auto &I = *cast<Instruction>(getUnderlyingValue());
489   auto &Builder = State.Builder;
490   switch (I.getOpcode()) {
491   case Instruction::Call:
492   case Instruction::Br:
493   case Instruction::PHI:
494   case Instruction::GetElementPtr:
495   case Instruction::Select:
496     llvm_unreachable("This instruction is handled by a different recipe.");
497   case Instruction::UDiv:
498   case Instruction::SDiv:
499   case Instruction::SRem:
500   case Instruction::URem:
501   case Instruction::Add:
502   case Instruction::FAdd:
503   case Instruction::Sub:
504   case Instruction::FSub:
505   case Instruction::FNeg:
506   case Instruction::Mul:
507   case Instruction::FMul:
508   case Instruction::FDiv:
509   case Instruction::FRem:
510   case Instruction::Shl:
511   case Instruction::LShr:
512   case Instruction::AShr:
513   case Instruction::And:
514   case Instruction::Or:
515   case Instruction::Xor: {
516     // Just widen unops and binops.
517     State.setDebugLocFromInst(&I);
518 
519     for (unsigned Part = 0; Part < State.UF; ++Part) {
520       SmallVector<Value *, 2> Ops;
521       for (VPValue *VPOp : operands())
522         Ops.push_back(State.get(VPOp, Part));
523 
524       Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
525 
526       if (auto *VecOp = dyn_cast<Instruction>(V)) {
527         VecOp->copyIRFlags(&I);
528 
529         // If the instruction is vectorized and was in a basic block that needed
530         // predication, we can't propagate poison-generating flags (nuw/nsw,
531         // exact, etc.). The control flow has been linearized and the
532         // instruction is no longer guarded by the predicate, which could make
533         // the flag properties to no longer hold.
534         if (State.MayGeneratePoisonRecipes.contains(this))
535           VecOp->dropPoisonGeneratingFlags();
536       }
537 
538       // Use this vector value for all users of the original instruction.
539       State.set(this, V, Part);
540       State.addMetadata(V, &I);
541     }
542 
543     break;
544   }
545   case Instruction::Freeze: {
546     State.setDebugLocFromInst(&I);
547 
548     for (unsigned Part = 0; Part < State.UF; ++Part) {
549       Value *Op = State.get(getOperand(0), Part);
550 
551       Value *Freeze = Builder.CreateFreeze(Op);
552       State.set(this, Freeze, Part);
553     }
554     break;
555   }
556   case Instruction::ICmp:
557   case Instruction::FCmp: {
558     // Widen compares. Generate vector compares.
559     bool FCmp = (I.getOpcode() == Instruction::FCmp);
560     auto *Cmp = cast<CmpInst>(&I);
561     State.setDebugLocFromInst(Cmp);
562     for (unsigned Part = 0; Part < State.UF; ++Part) {
563       Value *A = State.get(getOperand(0), Part);
564       Value *B = State.get(getOperand(1), Part);
565       Value *C = nullptr;
566       if (FCmp) {
567         // Propagate fast math flags.
568         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
569         Builder.setFastMathFlags(Cmp->getFastMathFlags());
570         C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
571       } else {
572         C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
573       }
574       State.set(this, C, Part);
575       State.addMetadata(C, &I);
576     }
577 
578     break;
579   }
580 
581   case Instruction::ZExt:
582   case Instruction::SExt:
583   case Instruction::FPToUI:
584   case Instruction::FPToSI:
585   case Instruction::FPExt:
586   case Instruction::PtrToInt:
587   case Instruction::IntToPtr:
588   case Instruction::SIToFP:
589   case Instruction::UIToFP:
590   case Instruction::Trunc:
591   case Instruction::FPTrunc:
592   case Instruction::BitCast: {
593     auto *CI = cast<CastInst>(&I);
594     State.setDebugLocFromInst(CI);
595 
596     /// Vectorize casts.
597     Type *DestTy = (State.VF.isScalar())
598                        ? CI->getType()
599                        : VectorType::get(CI->getType(), State.VF);
600 
601     for (unsigned Part = 0; Part < State.UF; ++Part) {
602       Value *A = State.get(getOperand(0), Part);
603       Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
604       State.set(this, Cast, Part);
605       State.addMetadata(Cast, &I);
606     }
607     break;
608   }
609   default:
610     // This instruction is not vectorized by simple widening.
611     LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
612     llvm_unreachable("Unhandled instruction!");
613   } // end of switch.
614 }
615 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
616 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
617                           VPSlotTracker &SlotTracker) const {
618   O << Indent << "WIDEN ";
619   printAsOperand(O, SlotTracker);
620   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
621   printOperands(O, SlotTracker);
622 }
623 
624 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
625                                           VPSlotTracker &SlotTracker) const {
626   O << Indent << "WIDEN-INDUCTION";
627   if (getTruncInst()) {
628     O << "\\l\"";
629     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
630     O << " +\n" << Indent << "\"  ";
631     getVPValue(0)->printAsOperand(O, SlotTracker);
632   } else
633     O << " " << VPlanIngredient(IV);
634 
635   O << ", ";
636   getStepValue()->printAsOperand(O, SlotTracker);
637 }
638 #endif
639 
640 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
641   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
642   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
643   return StartC && StartC->isZero() && StepC && StepC->isOne();
644 }
645 
646 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
647   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
648 }
649 
650 bool VPScalarIVStepsRecipe::isCanonical() const {
651   auto *CanIV = getCanonicalIV();
652   // The start value of the steps-recipe must match the start value of the
653   // canonical induction and it must step by 1.
654   if (CanIV->getStartValue() != getStartValue())
655     return false;
656   auto *StepVPV = getStepValue();
657   if (StepVPV->getDef())
658     return false;
659   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
660   return StepC && StepC->isOne();
661 }
662 
663 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
664 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
665                                   VPSlotTracker &SlotTracker) const {
666   O << Indent;
667   printAsOperand(O, SlotTracker);
668   O << Indent << "= SCALAR-STEPS ";
669   printOperands(O, SlotTracker);
670 }
671 #endif
672 
673 void VPWidenGEPRecipe::execute(VPTransformState &State) {
674   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
675   // Construct a vector GEP by widening the operands of the scalar GEP as
676   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
677   // results in a vector of pointers when at least one operand of the GEP
678   // is vector-typed. Thus, to keep the representation compact, we only use
679   // vector-typed operands for loop-varying values.
680 
681   if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
682     // If we are vectorizing, but the GEP has only loop-invariant operands,
683     // the GEP we build (by only using vector-typed operands for
684     // loop-varying values) would be a scalar pointer. Thus, to ensure we
685     // produce a vector of pointers, we need to either arbitrarily pick an
686     // operand to broadcast, or broadcast a clone of the original GEP.
687     // Here, we broadcast a clone of the original.
688     //
689     // TODO: If at some point we decide to scalarize instructions having
690     //       loop-invariant operands, this special case will no longer be
691     //       required. We would add the scalarization decision to
692     //       collectLoopScalars() and teach getVectorValue() to broadcast
693     //       the lane-zero scalar value.
694     auto *Clone = State.Builder.Insert(GEP->clone());
695     for (unsigned Part = 0; Part < State.UF; ++Part) {
696       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
697       State.set(this, EntryPart, Part);
698       State.addMetadata(EntryPart, GEP);
699     }
700   } else {
701     // If the GEP has at least one loop-varying operand, we are sure to
702     // produce a vector of pointers. But if we are only unrolling, we want
703     // to produce a scalar GEP for each unroll part. Thus, the GEP we
704     // produce with the code below will be scalar (if VF == 1) or vector
705     // (otherwise). Note that for the unroll-only case, we still maintain
706     // values in the vector mapping with initVector, as we do for other
707     // instructions.
708     for (unsigned Part = 0; Part < State.UF; ++Part) {
709       // The pointer operand of the new GEP. If it's loop-invariant, we
710       // won't broadcast it.
711       auto *Ptr = IsPtrLoopInvariant
712                       ? State.get(getOperand(0), VPIteration(0, 0))
713                       : State.get(getOperand(0), Part);
714 
715       // Collect all the indices for the new GEP. If any index is
716       // loop-invariant, we won't broadcast it.
717       SmallVector<Value *, 4> Indices;
718       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
719         VPValue *Operand = getOperand(I);
720         if (IsIndexLoopInvariant[I - 1])
721           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
722         else
723           Indices.push_back(State.get(Operand, Part));
724       }
725 
726       // If the GEP instruction is vectorized and was in a basic block that
727       // needed predication, we can't propagate the poison-generating 'inbounds'
728       // flag. The control flow has been linearized and the GEP is no longer
729       // guarded by the predicate, which could make the 'inbounds' properties to
730       // no longer hold.
731       bool IsInBounds =
732           GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
733 
734       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
735       // but it should be a vector, otherwise.
736       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
737                                              Indices, "", IsInBounds);
738       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
739              "NewGEP is not a pointer vector");
740       State.set(this, NewGEP, Part);
741       State.addMetadata(NewGEP, GEP);
742     }
743   }
744 }
745 
746 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
747 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
748                              VPSlotTracker &SlotTracker) const {
749   O << Indent << "WIDEN-GEP ";
750   O << (IsPtrLoopInvariant ? "Inv" : "Var");
751   size_t IndicesNumber = IsIndexLoopInvariant.size();
752   for (size_t I = 0; I < IndicesNumber; ++I)
753     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
754 
755   O << " ";
756   printAsOperand(O, SlotTracker);
757   O << " = getelementptr ";
758   printOperands(O, SlotTracker);
759 }
760 #endif
761 
762 void VPBlendRecipe::execute(VPTransformState &State) {
763   State.setDebugLocFromInst(Phi);
764   // We know that all PHIs in non-header blocks are converted into
765   // selects, so we don't have to worry about the insertion order and we
766   // can just use the builder.
767   // At this point we generate the predication tree. There may be
768   // duplications since this is a simple recursive scan, but future
769   // optimizations will clean it up.
770 
771   unsigned NumIncoming = getNumIncomingValues();
772 
773   // Generate a sequence of selects of the form:
774   // SELECT(Mask3, In3,
775   //        SELECT(Mask2, In2,
776   //               SELECT(Mask1, In1,
777   //                      In0)))
778   // Note that Mask0 is never used: lanes for which no path reaches this phi and
779   // are essentially undef are taken from In0.
780  VectorParts Entry(State.UF);
781   for (unsigned In = 0; In < NumIncoming; ++In) {
782     for (unsigned Part = 0; Part < State.UF; ++Part) {
783       // We might have single edge PHIs (blocks) - use an identity
784       // 'select' for the first PHI operand.
785       Value *In0 = State.get(getIncomingValue(In), Part);
786       if (In == 0)
787         Entry[Part] = In0; // Initialize with the first incoming value.
788       else {
789         // Select between the current value and the previous incoming edge
790         // based on the incoming mask.
791         Value *Cond = State.get(getMask(In), Part);
792         Entry[Part] =
793             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
794       }
795     }
796   }
797   for (unsigned Part = 0; Part < State.UF; ++Part)
798     State.set(this, Entry[Part], Part);
799 }
800 
801 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
802 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
803                           VPSlotTracker &SlotTracker) const {
804   O << Indent << "BLEND ";
805   Phi->printAsOperand(O, false);
806   O << " =";
807   if (getNumIncomingValues() == 1) {
808     // Not a User of any mask: not really blending, this is a
809     // single-predecessor phi.
810     O << " ";
811     getIncomingValue(0)->printAsOperand(O, SlotTracker);
812   } else {
813     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
814       O << " ";
815       getIncomingValue(I)->printAsOperand(O, SlotTracker);
816       O << "/";
817       getMask(I)->printAsOperand(O, SlotTracker);
818     }
819   }
820 }
821 
822 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
823                               VPSlotTracker &SlotTracker) const {
824   O << Indent << "REDUCE ";
825   printAsOperand(O, SlotTracker);
826   O << " = ";
827   getChainOp()->printAsOperand(O, SlotTracker);
828   O << " +";
829   if (isa<FPMathOperator>(getUnderlyingInstr()))
830     O << getUnderlyingInstr()->getFastMathFlags();
831   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
832   getVecOp()->printAsOperand(O, SlotTracker);
833   if (getCondOp()) {
834     O << ", ";
835     getCondOp()->printAsOperand(O, SlotTracker);
836   }
837   O << ")";
838   if (RdxDesc->IntermediateStore)
839     O << " (with final reduction value stored in invariant address sank "
840          "outside of loop)";
841 }
842 
843 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
844                               VPSlotTracker &SlotTracker) const {
845   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
846 
847   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
848     printAsOperand(O, SlotTracker);
849     O << " = ";
850   }
851   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
852     O << "call @" << CB->getCalledFunction()->getName() << "(";
853     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
854                     O, [&O, &SlotTracker](VPValue *Op) {
855                       Op->printAsOperand(O, SlotTracker);
856                     });
857     O << ")";
858   } else {
859     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
860     printOperands(O, SlotTracker);
861   }
862 
863   if (AlsoPack)
864     O << " (S->V)";
865 }
866 
867 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
868                                 VPSlotTracker &SlotTracker) const {
869   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
870   printAsOperand(O, SlotTracker);
871   O << " = ";
872   printOperands(O, SlotTracker);
873 }
874 
875 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
876                                            VPSlotTracker &SlotTracker) const {
877   O << Indent << "WIDEN ";
878 
879   if (!isStore()) {
880     getVPSingleValue()->printAsOperand(O, SlotTracker);
881     O << " = ";
882   }
883   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
884 
885   printOperands(O, SlotTracker);
886 }
887 #endif
888 
889 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
890   Value *Start = getStartValue()->getLiveInIRValue();
891   PHINode *EntryPart = PHINode::Create(
892       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
893 
894   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
895   EntryPart->addIncoming(Start, VectorPH);
896   EntryPart->setDebugLoc(DL);
897   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
898     State.set(this, EntryPart, Part);
899 }
900 
901 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
902 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
903                                    VPSlotTracker &SlotTracker) const {
904   O << Indent << "EMIT ";
905   printAsOperand(O, SlotTracker);
906   O << " = CANONICAL-INDUCTION";
907 }
908 #endif
909 
910 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
911   bool IsUniform = vputils::onlyFirstLaneUsed(this);
912   return all_of(users(),
913                 [&](const VPUser *U) { return U->usesScalars(this); }) &&
914          (IsUniform || !VF.isScalable());
915 }
916 
917 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
918 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
919                                           VPSlotTracker &SlotTracker) const {
920   O << Indent << "EMIT ";
921   printAsOperand(O, SlotTracker);
922   O << " = WIDEN-POINTER-INDUCTION ";
923   getStartValue()->printAsOperand(O, SlotTracker);
924   O << ", " << *IndDesc.getStep();
925 }
926 #endif
927 
928 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
929   assert(!State.Instance && "cannot be used in per-lane");
930   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
931   SCEVExpander Exp(SE, DL, "induction");
932 
933   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
934                                  &*State.Builder.GetInsertPoint());
935 
936   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
937     State.set(this, Res, Part);
938 }
939 
940 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
941 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
942                                VPSlotTracker &SlotTracker) const {
943   O << Indent << "EMIT ";
944   getVPSingleValue()->printAsOperand(O, SlotTracker);
945   O << " = EXPAND SCEV " << *Expr;
946 }
947 #endif
948 
949 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
950   Value *CanonicalIV = State.get(getOperand(0), 0);
951   Type *STy = CanonicalIV->getType();
952   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
953   ElementCount VF = State.VF;
954   Value *VStart = VF.isScalar()
955                       ? CanonicalIV
956                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
957   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
958     Value *VStep = createStepForVF(Builder, STy, VF, Part);
959     if (VF.isVector()) {
960       VStep = Builder.CreateVectorSplat(VF, VStep);
961       VStep =
962           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
963     }
964     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
965     State.set(this, CanonicalVectorIV, Part);
966   }
967 }
968 
969 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
970 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
971                                      VPSlotTracker &SlotTracker) const {
972   O << Indent << "EMIT ";
973   printAsOperand(O, SlotTracker);
974   O << " = WIDEN-CANONICAL-INDUCTION ";
975   printOperands(O, SlotTracker);
976 }
977 #endif
978 
979 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
980   auto &Builder = State.Builder;
981   // Create a vector from the initial value.
982   auto *VectorInit = getStartValue()->getLiveInIRValue();
983 
984   Type *VecTy = State.VF.isScalar()
985                     ? VectorInit->getType()
986                     : VectorType::get(VectorInit->getType(), State.VF);
987 
988   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
989   if (State.VF.isVector()) {
990     auto *IdxTy = Builder.getInt32Ty();
991     auto *One = ConstantInt::get(IdxTy, 1);
992     IRBuilder<>::InsertPointGuard Guard(Builder);
993     Builder.SetInsertPoint(VectorPH->getTerminator());
994     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
995     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
996     VectorInit = Builder.CreateInsertElement(
997         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
998   }
999 
1000   // Create a phi node for the new recurrence.
1001   PHINode *EntryPart = PHINode::Create(
1002       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1003   EntryPart->addIncoming(VectorInit, VectorPH);
1004   State.set(this, EntryPart, 0);
1005 }
1006 
1007 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1008 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1009                                             VPSlotTracker &SlotTracker) const {
1010   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1011   printAsOperand(O, SlotTracker);
1012   O << " = phi ";
1013   printOperands(O, SlotTracker);
1014 }
1015 #endif
1016 
1017 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1018   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1019   auto &Builder = State.Builder;
1020 
1021   // In order to support recurrences we need to be able to vectorize Phi nodes.
1022   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1023   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1024   // this value when we vectorize all of the instructions that use the PHI.
1025   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1026   Type *VecTy =
1027       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1028 
1029   BasicBlock *HeaderBB = State.CFG.PrevBB;
1030   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1031          "recipe must be in the vector loop header");
1032   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1033   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1034     Value *EntryPart =
1035         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1036     State.set(this, EntryPart, Part);
1037   }
1038 
1039   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1040 
1041   // Reductions do not have to start at zero. They can start with
1042   // any loop invariant values.
1043   VPValue *StartVPV = getStartValue();
1044   Value *StartV = StartVPV->getLiveInIRValue();
1045 
1046   Value *Iden = nullptr;
1047   RecurKind RK = RdxDesc.getRecurrenceKind();
1048   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1049       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
1050     // MinMax reduction have the start value as their identify.
1051     if (ScalarPHI) {
1052       Iden = StartV;
1053     } else {
1054       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1055       Builder.SetInsertPoint(VectorPH->getTerminator());
1056       StartV = Iden =
1057           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1058     }
1059   } else {
1060     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1061                                          RdxDesc.getFastMathFlags());
1062 
1063     if (!ScalarPHI) {
1064       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1065       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1066       Builder.SetInsertPoint(VectorPH->getTerminator());
1067       Constant *Zero = Builder.getInt32(0);
1068       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1069     }
1070   }
1071 
1072   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1073     Value *EntryPart = State.get(this, Part);
1074     // Make sure to add the reduction start value only to the
1075     // first unroll part.
1076     Value *StartVal = (Part == 0) ? StartV : Iden;
1077     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1078   }
1079 }
1080 
1081 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1082 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1083                                  VPSlotTracker &SlotTracker) const {
1084   O << Indent << "WIDEN-REDUCTION-PHI ";
1085 
1086   printAsOperand(O, SlotTracker);
1087   O << " = phi ";
1088   printOperands(O, SlotTracker);
1089 }
1090 #endif
1091 
1092 void VPWidenPHIRecipe::execute(VPTransformState &State) {
1093   assert(EnableVPlanNativePath &&
1094          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1095 
1096   // Currently we enter here in the VPlan-native path for non-induction
1097   // PHIs where all control flow is uniform. We simply widen these PHIs.
1098   // Create a vector phi with no operands - the vector phi operands will be
1099   // set at the end of vector code generation.
1100   VPBasicBlock *Parent = getParent();
1101   VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
1102   unsigned StartIdx = 0;
1103   // For phis in header blocks of loop regions, use the index of the value
1104   // coming from the preheader.
1105   if (LoopRegion->getEntryBasicBlock() == Parent) {
1106     for (unsigned I = 0; I < getNumOperands(); ++I) {
1107       if (getIncomingBlock(I) ==
1108           LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
1109         StartIdx = I;
1110     }
1111   }
1112   Value *Op0 = State.get(getOperand(StartIdx), 0);
1113   Type *VecTy = Op0->getType();
1114   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1115   State.set(this, VecPhi, 0);
1116 }
1117 
1118 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1119 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1120                              VPSlotTracker &SlotTracker) const {
1121   O << Indent << "WIDEN-PHI ";
1122 
1123   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1124   // Unless all incoming values are modeled in VPlan  print the original PHI
1125   // directly.
1126   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1127   // values as VPValues.
1128   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1129     O << VPlanIngredient(OriginalPhi);
1130     return;
1131   }
1132 
1133   printAsOperand(O, SlotTracker);
1134   O << " = phi ";
1135   printOperands(O, SlotTracker);
1136 }
1137 #endif
1138 
1139 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1140 // remove VPActiveLaneMaskPHIRecipe.
1141 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
1142   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1143   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1144     Value *StartMask = State.get(getOperand(0), Part);
1145     PHINode *EntryPart =
1146         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1147     EntryPart->addIncoming(StartMask, VectorPH);
1148     EntryPart->setDebugLoc(DL);
1149     State.set(this, EntryPart, Part);
1150   }
1151 }
1152 
1153 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1154 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1155                                       VPSlotTracker &SlotTracker) const {
1156   O << Indent << "ACTIVE-LANE-MASK-PHI ";
1157 
1158   printAsOperand(O, SlotTracker);
1159   O << " = phi ";
1160   printOperands(O, SlotTracker);
1161 }
1162 #endif
1163