103975b7fSFlorian Hahn //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
203975b7fSFlorian Hahn //
303975b7fSFlorian Hahn // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
403975b7fSFlorian Hahn // See https://llvm.org/LICENSE.txt for license information.
503975b7fSFlorian Hahn // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
603975b7fSFlorian Hahn //
703975b7fSFlorian Hahn //===----------------------------------------------------------------------===//
803975b7fSFlorian Hahn ///
903975b7fSFlorian Hahn /// \file
1003975b7fSFlorian Hahn /// This file contains implementations for different VPlan recipes.
1103975b7fSFlorian Hahn ///
1203975b7fSFlorian Hahn //===----------------------------------------------------------------------===//
1303975b7fSFlorian Hahn
1403975b7fSFlorian Hahn #include "VPlan.h"
1503975b7fSFlorian Hahn #include "llvm/ADT/STLExtras.h"
1603975b7fSFlorian Hahn #include "llvm/ADT/SmallVector.h"
1703975b7fSFlorian Hahn #include "llvm/ADT/Twine.h"
1803975b7fSFlorian Hahn #include "llvm/Analysis/IVDescriptors.h"
1903975b7fSFlorian Hahn #include "llvm/IR/BasicBlock.h"
2003975b7fSFlorian Hahn #include "llvm/IR/IRBuilder.h"
2103975b7fSFlorian Hahn #include "llvm/IR/Instruction.h"
2203975b7fSFlorian Hahn #include "llvm/IR/Instructions.h"
2303975b7fSFlorian Hahn #include "llvm/IR/Type.h"
2403975b7fSFlorian Hahn #include "llvm/IR/Value.h"
2503975b7fSFlorian Hahn #include "llvm/Support/Casting.h"
2603975b7fSFlorian Hahn #include "llvm/Support/CommandLine.h"
2703975b7fSFlorian Hahn #include "llvm/Support/Debug.h"
2803975b7fSFlorian Hahn #include "llvm/Support/raw_ostream.h"
29225e3ec6SFlorian Hahn #include "llvm/Transforms/Utils/BasicBlockUtils.h"
3003975b7fSFlorian Hahn #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3103975b7fSFlorian Hahn #include <cassert>
3203975b7fSFlorian Hahn
3303975b7fSFlorian Hahn using namespace llvm;
3403975b7fSFlorian Hahn
355d135041SFlorian Hahn using VectorParts = SmallVector<Value *, 2>;
365d135041SFlorian Hahn
3703975b7fSFlorian Hahn extern cl::opt<bool> EnableVPlanNativePath;
3803975b7fSFlorian Hahn
3913ae2134SFlorian Hahn #define LV_NAME "loop-vectorize"
4013ae2134SFlorian Hahn #define DEBUG_TYPE LV_NAME
4113ae2134SFlorian Hahn
mayWriteToMemory() const4203975b7fSFlorian Hahn bool VPRecipeBase::mayWriteToMemory() const {
4303975b7fSFlorian Hahn switch (getVPDefID()) {
4403975b7fSFlorian Hahn case VPWidenMemoryInstructionSC: {
4503975b7fSFlorian Hahn return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
4603975b7fSFlorian Hahn }
4703975b7fSFlorian Hahn case VPReplicateSC:
4803975b7fSFlorian Hahn case VPWidenCallSC:
4903975b7fSFlorian Hahn return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
5003975b7fSFlorian Hahn ->mayWriteToMemory();
5103975b7fSFlorian Hahn case VPBranchOnMaskSC:
5203975b7fSFlorian Hahn return false;
5303975b7fSFlorian Hahn case VPWidenIntOrFpInductionSC:
5403975b7fSFlorian Hahn case VPWidenCanonicalIVSC:
5503975b7fSFlorian Hahn case VPWidenPHISC:
5603975b7fSFlorian Hahn case VPBlendSC:
5703975b7fSFlorian Hahn case VPWidenSC:
5803975b7fSFlorian Hahn case VPWidenGEPSC:
5903975b7fSFlorian Hahn case VPReductionSC:
6003975b7fSFlorian Hahn case VPWidenSelectSC: {
6103975b7fSFlorian Hahn const Instruction *I =
6203975b7fSFlorian Hahn dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
6303975b7fSFlorian Hahn (void)I;
6403975b7fSFlorian Hahn assert((!I || !I->mayWriteToMemory()) &&
6503975b7fSFlorian Hahn "underlying instruction may write to memory");
6603975b7fSFlorian Hahn return false;
6703975b7fSFlorian Hahn }
6803975b7fSFlorian Hahn default:
6903975b7fSFlorian Hahn return true;
7003975b7fSFlorian Hahn }
7103975b7fSFlorian Hahn }
7203975b7fSFlorian Hahn
mayReadFromMemory() const7303975b7fSFlorian Hahn bool VPRecipeBase::mayReadFromMemory() const {
7403975b7fSFlorian Hahn switch (getVPDefID()) {
7503975b7fSFlorian Hahn case VPWidenMemoryInstructionSC: {
7603975b7fSFlorian Hahn return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
7703975b7fSFlorian Hahn }
7803975b7fSFlorian Hahn case VPReplicateSC:
7903975b7fSFlorian Hahn case VPWidenCallSC:
8003975b7fSFlorian Hahn return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
8103975b7fSFlorian Hahn ->mayReadFromMemory();
8203975b7fSFlorian Hahn case VPBranchOnMaskSC:
8303975b7fSFlorian Hahn return false;
8403975b7fSFlorian Hahn case VPWidenIntOrFpInductionSC:
8503975b7fSFlorian Hahn case VPWidenCanonicalIVSC:
8603975b7fSFlorian Hahn case VPWidenPHISC:
8703975b7fSFlorian Hahn case VPBlendSC:
8803975b7fSFlorian Hahn case VPWidenSC:
8903975b7fSFlorian Hahn case VPWidenGEPSC:
9003975b7fSFlorian Hahn case VPReductionSC:
9103975b7fSFlorian Hahn case VPWidenSelectSC: {
9203975b7fSFlorian Hahn const Instruction *I =
9303975b7fSFlorian Hahn dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
9403975b7fSFlorian Hahn (void)I;
9503975b7fSFlorian Hahn assert((!I || !I->mayReadFromMemory()) &&
9603975b7fSFlorian Hahn "underlying instruction may read from memory");
9703975b7fSFlorian Hahn return false;
9803975b7fSFlorian Hahn }
9903975b7fSFlorian Hahn default:
10003975b7fSFlorian Hahn return true;
10103975b7fSFlorian Hahn }
10203975b7fSFlorian Hahn }
10303975b7fSFlorian Hahn
mayHaveSideEffects() const10403975b7fSFlorian Hahn bool VPRecipeBase::mayHaveSideEffects() const {
10503975b7fSFlorian Hahn switch (getVPDefID()) {
10603975b7fSFlorian Hahn case VPWidenIntOrFpInductionSC:
10703975b7fSFlorian Hahn case VPWidenPointerInductionSC:
10803975b7fSFlorian Hahn case VPWidenCanonicalIVSC:
10903975b7fSFlorian Hahn case VPWidenPHISC:
11003975b7fSFlorian Hahn case VPBlendSC:
11103975b7fSFlorian Hahn case VPWidenSC:
11203975b7fSFlorian Hahn case VPWidenGEPSC:
11303975b7fSFlorian Hahn case VPReductionSC:
11403975b7fSFlorian Hahn case VPWidenSelectSC:
11503975b7fSFlorian Hahn case VPScalarIVStepsSC: {
11603975b7fSFlorian Hahn const Instruction *I =
11703975b7fSFlorian Hahn dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
11803975b7fSFlorian Hahn (void)I;
11903975b7fSFlorian Hahn assert((!I || !I->mayHaveSideEffects()) &&
12003975b7fSFlorian Hahn "underlying instruction has side-effects");
12103975b7fSFlorian Hahn return false;
12203975b7fSFlorian Hahn }
12303975b7fSFlorian Hahn case VPReplicateSC: {
12403975b7fSFlorian Hahn auto *R = cast<VPReplicateRecipe>(this);
12503975b7fSFlorian Hahn return R->getUnderlyingInstr()->mayHaveSideEffects();
12603975b7fSFlorian Hahn }
12703975b7fSFlorian Hahn default:
12803975b7fSFlorian Hahn return true;
12903975b7fSFlorian Hahn }
13003975b7fSFlorian Hahn }
13103975b7fSFlorian Hahn
fixPhi(VPlan & Plan,VPTransformState & State)13203975b7fSFlorian Hahn void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
13303975b7fSFlorian Hahn auto Lane = VPLane::getLastLaneForVF(State.VF);
13403975b7fSFlorian Hahn VPValue *ExitValue = getOperand(0);
13503975b7fSFlorian Hahn if (Plan.isUniformAfterVectorization(ExitValue))
13603975b7fSFlorian Hahn Lane = VPLane::getFirstLane();
13703975b7fSFlorian Hahn Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
13803975b7fSFlorian Hahn State.Builder.GetInsertBlock());
13903975b7fSFlorian Hahn }
14003975b7fSFlorian Hahn
insertBefore(VPRecipeBase * InsertPos)14103975b7fSFlorian Hahn void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
14203975b7fSFlorian Hahn assert(!Parent && "Recipe already in some VPBasicBlock");
14303975b7fSFlorian Hahn assert(InsertPos->getParent() &&
14403975b7fSFlorian Hahn "Insertion position not in any VPBasicBlock");
14503975b7fSFlorian Hahn Parent = InsertPos->getParent();
14603975b7fSFlorian Hahn Parent->getRecipeList().insert(InsertPos->getIterator(), this);
14703975b7fSFlorian Hahn }
14803975b7fSFlorian Hahn
insertBefore(VPBasicBlock & BB,iplist<VPRecipeBase>::iterator I)14903975b7fSFlorian Hahn void VPRecipeBase::insertBefore(VPBasicBlock &BB,
15003975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator I) {
15103975b7fSFlorian Hahn assert(!Parent && "Recipe already in some VPBasicBlock");
15203975b7fSFlorian Hahn assert(I == BB.end() || I->getParent() == &BB);
15303975b7fSFlorian Hahn Parent = &BB;
15403975b7fSFlorian Hahn BB.getRecipeList().insert(I, this);
15503975b7fSFlorian Hahn }
15603975b7fSFlorian Hahn
insertAfter(VPRecipeBase * InsertPos)15703975b7fSFlorian Hahn void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
15803975b7fSFlorian Hahn assert(!Parent && "Recipe already in some VPBasicBlock");
15903975b7fSFlorian Hahn assert(InsertPos->getParent() &&
16003975b7fSFlorian Hahn "Insertion position not in any VPBasicBlock");
16103975b7fSFlorian Hahn Parent = InsertPos->getParent();
16203975b7fSFlorian Hahn Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
16303975b7fSFlorian Hahn }
16403975b7fSFlorian Hahn
removeFromParent()16503975b7fSFlorian Hahn void VPRecipeBase::removeFromParent() {
16603975b7fSFlorian Hahn assert(getParent() && "Recipe not in any VPBasicBlock");
16703975b7fSFlorian Hahn getParent()->getRecipeList().remove(getIterator());
16803975b7fSFlorian Hahn Parent = nullptr;
16903975b7fSFlorian Hahn }
17003975b7fSFlorian Hahn
eraseFromParent()17103975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
17203975b7fSFlorian Hahn assert(getParent() && "Recipe not in any VPBasicBlock");
17303975b7fSFlorian Hahn return getParent()->getRecipeList().erase(getIterator());
17403975b7fSFlorian Hahn }
17503975b7fSFlorian Hahn
moveAfter(VPRecipeBase * InsertPos)17603975b7fSFlorian Hahn void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
17703975b7fSFlorian Hahn removeFromParent();
17803975b7fSFlorian Hahn insertAfter(InsertPos);
17903975b7fSFlorian Hahn }
18003975b7fSFlorian Hahn
moveBefore(VPBasicBlock & BB,iplist<VPRecipeBase>::iterator I)18103975b7fSFlorian Hahn void VPRecipeBase::moveBefore(VPBasicBlock &BB,
18203975b7fSFlorian Hahn iplist<VPRecipeBase>::iterator I) {
18303975b7fSFlorian Hahn removeFromParent();
18403975b7fSFlorian Hahn insertBefore(BB, I);
18503975b7fSFlorian Hahn }
18603975b7fSFlorian Hahn
generateInstruction(VPTransformState & State,unsigned Part)18703975b7fSFlorian Hahn void VPInstruction::generateInstruction(VPTransformState &State,
18803975b7fSFlorian Hahn unsigned Part) {
18903975b7fSFlorian Hahn IRBuilderBase &Builder = State.Builder;
19003975b7fSFlorian Hahn Builder.SetCurrentDebugLocation(DL);
19103975b7fSFlorian Hahn
19203975b7fSFlorian Hahn if (Instruction::isBinaryOp(getOpcode())) {
19303975b7fSFlorian Hahn Value *A = State.get(getOperand(0), Part);
19403975b7fSFlorian Hahn Value *B = State.get(getOperand(1), Part);
19502d6950dSDavid Sherwood Value *V =
19602d6950dSDavid Sherwood Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
19703975b7fSFlorian Hahn State.set(this, V, Part);
19803975b7fSFlorian Hahn return;
19903975b7fSFlorian Hahn }
20003975b7fSFlorian Hahn
20103975b7fSFlorian Hahn switch (getOpcode()) {
20203975b7fSFlorian Hahn case VPInstruction::Not: {
20303975b7fSFlorian Hahn Value *A = State.get(getOperand(0), Part);
20402d6950dSDavid Sherwood Value *V = Builder.CreateNot(A, Name);
20503975b7fSFlorian Hahn State.set(this, V, Part);
20603975b7fSFlorian Hahn break;
20703975b7fSFlorian Hahn }
20803975b7fSFlorian Hahn case VPInstruction::ICmpULE: {
20903975b7fSFlorian Hahn Value *IV = State.get(getOperand(0), Part);
21003975b7fSFlorian Hahn Value *TC = State.get(getOperand(1), Part);
21102d6950dSDavid Sherwood Value *V = Builder.CreateICmpULE(IV, TC, Name);
21203975b7fSFlorian Hahn State.set(this, V, Part);
21303975b7fSFlorian Hahn break;
21403975b7fSFlorian Hahn }
21503975b7fSFlorian Hahn case Instruction::Select: {
21603975b7fSFlorian Hahn Value *Cond = State.get(getOperand(0), Part);
21703975b7fSFlorian Hahn Value *Op1 = State.get(getOperand(1), Part);
21803975b7fSFlorian Hahn Value *Op2 = State.get(getOperand(2), Part);
21902d6950dSDavid Sherwood Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
22003975b7fSFlorian Hahn State.set(this, V, Part);
22103975b7fSFlorian Hahn break;
22203975b7fSFlorian Hahn }
22303975b7fSFlorian Hahn case VPInstruction::ActiveLaneMask: {
22403975b7fSFlorian Hahn // Get first lane of vector induction variable.
22503975b7fSFlorian Hahn Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
22603975b7fSFlorian Hahn // Get the original loop tripcount.
22703975b7fSFlorian Hahn Value *ScalarTC = State.get(getOperand(1), Part);
22803975b7fSFlorian Hahn
22903975b7fSFlorian Hahn auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
23003975b7fSFlorian Hahn auto *PredTy = VectorType::get(Int1Ty, State.VF);
23103975b7fSFlorian Hahn Instruction *Call = Builder.CreateIntrinsic(
23203975b7fSFlorian Hahn Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
23302d6950dSDavid Sherwood {VIVElem0, ScalarTC}, nullptr, Name);
23403975b7fSFlorian Hahn State.set(this, Call, Part);
23503975b7fSFlorian Hahn break;
23603975b7fSFlorian Hahn }
23703975b7fSFlorian Hahn case VPInstruction::FirstOrderRecurrenceSplice: {
23803975b7fSFlorian Hahn // Generate code to combine the previous and current values in vector v3.
23903975b7fSFlorian Hahn //
24003975b7fSFlorian Hahn // vector.ph:
24103975b7fSFlorian Hahn // v_init = vector(..., ..., ..., a[-1])
24203975b7fSFlorian Hahn // br vector.body
24303975b7fSFlorian Hahn //
24403975b7fSFlorian Hahn // vector.body
24503975b7fSFlorian Hahn // i = phi [0, vector.ph], [i+4, vector.body]
24603975b7fSFlorian Hahn // v1 = phi [v_init, vector.ph], [v2, vector.body]
24703975b7fSFlorian Hahn // v2 = a[i, i+1, i+2, i+3];
24803975b7fSFlorian Hahn // v3 = vector(v1(3), v2(0, 1, 2))
24903975b7fSFlorian Hahn
25003975b7fSFlorian Hahn // For the first part, use the recurrence phi (v1), otherwise v2.
25103975b7fSFlorian Hahn auto *V1 = State.get(getOperand(0), 0);
25203975b7fSFlorian Hahn Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
25303975b7fSFlorian Hahn if (!PartMinus1->getType()->isVectorTy()) {
25403975b7fSFlorian Hahn State.set(this, PartMinus1, Part);
25503975b7fSFlorian Hahn } else {
25603975b7fSFlorian Hahn Value *V2 = State.get(getOperand(1), Part);
25702d6950dSDavid Sherwood State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
25802d6950dSDavid Sherwood Part);
25903975b7fSFlorian Hahn }
26003975b7fSFlorian Hahn break;
26103975b7fSFlorian Hahn }
26203975b7fSFlorian Hahn case VPInstruction::CanonicalIVIncrement:
26303975b7fSFlorian Hahn case VPInstruction::CanonicalIVIncrementNUW: {
26403975b7fSFlorian Hahn Value *Next = nullptr;
26503975b7fSFlorian Hahn if (Part == 0) {
26603975b7fSFlorian Hahn bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
26703975b7fSFlorian Hahn auto *Phi = State.get(getOperand(0), 0);
26803975b7fSFlorian Hahn // The loop step is equal to the vectorization factor (num of SIMD
26903975b7fSFlorian Hahn // elements) times the unroll factor (num of SIMD instructions).
27003975b7fSFlorian Hahn Value *Step =
27103975b7fSFlorian Hahn createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
27202d6950dSDavid Sherwood Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
27303975b7fSFlorian Hahn } else {
27403975b7fSFlorian Hahn Next = State.get(this, 0);
27503975b7fSFlorian Hahn }
27603975b7fSFlorian Hahn
27703975b7fSFlorian Hahn State.set(this, Next, Part);
27803975b7fSFlorian Hahn break;
27903975b7fSFlorian Hahn }
28003fee671SDavid Sherwood
28103fee671SDavid Sherwood case VPInstruction::CanonicalIVIncrementForPart:
28203fee671SDavid Sherwood case VPInstruction::CanonicalIVIncrementForPartNUW: {
28303fee671SDavid Sherwood bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
28403fee671SDavid Sherwood auto *IV = State.get(getOperand(0), VPIteration(0, 0));
28503fee671SDavid Sherwood if (Part == 0) {
28603fee671SDavid Sherwood State.set(this, IV, Part);
28703fee671SDavid Sherwood break;
28803fee671SDavid Sherwood }
28903fee671SDavid Sherwood
29003fee671SDavid Sherwood // The canonical IV is incremented by the vectorization factor (num of SIMD
29103fee671SDavid Sherwood // elements) times the unroll part.
29203fee671SDavid Sherwood Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
29303fee671SDavid Sherwood Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
29403fee671SDavid Sherwood State.set(this, Next, Part);
29503fee671SDavid Sherwood break;
29603fee671SDavid Sherwood }
29703975b7fSFlorian Hahn case VPInstruction::BranchOnCond: {
29803975b7fSFlorian Hahn if (Part != 0)
29903975b7fSFlorian Hahn break;
30003975b7fSFlorian Hahn
30103975b7fSFlorian Hahn Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
30203975b7fSFlorian Hahn VPRegionBlock *ParentRegion = getParent()->getParent();
30303975b7fSFlorian Hahn VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
30403975b7fSFlorian Hahn
30503975b7fSFlorian Hahn // Replace the temporary unreachable terminator with a new conditional
30603975b7fSFlorian Hahn // branch, hooking it up to backward destination for exiting blocks now and
30703975b7fSFlorian Hahn // to forward destination(s) later when they are created.
30803975b7fSFlorian Hahn BranchInst *CondBr =
30903975b7fSFlorian Hahn Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
31003975b7fSFlorian Hahn
31103975b7fSFlorian Hahn if (getParent()->isExiting())
31203975b7fSFlorian Hahn CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
31303975b7fSFlorian Hahn
31403975b7fSFlorian Hahn CondBr->setSuccessor(0, nullptr);
31503975b7fSFlorian Hahn Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
31603975b7fSFlorian Hahn break;
31703975b7fSFlorian Hahn }
31803975b7fSFlorian Hahn case VPInstruction::BranchOnCount: {
31903975b7fSFlorian Hahn if (Part != 0)
32003975b7fSFlorian Hahn break;
32103975b7fSFlorian Hahn // First create the compare.
32203975b7fSFlorian Hahn Value *IV = State.get(getOperand(0), Part);
32303975b7fSFlorian Hahn Value *TC = State.get(getOperand(1), Part);
32403975b7fSFlorian Hahn Value *Cond = Builder.CreateICmpEQ(IV, TC);
32503975b7fSFlorian Hahn
32603975b7fSFlorian Hahn // Now create the branch.
32703975b7fSFlorian Hahn auto *Plan = getParent()->getPlan();
32803975b7fSFlorian Hahn VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
32903975b7fSFlorian Hahn VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
33003975b7fSFlorian Hahn
33103975b7fSFlorian Hahn // Replace the temporary unreachable terminator with a new conditional
33203975b7fSFlorian Hahn // branch, hooking it up to backward destination (the header) now and to the
33303975b7fSFlorian Hahn // forward destination (the exit/middle block) later when it is created.
33403975b7fSFlorian Hahn // Note that CreateCondBr expects a valid BB as first argument, so we need
33503975b7fSFlorian Hahn // to set it to nullptr later.
33603975b7fSFlorian Hahn BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
33703975b7fSFlorian Hahn State.CFG.VPBB2IRBB[Header]);
33803975b7fSFlorian Hahn CondBr->setSuccessor(0, nullptr);
33903975b7fSFlorian Hahn Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
34003975b7fSFlorian Hahn break;
34103975b7fSFlorian Hahn }
34203975b7fSFlorian Hahn default:
34303975b7fSFlorian Hahn llvm_unreachable("Unsupported opcode for instruction");
34403975b7fSFlorian Hahn }
34503975b7fSFlorian Hahn }
34603975b7fSFlorian Hahn
execute(VPTransformState & State)34703975b7fSFlorian Hahn void VPInstruction::execute(VPTransformState &State) {
34803975b7fSFlorian Hahn assert(!State.Instance && "VPInstruction executing an Instance");
34903975b7fSFlorian Hahn IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
35003975b7fSFlorian Hahn State.Builder.setFastMathFlags(FMF);
35103975b7fSFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part)
35203975b7fSFlorian Hahn generateInstruction(State, Part);
35303975b7fSFlorian Hahn }
35403975b7fSFlorian Hahn
35503975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const35603975b7fSFlorian Hahn void VPInstruction::dump() const {
35703975b7fSFlorian Hahn VPSlotTracker SlotTracker(getParent()->getPlan());
35803975b7fSFlorian Hahn print(dbgs(), "", SlotTracker);
35903975b7fSFlorian Hahn }
36003975b7fSFlorian Hahn
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const36103975b7fSFlorian Hahn void VPInstruction::print(raw_ostream &O, const Twine &Indent,
36203975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
36303975b7fSFlorian Hahn O << Indent << "EMIT ";
36403975b7fSFlorian Hahn
36503975b7fSFlorian Hahn if (hasResult()) {
36603975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
36703975b7fSFlorian Hahn O << " = ";
36803975b7fSFlorian Hahn }
36903975b7fSFlorian Hahn
37003975b7fSFlorian Hahn switch (getOpcode()) {
37103975b7fSFlorian Hahn case VPInstruction::Not:
37203975b7fSFlorian Hahn O << "not";
37303975b7fSFlorian Hahn break;
37403975b7fSFlorian Hahn case VPInstruction::ICmpULE:
37503975b7fSFlorian Hahn O << "icmp ule";
37603975b7fSFlorian Hahn break;
37703975b7fSFlorian Hahn case VPInstruction::SLPLoad:
37803975b7fSFlorian Hahn O << "combined load";
37903975b7fSFlorian Hahn break;
38003975b7fSFlorian Hahn case VPInstruction::SLPStore:
38103975b7fSFlorian Hahn O << "combined store";
38203975b7fSFlorian Hahn break;
38303975b7fSFlorian Hahn case VPInstruction::ActiveLaneMask:
38403975b7fSFlorian Hahn O << "active lane mask";
38503975b7fSFlorian Hahn break;
38603975b7fSFlorian Hahn case VPInstruction::FirstOrderRecurrenceSplice:
38703975b7fSFlorian Hahn O << "first-order splice";
38803975b7fSFlorian Hahn break;
38903975b7fSFlorian Hahn case VPInstruction::CanonicalIVIncrement:
39003975b7fSFlorian Hahn O << "VF * UF + ";
39103975b7fSFlorian Hahn break;
39203975b7fSFlorian Hahn case VPInstruction::CanonicalIVIncrementNUW:
39303975b7fSFlorian Hahn O << "VF * UF +(nuw) ";
39403975b7fSFlorian Hahn break;
39503975b7fSFlorian Hahn case VPInstruction::BranchOnCond:
39603975b7fSFlorian Hahn O << "branch-on-cond";
39703975b7fSFlorian Hahn break;
39803fee671SDavid Sherwood case VPInstruction::CanonicalIVIncrementForPart:
39903fee671SDavid Sherwood O << "VF * Part + ";
40003fee671SDavid Sherwood break;
40103fee671SDavid Sherwood case VPInstruction::CanonicalIVIncrementForPartNUW:
40203fee671SDavid Sherwood O << "VF * Part +(nuw) ";
40303fee671SDavid Sherwood break;
40403975b7fSFlorian Hahn case VPInstruction::BranchOnCount:
40503975b7fSFlorian Hahn O << "branch-on-count ";
40603975b7fSFlorian Hahn break;
40703975b7fSFlorian Hahn default:
40803975b7fSFlorian Hahn O << Instruction::getOpcodeName(getOpcode());
40903975b7fSFlorian Hahn }
41003975b7fSFlorian Hahn
41103975b7fSFlorian Hahn O << FMF;
41203975b7fSFlorian Hahn
41303975b7fSFlorian Hahn for (const VPValue *Operand : operands()) {
41403975b7fSFlorian Hahn O << " ";
41503975b7fSFlorian Hahn Operand->printAsOperand(O, SlotTracker);
41603975b7fSFlorian Hahn }
41703975b7fSFlorian Hahn
41803975b7fSFlorian Hahn if (DL) {
41903975b7fSFlorian Hahn O << ", !dbg ";
42003975b7fSFlorian Hahn DL.print(O);
42103975b7fSFlorian Hahn }
42203975b7fSFlorian Hahn }
42303975b7fSFlorian Hahn #endif
42403975b7fSFlorian Hahn
setFastMathFlags(FastMathFlags FMFNew)42503975b7fSFlorian Hahn void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
42603975b7fSFlorian Hahn // Make sure the VPInstruction is a floating-point operation.
42703975b7fSFlorian Hahn assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
42803975b7fSFlorian Hahn Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
42903975b7fSFlorian Hahn Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
43003975b7fSFlorian Hahn Opcode == Instruction::FCmp) &&
43103975b7fSFlorian Hahn "this op can't take fast-math flags");
43203975b7fSFlorian Hahn FMF = FMFNew;
43303975b7fSFlorian Hahn }
43403975b7fSFlorian Hahn
43503975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const43603975b7fSFlorian Hahn void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
43703975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
43803975b7fSFlorian Hahn O << Indent << "WIDEN-CALL ";
43903975b7fSFlorian Hahn
44003975b7fSFlorian Hahn auto *CI = cast<CallInst>(getUnderlyingInstr());
44103975b7fSFlorian Hahn if (CI->getType()->isVoidTy())
44203975b7fSFlorian Hahn O << "void ";
44303975b7fSFlorian Hahn else {
44403975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
44503975b7fSFlorian Hahn O << " = ";
44603975b7fSFlorian Hahn }
44703975b7fSFlorian Hahn
44803975b7fSFlorian Hahn O << "call @" << CI->getCalledFunction()->getName() << "(";
44903975b7fSFlorian Hahn printOperands(O, SlotTracker);
45003975b7fSFlorian Hahn O << ")";
45103975b7fSFlorian Hahn }
45203975b7fSFlorian Hahn
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const45303975b7fSFlorian Hahn void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
45403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
45503975b7fSFlorian Hahn O << Indent << "WIDEN-SELECT ";
45603975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
45703975b7fSFlorian Hahn O << " = select ";
45803975b7fSFlorian Hahn getOperand(0)->printAsOperand(O, SlotTracker);
45903975b7fSFlorian Hahn O << ", ";
46003975b7fSFlorian Hahn getOperand(1)->printAsOperand(O, SlotTracker);
46103975b7fSFlorian Hahn O << ", ";
46203975b7fSFlorian Hahn getOperand(2)->printAsOperand(O, SlotTracker);
46303975b7fSFlorian Hahn O << (InvariantCond ? " (condition is loop invariant)" : "");
46403975b7fSFlorian Hahn }
4650c27b388SFlorian Hahn #endif
46603975b7fSFlorian Hahn
execute(VPTransformState & State)4670c27b388SFlorian Hahn void VPWidenSelectRecipe::execute(VPTransformState &State) {
4680c27b388SFlorian Hahn auto &I = *cast<SelectInst>(getUnderlyingInstr());
4690c27b388SFlorian Hahn State.setDebugLocFromInst(&I);
4700c27b388SFlorian Hahn
4710c27b388SFlorian Hahn // The condition can be loop invariant but still defined inside the
4720c27b388SFlorian Hahn // loop. This means that we can't just use the original 'cond' value.
4730c27b388SFlorian Hahn // We have to take the 'vectorized' value and pick the first lane.
4740c27b388SFlorian Hahn // Instcombine will make this a no-op.
4750c27b388SFlorian Hahn auto *InvarCond =
4760c27b388SFlorian Hahn InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
4770c27b388SFlorian Hahn
4780c27b388SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
4790c27b388SFlorian Hahn Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
4800c27b388SFlorian Hahn Value *Op0 = State.get(getOperand(1), Part);
4810c27b388SFlorian Hahn Value *Op1 = State.get(getOperand(2), Part);
4820c27b388SFlorian Hahn Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
4830c27b388SFlorian Hahn State.set(this, Sel, Part);
4840c27b388SFlorian Hahn State.addMetadata(Sel, &I);
4850c27b388SFlorian Hahn }
4860c27b388SFlorian Hahn }
4870c27b388SFlorian Hahn
execute(VPTransformState & State)48813ae2134SFlorian Hahn void VPWidenRecipe::execute(VPTransformState &State) {
48913ae2134SFlorian Hahn auto &I = *cast<Instruction>(getUnderlyingValue());
49013ae2134SFlorian Hahn auto &Builder = State.Builder;
49113ae2134SFlorian Hahn switch (I.getOpcode()) {
49213ae2134SFlorian Hahn case Instruction::Call:
49313ae2134SFlorian Hahn case Instruction::Br:
49413ae2134SFlorian Hahn case Instruction::PHI:
49513ae2134SFlorian Hahn case Instruction::GetElementPtr:
49613ae2134SFlorian Hahn case Instruction::Select:
49713ae2134SFlorian Hahn llvm_unreachable("This instruction is handled by a different recipe.");
49813ae2134SFlorian Hahn case Instruction::UDiv:
49913ae2134SFlorian Hahn case Instruction::SDiv:
50013ae2134SFlorian Hahn case Instruction::SRem:
50113ae2134SFlorian Hahn case Instruction::URem:
50213ae2134SFlorian Hahn case Instruction::Add:
50313ae2134SFlorian Hahn case Instruction::FAdd:
50413ae2134SFlorian Hahn case Instruction::Sub:
50513ae2134SFlorian Hahn case Instruction::FSub:
50613ae2134SFlorian Hahn case Instruction::FNeg:
50713ae2134SFlorian Hahn case Instruction::Mul:
50813ae2134SFlorian Hahn case Instruction::FMul:
50913ae2134SFlorian Hahn case Instruction::FDiv:
51013ae2134SFlorian Hahn case Instruction::FRem:
51113ae2134SFlorian Hahn case Instruction::Shl:
51213ae2134SFlorian Hahn case Instruction::LShr:
51313ae2134SFlorian Hahn case Instruction::AShr:
51413ae2134SFlorian Hahn case Instruction::And:
51513ae2134SFlorian Hahn case Instruction::Or:
51613ae2134SFlorian Hahn case Instruction::Xor: {
51713ae2134SFlorian Hahn // Just widen unops and binops.
51813ae2134SFlorian Hahn State.setDebugLocFromInst(&I);
51913ae2134SFlorian Hahn
52013ae2134SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
52113ae2134SFlorian Hahn SmallVector<Value *, 2> Ops;
52213ae2134SFlorian Hahn for (VPValue *VPOp : operands())
52313ae2134SFlorian Hahn Ops.push_back(State.get(VPOp, Part));
52413ae2134SFlorian Hahn
52513ae2134SFlorian Hahn Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
52613ae2134SFlorian Hahn
52713ae2134SFlorian Hahn if (auto *VecOp = dyn_cast<Instruction>(V)) {
52813ae2134SFlorian Hahn VecOp->copyIRFlags(&I);
52913ae2134SFlorian Hahn
53013ae2134SFlorian Hahn // If the instruction is vectorized and was in a basic block that needed
53113ae2134SFlorian Hahn // predication, we can't propagate poison-generating flags (nuw/nsw,
53213ae2134SFlorian Hahn // exact, etc.). The control flow has been linearized and the
53313ae2134SFlorian Hahn // instruction is no longer guarded by the predicate, which could make
53413ae2134SFlorian Hahn // the flag properties to no longer hold.
53513ae2134SFlorian Hahn if (State.MayGeneratePoisonRecipes.contains(this))
53613ae2134SFlorian Hahn VecOp->dropPoisonGeneratingFlags();
53713ae2134SFlorian Hahn }
53813ae2134SFlorian Hahn
53913ae2134SFlorian Hahn // Use this vector value for all users of the original instruction.
54013ae2134SFlorian Hahn State.set(this, V, Part);
54113ae2134SFlorian Hahn State.addMetadata(V, &I);
54213ae2134SFlorian Hahn }
54313ae2134SFlorian Hahn
54413ae2134SFlorian Hahn break;
54513ae2134SFlorian Hahn }
54613ae2134SFlorian Hahn case Instruction::Freeze: {
54713ae2134SFlorian Hahn State.setDebugLocFromInst(&I);
54813ae2134SFlorian Hahn
54913ae2134SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
55013ae2134SFlorian Hahn Value *Op = State.get(getOperand(0), Part);
55113ae2134SFlorian Hahn
55213ae2134SFlorian Hahn Value *Freeze = Builder.CreateFreeze(Op);
55313ae2134SFlorian Hahn State.set(this, Freeze, Part);
55413ae2134SFlorian Hahn }
55513ae2134SFlorian Hahn break;
55613ae2134SFlorian Hahn }
55713ae2134SFlorian Hahn case Instruction::ICmp:
55813ae2134SFlorian Hahn case Instruction::FCmp: {
55913ae2134SFlorian Hahn // Widen compares. Generate vector compares.
56013ae2134SFlorian Hahn bool FCmp = (I.getOpcode() == Instruction::FCmp);
56113ae2134SFlorian Hahn auto *Cmp = cast<CmpInst>(&I);
56213ae2134SFlorian Hahn State.setDebugLocFromInst(Cmp);
56313ae2134SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
56413ae2134SFlorian Hahn Value *A = State.get(getOperand(0), Part);
56513ae2134SFlorian Hahn Value *B = State.get(getOperand(1), Part);
56613ae2134SFlorian Hahn Value *C = nullptr;
56713ae2134SFlorian Hahn if (FCmp) {
56813ae2134SFlorian Hahn // Propagate fast math flags.
56913ae2134SFlorian Hahn IRBuilder<>::FastMathFlagGuard FMFG(Builder);
57013ae2134SFlorian Hahn Builder.setFastMathFlags(Cmp->getFastMathFlags());
57113ae2134SFlorian Hahn C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
57213ae2134SFlorian Hahn } else {
57313ae2134SFlorian Hahn C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
57413ae2134SFlorian Hahn }
57513ae2134SFlorian Hahn State.set(this, C, Part);
57613ae2134SFlorian Hahn State.addMetadata(C, &I);
57713ae2134SFlorian Hahn }
57813ae2134SFlorian Hahn
57913ae2134SFlorian Hahn break;
58013ae2134SFlorian Hahn }
58113ae2134SFlorian Hahn
58213ae2134SFlorian Hahn case Instruction::ZExt:
58313ae2134SFlorian Hahn case Instruction::SExt:
58413ae2134SFlorian Hahn case Instruction::FPToUI:
58513ae2134SFlorian Hahn case Instruction::FPToSI:
58613ae2134SFlorian Hahn case Instruction::FPExt:
58713ae2134SFlorian Hahn case Instruction::PtrToInt:
58813ae2134SFlorian Hahn case Instruction::IntToPtr:
58913ae2134SFlorian Hahn case Instruction::SIToFP:
59013ae2134SFlorian Hahn case Instruction::UIToFP:
59113ae2134SFlorian Hahn case Instruction::Trunc:
59213ae2134SFlorian Hahn case Instruction::FPTrunc:
59313ae2134SFlorian Hahn case Instruction::BitCast: {
59413ae2134SFlorian Hahn auto *CI = cast<CastInst>(&I);
59513ae2134SFlorian Hahn State.setDebugLocFromInst(CI);
59613ae2134SFlorian Hahn
59713ae2134SFlorian Hahn /// Vectorize casts.
59813ae2134SFlorian Hahn Type *DestTy = (State.VF.isScalar())
59913ae2134SFlorian Hahn ? CI->getType()
60013ae2134SFlorian Hahn : VectorType::get(CI->getType(), State.VF);
60113ae2134SFlorian Hahn
60213ae2134SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
60313ae2134SFlorian Hahn Value *A = State.get(getOperand(0), Part);
60413ae2134SFlorian Hahn Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
60513ae2134SFlorian Hahn State.set(this, Cast, Part);
60613ae2134SFlorian Hahn State.addMetadata(Cast, &I);
60713ae2134SFlorian Hahn }
60813ae2134SFlorian Hahn break;
60913ae2134SFlorian Hahn }
61013ae2134SFlorian Hahn default:
61113ae2134SFlorian Hahn // This instruction is not vectorized by simple widening.
61213ae2134SFlorian Hahn LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
61313ae2134SFlorian Hahn llvm_unreachable("Unhandled instruction!");
61413ae2134SFlorian Hahn } // end of switch.
61513ae2134SFlorian Hahn }
6160c27b388SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const61703975b7fSFlorian Hahn void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
61803975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
61903975b7fSFlorian Hahn O << Indent << "WIDEN ";
62003975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
62103975b7fSFlorian Hahn O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
62203975b7fSFlorian Hahn printOperands(O, SlotTracker);
62303975b7fSFlorian Hahn }
62403975b7fSFlorian Hahn
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const62503975b7fSFlorian Hahn void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
62603975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
62703975b7fSFlorian Hahn O << Indent << "WIDEN-INDUCTION";
62803975b7fSFlorian Hahn if (getTruncInst()) {
62903975b7fSFlorian Hahn O << "\\l\"";
63003975b7fSFlorian Hahn O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
63103975b7fSFlorian Hahn O << " +\n" << Indent << "\" ";
63203975b7fSFlorian Hahn getVPValue(0)->printAsOperand(O, SlotTracker);
63303975b7fSFlorian Hahn } else
63403975b7fSFlorian Hahn O << " " << VPlanIngredient(IV);
63503975b7fSFlorian Hahn
63603975b7fSFlorian Hahn O << ", ";
63703975b7fSFlorian Hahn getStepValue()->printAsOperand(O, SlotTracker);
63803975b7fSFlorian Hahn }
63903975b7fSFlorian Hahn #endif
64003975b7fSFlorian Hahn
isCanonical() const64103975b7fSFlorian Hahn bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
64203975b7fSFlorian Hahn auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
64303975b7fSFlorian Hahn auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
64403975b7fSFlorian Hahn return StartC && StartC->isZero() && StepC && StepC->isOne();
64503975b7fSFlorian Hahn }
64603975b7fSFlorian Hahn
getCanonicalIV() const64703975b7fSFlorian Hahn VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
64803975b7fSFlorian Hahn return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
64903975b7fSFlorian Hahn }
65003975b7fSFlorian Hahn
isCanonical() const65103975b7fSFlorian Hahn bool VPScalarIVStepsRecipe::isCanonical() const {
65203975b7fSFlorian Hahn auto *CanIV = getCanonicalIV();
65303975b7fSFlorian Hahn // The start value of the steps-recipe must match the start value of the
65403975b7fSFlorian Hahn // canonical induction and it must step by 1.
65503975b7fSFlorian Hahn if (CanIV->getStartValue() != getStartValue())
65603975b7fSFlorian Hahn return false;
65703975b7fSFlorian Hahn auto *StepVPV = getStepValue();
65803975b7fSFlorian Hahn if (StepVPV->getDef())
65903975b7fSFlorian Hahn return false;
66003975b7fSFlorian Hahn auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
66103975b7fSFlorian Hahn return StepC && StepC->isOne();
66203975b7fSFlorian Hahn }
66303975b7fSFlorian Hahn
66403975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const66503975b7fSFlorian Hahn void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
66603975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
66703975b7fSFlorian Hahn O << Indent;
66803975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
66903975b7fSFlorian Hahn O << Indent << "= SCALAR-STEPS ";
67003975b7fSFlorian Hahn printOperands(O, SlotTracker);
67103975b7fSFlorian Hahn }
6726a4bc452SFlorian Hahn #endif
67303975b7fSFlorian Hahn
execute(VPTransformState & State)6746a4bc452SFlorian Hahn void VPWidenGEPRecipe::execute(VPTransformState &State) {
6756a4bc452SFlorian Hahn auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
6766a4bc452SFlorian Hahn // Construct a vector GEP by widening the operands of the scalar GEP as
6776a4bc452SFlorian Hahn // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
6786a4bc452SFlorian Hahn // results in a vector of pointers when at least one operand of the GEP
6796a4bc452SFlorian Hahn // is vector-typed. Thus, to keep the representation compact, we only use
6806a4bc452SFlorian Hahn // vector-typed operands for loop-varying values.
6816a4bc452SFlorian Hahn
6826a4bc452SFlorian Hahn if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
6836a4bc452SFlorian Hahn // If we are vectorizing, but the GEP has only loop-invariant operands,
6846a4bc452SFlorian Hahn // the GEP we build (by only using vector-typed operands for
6856a4bc452SFlorian Hahn // loop-varying values) would be a scalar pointer. Thus, to ensure we
6866a4bc452SFlorian Hahn // produce a vector of pointers, we need to either arbitrarily pick an
6876a4bc452SFlorian Hahn // operand to broadcast, or broadcast a clone of the original GEP.
6886a4bc452SFlorian Hahn // Here, we broadcast a clone of the original.
6896a4bc452SFlorian Hahn //
6906a4bc452SFlorian Hahn // TODO: If at some point we decide to scalarize instructions having
6916a4bc452SFlorian Hahn // loop-invariant operands, this special case will no longer be
6926a4bc452SFlorian Hahn // required. We would add the scalarization decision to
6936a4bc452SFlorian Hahn // collectLoopScalars() and teach getVectorValue() to broadcast
6946a4bc452SFlorian Hahn // the lane-zero scalar value.
6956a4bc452SFlorian Hahn auto *Clone = State.Builder.Insert(GEP->clone());
6966a4bc452SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
6976a4bc452SFlorian Hahn Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
6986a4bc452SFlorian Hahn State.set(this, EntryPart, Part);
6996a4bc452SFlorian Hahn State.addMetadata(EntryPart, GEP);
7006a4bc452SFlorian Hahn }
7016a4bc452SFlorian Hahn } else {
7026a4bc452SFlorian Hahn // If the GEP has at least one loop-varying operand, we are sure to
7036a4bc452SFlorian Hahn // produce a vector of pointers. But if we are only unrolling, we want
7046a4bc452SFlorian Hahn // to produce a scalar GEP for each unroll part. Thus, the GEP we
7056a4bc452SFlorian Hahn // produce with the code below will be scalar (if VF == 1) or vector
7066a4bc452SFlorian Hahn // (otherwise). Note that for the unroll-only case, we still maintain
7076a4bc452SFlorian Hahn // values in the vector mapping with initVector, as we do for other
7086a4bc452SFlorian Hahn // instructions.
7096a4bc452SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
7106a4bc452SFlorian Hahn // The pointer operand of the new GEP. If it's loop-invariant, we
7116a4bc452SFlorian Hahn // won't broadcast it.
7126a4bc452SFlorian Hahn auto *Ptr = IsPtrLoopInvariant
7136a4bc452SFlorian Hahn ? State.get(getOperand(0), VPIteration(0, 0))
7146a4bc452SFlorian Hahn : State.get(getOperand(0), Part);
7156a4bc452SFlorian Hahn
7166a4bc452SFlorian Hahn // Collect all the indices for the new GEP. If any index is
7176a4bc452SFlorian Hahn // loop-invariant, we won't broadcast it.
7186a4bc452SFlorian Hahn SmallVector<Value *, 4> Indices;
7196a4bc452SFlorian Hahn for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
7206a4bc452SFlorian Hahn VPValue *Operand = getOperand(I);
7216a4bc452SFlorian Hahn if (IsIndexLoopInvariant[I - 1])
7226a4bc452SFlorian Hahn Indices.push_back(State.get(Operand, VPIteration(0, 0)));
7236a4bc452SFlorian Hahn else
7246a4bc452SFlorian Hahn Indices.push_back(State.get(Operand, Part));
7256a4bc452SFlorian Hahn }
7266a4bc452SFlorian Hahn
7276a4bc452SFlorian Hahn // If the GEP instruction is vectorized and was in a basic block that
7286a4bc452SFlorian Hahn // needed predication, we can't propagate the poison-generating 'inbounds'
7296a4bc452SFlorian Hahn // flag. The control flow has been linearized and the GEP is no longer
7306a4bc452SFlorian Hahn // guarded by the predicate, which could make the 'inbounds' properties to
7316a4bc452SFlorian Hahn // no longer hold.
7326a4bc452SFlorian Hahn bool IsInBounds =
7336a4bc452SFlorian Hahn GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
7346a4bc452SFlorian Hahn
7356a4bc452SFlorian Hahn // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
7366a4bc452SFlorian Hahn // but it should be a vector, otherwise.
7376a4bc452SFlorian Hahn auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
7386a4bc452SFlorian Hahn Indices, "", IsInBounds);
7396a4bc452SFlorian Hahn assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
7406a4bc452SFlorian Hahn "NewGEP is not a pointer vector");
7416a4bc452SFlorian Hahn State.set(this, NewGEP, Part);
7426a4bc452SFlorian Hahn State.addMetadata(NewGEP, GEP);
7436a4bc452SFlorian Hahn }
7446a4bc452SFlorian Hahn }
7456a4bc452SFlorian Hahn }
7466a4bc452SFlorian Hahn
7476a4bc452SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const74803975b7fSFlorian Hahn void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
74903975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
75003975b7fSFlorian Hahn O << Indent << "WIDEN-GEP ";
75103975b7fSFlorian Hahn O << (IsPtrLoopInvariant ? "Inv" : "Var");
75203975b7fSFlorian Hahn size_t IndicesNumber = IsIndexLoopInvariant.size();
75303975b7fSFlorian Hahn for (size_t I = 0; I < IndicesNumber; ++I)
75403975b7fSFlorian Hahn O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
75503975b7fSFlorian Hahn
75603975b7fSFlorian Hahn O << " ";
75703975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
75803975b7fSFlorian Hahn O << " = getelementptr ";
75903975b7fSFlorian Hahn printOperands(O, SlotTracker);
76003975b7fSFlorian Hahn }
7615d135041SFlorian Hahn #endif
76203975b7fSFlorian Hahn
execute(VPTransformState & State)7635d135041SFlorian Hahn void VPBlendRecipe::execute(VPTransformState &State) {
7645d135041SFlorian Hahn State.setDebugLocFromInst(Phi);
7655d135041SFlorian Hahn // We know that all PHIs in non-header blocks are converted into
7665d135041SFlorian Hahn // selects, so we don't have to worry about the insertion order and we
7675d135041SFlorian Hahn // can just use the builder.
7685d135041SFlorian Hahn // At this point we generate the predication tree. There may be
7695d135041SFlorian Hahn // duplications since this is a simple recursive scan, but future
7705d135041SFlorian Hahn // optimizations will clean it up.
7715d135041SFlorian Hahn
7725d135041SFlorian Hahn unsigned NumIncoming = getNumIncomingValues();
7735d135041SFlorian Hahn
7745d135041SFlorian Hahn // Generate a sequence of selects of the form:
7755d135041SFlorian Hahn // SELECT(Mask3, In3,
7765d135041SFlorian Hahn // SELECT(Mask2, In2,
7775d135041SFlorian Hahn // SELECT(Mask1, In1,
7785d135041SFlorian Hahn // In0)))
7795d135041SFlorian Hahn // Note that Mask0 is never used: lanes for which no path reaches this phi and
7805d135041SFlorian Hahn // are essentially undef are taken from In0.
7815d135041SFlorian Hahn VectorParts Entry(State.UF);
7825d135041SFlorian Hahn for (unsigned In = 0; In < NumIncoming; ++In) {
7835d135041SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part) {
7845d135041SFlorian Hahn // We might have single edge PHIs (blocks) - use an identity
7855d135041SFlorian Hahn // 'select' for the first PHI operand.
7865d135041SFlorian Hahn Value *In0 = State.get(getIncomingValue(In), Part);
7875d135041SFlorian Hahn if (In == 0)
7885d135041SFlorian Hahn Entry[Part] = In0; // Initialize with the first incoming value.
7895d135041SFlorian Hahn else {
7905d135041SFlorian Hahn // Select between the current value and the previous incoming edge
7915d135041SFlorian Hahn // based on the incoming mask.
7925d135041SFlorian Hahn Value *Cond = State.get(getMask(In), Part);
7935d135041SFlorian Hahn Entry[Part] =
7945d135041SFlorian Hahn State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
7955d135041SFlorian Hahn }
7965d135041SFlorian Hahn }
7975d135041SFlorian Hahn }
7985d135041SFlorian Hahn for (unsigned Part = 0; Part < State.UF; ++Part)
7995d135041SFlorian Hahn State.set(this, Entry[Part], Part);
8005d135041SFlorian Hahn }
8015d135041SFlorian Hahn
8025d135041SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const80303975b7fSFlorian Hahn void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
80403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
80503975b7fSFlorian Hahn O << Indent << "BLEND ";
80603975b7fSFlorian Hahn Phi->printAsOperand(O, false);
80703975b7fSFlorian Hahn O << " =";
80803975b7fSFlorian Hahn if (getNumIncomingValues() == 1) {
80903975b7fSFlorian Hahn // Not a User of any mask: not really blending, this is a
81003975b7fSFlorian Hahn // single-predecessor phi.
81103975b7fSFlorian Hahn O << " ";
81203975b7fSFlorian Hahn getIncomingValue(0)->printAsOperand(O, SlotTracker);
81303975b7fSFlorian Hahn } else {
81403975b7fSFlorian Hahn for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
81503975b7fSFlorian Hahn O << " ";
81603975b7fSFlorian Hahn getIncomingValue(I)->printAsOperand(O, SlotTracker);
81703975b7fSFlorian Hahn O << "/";
81803975b7fSFlorian Hahn getMask(I)->printAsOperand(O, SlotTracker);
81903975b7fSFlorian Hahn }
82003975b7fSFlorian Hahn }
82103975b7fSFlorian Hahn }
82203975b7fSFlorian Hahn
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const82303975b7fSFlorian Hahn void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
82403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
82503975b7fSFlorian Hahn O << Indent << "REDUCE ";
82603975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
82703975b7fSFlorian Hahn O << " = ";
82803975b7fSFlorian Hahn getChainOp()->printAsOperand(O, SlotTracker);
82903975b7fSFlorian Hahn O << " +";
83003975b7fSFlorian Hahn if (isa<FPMathOperator>(getUnderlyingInstr()))
83103975b7fSFlorian Hahn O << getUnderlyingInstr()->getFastMathFlags();
83203975b7fSFlorian Hahn O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
83303975b7fSFlorian Hahn getVecOp()->printAsOperand(O, SlotTracker);
83403975b7fSFlorian Hahn if (getCondOp()) {
83503975b7fSFlorian Hahn O << ", ";
83603975b7fSFlorian Hahn getCondOp()->printAsOperand(O, SlotTracker);
83703975b7fSFlorian Hahn }
83803975b7fSFlorian Hahn O << ")";
83903975b7fSFlorian Hahn if (RdxDesc->IntermediateStore)
84003975b7fSFlorian Hahn O << " (with final reduction value stored in invariant address sank "
84103975b7fSFlorian Hahn "outside of loop)";
84203975b7fSFlorian Hahn }
84303975b7fSFlorian Hahn
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const84403975b7fSFlorian Hahn void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
84503975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
84603975b7fSFlorian Hahn O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
84703975b7fSFlorian Hahn
84803975b7fSFlorian Hahn if (!getUnderlyingInstr()->getType()->isVoidTy()) {
84903975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
85003975b7fSFlorian Hahn O << " = ";
85103975b7fSFlorian Hahn }
85203975b7fSFlorian Hahn if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
85303975b7fSFlorian Hahn O << "call @" << CB->getCalledFunction()->getName() << "(";
85403975b7fSFlorian Hahn interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
85503975b7fSFlorian Hahn O, [&O, &SlotTracker](VPValue *Op) {
85603975b7fSFlorian Hahn Op->printAsOperand(O, SlotTracker);
85703975b7fSFlorian Hahn });
85803975b7fSFlorian Hahn O << ")";
85903975b7fSFlorian Hahn } else {
86003975b7fSFlorian Hahn O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
86103975b7fSFlorian Hahn printOperands(O, SlotTracker);
86203975b7fSFlorian Hahn }
86303975b7fSFlorian Hahn
86403975b7fSFlorian Hahn if (AlsoPack)
86503975b7fSFlorian Hahn O << " (S->V)";
86603975b7fSFlorian Hahn }
867225e3ec6SFlorian Hahn #endif
86803975b7fSFlorian Hahn
execute(VPTransformState & State)869225e3ec6SFlorian Hahn void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
870225e3ec6SFlorian Hahn assert(State.Instance && "Branch on Mask works only on single instance.");
871225e3ec6SFlorian Hahn
872225e3ec6SFlorian Hahn unsigned Part = State.Instance->Part;
873225e3ec6SFlorian Hahn unsigned Lane = State.Instance->Lane.getKnownLane();
874225e3ec6SFlorian Hahn
875225e3ec6SFlorian Hahn Value *ConditionBit = nullptr;
876225e3ec6SFlorian Hahn VPValue *BlockInMask = getMask();
877225e3ec6SFlorian Hahn if (BlockInMask) {
878225e3ec6SFlorian Hahn ConditionBit = State.get(BlockInMask, Part);
879225e3ec6SFlorian Hahn if (ConditionBit->getType()->isVectorTy())
880225e3ec6SFlorian Hahn ConditionBit = State.Builder.CreateExtractElement(
881225e3ec6SFlorian Hahn ConditionBit, State.Builder.getInt32(Lane));
882225e3ec6SFlorian Hahn } else // Block in mask is all-one.
883225e3ec6SFlorian Hahn ConditionBit = State.Builder.getTrue();
884225e3ec6SFlorian Hahn
885225e3ec6SFlorian Hahn // Replace the temporary unreachable terminator with a new conditional branch,
886225e3ec6SFlorian Hahn // whose two destinations will be set later when they are created.
887225e3ec6SFlorian Hahn auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
888225e3ec6SFlorian Hahn assert(isa<UnreachableInst>(CurrentTerminator) &&
889225e3ec6SFlorian Hahn "Expected to replace unreachable terminator with conditional branch.");
890225e3ec6SFlorian Hahn auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
891225e3ec6SFlorian Hahn CondBr->setSuccessor(0, nullptr);
892225e3ec6SFlorian Hahn ReplaceInstWithInst(CurrentTerminator, CondBr);
893225e3ec6SFlorian Hahn }
894225e3ec6SFlorian Hahn
execute(VPTransformState & State)895cc0ee179SFlorian Hahn void VPPredInstPHIRecipe::execute(VPTransformState &State) {
896cc0ee179SFlorian Hahn assert(State.Instance && "Predicated instruction PHI works per instance.");
897cc0ee179SFlorian Hahn Instruction *ScalarPredInst =
898cc0ee179SFlorian Hahn cast<Instruction>(State.get(getOperand(0), *State.Instance));
899cc0ee179SFlorian Hahn BasicBlock *PredicatedBB = ScalarPredInst->getParent();
900cc0ee179SFlorian Hahn BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
901cc0ee179SFlorian Hahn assert(PredicatingBB && "Predicated block has no single predecessor.");
902cc0ee179SFlorian Hahn assert(isa<VPReplicateRecipe>(getOperand(0)) &&
903cc0ee179SFlorian Hahn "operand must be VPReplicateRecipe");
904cc0ee179SFlorian Hahn
905cc0ee179SFlorian Hahn // By current pack/unpack logic we need to generate only a single phi node: if
906cc0ee179SFlorian Hahn // a vector value for the predicated instruction exists at this point it means
907cc0ee179SFlorian Hahn // the instruction has vector users only, and a phi for the vector value is
908cc0ee179SFlorian Hahn // needed. In this case the recipe of the predicated instruction is marked to
909cc0ee179SFlorian Hahn // also do that packing, thereby "hoisting" the insert-element sequence.
910cc0ee179SFlorian Hahn // Otherwise, a phi node for the scalar value is needed.
911cc0ee179SFlorian Hahn unsigned Part = State.Instance->Part;
912cc0ee179SFlorian Hahn if (State.hasVectorValue(getOperand(0), Part)) {
913cc0ee179SFlorian Hahn Value *VectorValue = State.get(getOperand(0), Part);
914cc0ee179SFlorian Hahn InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
915cc0ee179SFlorian Hahn PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
916cc0ee179SFlorian Hahn VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
917cc0ee179SFlorian Hahn VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
918cc0ee179SFlorian Hahn if (State.hasVectorValue(this, Part))
919cc0ee179SFlorian Hahn State.reset(this, VPhi, Part);
920cc0ee179SFlorian Hahn else
921cc0ee179SFlorian Hahn State.set(this, VPhi, Part);
922cc0ee179SFlorian Hahn // NOTE: Currently we need to update the value of the operand, so the next
923cc0ee179SFlorian Hahn // predicated iteration inserts its generated value in the correct vector.
924cc0ee179SFlorian Hahn State.reset(getOperand(0), VPhi, Part);
925cc0ee179SFlorian Hahn } else {
926cc0ee179SFlorian Hahn Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
927cc0ee179SFlorian Hahn PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
928cc0ee179SFlorian Hahn Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
929cc0ee179SFlorian Hahn PredicatingBB);
930cc0ee179SFlorian Hahn Phi->addIncoming(ScalarPredInst, PredicatedBB);
931cc0ee179SFlorian Hahn if (State.hasScalarValue(this, *State.Instance))
932cc0ee179SFlorian Hahn State.reset(this, Phi, *State.Instance);
933cc0ee179SFlorian Hahn else
934cc0ee179SFlorian Hahn State.set(this, Phi, *State.Instance);
935cc0ee179SFlorian Hahn // NOTE: Currently we need to update the value of the operand, so the next
936cc0ee179SFlorian Hahn // predicated iteration inserts its generated value in the correct vector.
937cc0ee179SFlorian Hahn State.reset(getOperand(0), Phi, *State.Instance);
938cc0ee179SFlorian Hahn }
939cc0ee179SFlorian Hahn }
940cc0ee179SFlorian Hahn
941225e3ec6SFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const94203975b7fSFlorian Hahn void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
94303975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
94403975b7fSFlorian Hahn O << Indent << "PHI-PREDICATED-INSTRUCTION ";
94503975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
94603975b7fSFlorian Hahn O << " = ";
94703975b7fSFlorian Hahn printOperands(O, SlotTracker);
94803975b7fSFlorian Hahn }
94903975b7fSFlorian Hahn
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const95003975b7fSFlorian Hahn void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
95103975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
95203975b7fSFlorian Hahn O << Indent << "WIDEN ";
95303975b7fSFlorian Hahn
95403975b7fSFlorian Hahn if (!isStore()) {
95503975b7fSFlorian Hahn getVPSingleValue()->printAsOperand(O, SlotTracker);
95603975b7fSFlorian Hahn O << " = ";
95703975b7fSFlorian Hahn }
95803975b7fSFlorian Hahn O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
95903975b7fSFlorian Hahn
96003975b7fSFlorian Hahn printOperands(O, SlotTracker);
96103975b7fSFlorian Hahn }
96203975b7fSFlorian Hahn #endif
96303975b7fSFlorian Hahn
execute(VPTransformState & State)96403975b7fSFlorian Hahn void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
96503975b7fSFlorian Hahn Value *Start = getStartValue()->getLiveInIRValue();
96603975b7fSFlorian Hahn PHINode *EntryPart = PHINode::Create(
96703975b7fSFlorian Hahn Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
96803975b7fSFlorian Hahn
96903975b7fSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
97003975b7fSFlorian Hahn EntryPart->addIncoming(Start, VectorPH);
97103975b7fSFlorian Hahn EntryPart->setDebugLoc(DL);
97203975b7fSFlorian Hahn for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
97303975b7fSFlorian Hahn State.set(this, EntryPart, Part);
97403975b7fSFlorian Hahn }
97503975b7fSFlorian Hahn
97603975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const97703975b7fSFlorian Hahn void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
97803975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
97903975b7fSFlorian Hahn O << Indent << "EMIT ";
98003975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
98103975b7fSFlorian Hahn O << " = CANONICAL-INDUCTION";
98203975b7fSFlorian Hahn }
98303975b7fSFlorian Hahn #endif
98403975b7fSFlorian Hahn
onlyScalarsGenerated(ElementCount VF)985*5f620d00SFlorian Hahn bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
986*5f620d00SFlorian Hahn return IsScalarAfterVectorization &&
987*5f620d00SFlorian Hahn (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
98803975b7fSFlorian Hahn }
98903975b7fSFlorian Hahn
99003975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const99103975b7fSFlorian Hahn void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
99203975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
99303975b7fSFlorian Hahn O << Indent << "EMIT ";
99403975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
99503975b7fSFlorian Hahn O << " = WIDEN-POINTER-INDUCTION ";
99603975b7fSFlorian Hahn getStartValue()->printAsOperand(O, SlotTracker);
99703975b7fSFlorian Hahn O << ", " << *IndDesc.getStep();
99803975b7fSFlorian Hahn }
99903975b7fSFlorian Hahn #endif
100003975b7fSFlorian Hahn
execute(VPTransformState & State)100103975b7fSFlorian Hahn void VPExpandSCEVRecipe::execute(VPTransformState &State) {
100203975b7fSFlorian Hahn assert(!State.Instance && "cannot be used in per-lane");
100303975b7fSFlorian Hahn const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
100403975b7fSFlorian Hahn SCEVExpander Exp(SE, DL, "induction");
100503975b7fSFlorian Hahn
100603975b7fSFlorian Hahn Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
100703975b7fSFlorian Hahn &*State.Builder.GetInsertPoint());
100803975b7fSFlorian Hahn
100903975b7fSFlorian Hahn for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
101003975b7fSFlorian Hahn State.set(this, Res, Part);
101103975b7fSFlorian Hahn }
101203975b7fSFlorian Hahn
101303975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const101403975b7fSFlorian Hahn void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
101503975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
101603975b7fSFlorian Hahn O << Indent << "EMIT ";
101703975b7fSFlorian Hahn getVPSingleValue()->printAsOperand(O, SlotTracker);
101803975b7fSFlorian Hahn O << " = EXPAND SCEV " << *Expr;
101903975b7fSFlorian Hahn }
102003975b7fSFlorian Hahn #endif
102103975b7fSFlorian Hahn
execute(VPTransformState & State)102203975b7fSFlorian Hahn void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
102303975b7fSFlorian Hahn Value *CanonicalIV = State.get(getOperand(0), 0);
102403975b7fSFlorian Hahn Type *STy = CanonicalIV->getType();
102503975b7fSFlorian Hahn IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
102603975b7fSFlorian Hahn ElementCount VF = State.VF;
102703975b7fSFlorian Hahn Value *VStart = VF.isScalar()
102803975b7fSFlorian Hahn ? CanonicalIV
102903975b7fSFlorian Hahn : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
103003975b7fSFlorian Hahn for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
103103975b7fSFlorian Hahn Value *VStep = createStepForVF(Builder, STy, VF, Part);
103203975b7fSFlorian Hahn if (VF.isVector()) {
103303975b7fSFlorian Hahn VStep = Builder.CreateVectorSplat(VF, VStep);
103403975b7fSFlorian Hahn VStep =
103503975b7fSFlorian Hahn Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
103603975b7fSFlorian Hahn }
103703975b7fSFlorian Hahn Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
103803975b7fSFlorian Hahn State.set(this, CanonicalVectorIV, Part);
103903975b7fSFlorian Hahn }
104003975b7fSFlorian Hahn }
104103975b7fSFlorian Hahn
104203975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const104303975b7fSFlorian Hahn void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
104403975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
104503975b7fSFlorian Hahn O << Indent << "EMIT ";
104603975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
104703975b7fSFlorian Hahn O << " = WIDEN-CANONICAL-INDUCTION ";
104803975b7fSFlorian Hahn printOperands(O, SlotTracker);
104903975b7fSFlorian Hahn }
105003975b7fSFlorian Hahn #endif
105103975b7fSFlorian Hahn
execute(VPTransformState & State)105203975b7fSFlorian Hahn void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
105303975b7fSFlorian Hahn auto &Builder = State.Builder;
105403975b7fSFlorian Hahn // Create a vector from the initial value.
105503975b7fSFlorian Hahn auto *VectorInit = getStartValue()->getLiveInIRValue();
105603975b7fSFlorian Hahn
105703975b7fSFlorian Hahn Type *VecTy = State.VF.isScalar()
105803975b7fSFlorian Hahn ? VectorInit->getType()
105903975b7fSFlorian Hahn : VectorType::get(VectorInit->getType(), State.VF);
106003975b7fSFlorian Hahn
106103975b7fSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
106203975b7fSFlorian Hahn if (State.VF.isVector()) {
106303975b7fSFlorian Hahn auto *IdxTy = Builder.getInt32Ty();
106403975b7fSFlorian Hahn auto *One = ConstantInt::get(IdxTy, 1);
106503975b7fSFlorian Hahn IRBuilder<>::InsertPointGuard Guard(Builder);
106603975b7fSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator());
106703975b7fSFlorian Hahn auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
106803975b7fSFlorian Hahn auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
106903975b7fSFlorian Hahn VectorInit = Builder.CreateInsertElement(
107003975b7fSFlorian Hahn PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
107103975b7fSFlorian Hahn }
107203975b7fSFlorian Hahn
107303975b7fSFlorian Hahn // Create a phi node for the new recurrence.
107403975b7fSFlorian Hahn PHINode *EntryPart = PHINode::Create(
107503975b7fSFlorian Hahn VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
107603975b7fSFlorian Hahn EntryPart->addIncoming(VectorInit, VectorPH);
107703975b7fSFlorian Hahn State.set(this, EntryPart, 0);
107803975b7fSFlorian Hahn }
107903975b7fSFlorian Hahn
108003975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const108103975b7fSFlorian Hahn void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
108203975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
108303975b7fSFlorian Hahn O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
108403975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
108503975b7fSFlorian Hahn O << " = phi ";
108603975b7fSFlorian Hahn printOperands(O, SlotTracker);
108703975b7fSFlorian Hahn }
108803975b7fSFlorian Hahn #endif
108903975b7fSFlorian Hahn
execute(VPTransformState & State)109003975b7fSFlorian Hahn void VPReductionPHIRecipe::execute(VPTransformState &State) {
109103975b7fSFlorian Hahn PHINode *PN = cast<PHINode>(getUnderlyingValue());
109203975b7fSFlorian Hahn auto &Builder = State.Builder;
109303975b7fSFlorian Hahn
109403975b7fSFlorian Hahn // In order to support recurrences we need to be able to vectorize Phi nodes.
109503975b7fSFlorian Hahn // Phi nodes have cycles, so we need to vectorize them in two stages. This is
109603975b7fSFlorian Hahn // stage #1: We create a new vector PHI node with no incoming edges. We'll use
109703975b7fSFlorian Hahn // this value when we vectorize all of the instructions that use the PHI.
109803975b7fSFlorian Hahn bool ScalarPHI = State.VF.isScalar() || IsInLoop;
109903975b7fSFlorian Hahn Type *VecTy =
110003975b7fSFlorian Hahn ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
110103975b7fSFlorian Hahn
110203975b7fSFlorian Hahn BasicBlock *HeaderBB = State.CFG.PrevBB;
110303975b7fSFlorian Hahn assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
110403975b7fSFlorian Hahn "recipe must be in the vector loop header");
110503975b7fSFlorian Hahn unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
110603975b7fSFlorian Hahn for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
110703975b7fSFlorian Hahn Value *EntryPart =
110803975b7fSFlorian Hahn PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
110903975b7fSFlorian Hahn State.set(this, EntryPart, Part);
111003975b7fSFlorian Hahn }
111103975b7fSFlorian Hahn
111203975b7fSFlorian Hahn BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
111303975b7fSFlorian Hahn
111403975b7fSFlorian Hahn // Reductions do not have to start at zero. They can start with
111503975b7fSFlorian Hahn // any loop invariant values.
111603975b7fSFlorian Hahn VPValue *StartVPV = getStartValue();
111703975b7fSFlorian Hahn Value *StartV = StartVPV->getLiveInIRValue();
111803975b7fSFlorian Hahn
111903975b7fSFlorian Hahn Value *Iden = nullptr;
112003975b7fSFlorian Hahn RecurKind RK = RdxDesc.getRecurrenceKind();
112103975b7fSFlorian Hahn if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
112203975b7fSFlorian Hahn RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
112303975b7fSFlorian Hahn // MinMax reduction have the start value as their identify.
112403975b7fSFlorian Hahn if (ScalarPHI) {
112503975b7fSFlorian Hahn Iden = StartV;
112603975b7fSFlorian Hahn } else {
112703975b7fSFlorian Hahn IRBuilderBase::InsertPointGuard IPBuilder(Builder);
112803975b7fSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator());
112903975b7fSFlorian Hahn StartV = Iden =
113003975b7fSFlorian Hahn Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
113103975b7fSFlorian Hahn }
113203975b7fSFlorian Hahn } else {
113303975b7fSFlorian Hahn Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
113403975b7fSFlorian Hahn RdxDesc.getFastMathFlags());
113503975b7fSFlorian Hahn
113603975b7fSFlorian Hahn if (!ScalarPHI) {
113703975b7fSFlorian Hahn Iden = Builder.CreateVectorSplat(State.VF, Iden);
113803975b7fSFlorian Hahn IRBuilderBase::InsertPointGuard IPBuilder(Builder);
113903975b7fSFlorian Hahn Builder.SetInsertPoint(VectorPH->getTerminator());
114003975b7fSFlorian Hahn Constant *Zero = Builder.getInt32(0);
114103975b7fSFlorian Hahn StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
114203975b7fSFlorian Hahn }
114303975b7fSFlorian Hahn }
114403975b7fSFlorian Hahn
114503975b7fSFlorian Hahn for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
114603975b7fSFlorian Hahn Value *EntryPart = State.get(this, Part);
114703975b7fSFlorian Hahn // Make sure to add the reduction start value only to the
114803975b7fSFlorian Hahn // first unroll part.
114903975b7fSFlorian Hahn Value *StartVal = (Part == 0) ? StartV : Iden;
115003975b7fSFlorian Hahn cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
115103975b7fSFlorian Hahn }
115203975b7fSFlorian Hahn }
115303975b7fSFlorian Hahn
115403975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const115503975b7fSFlorian Hahn void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
115603975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
115703975b7fSFlorian Hahn O << Indent << "WIDEN-REDUCTION-PHI ";
115803975b7fSFlorian Hahn
115903975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
116003975b7fSFlorian Hahn O << " = phi ";
116103975b7fSFlorian Hahn printOperands(O, SlotTracker);
116203975b7fSFlorian Hahn }
116303975b7fSFlorian Hahn #endif
116403975b7fSFlorian Hahn
execute(VPTransformState & State)116503975b7fSFlorian Hahn void VPWidenPHIRecipe::execute(VPTransformState &State) {
116603975b7fSFlorian Hahn assert(EnableVPlanNativePath &&
116703975b7fSFlorian Hahn "Non-native vplans are not expected to have VPWidenPHIRecipes.");
116803975b7fSFlorian Hahn
116903975b7fSFlorian Hahn // Currently we enter here in the VPlan-native path for non-induction
117003975b7fSFlorian Hahn // PHIs where all control flow is uniform. We simply widen these PHIs.
117103975b7fSFlorian Hahn // Create a vector phi with no operands - the vector phi operands will be
117203975b7fSFlorian Hahn // set at the end of vector code generation.
117303975b7fSFlorian Hahn VPBasicBlock *Parent = getParent();
117403975b7fSFlorian Hahn VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
117503975b7fSFlorian Hahn unsigned StartIdx = 0;
117603975b7fSFlorian Hahn // For phis in header blocks of loop regions, use the index of the value
117703975b7fSFlorian Hahn // coming from the preheader.
117803975b7fSFlorian Hahn if (LoopRegion->getEntryBasicBlock() == Parent) {
117903975b7fSFlorian Hahn for (unsigned I = 0; I < getNumOperands(); ++I) {
118003975b7fSFlorian Hahn if (getIncomingBlock(I) ==
118103975b7fSFlorian Hahn LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
118203975b7fSFlorian Hahn StartIdx = I;
118303975b7fSFlorian Hahn }
118403975b7fSFlorian Hahn }
118503975b7fSFlorian Hahn Value *Op0 = State.get(getOperand(StartIdx), 0);
118603975b7fSFlorian Hahn Type *VecTy = Op0->getType();
118703975b7fSFlorian Hahn Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
118803975b7fSFlorian Hahn State.set(this, VecPhi, 0);
118903975b7fSFlorian Hahn }
119003975b7fSFlorian Hahn
119103975b7fSFlorian Hahn #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const119203975b7fSFlorian Hahn void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
119303975b7fSFlorian Hahn VPSlotTracker &SlotTracker) const {
119403975b7fSFlorian Hahn O << Indent << "WIDEN-PHI ";
119503975b7fSFlorian Hahn
119603975b7fSFlorian Hahn auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
119703975b7fSFlorian Hahn // Unless all incoming values are modeled in VPlan print the original PHI
119803975b7fSFlorian Hahn // directly.
119903975b7fSFlorian Hahn // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
120003975b7fSFlorian Hahn // values as VPValues.
120103975b7fSFlorian Hahn if (getNumOperands() != OriginalPhi->getNumOperands()) {
120203975b7fSFlorian Hahn O << VPlanIngredient(OriginalPhi);
120303975b7fSFlorian Hahn return;
120403975b7fSFlorian Hahn }
120503975b7fSFlorian Hahn
120603975b7fSFlorian Hahn printAsOperand(O, SlotTracker);
120703975b7fSFlorian Hahn O << " = phi ";
120803975b7fSFlorian Hahn printOperands(O, SlotTracker);
120903975b7fSFlorian Hahn }
121003975b7fSFlorian Hahn #endif
121103fee671SDavid Sherwood
121203fee671SDavid Sherwood // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
121303fee671SDavid Sherwood // remove VPActiveLaneMaskPHIRecipe.
execute(VPTransformState & State)121403fee671SDavid Sherwood void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
121503fee671SDavid Sherwood BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
121603fee671SDavid Sherwood for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
121703fee671SDavid Sherwood Value *StartMask = State.get(getOperand(0), Part);
121803fee671SDavid Sherwood PHINode *EntryPart =
121903fee671SDavid Sherwood State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
122003fee671SDavid Sherwood EntryPart->addIncoming(StartMask, VectorPH);
122103fee671SDavid Sherwood EntryPart->setDebugLoc(DL);
122203fee671SDavid Sherwood State.set(this, EntryPart, Part);
122303fee671SDavid Sherwood }
122403fee671SDavid Sherwood }
122503fee671SDavid Sherwood
122603fee671SDavid Sherwood #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const122703fee671SDavid Sherwood void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
122803fee671SDavid Sherwood VPSlotTracker &SlotTracker) const {
122903fee671SDavid Sherwood O << Indent << "ACTIVE-LANE-MASK-PHI ";
123003fee671SDavid Sherwood
123103fee671SDavid Sherwood printAsOperand(O, SlotTracker);
123203fee671SDavid Sherwood O << " = phi ";
123303fee671SDavid Sherwood printOperands(O, SlotTracker);
123403fee671SDavid Sherwood }
123503fee671SDavid Sherwood #endif
1236