1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 30 #include <cassert> 31 32 using namespace llvm; 33 34 extern cl::opt<bool> EnableVPlanNativePath; 35 36 bool VPRecipeBase::mayWriteToMemory() const { 37 switch (getVPDefID()) { 38 case VPWidenMemoryInstructionSC: { 39 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 40 } 41 case VPReplicateSC: 42 case VPWidenCallSC: 43 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 44 ->mayWriteToMemory(); 45 case VPBranchOnMaskSC: 46 return false; 47 case VPWidenIntOrFpInductionSC: 48 case VPWidenCanonicalIVSC: 49 case VPWidenPHISC: 50 case VPBlendSC: 51 case VPWidenSC: 52 case VPWidenGEPSC: 53 case VPReductionSC: 54 case VPWidenSelectSC: { 55 const Instruction *I = 56 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 57 (void)I; 58 assert((!I || !I->mayWriteToMemory()) && 59 "underlying instruction may write to memory"); 60 return false; 61 } 62 default: 63 return true; 64 } 65 } 66 67 bool VPRecipeBase::mayReadFromMemory() const { 68 switch (getVPDefID()) { 69 case VPWidenMemoryInstructionSC: { 70 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 71 } 72 case VPReplicateSC: 73 case VPWidenCallSC: 74 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 75 ->mayReadFromMemory(); 76 case VPBranchOnMaskSC: 77 return false; 78 case VPWidenIntOrFpInductionSC: 79 case VPWidenCanonicalIVSC: 80 case VPWidenPHISC: 81 case VPBlendSC: 82 case VPWidenSC: 83 case VPWidenGEPSC: 84 case VPReductionSC: 85 case VPWidenSelectSC: { 86 const Instruction *I = 87 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 88 (void)I; 89 assert((!I || !I->mayReadFromMemory()) && 90 "underlying instruction may read from memory"); 91 return false; 92 } 93 default: 94 return true; 95 } 96 } 97 98 bool VPRecipeBase::mayHaveSideEffects() const { 99 switch (getVPDefID()) { 100 case VPWidenIntOrFpInductionSC: 101 case VPWidenPointerInductionSC: 102 case VPWidenCanonicalIVSC: 103 case VPWidenPHISC: 104 case VPBlendSC: 105 case VPWidenSC: 106 case VPWidenGEPSC: 107 case VPReductionSC: 108 case VPWidenSelectSC: 109 case VPScalarIVStepsSC: { 110 const Instruction *I = 111 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 112 (void)I; 113 assert((!I || !I->mayHaveSideEffects()) && 114 "underlying instruction has side-effects"); 115 return false; 116 } 117 case VPReplicateSC: { 118 auto *R = cast<VPReplicateRecipe>(this); 119 return R->getUnderlyingInstr()->mayHaveSideEffects(); 120 } 121 default: 122 return true; 123 } 124 } 125 126 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 127 auto Lane = VPLane::getLastLaneForVF(State.VF); 128 VPValue *ExitValue = getOperand(0); 129 if (Plan.isUniformAfterVectorization(ExitValue)) 130 Lane = VPLane::getFirstLane(); 131 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 132 State.Builder.GetInsertBlock()); 133 } 134 135 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 136 assert(!Parent && "Recipe already in some VPBasicBlock"); 137 assert(InsertPos->getParent() && 138 "Insertion position not in any VPBasicBlock"); 139 Parent = InsertPos->getParent(); 140 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 141 } 142 143 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 144 iplist<VPRecipeBase>::iterator I) { 145 assert(!Parent && "Recipe already in some VPBasicBlock"); 146 assert(I == BB.end() || I->getParent() == &BB); 147 Parent = &BB; 148 BB.getRecipeList().insert(I, this); 149 } 150 151 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 152 assert(!Parent && "Recipe already in some VPBasicBlock"); 153 assert(InsertPos->getParent() && 154 "Insertion position not in any VPBasicBlock"); 155 Parent = InsertPos->getParent(); 156 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 157 } 158 159 void VPRecipeBase::removeFromParent() { 160 assert(getParent() && "Recipe not in any VPBasicBlock"); 161 getParent()->getRecipeList().remove(getIterator()); 162 Parent = nullptr; 163 } 164 165 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 166 assert(getParent() && "Recipe not in any VPBasicBlock"); 167 return getParent()->getRecipeList().erase(getIterator()); 168 } 169 170 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 171 removeFromParent(); 172 insertAfter(InsertPos); 173 } 174 175 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 176 iplist<VPRecipeBase>::iterator I) { 177 removeFromParent(); 178 insertBefore(BB, I); 179 } 180 181 void VPInstruction::generateInstruction(VPTransformState &State, 182 unsigned Part) { 183 IRBuilderBase &Builder = State.Builder; 184 Builder.SetCurrentDebugLocation(DL); 185 186 if (Instruction::isBinaryOp(getOpcode())) { 187 Value *A = State.get(getOperand(0), Part); 188 Value *B = State.get(getOperand(1), Part); 189 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); 190 State.set(this, V, Part); 191 return; 192 } 193 194 switch (getOpcode()) { 195 case VPInstruction::Not: { 196 Value *A = State.get(getOperand(0), Part); 197 Value *V = Builder.CreateNot(A); 198 State.set(this, V, Part); 199 break; 200 } 201 case VPInstruction::ICmpULE: { 202 Value *IV = State.get(getOperand(0), Part); 203 Value *TC = State.get(getOperand(1), Part); 204 Value *V = Builder.CreateICmpULE(IV, TC); 205 State.set(this, V, Part); 206 break; 207 } 208 case Instruction::Select: { 209 Value *Cond = State.get(getOperand(0), Part); 210 Value *Op1 = State.get(getOperand(1), Part); 211 Value *Op2 = State.get(getOperand(2), Part); 212 Value *V = Builder.CreateSelect(Cond, Op1, Op2); 213 State.set(this, V, Part); 214 break; 215 } 216 case VPInstruction::ActiveLaneMask: { 217 // Get first lane of vector induction variable. 218 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 219 // Get the original loop tripcount. 220 Value *ScalarTC = State.get(getOperand(1), Part); 221 222 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 223 auto *PredTy = VectorType::get(Int1Ty, State.VF); 224 Instruction *Call = Builder.CreateIntrinsic( 225 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 226 {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); 227 State.set(this, Call, Part); 228 break; 229 } 230 case VPInstruction::FirstOrderRecurrenceSplice: { 231 // Generate code to combine the previous and current values in vector v3. 232 // 233 // vector.ph: 234 // v_init = vector(..., ..., ..., a[-1]) 235 // br vector.body 236 // 237 // vector.body 238 // i = phi [0, vector.ph], [i+4, vector.body] 239 // v1 = phi [v_init, vector.ph], [v2, vector.body] 240 // v2 = a[i, i+1, i+2, i+3]; 241 // v3 = vector(v1(3), v2(0, 1, 2)) 242 243 // For the first part, use the recurrence phi (v1), otherwise v2. 244 auto *V1 = State.get(getOperand(0), 0); 245 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 246 if (!PartMinus1->getType()->isVectorTy()) { 247 State.set(this, PartMinus1, Part); 248 } else { 249 Value *V2 = State.get(getOperand(1), Part); 250 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); 251 } 252 break; 253 } 254 case VPInstruction::CanonicalIVIncrement: 255 case VPInstruction::CanonicalIVIncrementNUW: { 256 Value *Next = nullptr; 257 if (Part == 0) { 258 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 259 auto *Phi = State.get(getOperand(0), 0); 260 // The loop step is equal to the vectorization factor (num of SIMD 261 // elements) times the unroll factor (num of SIMD instructions). 262 Value *Step = 263 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 264 Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); 265 } else { 266 Next = State.get(this, 0); 267 } 268 269 State.set(this, Next, Part); 270 break; 271 } 272 case VPInstruction::BranchOnCond: { 273 if (Part != 0) 274 break; 275 276 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 277 VPRegionBlock *ParentRegion = getParent()->getParent(); 278 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 279 280 // Replace the temporary unreachable terminator with a new conditional 281 // branch, hooking it up to backward destination for exiting blocks now and 282 // to forward destination(s) later when they are created. 283 BranchInst *CondBr = 284 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 285 286 if (getParent()->isExiting()) 287 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 288 289 CondBr->setSuccessor(0, nullptr); 290 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 291 break; 292 } 293 case VPInstruction::BranchOnCount: { 294 if (Part != 0) 295 break; 296 // First create the compare. 297 Value *IV = State.get(getOperand(0), Part); 298 Value *TC = State.get(getOperand(1), Part); 299 Value *Cond = Builder.CreateICmpEQ(IV, TC); 300 301 // Now create the branch. 302 auto *Plan = getParent()->getPlan(); 303 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 304 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 305 306 // Replace the temporary unreachable terminator with a new conditional 307 // branch, hooking it up to backward destination (the header) now and to the 308 // forward destination (the exit/middle block) later when it is created. 309 // Note that CreateCondBr expects a valid BB as first argument, so we need 310 // to set it to nullptr later. 311 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 312 State.CFG.VPBB2IRBB[Header]); 313 CondBr->setSuccessor(0, nullptr); 314 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 315 break; 316 } 317 default: 318 llvm_unreachable("Unsupported opcode for instruction"); 319 } 320 } 321 322 void VPInstruction::execute(VPTransformState &State) { 323 assert(!State.Instance && "VPInstruction executing an Instance"); 324 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 325 State.Builder.setFastMathFlags(FMF); 326 for (unsigned Part = 0; Part < State.UF; ++Part) 327 generateInstruction(State, Part); 328 } 329 330 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 331 void VPInstruction::dump() const { 332 VPSlotTracker SlotTracker(getParent()->getPlan()); 333 print(dbgs(), "", SlotTracker); 334 } 335 336 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 337 VPSlotTracker &SlotTracker) const { 338 O << Indent << "EMIT "; 339 340 if (hasResult()) { 341 printAsOperand(O, SlotTracker); 342 O << " = "; 343 } 344 345 switch (getOpcode()) { 346 case VPInstruction::Not: 347 O << "not"; 348 break; 349 case VPInstruction::ICmpULE: 350 O << "icmp ule"; 351 break; 352 case VPInstruction::SLPLoad: 353 O << "combined load"; 354 break; 355 case VPInstruction::SLPStore: 356 O << "combined store"; 357 break; 358 case VPInstruction::ActiveLaneMask: 359 O << "active lane mask"; 360 break; 361 case VPInstruction::FirstOrderRecurrenceSplice: 362 O << "first-order splice"; 363 break; 364 case VPInstruction::CanonicalIVIncrement: 365 O << "VF * UF + "; 366 break; 367 case VPInstruction::CanonicalIVIncrementNUW: 368 O << "VF * UF +(nuw) "; 369 break; 370 case VPInstruction::BranchOnCond: 371 O << "branch-on-cond"; 372 break; 373 case VPInstruction::BranchOnCount: 374 O << "branch-on-count "; 375 break; 376 default: 377 O << Instruction::getOpcodeName(getOpcode()); 378 } 379 380 O << FMF; 381 382 for (const VPValue *Operand : operands()) { 383 O << " "; 384 Operand->printAsOperand(O, SlotTracker); 385 } 386 387 if (DL) { 388 O << ", !dbg "; 389 DL.print(O); 390 } 391 } 392 #endif 393 394 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 395 // Make sure the VPInstruction is a floating-point operation. 396 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 397 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 398 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 399 Opcode == Instruction::FCmp) && 400 "this op can't take fast-math flags"); 401 FMF = FMFNew; 402 } 403 404 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 405 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 406 VPSlotTracker &SlotTracker) const { 407 O << Indent << "WIDEN-CALL "; 408 409 auto *CI = cast<CallInst>(getUnderlyingInstr()); 410 if (CI->getType()->isVoidTy()) 411 O << "void "; 412 else { 413 printAsOperand(O, SlotTracker); 414 O << " = "; 415 } 416 417 O << "call @" << CI->getCalledFunction()->getName() << "("; 418 printOperands(O, SlotTracker); 419 O << ")"; 420 } 421 422 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 423 VPSlotTracker &SlotTracker) const { 424 O << Indent << "WIDEN-SELECT "; 425 printAsOperand(O, SlotTracker); 426 O << " = select "; 427 getOperand(0)->printAsOperand(O, SlotTracker); 428 O << ", "; 429 getOperand(1)->printAsOperand(O, SlotTracker); 430 O << ", "; 431 getOperand(2)->printAsOperand(O, SlotTracker); 432 O << (InvariantCond ? " (condition is loop invariant)" : ""); 433 } 434 #endif 435 436 void VPWidenSelectRecipe::execute(VPTransformState &State) { 437 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 438 State.setDebugLocFromInst(&I); 439 440 // The condition can be loop invariant but still defined inside the 441 // loop. This means that we can't just use the original 'cond' value. 442 // We have to take the 'vectorized' value and pick the first lane. 443 // Instcombine will make this a no-op. 444 auto *InvarCond = 445 InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; 446 447 for (unsigned Part = 0; Part < State.UF; ++Part) { 448 Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); 449 Value *Op0 = State.get(getOperand(1), Part); 450 Value *Op1 = State.get(getOperand(2), Part); 451 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 452 State.set(this, Sel, Part); 453 State.addMetadata(Sel, &I); 454 } 455 } 456 457 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 458 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 459 VPSlotTracker &SlotTracker) const { 460 O << Indent << "WIDEN "; 461 printAsOperand(O, SlotTracker); 462 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 463 printOperands(O, SlotTracker); 464 } 465 466 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 467 VPSlotTracker &SlotTracker) const { 468 O << Indent << "WIDEN-INDUCTION"; 469 if (getTruncInst()) { 470 O << "\\l\""; 471 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 472 O << " +\n" << Indent << "\" "; 473 getVPValue(0)->printAsOperand(O, SlotTracker); 474 } else 475 O << " " << VPlanIngredient(IV); 476 477 O << ", "; 478 getStepValue()->printAsOperand(O, SlotTracker); 479 } 480 #endif 481 482 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 483 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 484 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 485 return StartC && StartC->isZero() && StepC && StepC->isOne(); 486 } 487 488 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 489 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 490 } 491 492 bool VPScalarIVStepsRecipe::isCanonical() const { 493 auto *CanIV = getCanonicalIV(); 494 // The start value of the steps-recipe must match the start value of the 495 // canonical induction and it must step by 1. 496 if (CanIV->getStartValue() != getStartValue()) 497 return false; 498 auto *StepVPV = getStepValue(); 499 if (StepVPV->getDef()) 500 return false; 501 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 502 return StepC && StepC->isOne(); 503 } 504 505 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 506 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 507 VPSlotTracker &SlotTracker) const { 508 O << Indent; 509 printAsOperand(O, SlotTracker); 510 O << Indent << "= SCALAR-STEPS "; 511 printOperands(O, SlotTracker); 512 } 513 514 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 515 VPSlotTracker &SlotTracker) const { 516 O << Indent << "WIDEN-GEP "; 517 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 518 size_t IndicesNumber = IsIndexLoopInvariant.size(); 519 for (size_t I = 0; I < IndicesNumber; ++I) 520 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 521 522 O << " "; 523 printAsOperand(O, SlotTracker); 524 O << " = getelementptr "; 525 printOperands(O, SlotTracker); 526 } 527 528 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 529 VPSlotTracker &SlotTracker) const { 530 O << Indent << "BLEND "; 531 Phi->printAsOperand(O, false); 532 O << " ="; 533 if (getNumIncomingValues() == 1) { 534 // Not a User of any mask: not really blending, this is a 535 // single-predecessor phi. 536 O << " "; 537 getIncomingValue(0)->printAsOperand(O, SlotTracker); 538 } else { 539 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 540 O << " "; 541 getIncomingValue(I)->printAsOperand(O, SlotTracker); 542 O << "/"; 543 getMask(I)->printAsOperand(O, SlotTracker); 544 } 545 } 546 } 547 548 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 549 VPSlotTracker &SlotTracker) const { 550 O << Indent << "REDUCE "; 551 printAsOperand(O, SlotTracker); 552 O << " = "; 553 getChainOp()->printAsOperand(O, SlotTracker); 554 O << " +"; 555 if (isa<FPMathOperator>(getUnderlyingInstr())) 556 O << getUnderlyingInstr()->getFastMathFlags(); 557 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 558 getVecOp()->printAsOperand(O, SlotTracker); 559 if (getCondOp()) { 560 O << ", "; 561 getCondOp()->printAsOperand(O, SlotTracker); 562 } 563 O << ")"; 564 if (RdxDesc->IntermediateStore) 565 O << " (with final reduction value stored in invariant address sank " 566 "outside of loop)"; 567 } 568 569 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 570 VPSlotTracker &SlotTracker) const { 571 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 572 573 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 574 printAsOperand(O, SlotTracker); 575 O << " = "; 576 } 577 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 578 O << "call @" << CB->getCalledFunction()->getName() << "("; 579 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 580 O, [&O, &SlotTracker](VPValue *Op) { 581 Op->printAsOperand(O, SlotTracker); 582 }); 583 O << ")"; 584 } else { 585 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 586 printOperands(O, SlotTracker); 587 } 588 589 if (AlsoPack) 590 O << " (S->V)"; 591 } 592 593 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 594 VPSlotTracker &SlotTracker) const { 595 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 596 printAsOperand(O, SlotTracker); 597 O << " = "; 598 printOperands(O, SlotTracker); 599 } 600 601 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 602 VPSlotTracker &SlotTracker) const { 603 O << Indent << "WIDEN "; 604 605 if (!isStore()) { 606 getVPSingleValue()->printAsOperand(O, SlotTracker); 607 O << " = "; 608 } 609 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 610 611 printOperands(O, SlotTracker); 612 } 613 #endif 614 615 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 616 Value *Start = getStartValue()->getLiveInIRValue(); 617 PHINode *EntryPart = PHINode::Create( 618 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 619 620 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 621 EntryPart->addIncoming(Start, VectorPH); 622 EntryPart->setDebugLoc(DL); 623 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 624 State.set(this, EntryPart, Part); 625 } 626 627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 628 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 629 VPSlotTracker &SlotTracker) const { 630 O << Indent << "EMIT "; 631 printAsOperand(O, SlotTracker); 632 O << " = CANONICAL-INDUCTION"; 633 } 634 #endif 635 636 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 637 bool IsUniform = vputils::onlyFirstLaneUsed(this); 638 return all_of(users(), 639 [&](const VPUser *U) { return U->usesScalars(this); }) && 640 (IsUniform || !VF.isScalable()); 641 } 642 643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 644 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 645 VPSlotTracker &SlotTracker) const { 646 O << Indent << "EMIT "; 647 printAsOperand(O, SlotTracker); 648 O << " = WIDEN-POINTER-INDUCTION "; 649 getStartValue()->printAsOperand(O, SlotTracker); 650 O << ", " << *IndDesc.getStep(); 651 } 652 #endif 653 654 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 655 assert(!State.Instance && "cannot be used in per-lane"); 656 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 657 SCEVExpander Exp(SE, DL, "induction"); 658 659 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 660 &*State.Builder.GetInsertPoint()); 661 662 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 663 State.set(this, Res, Part); 664 } 665 666 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 667 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 668 VPSlotTracker &SlotTracker) const { 669 O << Indent << "EMIT "; 670 getVPSingleValue()->printAsOperand(O, SlotTracker); 671 O << " = EXPAND SCEV " << *Expr; 672 } 673 #endif 674 675 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 676 Value *CanonicalIV = State.get(getOperand(0), 0); 677 Type *STy = CanonicalIV->getType(); 678 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 679 ElementCount VF = State.VF; 680 Value *VStart = VF.isScalar() 681 ? CanonicalIV 682 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 683 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 684 Value *VStep = createStepForVF(Builder, STy, VF, Part); 685 if (VF.isVector()) { 686 VStep = Builder.CreateVectorSplat(VF, VStep); 687 VStep = 688 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 689 } 690 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 691 State.set(this, CanonicalVectorIV, Part); 692 } 693 } 694 695 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 696 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 697 VPSlotTracker &SlotTracker) const { 698 O << Indent << "EMIT "; 699 printAsOperand(O, SlotTracker); 700 O << " = WIDEN-CANONICAL-INDUCTION "; 701 printOperands(O, SlotTracker); 702 } 703 #endif 704 705 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 706 auto &Builder = State.Builder; 707 // Create a vector from the initial value. 708 auto *VectorInit = getStartValue()->getLiveInIRValue(); 709 710 Type *VecTy = State.VF.isScalar() 711 ? VectorInit->getType() 712 : VectorType::get(VectorInit->getType(), State.VF); 713 714 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 715 if (State.VF.isVector()) { 716 auto *IdxTy = Builder.getInt32Ty(); 717 auto *One = ConstantInt::get(IdxTy, 1); 718 IRBuilder<>::InsertPointGuard Guard(Builder); 719 Builder.SetInsertPoint(VectorPH->getTerminator()); 720 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 721 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 722 VectorInit = Builder.CreateInsertElement( 723 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 724 } 725 726 // Create a phi node for the new recurrence. 727 PHINode *EntryPart = PHINode::Create( 728 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 729 EntryPart->addIncoming(VectorInit, VectorPH); 730 State.set(this, EntryPart, 0); 731 } 732 733 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 734 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 735 VPSlotTracker &SlotTracker) const { 736 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 737 printAsOperand(O, SlotTracker); 738 O << " = phi "; 739 printOperands(O, SlotTracker); 740 } 741 #endif 742 743 void VPReductionPHIRecipe::execute(VPTransformState &State) { 744 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 745 auto &Builder = State.Builder; 746 747 // In order to support recurrences we need to be able to vectorize Phi nodes. 748 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 749 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 750 // this value when we vectorize all of the instructions that use the PHI. 751 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 752 Type *VecTy = 753 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 754 755 BasicBlock *HeaderBB = State.CFG.PrevBB; 756 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 757 "recipe must be in the vector loop header"); 758 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 759 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 760 Value *EntryPart = 761 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 762 State.set(this, EntryPart, Part); 763 } 764 765 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 766 767 // Reductions do not have to start at zero. They can start with 768 // any loop invariant values. 769 VPValue *StartVPV = getStartValue(); 770 Value *StartV = StartVPV->getLiveInIRValue(); 771 772 Value *Iden = nullptr; 773 RecurKind RK = RdxDesc.getRecurrenceKind(); 774 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 775 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 776 // MinMax reduction have the start value as their identify. 777 if (ScalarPHI) { 778 Iden = StartV; 779 } else { 780 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 781 Builder.SetInsertPoint(VectorPH->getTerminator()); 782 StartV = Iden = 783 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 784 } 785 } else { 786 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 787 RdxDesc.getFastMathFlags()); 788 789 if (!ScalarPHI) { 790 Iden = Builder.CreateVectorSplat(State.VF, Iden); 791 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 792 Builder.SetInsertPoint(VectorPH->getTerminator()); 793 Constant *Zero = Builder.getInt32(0); 794 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 795 } 796 } 797 798 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 799 Value *EntryPart = State.get(this, Part); 800 // Make sure to add the reduction start value only to the 801 // first unroll part. 802 Value *StartVal = (Part == 0) ? StartV : Iden; 803 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 804 } 805 } 806 807 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 808 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 809 VPSlotTracker &SlotTracker) const { 810 O << Indent << "WIDEN-REDUCTION-PHI "; 811 812 printAsOperand(O, SlotTracker); 813 O << " = phi "; 814 printOperands(O, SlotTracker); 815 } 816 #endif 817 818 void VPWidenPHIRecipe::execute(VPTransformState &State) { 819 assert(EnableVPlanNativePath && 820 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 821 822 // Currently we enter here in the VPlan-native path for non-induction 823 // PHIs where all control flow is uniform. We simply widen these PHIs. 824 // Create a vector phi with no operands - the vector phi operands will be 825 // set at the end of vector code generation. 826 VPBasicBlock *Parent = getParent(); 827 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 828 unsigned StartIdx = 0; 829 // For phis in header blocks of loop regions, use the index of the value 830 // coming from the preheader. 831 if (LoopRegion->getEntryBasicBlock() == Parent) { 832 for (unsigned I = 0; I < getNumOperands(); ++I) { 833 if (getIncomingBlock(I) == 834 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 835 StartIdx = I; 836 } 837 } 838 Value *Op0 = State.get(getOperand(StartIdx), 0); 839 Type *VecTy = Op0->getType(); 840 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 841 State.set(this, VecPhi, 0); 842 } 843 844 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 845 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 846 VPSlotTracker &SlotTracker) const { 847 O << Indent << "WIDEN-PHI "; 848 849 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 850 // Unless all incoming values are modeled in VPlan print the original PHI 851 // directly. 852 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 853 // values as VPValues. 854 if (getNumOperands() != OriginalPhi->getNumOperands()) { 855 O << VPlanIngredient(OriginalPhi); 856 return; 857 } 858 859 printAsOperand(O, SlotTracker); 860 O << " = phi "; 861 printOperands(O, SlotTracker); 862 } 863 #endif 864