1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 30 #include <cassert> 31 32 using namespace llvm; 33 34 extern cl::opt<bool> EnableVPlanNativePath; 35 36 bool VPRecipeBase::mayWriteToMemory() const { 37 switch (getVPDefID()) { 38 case VPWidenMemoryInstructionSC: { 39 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 40 } 41 case VPReplicateSC: 42 case VPWidenCallSC: 43 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 44 ->mayWriteToMemory(); 45 case VPBranchOnMaskSC: 46 return false; 47 case VPWidenIntOrFpInductionSC: 48 case VPWidenCanonicalIVSC: 49 case VPWidenPHISC: 50 case VPBlendSC: 51 case VPWidenSC: 52 case VPWidenGEPSC: 53 case VPReductionSC: 54 case VPWidenSelectSC: { 55 const Instruction *I = 56 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 57 (void)I; 58 assert((!I || !I->mayWriteToMemory()) && 59 "underlying instruction may write to memory"); 60 return false; 61 } 62 default: 63 return true; 64 } 65 } 66 67 bool VPRecipeBase::mayReadFromMemory() const { 68 switch (getVPDefID()) { 69 case VPWidenMemoryInstructionSC: { 70 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 71 } 72 case VPReplicateSC: 73 case VPWidenCallSC: 74 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 75 ->mayReadFromMemory(); 76 case VPBranchOnMaskSC: 77 return false; 78 case VPWidenIntOrFpInductionSC: 79 case VPWidenCanonicalIVSC: 80 case VPWidenPHISC: 81 case VPBlendSC: 82 case VPWidenSC: 83 case VPWidenGEPSC: 84 case VPReductionSC: 85 case VPWidenSelectSC: { 86 const Instruction *I = 87 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 88 (void)I; 89 assert((!I || !I->mayReadFromMemory()) && 90 "underlying instruction may read from memory"); 91 return false; 92 } 93 default: 94 return true; 95 } 96 } 97 98 bool VPRecipeBase::mayHaveSideEffects() const { 99 switch (getVPDefID()) { 100 case VPWidenIntOrFpInductionSC: 101 case VPWidenPointerInductionSC: 102 case VPWidenCanonicalIVSC: 103 case VPWidenPHISC: 104 case VPBlendSC: 105 case VPWidenSC: 106 case VPWidenGEPSC: 107 case VPReductionSC: 108 case VPWidenSelectSC: 109 case VPScalarIVStepsSC: { 110 const Instruction *I = 111 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 112 (void)I; 113 assert((!I || !I->mayHaveSideEffects()) && 114 "underlying instruction has side-effects"); 115 return false; 116 } 117 case VPReplicateSC: { 118 auto *R = cast<VPReplicateRecipe>(this); 119 return R->getUnderlyingInstr()->mayHaveSideEffects(); 120 } 121 default: 122 return true; 123 } 124 } 125 126 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 127 auto Lane = VPLane::getLastLaneForVF(State.VF); 128 VPValue *ExitValue = getOperand(0); 129 if (Plan.isUniformAfterVectorization(ExitValue)) 130 Lane = VPLane::getFirstLane(); 131 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 132 State.Builder.GetInsertBlock()); 133 } 134 135 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 136 assert(!Parent && "Recipe already in some VPBasicBlock"); 137 assert(InsertPos->getParent() && 138 "Insertion position not in any VPBasicBlock"); 139 Parent = InsertPos->getParent(); 140 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 141 } 142 143 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 144 iplist<VPRecipeBase>::iterator I) { 145 assert(!Parent && "Recipe already in some VPBasicBlock"); 146 assert(I == BB.end() || I->getParent() == &BB); 147 Parent = &BB; 148 BB.getRecipeList().insert(I, this); 149 } 150 151 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 152 assert(!Parent && "Recipe already in some VPBasicBlock"); 153 assert(InsertPos->getParent() && 154 "Insertion position not in any VPBasicBlock"); 155 Parent = InsertPos->getParent(); 156 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 157 } 158 159 void VPRecipeBase::removeFromParent() { 160 assert(getParent() && "Recipe not in any VPBasicBlock"); 161 getParent()->getRecipeList().remove(getIterator()); 162 Parent = nullptr; 163 } 164 165 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 166 assert(getParent() && "Recipe not in any VPBasicBlock"); 167 return getParent()->getRecipeList().erase(getIterator()); 168 } 169 170 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 171 removeFromParent(); 172 insertAfter(InsertPos); 173 } 174 175 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 176 iplist<VPRecipeBase>::iterator I) { 177 removeFromParent(); 178 insertBefore(BB, I); 179 } 180 181 void VPInstruction::generateInstruction(VPTransformState &State, 182 unsigned Part) { 183 IRBuilderBase &Builder = State.Builder; 184 Builder.SetCurrentDebugLocation(DL); 185 186 if (Instruction::isBinaryOp(getOpcode())) { 187 Value *A = State.get(getOperand(0), Part); 188 Value *B = State.get(getOperand(1), Part); 189 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); 190 State.set(this, V, Part); 191 return; 192 } 193 194 switch (getOpcode()) { 195 case VPInstruction::Not: { 196 Value *A = State.get(getOperand(0), Part); 197 Value *V = Builder.CreateNot(A); 198 State.set(this, V, Part); 199 break; 200 } 201 case VPInstruction::ICmpULE: { 202 Value *IV = State.get(getOperand(0), Part); 203 Value *TC = State.get(getOperand(1), Part); 204 Value *V = Builder.CreateICmpULE(IV, TC); 205 State.set(this, V, Part); 206 break; 207 } 208 case Instruction::Select: { 209 Value *Cond = State.get(getOperand(0), Part); 210 Value *Op1 = State.get(getOperand(1), Part); 211 Value *Op2 = State.get(getOperand(2), Part); 212 Value *V = Builder.CreateSelect(Cond, Op1, Op2); 213 State.set(this, V, Part); 214 break; 215 } 216 case VPInstruction::ActiveLaneMask: { 217 // Get first lane of vector induction variable. 218 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 219 // Get the original loop tripcount. 220 Value *ScalarTC = State.get(getOperand(1), Part); 221 222 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 223 auto *PredTy = VectorType::get(Int1Ty, State.VF); 224 Instruction *Call = Builder.CreateIntrinsic( 225 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 226 {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); 227 State.set(this, Call, Part); 228 break; 229 } 230 case VPInstruction::FirstOrderRecurrenceSplice: { 231 // Generate code to combine the previous and current values in vector v3. 232 // 233 // vector.ph: 234 // v_init = vector(..., ..., ..., a[-1]) 235 // br vector.body 236 // 237 // vector.body 238 // i = phi [0, vector.ph], [i+4, vector.body] 239 // v1 = phi [v_init, vector.ph], [v2, vector.body] 240 // v2 = a[i, i+1, i+2, i+3]; 241 // v3 = vector(v1(3), v2(0, 1, 2)) 242 243 // For the first part, use the recurrence phi (v1), otherwise v2. 244 auto *V1 = State.get(getOperand(0), 0); 245 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 246 if (!PartMinus1->getType()->isVectorTy()) { 247 State.set(this, PartMinus1, Part); 248 } else { 249 Value *V2 = State.get(getOperand(1), Part); 250 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); 251 } 252 break; 253 } 254 case VPInstruction::CanonicalIVIncrement: 255 case VPInstruction::CanonicalIVIncrementNUW: { 256 Value *Next = nullptr; 257 if (Part == 0) { 258 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 259 auto *Phi = State.get(getOperand(0), 0); 260 // The loop step is equal to the vectorization factor (num of SIMD 261 // elements) times the unroll factor (num of SIMD instructions). 262 Value *Step = 263 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 264 Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); 265 } else { 266 Next = State.get(this, 0); 267 } 268 269 State.set(this, Next, Part); 270 break; 271 } 272 case VPInstruction::BranchOnCond: { 273 if (Part != 0) 274 break; 275 276 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 277 VPRegionBlock *ParentRegion = getParent()->getParent(); 278 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 279 280 // Replace the temporary unreachable terminator with a new conditional 281 // branch, hooking it up to backward destination for exiting blocks now and 282 // to forward destination(s) later when they are created. 283 BranchInst *CondBr = 284 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 285 286 if (getParent()->isExiting()) 287 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 288 289 CondBr->setSuccessor(0, nullptr); 290 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 291 break; 292 } 293 case VPInstruction::BranchOnCount: { 294 if (Part != 0) 295 break; 296 // First create the compare. 297 Value *IV = State.get(getOperand(0), Part); 298 Value *TC = State.get(getOperand(1), Part); 299 Value *Cond = Builder.CreateICmpEQ(IV, TC); 300 301 // Now create the branch. 302 auto *Plan = getParent()->getPlan(); 303 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 304 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 305 306 // Replace the temporary unreachable terminator with a new conditional 307 // branch, hooking it up to backward destination (the header) now and to the 308 // forward destination (the exit/middle block) later when it is created. 309 // Note that CreateCondBr expects a valid BB as first argument, so we need 310 // to set it to nullptr later. 311 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 312 State.CFG.VPBB2IRBB[Header]); 313 CondBr->setSuccessor(0, nullptr); 314 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 315 break; 316 } 317 default: 318 llvm_unreachable("Unsupported opcode for instruction"); 319 } 320 } 321 322 void VPInstruction::execute(VPTransformState &State) { 323 assert(!State.Instance && "VPInstruction executing an Instance"); 324 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 325 State.Builder.setFastMathFlags(FMF); 326 for (unsigned Part = 0; Part < State.UF; ++Part) 327 generateInstruction(State, Part); 328 } 329 330 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 331 void VPInstruction::dump() const { 332 VPSlotTracker SlotTracker(getParent()->getPlan()); 333 print(dbgs(), "", SlotTracker); 334 } 335 336 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 337 VPSlotTracker &SlotTracker) const { 338 O << Indent << "EMIT "; 339 340 if (hasResult()) { 341 printAsOperand(O, SlotTracker); 342 O << " = "; 343 } 344 345 switch (getOpcode()) { 346 case VPInstruction::Not: 347 O << "not"; 348 break; 349 case VPInstruction::ICmpULE: 350 O << "icmp ule"; 351 break; 352 case VPInstruction::SLPLoad: 353 O << "combined load"; 354 break; 355 case VPInstruction::SLPStore: 356 O << "combined store"; 357 break; 358 case VPInstruction::ActiveLaneMask: 359 O << "active lane mask"; 360 break; 361 case VPInstruction::FirstOrderRecurrenceSplice: 362 O << "first-order splice"; 363 break; 364 case VPInstruction::CanonicalIVIncrement: 365 O << "VF * UF + "; 366 break; 367 case VPInstruction::CanonicalIVIncrementNUW: 368 O << "VF * UF +(nuw) "; 369 break; 370 case VPInstruction::BranchOnCond: 371 O << "branch-on-cond"; 372 break; 373 case VPInstruction::BranchOnCount: 374 O << "branch-on-count "; 375 break; 376 default: 377 O << Instruction::getOpcodeName(getOpcode()); 378 } 379 380 O << FMF; 381 382 for (const VPValue *Operand : operands()) { 383 O << " "; 384 Operand->printAsOperand(O, SlotTracker); 385 } 386 387 if (DL) { 388 O << ", !dbg "; 389 DL.print(O); 390 } 391 } 392 #endif 393 394 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 395 // Make sure the VPInstruction is a floating-point operation. 396 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 397 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 398 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 399 Opcode == Instruction::FCmp) && 400 "this op can't take fast-math flags"); 401 FMF = FMFNew; 402 } 403 404 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 405 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 406 VPSlotTracker &SlotTracker) const { 407 O << Indent << "WIDEN-CALL "; 408 409 auto *CI = cast<CallInst>(getUnderlyingInstr()); 410 if (CI->getType()->isVoidTy()) 411 O << "void "; 412 else { 413 printAsOperand(O, SlotTracker); 414 O << " = "; 415 } 416 417 O << "call @" << CI->getCalledFunction()->getName() << "("; 418 printOperands(O, SlotTracker); 419 O << ")"; 420 } 421 422 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 423 VPSlotTracker &SlotTracker) const { 424 O << Indent << "WIDEN-SELECT "; 425 printAsOperand(O, SlotTracker); 426 O << " = select "; 427 getOperand(0)->printAsOperand(O, SlotTracker); 428 O << ", "; 429 getOperand(1)->printAsOperand(O, SlotTracker); 430 O << ", "; 431 getOperand(2)->printAsOperand(O, SlotTracker); 432 O << (InvariantCond ? " (condition is loop invariant)" : ""); 433 } 434 435 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 436 VPSlotTracker &SlotTracker) const { 437 O << Indent << "WIDEN "; 438 printAsOperand(O, SlotTracker); 439 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 440 printOperands(O, SlotTracker); 441 } 442 443 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 444 VPSlotTracker &SlotTracker) const { 445 O << Indent << "WIDEN-INDUCTION"; 446 if (getTruncInst()) { 447 O << "\\l\""; 448 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 449 O << " +\n" << Indent << "\" "; 450 getVPValue(0)->printAsOperand(O, SlotTracker); 451 } else 452 O << " " << VPlanIngredient(IV); 453 454 O << ", "; 455 getStepValue()->printAsOperand(O, SlotTracker); 456 } 457 #endif 458 459 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 460 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 461 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 462 return StartC && StartC->isZero() && StepC && StepC->isOne(); 463 } 464 465 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 466 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 467 } 468 469 bool VPScalarIVStepsRecipe::isCanonical() const { 470 auto *CanIV = getCanonicalIV(); 471 // The start value of the steps-recipe must match the start value of the 472 // canonical induction and it must step by 1. 473 if (CanIV->getStartValue() != getStartValue()) 474 return false; 475 auto *StepVPV = getStepValue(); 476 if (StepVPV->getDef()) 477 return false; 478 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 479 return StepC && StepC->isOne(); 480 } 481 482 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 483 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 484 VPSlotTracker &SlotTracker) const { 485 O << Indent; 486 printAsOperand(O, SlotTracker); 487 O << Indent << "= SCALAR-STEPS "; 488 printOperands(O, SlotTracker); 489 } 490 491 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 492 VPSlotTracker &SlotTracker) const { 493 O << Indent << "WIDEN-GEP "; 494 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 495 size_t IndicesNumber = IsIndexLoopInvariant.size(); 496 for (size_t I = 0; I < IndicesNumber; ++I) 497 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 498 499 O << " "; 500 printAsOperand(O, SlotTracker); 501 O << " = getelementptr "; 502 printOperands(O, SlotTracker); 503 } 504 505 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 506 VPSlotTracker &SlotTracker) const { 507 O << Indent << "BLEND "; 508 Phi->printAsOperand(O, false); 509 O << " ="; 510 if (getNumIncomingValues() == 1) { 511 // Not a User of any mask: not really blending, this is a 512 // single-predecessor phi. 513 O << " "; 514 getIncomingValue(0)->printAsOperand(O, SlotTracker); 515 } else { 516 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 517 O << " "; 518 getIncomingValue(I)->printAsOperand(O, SlotTracker); 519 O << "/"; 520 getMask(I)->printAsOperand(O, SlotTracker); 521 } 522 } 523 } 524 525 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 526 VPSlotTracker &SlotTracker) const { 527 O << Indent << "REDUCE "; 528 printAsOperand(O, SlotTracker); 529 O << " = "; 530 getChainOp()->printAsOperand(O, SlotTracker); 531 O << " +"; 532 if (isa<FPMathOperator>(getUnderlyingInstr())) 533 O << getUnderlyingInstr()->getFastMathFlags(); 534 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 535 getVecOp()->printAsOperand(O, SlotTracker); 536 if (getCondOp()) { 537 O << ", "; 538 getCondOp()->printAsOperand(O, SlotTracker); 539 } 540 O << ")"; 541 if (RdxDesc->IntermediateStore) 542 O << " (with final reduction value stored in invariant address sank " 543 "outside of loop)"; 544 } 545 546 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 547 VPSlotTracker &SlotTracker) const { 548 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 549 550 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 551 printAsOperand(O, SlotTracker); 552 O << " = "; 553 } 554 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 555 O << "call @" << CB->getCalledFunction()->getName() << "("; 556 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 557 O, [&O, &SlotTracker](VPValue *Op) { 558 Op->printAsOperand(O, SlotTracker); 559 }); 560 O << ")"; 561 } else { 562 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 563 printOperands(O, SlotTracker); 564 } 565 566 if (AlsoPack) 567 O << " (S->V)"; 568 } 569 570 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 571 VPSlotTracker &SlotTracker) const { 572 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 573 printAsOperand(O, SlotTracker); 574 O << " = "; 575 printOperands(O, SlotTracker); 576 } 577 578 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 579 VPSlotTracker &SlotTracker) const { 580 O << Indent << "WIDEN "; 581 582 if (!isStore()) { 583 getVPSingleValue()->printAsOperand(O, SlotTracker); 584 O << " = "; 585 } 586 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 587 588 printOperands(O, SlotTracker); 589 } 590 #endif 591 592 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 593 Value *Start = getStartValue()->getLiveInIRValue(); 594 PHINode *EntryPart = PHINode::Create( 595 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 596 597 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 598 EntryPart->addIncoming(Start, VectorPH); 599 EntryPart->setDebugLoc(DL); 600 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 601 State.set(this, EntryPart, Part); 602 } 603 604 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 605 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 606 VPSlotTracker &SlotTracker) const { 607 O << Indent << "EMIT "; 608 printAsOperand(O, SlotTracker); 609 O << " = CANONICAL-INDUCTION"; 610 } 611 #endif 612 613 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 614 bool IsUniform = vputils::onlyFirstLaneUsed(this); 615 return all_of(users(), 616 [&](const VPUser *U) { return U->usesScalars(this); }) && 617 (IsUniform || !VF.isScalable()); 618 } 619 620 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 621 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 622 VPSlotTracker &SlotTracker) const { 623 O << Indent << "EMIT "; 624 printAsOperand(O, SlotTracker); 625 O << " = WIDEN-POINTER-INDUCTION "; 626 getStartValue()->printAsOperand(O, SlotTracker); 627 O << ", " << *IndDesc.getStep(); 628 } 629 #endif 630 631 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 632 assert(!State.Instance && "cannot be used in per-lane"); 633 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 634 SCEVExpander Exp(SE, DL, "induction"); 635 636 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 637 &*State.Builder.GetInsertPoint()); 638 639 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 640 State.set(this, Res, Part); 641 } 642 643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 644 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 645 VPSlotTracker &SlotTracker) const { 646 O << Indent << "EMIT "; 647 getVPSingleValue()->printAsOperand(O, SlotTracker); 648 O << " = EXPAND SCEV " << *Expr; 649 } 650 #endif 651 652 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 653 Value *CanonicalIV = State.get(getOperand(0), 0); 654 Type *STy = CanonicalIV->getType(); 655 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 656 ElementCount VF = State.VF; 657 Value *VStart = VF.isScalar() 658 ? CanonicalIV 659 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 660 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 661 Value *VStep = createStepForVF(Builder, STy, VF, Part); 662 if (VF.isVector()) { 663 VStep = Builder.CreateVectorSplat(VF, VStep); 664 VStep = 665 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 666 } 667 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 668 State.set(this, CanonicalVectorIV, Part); 669 } 670 } 671 672 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 673 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 674 VPSlotTracker &SlotTracker) const { 675 O << Indent << "EMIT "; 676 printAsOperand(O, SlotTracker); 677 O << " = WIDEN-CANONICAL-INDUCTION "; 678 printOperands(O, SlotTracker); 679 } 680 #endif 681 682 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 683 auto &Builder = State.Builder; 684 // Create a vector from the initial value. 685 auto *VectorInit = getStartValue()->getLiveInIRValue(); 686 687 Type *VecTy = State.VF.isScalar() 688 ? VectorInit->getType() 689 : VectorType::get(VectorInit->getType(), State.VF); 690 691 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 692 if (State.VF.isVector()) { 693 auto *IdxTy = Builder.getInt32Ty(); 694 auto *One = ConstantInt::get(IdxTy, 1); 695 IRBuilder<>::InsertPointGuard Guard(Builder); 696 Builder.SetInsertPoint(VectorPH->getTerminator()); 697 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 698 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 699 VectorInit = Builder.CreateInsertElement( 700 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 701 } 702 703 // Create a phi node for the new recurrence. 704 PHINode *EntryPart = PHINode::Create( 705 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 706 EntryPart->addIncoming(VectorInit, VectorPH); 707 State.set(this, EntryPart, 0); 708 } 709 710 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 711 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 712 VPSlotTracker &SlotTracker) const { 713 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 714 printAsOperand(O, SlotTracker); 715 O << " = phi "; 716 printOperands(O, SlotTracker); 717 } 718 #endif 719 720 void VPReductionPHIRecipe::execute(VPTransformState &State) { 721 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 722 auto &Builder = State.Builder; 723 724 // In order to support recurrences we need to be able to vectorize Phi nodes. 725 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 726 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 727 // this value when we vectorize all of the instructions that use the PHI. 728 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 729 Type *VecTy = 730 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 731 732 BasicBlock *HeaderBB = State.CFG.PrevBB; 733 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 734 "recipe must be in the vector loop header"); 735 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 736 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 737 Value *EntryPart = 738 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 739 State.set(this, EntryPart, Part); 740 } 741 742 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 743 744 // Reductions do not have to start at zero. They can start with 745 // any loop invariant values. 746 VPValue *StartVPV = getStartValue(); 747 Value *StartV = StartVPV->getLiveInIRValue(); 748 749 Value *Iden = nullptr; 750 RecurKind RK = RdxDesc.getRecurrenceKind(); 751 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 752 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 753 // MinMax reduction have the start value as their identify. 754 if (ScalarPHI) { 755 Iden = StartV; 756 } else { 757 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 758 Builder.SetInsertPoint(VectorPH->getTerminator()); 759 StartV = Iden = 760 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 761 } 762 } else { 763 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 764 RdxDesc.getFastMathFlags()); 765 766 if (!ScalarPHI) { 767 Iden = Builder.CreateVectorSplat(State.VF, Iden); 768 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 769 Builder.SetInsertPoint(VectorPH->getTerminator()); 770 Constant *Zero = Builder.getInt32(0); 771 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 772 } 773 } 774 775 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 776 Value *EntryPart = State.get(this, Part); 777 // Make sure to add the reduction start value only to the 778 // first unroll part. 779 Value *StartVal = (Part == 0) ? StartV : Iden; 780 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 781 } 782 } 783 784 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 785 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 786 VPSlotTracker &SlotTracker) const { 787 O << Indent << "WIDEN-REDUCTION-PHI "; 788 789 printAsOperand(O, SlotTracker); 790 O << " = phi "; 791 printOperands(O, SlotTracker); 792 } 793 #endif 794 795 void VPWidenPHIRecipe::execute(VPTransformState &State) { 796 assert(EnableVPlanNativePath && 797 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 798 799 // Currently we enter here in the VPlan-native path for non-induction 800 // PHIs where all control flow is uniform. We simply widen these PHIs. 801 // Create a vector phi with no operands - the vector phi operands will be 802 // set at the end of vector code generation. 803 VPBasicBlock *Parent = getParent(); 804 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 805 unsigned StartIdx = 0; 806 // For phis in header blocks of loop regions, use the index of the value 807 // coming from the preheader. 808 if (LoopRegion->getEntryBasicBlock() == Parent) { 809 for (unsigned I = 0; I < getNumOperands(); ++I) { 810 if (getIncomingBlock(I) == 811 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 812 StartIdx = I; 813 } 814 } 815 Value *Op0 = State.get(getOperand(StartIdx), 0); 816 Type *VecTy = Op0->getType(); 817 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 818 State.set(this, VecPhi, 0); 819 } 820 821 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 822 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 823 VPSlotTracker &SlotTracker) const { 824 O << Indent << "WIDEN-PHI "; 825 826 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 827 // Unless all incoming values are modeled in VPlan print the original PHI 828 // directly. 829 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 830 // values as VPValues. 831 if (getNumOperands() != OriginalPhi->getNumOperands()) { 832 O << VPlanIngredient(OriginalPhi); 833 return; 834 } 835 836 printAsOperand(O, SlotTracker); 837 O << " = phi "; 838 printOperands(O, SlotTracker); 839 } 840 #endif 841