1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 30 #include <cassert> 31 32 using namespace llvm; 33 34 extern cl::opt<bool> EnableVPlanNativePath; 35 36 #define LV_NAME "loop-vectorize" 37 #define DEBUG_TYPE LV_NAME 38 39 bool VPRecipeBase::mayWriteToMemory() const { 40 switch (getVPDefID()) { 41 case VPWidenMemoryInstructionSC: { 42 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 43 } 44 case VPReplicateSC: 45 case VPWidenCallSC: 46 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 47 ->mayWriteToMemory(); 48 case VPBranchOnMaskSC: 49 return false; 50 case VPWidenIntOrFpInductionSC: 51 case VPWidenCanonicalIVSC: 52 case VPWidenPHISC: 53 case VPBlendSC: 54 case VPWidenSC: 55 case VPWidenGEPSC: 56 case VPReductionSC: 57 case VPWidenSelectSC: { 58 const Instruction *I = 59 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 60 (void)I; 61 assert((!I || !I->mayWriteToMemory()) && 62 "underlying instruction may write to memory"); 63 return false; 64 } 65 default: 66 return true; 67 } 68 } 69 70 bool VPRecipeBase::mayReadFromMemory() const { 71 switch (getVPDefID()) { 72 case VPWidenMemoryInstructionSC: { 73 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 74 } 75 case VPReplicateSC: 76 case VPWidenCallSC: 77 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 78 ->mayReadFromMemory(); 79 case VPBranchOnMaskSC: 80 return false; 81 case VPWidenIntOrFpInductionSC: 82 case VPWidenCanonicalIVSC: 83 case VPWidenPHISC: 84 case VPBlendSC: 85 case VPWidenSC: 86 case VPWidenGEPSC: 87 case VPReductionSC: 88 case VPWidenSelectSC: { 89 const Instruction *I = 90 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 91 (void)I; 92 assert((!I || !I->mayReadFromMemory()) && 93 "underlying instruction may read from memory"); 94 return false; 95 } 96 default: 97 return true; 98 } 99 } 100 101 bool VPRecipeBase::mayHaveSideEffects() const { 102 switch (getVPDefID()) { 103 case VPWidenIntOrFpInductionSC: 104 case VPWidenPointerInductionSC: 105 case VPWidenCanonicalIVSC: 106 case VPWidenPHISC: 107 case VPBlendSC: 108 case VPWidenSC: 109 case VPWidenGEPSC: 110 case VPReductionSC: 111 case VPWidenSelectSC: 112 case VPScalarIVStepsSC: { 113 const Instruction *I = 114 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 115 (void)I; 116 assert((!I || !I->mayHaveSideEffects()) && 117 "underlying instruction has side-effects"); 118 return false; 119 } 120 case VPReplicateSC: { 121 auto *R = cast<VPReplicateRecipe>(this); 122 return R->getUnderlyingInstr()->mayHaveSideEffects(); 123 } 124 default: 125 return true; 126 } 127 } 128 129 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 130 auto Lane = VPLane::getLastLaneForVF(State.VF); 131 VPValue *ExitValue = getOperand(0); 132 if (Plan.isUniformAfterVectorization(ExitValue)) 133 Lane = VPLane::getFirstLane(); 134 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 135 State.Builder.GetInsertBlock()); 136 } 137 138 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 139 assert(!Parent && "Recipe already in some VPBasicBlock"); 140 assert(InsertPos->getParent() && 141 "Insertion position not in any VPBasicBlock"); 142 Parent = InsertPos->getParent(); 143 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 144 } 145 146 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 147 iplist<VPRecipeBase>::iterator I) { 148 assert(!Parent && "Recipe already in some VPBasicBlock"); 149 assert(I == BB.end() || I->getParent() == &BB); 150 Parent = &BB; 151 BB.getRecipeList().insert(I, this); 152 } 153 154 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 155 assert(!Parent && "Recipe already in some VPBasicBlock"); 156 assert(InsertPos->getParent() && 157 "Insertion position not in any VPBasicBlock"); 158 Parent = InsertPos->getParent(); 159 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 160 } 161 162 void VPRecipeBase::removeFromParent() { 163 assert(getParent() && "Recipe not in any VPBasicBlock"); 164 getParent()->getRecipeList().remove(getIterator()); 165 Parent = nullptr; 166 } 167 168 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 169 assert(getParent() && "Recipe not in any VPBasicBlock"); 170 return getParent()->getRecipeList().erase(getIterator()); 171 } 172 173 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 174 removeFromParent(); 175 insertAfter(InsertPos); 176 } 177 178 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 179 iplist<VPRecipeBase>::iterator I) { 180 removeFromParent(); 181 insertBefore(BB, I); 182 } 183 184 void VPInstruction::generateInstruction(VPTransformState &State, 185 unsigned Part) { 186 IRBuilderBase &Builder = State.Builder; 187 Builder.SetCurrentDebugLocation(DL); 188 189 if (Instruction::isBinaryOp(getOpcode())) { 190 Value *A = State.get(getOperand(0), Part); 191 Value *B = State.get(getOperand(1), Part); 192 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); 193 State.set(this, V, Part); 194 return; 195 } 196 197 switch (getOpcode()) { 198 case VPInstruction::Not: { 199 Value *A = State.get(getOperand(0), Part); 200 Value *V = Builder.CreateNot(A); 201 State.set(this, V, Part); 202 break; 203 } 204 case VPInstruction::ICmpULE: { 205 Value *IV = State.get(getOperand(0), Part); 206 Value *TC = State.get(getOperand(1), Part); 207 Value *V = Builder.CreateICmpULE(IV, TC); 208 State.set(this, V, Part); 209 break; 210 } 211 case Instruction::Select: { 212 Value *Cond = State.get(getOperand(0), Part); 213 Value *Op1 = State.get(getOperand(1), Part); 214 Value *Op2 = State.get(getOperand(2), Part); 215 Value *V = Builder.CreateSelect(Cond, Op1, Op2); 216 State.set(this, V, Part); 217 break; 218 } 219 case VPInstruction::ActiveLaneMask: { 220 // Get first lane of vector induction variable. 221 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 222 // Get the original loop tripcount. 223 Value *ScalarTC = State.get(getOperand(1), Part); 224 225 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 226 auto *PredTy = VectorType::get(Int1Ty, State.VF); 227 Instruction *Call = Builder.CreateIntrinsic( 228 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 229 {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); 230 State.set(this, Call, Part); 231 break; 232 } 233 case VPInstruction::FirstOrderRecurrenceSplice: { 234 // Generate code to combine the previous and current values in vector v3. 235 // 236 // vector.ph: 237 // v_init = vector(..., ..., ..., a[-1]) 238 // br vector.body 239 // 240 // vector.body 241 // i = phi [0, vector.ph], [i+4, vector.body] 242 // v1 = phi [v_init, vector.ph], [v2, vector.body] 243 // v2 = a[i, i+1, i+2, i+3]; 244 // v3 = vector(v1(3), v2(0, 1, 2)) 245 246 // For the first part, use the recurrence phi (v1), otherwise v2. 247 auto *V1 = State.get(getOperand(0), 0); 248 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 249 if (!PartMinus1->getType()->isVectorTy()) { 250 State.set(this, PartMinus1, Part); 251 } else { 252 Value *V2 = State.get(getOperand(1), Part); 253 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); 254 } 255 break; 256 } 257 case VPInstruction::CanonicalIVIncrement: 258 case VPInstruction::CanonicalIVIncrementNUW: { 259 Value *Next = nullptr; 260 if (Part == 0) { 261 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 262 auto *Phi = State.get(getOperand(0), 0); 263 // The loop step is equal to the vectorization factor (num of SIMD 264 // elements) times the unroll factor (num of SIMD instructions). 265 Value *Step = 266 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 267 Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); 268 } else { 269 Next = State.get(this, 0); 270 } 271 272 State.set(this, Next, Part); 273 break; 274 } 275 case VPInstruction::BranchOnCond: { 276 if (Part != 0) 277 break; 278 279 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 280 VPRegionBlock *ParentRegion = getParent()->getParent(); 281 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 282 283 // Replace the temporary unreachable terminator with a new conditional 284 // branch, hooking it up to backward destination for exiting blocks now and 285 // to forward destination(s) later when they are created. 286 BranchInst *CondBr = 287 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 288 289 if (getParent()->isExiting()) 290 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 291 292 CondBr->setSuccessor(0, nullptr); 293 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 294 break; 295 } 296 case VPInstruction::BranchOnCount: { 297 if (Part != 0) 298 break; 299 // First create the compare. 300 Value *IV = State.get(getOperand(0), Part); 301 Value *TC = State.get(getOperand(1), Part); 302 Value *Cond = Builder.CreateICmpEQ(IV, TC); 303 304 // Now create the branch. 305 auto *Plan = getParent()->getPlan(); 306 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 307 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 308 309 // Replace the temporary unreachable terminator with a new conditional 310 // branch, hooking it up to backward destination (the header) now and to the 311 // forward destination (the exit/middle block) later when it is created. 312 // Note that CreateCondBr expects a valid BB as first argument, so we need 313 // to set it to nullptr later. 314 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 315 State.CFG.VPBB2IRBB[Header]); 316 CondBr->setSuccessor(0, nullptr); 317 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 318 break; 319 } 320 default: 321 llvm_unreachable("Unsupported opcode for instruction"); 322 } 323 } 324 325 void VPInstruction::execute(VPTransformState &State) { 326 assert(!State.Instance && "VPInstruction executing an Instance"); 327 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 328 State.Builder.setFastMathFlags(FMF); 329 for (unsigned Part = 0; Part < State.UF; ++Part) 330 generateInstruction(State, Part); 331 } 332 333 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 334 void VPInstruction::dump() const { 335 VPSlotTracker SlotTracker(getParent()->getPlan()); 336 print(dbgs(), "", SlotTracker); 337 } 338 339 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 340 VPSlotTracker &SlotTracker) const { 341 O << Indent << "EMIT "; 342 343 if (hasResult()) { 344 printAsOperand(O, SlotTracker); 345 O << " = "; 346 } 347 348 switch (getOpcode()) { 349 case VPInstruction::Not: 350 O << "not"; 351 break; 352 case VPInstruction::ICmpULE: 353 O << "icmp ule"; 354 break; 355 case VPInstruction::SLPLoad: 356 O << "combined load"; 357 break; 358 case VPInstruction::SLPStore: 359 O << "combined store"; 360 break; 361 case VPInstruction::ActiveLaneMask: 362 O << "active lane mask"; 363 break; 364 case VPInstruction::FirstOrderRecurrenceSplice: 365 O << "first-order splice"; 366 break; 367 case VPInstruction::CanonicalIVIncrement: 368 O << "VF * UF + "; 369 break; 370 case VPInstruction::CanonicalIVIncrementNUW: 371 O << "VF * UF +(nuw) "; 372 break; 373 case VPInstruction::BranchOnCond: 374 O << "branch-on-cond"; 375 break; 376 case VPInstruction::BranchOnCount: 377 O << "branch-on-count "; 378 break; 379 default: 380 O << Instruction::getOpcodeName(getOpcode()); 381 } 382 383 O << FMF; 384 385 for (const VPValue *Operand : operands()) { 386 O << " "; 387 Operand->printAsOperand(O, SlotTracker); 388 } 389 390 if (DL) { 391 O << ", !dbg "; 392 DL.print(O); 393 } 394 } 395 #endif 396 397 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 398 // Make sure the VPInstruction is a floating-point operation. 399 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 400 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 401 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 402 Opcode == Instruction::FCmp) && 403 "this op can't take fast-math flags"); 404 FMF = FMFNew; 405 } 406 407 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 408 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 409 VPSlotTracker &SlotTracker) const { 410 O << Indent << "WIDEN-CALL "; 411 412 auto *CI = cast<CallInst>(getUnderlyingInstr()); 413 if (CI->getType()->isVoidTy()) 414 O << "void "; 415 else { 416 printAsOperand(O, SlotTracker); 417 O << " = "; 418 } 419 420 O << "call @" << CI->getCalledFunction()->getName() << "("; 421 printOperands(O, SlotTracker); 422 O << ")"; 423 } 424 425 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 426 VPSlotTracker &SlotTracker) const { 427 O << Indent << "WIDEN-SELECT "; 428 printAsOperand(O, SlotTracker); 429 O << " = select "; 430 getOperand(0)->printAsOperand(O, SlotTracker); 431 O << ", "; 432 getOperand(1)->printAsOperand(O, SlotTracker); 433 O << ", "; 434 getOperand(2)->printAsOperand(O, SlotTracker); 435 O << (InvariantCond ? " (condition is loop invariant)" : ""); 436 } 437 #endif 438 439 void VPWidenSelectRecipe::execute(VPTransformState &State) { 440 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 441 State.setDebugLocFromInst(&I); 442 443 // The condition can be loop invariant but still defined inside the 444 // loop. This means that we can't just use the original 'cond' value. 445 // We have to take the 'vectorized' value and pick the first lane. 446 // Instcombine will make this a no-op. 447 auto *InvarCond = 448 InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; 449 450 for (unsigned Part = 0; Part < State.UF; ++Part) { 451 Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); 452 Value *Op0 = State.get(getOperand(1), Part); 453 Value *Op1 = State.get(getOperand(2), Part); 454 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 455 State.set(this, Sel, Part); 456 State.addMetadata(Sel, &I); 457 } 458 } 459 460 void VPWidenRecipe::execute(VPTransformState &State) { 461 auto &I = *cast<Instruction>(getUnderlyingValue()); 462 auto &Builder = State.Builder; 463 switch (I.getOpcode()) { 464 case Instruction::Call: 465 case Instruction::Br: 466 case Instruction::PHI: 467 case Instruction::GetElementPtr: 468 case Instruction::Select: 469 llvm_unreachable("This instruction is handled by a different recipe."); 470 case Instruction::UDiv: 471 case Instruction::SDiv: 472 case Instruction::SRem: 473 case Instruction::URem: 474 case Instruction::Add: 475 case Instruction::FAdd: 476 case Instruction::Sub: 477 case Instruction::FSub: 478 case Instruction::FNeg: 479 case Instruction::Mul: 480 case Instruction::FMul: 481 case Instruction::FDiv: 482 case Instruction::FRem: 483 case Instruction::Shl: 484 case Instruction::LShr: 485 case Instruction::AShr: 486 case Instruction::And: 487 case Instruction::Or: 488 case Instruction::Xor: { 489 // Just widen unops and binops. 490 State.setDebugLocFromInst(&I); 491 492 for (unsigned Part = 0; Part < State.UF; ++Part) { 493 SmallVector<Value *, 2> Ops; 494 for (VPValue *VPOp : operands()) 495 Ops.push_back(State.get(VPOp, Part)); 496 497 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); 498 499 if (auto *VecOp = dyn_cast<Instruction>(V)) { 500 VecOp->copyIRFlags(&I); 501 502 // If the instruction is vectorized and was in a basic block that needed 503 // predication, we can't propagate poison-generating flags (nuw/nsw, 504 // exact, etc.). The control flow has been linearized and the 505 // instruction is no longer guarded by the predicate, which could make 506 // the flag properties to no longer hold. 507 if (State.MayGeneratePoisonRecipes.contains(this)) 508 VecOp->dropPoisonGeneratingFlags(); 509 } 510 511 // Use this vector value for all users of the original instruction. 512 State.set(this, V, Part); 513 State.addMetadata(V, &I); 514 } 515 516 break; 517 } 518 case Instruction::Freeze: { 519 State.setDebugLocFromInst(&I); 520 521 for (unsigned Part = 0; Part < State.UF; ++Part) { 522 Value *Op = State.get(getOperand(0), Part); 523 524 Value *Freeze = Builder.CreateFreeze(Op); 525 State.set(this, Freeze, Part); 526 } 527 break; 528 } 529 case Instruction::ICmp: 530 case Instruction::FCmp: { 531 // Widen compares. Generate vector compares. 532 bool FCmp = (I.getOpcode() == Instruction::FCmp); 533 auto *Cmp = cast<CmpInst>(&I); 534 State.setDebugLocFromInst(Cmp); 535 for (unsigned Part = 0; Part < State.UF; ++Part) { 536 Value *A = State.get(getOperand(0), Part); 537 Value *B = State.get(getOperand(1), Part); 538 Value *C = nullptr; 539 if (FCmp) { 540 // Propagate fast math flags. 541 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 542 Builder.setFastMathFlags(Cmp->getFastMathFlags()); 543 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 544 } else { 545 C = Builder.CreateICmp(Cmp->getPredicate(), A, B); 546 } 547 State.set(this, C, Part); 548 State.addMetadata(C, &I); 549 } 550 551 break; 552 } 553 554 case Instruction::ZExt: 555 case Instruction::SExt: 556 case Instruction::FPToUI: 557 case Instruction::FPToSI: 558 case Instruction::FPExt: 559 case Instruction::PtrToInt: 560 case Instruction::IntToPtr: 561 case Instruction::SIToFP: 562 case Instruction::UIToFP: 563 case Instruction::Trunc: 564 case Instruction::FPTrunc: 565 case Instruction::BitCast: { 566 auto *CI = cast<CastInst>(&I); 567 State.setDebugLocFromInst(CI); 568 569 /// Vectorize casts. 570 Type *DestTy = (State.VF.isScalar()) 571 ? CI->getType() 572 : VectorType::get(CI->getType(), State.VF); 573 574 for (unsigned Part = 0; Part < State.UF; ++Part) { 575 Value *A = State.get(getOperand(0), Part); 576 Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); 577 State.set(this, Cast, Part); 578 State.addMetadata(Cast, &I); 579 } 580 break; 581 } 582 default: 583 // This instruction is not vectorized by simple widening. 584 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); 585 llvm_unreachable("Unhandled instruction!"); 586 } // end of switch. 587 } 588 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 589 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 590 VPSlotTracker &SlotTracker) const { 591 O << Indent << "WIDEN "; 592 printAsOperand(O, SlotTracker); 593 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 594 printOperands(O, SlotTracker); 595 } 596 597 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 598 VPSlotTracker &SlotTracker) const { 599 O << Indent << "WIDEN-INDUCTION"; 600 if (getTruncInst()) { 601 O << "\\l\""; 602 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 603 O << " +\n" << Indent << "\" "; 604 getVPValue(0)->printAsOperand(O, SlotTracker); 605 } else 606 O << " " << VPlanIngredient(IV); 607 608 O << ", "; 609 getStepValue()->printAsOperand(O, SlotTracker); 610 } 611 #endif 612 613 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 614 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 615 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 616 return StartC && StartC->isZero() && StepC && StepC->isOne(); 617 } 618 619 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 620 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 621 } 622 623 bool VPScalarIVStepsRecipe::isCanonical() const { 624 auto *CanIV = getCanonicalIV(); 625 // The start value of the steps-recipe must match the start value of the 626 // canonical induction and it must step by 1. 627 if (CanIV->getStartValue() != getStartValue()) 628 return false; 629 auto *StepVPV = getStepValue(); 630 if (StepVPV->getDef()) 631 return false; 632 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 633 return StepC && StepC->isOne(); 634 } 635 636 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 637 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 638 VPSlotTracker &SlotTracker) const { 639 O << Indent; 640 printAsOperand(O, SlotTracker); 641 O << Indent << "= SCALAR-STEPS "; 642 printOperands(O, SlotTracker); 643 } 644 #endif 645 646 void VPWidenGEPRecipe::execute(VPTransformState &State) { 647 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 648 // Construct a vector GEP by widening the operands of the scalar GEP as 649 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 650 // results in a vector of pointers when at least one operand of the GEP 651 // is vector-typed. Thus, to keep the representation compact, we only use 652 // vector-typed operands for loop-varying values. 653 654 if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { 655 // If we are vectorizing, but the GEP has only loop-invariant operands, 656 // the GEP we build (by only using vector-typed operands for 657 // loop-varying values) would be a scalar pointer. Thus, to ensure we 658 // produce a vector of pointers, we need to either arbitrarily pick an 659 // operand to broadcast, or broadcast a clone of the original GEP. 660 // Here, we broadcast a clone of the original. 661 // 662 // TODO: If at some point we decide to scalarize instructions having 663 // loop-invariant operands, this special case will no longer be 664 // required. We would add the scalarization decision to 665 // collectLoopScalars() and teach getVectorValue() to broadcast 666 // the lane-zero scalar value. 667 auto *Clone = State.Builder.Insert(GEP->clone()); 668 for (unsigned Part = 0; Part < State.UF; ++Part) { 669 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); 670 State.set(this, EntryPart, Part); 671 State.addMetadata(EntryPart, GEP); 672 } 673 } else { 674 // If the GEP has at least one loop-varying operand, we are sure to 675 // produce a vector of pointers. But if we are only unrolling, we want 676 // to produce a scalar GEP for each unroll part. Thus, the GEP we 677 // produce with the code below will be scalar (if VF == 1) or vector 678 // (otherwise). Note that for the unroll-only case, we still maintain 679 // values in the vector mapping with initVector, as we do for other 680 // instructions. 681 for (unsigned Part = 0; Part < State.UF; ++Part) { 682 // The pointer operand of the new GEP. If it's loop-invariant, we 683 // won't broadcast it. 684 auto *Ptr = IsPtrLoopInvariant 685 ? State.get(getOperand(0), VPIteration(0, 0)) 686 : State.get(getOperand(0), Part); 687 688 // Collect all the indices for the new GEP. If any index is 689 // loop-invariant, we won't broadcast it. 690 SmallVector<Value *, 4> Indices; 691 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 692 VPValue *Operand = getOperand(I); 693 if (IsIndexLoopInvariant[I - 1]) 694 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 695 else 696 Indices.push_back(State.get(Operand, Part)); 697 } 698 699 // If the GEP instruction is vectorized and was in a basic block that 700 // needed predication, we can't propagate the poison-generating 'inbounds' 701 // flag. The control flow has been linearized and the GEP is no longer 702 // guarded by the predicate, which could make the 'inbounds' properties to 703 // no longer hold. 704 bool IsInBounds = 705 GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; 706 707 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 708 // but it should be a vector, otherwise. 709 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 710 Indices, "", IsInBounds); 711 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 712 "NewGEP is not a pointer vector"); 713 State.set(this, NewGEP, Part); 714 State.addMetadata(NewGEP, GEP); 715 } 716 } 717 } 718 719 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 720 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 721 VPSlotTracker &SlotTracker) const { 722 O << Indent << "WIDEN-GEP "; 723 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 724 size_t IndicesNumber = IsIndexLoopInvariant.size(); 725 for (size_t I = 0; I < IndicesNumber; ++I) 726 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 727 728 O << " "; 729 printAsOperand(O, SlotTracker); 730 O << " = getelementptr "; 731 printOperands(O, SlotTracker); 732 } 733 734 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 735 VPSlotTracker &SlotTracker) const { 736 O << Indent << "BLEND "; 737 Phi->printAsOperand(O, false); 738 O << " ="; 739 if (getNumIncomingValues() == 1) { 740 // Not a User of any mask: not really blending, this is a 741 // single-predecessor phi. 742 O << " "; 743 getIncomingValue(0)->printAsOperand(O, SlotTracker); 744 } else { 745 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 746 O << " "; 747 getIncomingValue(I)->printAsOperand(O, SlotTracker); 748 O << "/"; 749 getMask(I)->printAsOperand(O, SlotTracker); 750 } 751 } 752 } 753 754 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 755 VPSlotTracker &SlotTracker) const { 756 O << Indent << "REDUCE "; 757 printAsOperand(O, SlotTracker); 758 O << " = "; 759 getChainOp()->printAsOperand(O, SlotTracker); 760 O << " +"; 761 if (isa<FPMathOperator>(getUnderlyingInstr())) 762 O << getUnderlyingInstr()->getFastMathFlags(); 763 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 764 getVecOp()->printAsOperand(O, SlotTracker); 765 if (getCondOp()) { 766 O << ", "; 767 getCondOp()->printAsOperand(O, SlotTracker); 768 } 769 O << ")"; 770 if (RdxDesc->IntermediateStore) 771 O << " (with final reduction value stored in invariant address sank " 772 "outside of loop)"; 773 } 774 775 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 776 VPSlotTracker &SlotTracker) const { 777 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 778 779 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 780 printAsOperand(O, SlotTracker); 781 O << " = "; 782 } 783 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 784 O << "call @" << CB->getCalledFunction()->getName() << "("; 785 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 786 O, [&O, &SlotTracker](VPValue *Op) { 787 Op->printAsOperand(O, SlotTracker); 788 }); 789 O << ")"; 790 } else { 791 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 792 printOperands(O, SlotTracker); 793 } 794 795 if (AlsoPack) 796 O << " (S->V)"; 797 } 798 799 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 800 VPSlotTracker &SlotTracker) const { 801 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 802 printAsOperand(O, SlotTracker); 803 O << " = "; 804 printOperands(O, SlotTracker); 805 } 806 807 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 808 VPSlotTracker &SlotTracker) const { 809 O << Indent << "WIDEN "; 810 811 if (!isStore()) { 812 getVPSingleValue()->printAsOperand(O, SlotTracker); 813 O << " = "; 814 } 815 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 816 817 printOperands(O, SlotTracker); 818 } 819 #endif 820 821 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 822 Value *Start = getStartValue()->getLiveInIRValue(); 823 PHINode *EntryPart = PHINode::Create( 824 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 825 826 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 827 EntryPart->addIncoming(Start, VectorPH); 828 EntryPart->setDebugLoc(DL); 829 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 830 State.set(this, EntryPart, Part); 831 } 832 833 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 834 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 835 VPSlotTracker &SlotTracker) const { 836 O << Indent << "EMIT "; 837 printAsOperand(O, SlotTracker); 838 O << " = CANONICAL-INDUCTION"; 839 } 840 #endif 841 842 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 843 bool IsUniform = vputils::onlyFirstLaneUsed(this); 844 return all_of(users(), 845 [&](const VPUser *U) { return U->usesScalars(this); }) && 846 (IsUniform || !VF.isScalable()); 847 } 848 849 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 850 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 851 VPSlotTracker &SlotTracker) const { 852 O << Indent << "EMIT "; 853 printAsOperand(O, SlotTracker); 854 O << " = WIDEN-POINTER-INDUCTION "; 855 getStartValue()->printAsOperand(O, SlotTracker); 856 O << ", " << *IndDesc.getStep(); 857 } 858 #endif 859 860 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 861 assert(!State.Instance && "cannot be used in per-lane"); 862 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 863 SCEVExpander Exp(SE, DL, "induction"); 864 865 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 866 &*State.Builder.GetInsertPoint()); 867 868 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 869 State.set(this, Res, Part); 870 } 871 872 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 873 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 874 VPSlotTracker &SlotTracker) const { 875 O << Indent << "EMIT "; 876 getVPSingleValue()->printAsOperand(O, SlotTracker); 877 O << " = EXPAND SCEV " << *Expr; 878 } 879 #endif 880 881 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 882 Value *CanonicalIV = State.get(getOperand(0), 0); 883 Type *STy = CanonicalIV->getType(); 884 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 885 ElementCount VF = State.VF; 886 Value *VStart = VF.isScalar() 887 ? CanonicalIV 888 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 889 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 890 Value *VStep = createStepForVF(Builder, STy, VF, Part); 891 if (VF.isVector()) { 892 VStep = Builder.CreateVectorSplat(VF, VStep); 893 VStep = 894 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 895 } 896 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 897 State.set(this, CanonicalVectorIV, Part); 898 } 899 } 900 901 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 902 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 903 VPSlotTracker &SlotTracker) const { 904 O << Indent << "EMIT "; 905 printAsOperand(O, SlotTracker); 906 O << " = WIDEN-CANONICAL-INDUCTION "; 907 printOperands(O, SlotTracker); 908 } 909 #endif 910 911 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 912 auto &Builder = State.Builder; 913 // Create a vector from the initial value. 914 auto *VectorInit = getStartValue()->getLiveInIRValue(); 915 916 Type *VecTy = State.VF.isScalar() 917 ? VectorInit->getType() 918 : VectorType::get(VectorInit->getType(), State.VF); 919 920 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 921 if (State.VF.isVector()) { 922 auto *IdxTy = Builder.getInt32Ty(); 923 auto *One = ConstantInt::get(IdxTy, 1); 924 IRBuilder<>::InsertPointGuard Guard(Builder); 925 Builder.SetInsertPoint(VectorPH->getTerminator()); 926 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 927 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 928 VectorInit = Builder.CreateInsertElement( 929 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 930 } 931 932 // Create a phi node for the new recurrence. 933 PHINode *EntryPart = PHINode::Create( 934 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 935 EntryPart->addIncoming(VectorInit, VectorPH); 936 State.set(this, EntryPart, 0); 937 } 938 939 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 940 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 941 VPSlotTracker &SlotTracker) const { 942 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 943 printAsOperand(O, SlotTracker); 944 O << " = phi "; 945 printOperands(O, SlotTracker); 946 } 947 #endif 948 949 void VPReductionPHIRecipe::execute(VPTransformState &State) { 950 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 951 auto &Builder = State.Builder; 952 953 // In order to support recurrences we need to be able to vectorize Phi nodes. 954 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 955 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 956 // this value when we vectorize all of the instructions that use the PHI. 957 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 958 Type *VecTy = 959 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 960 961 BasicBlock *HeaderBB = State.CFG.PrevBB; 962 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 963 "recipe must be in the vector loop header"); 964 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 965 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 966 Value *EntryPart = 967 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 968 State.set(this, EntryPart, Part); 969 } 970 971 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 972 973 // Reductions do not have to start at zero. They can start with 974 // any loop invariant values. 975 VPValue *StartVPV = getStartValue(); 976 Value *StartV = StartVPV->getLiveInIRValue(); 977 978 Value *Iden = nullptr; 979 RecurKind RK = RdxDesc.getRecurrenceKind(); 980 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 981 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 982 // MinMax reduction have the start value as their identify. 983 if (ScalarPHI) { 984 Iden = StartV; 985 } else { 986 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 987 Builder.SetInsertPoint(VectorPH->getTerminator()); 988 StartV = Iden = 989 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 990 } 991 } else { 992 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 993 RdxDesc.getFastMathFlags()); 994 995 if (!ScalarPHI) { 996 Iden = Builder.CreateVectorSplat(State.VF, Iden); 997 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 998 Builder.SetInsertPoint(VectorPH->getTerminator()); 999 Constant *Zero = Builder.getInt32(0); 1000 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 1001 } 1002 } 1003 1004 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1005 Value *EntryPart = State.get(this, Part); 1006 // Make sure to add the reduction start value only to the 1007 // first unroll part. 1008 Value *StartVal = (Part == 0) ? StartV : Iden; 1009 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 1010 } 1011 } 1012 1013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1014 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1015 VPSlotTracker &SlotTracker) const { 1016 O << Indent << "WIDEN-REDUCTION-PHI "; 1017 1018 printAsOperand(O, SlotTracker); 1019 O << " = phi "; 1020 printOperands(O, SlotTracker); 1021 } 1022 #endif 1023 1024 void VPWidenPHIRecipe::execute(VPTransformState &State) { 1025 assert(EnableVPlanNativePath && 1026 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 1027 1028 // Currently we enter here in the VPlan-native path for non-induction 1029 // PHIs where all control flow is uniform. We simply widen these PHIs. 1030 // Create a vector phi with no operands - the vector phi operands will be 1031 // set at the end of vector code generation. 1032 VPBasicBlock *Parent = getParent(); 1033 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 1034 unsigned StartIdx = 0; 1035 // For phis in header blocks of loop regions, use the index of the value 1036 // coming from the preheader. 1037 if (LoopRegion->getEntryBasicBlock() == Parent) { 1038 for (unsigned I = 0; I < getNumOperands(); ++I) { 1039 if (getIncomingBlock(I) == 1040 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 1041 StartIdx = I; 1042 } 1043 } 1044 Value *Op0 = State.get(getOperand(StartIdx), 0); 1045 Type *VecTy = Op0->getType(); 1046 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 1047 State.set(this, VecPhi, 0); 1048 } 1049 1050 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1051 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1052 VPSlotTracker &SlotTracker) const { 1053 O << Indent << "WIDEN-PHI "; 1054 1055 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 1056 // Unless all incoming values are modeled in VPlan print the original PHI 1057 // directly. 1058 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 1059 // values as VPValues. 1060 if (getNumOperands() != OriginalPhi->getNumOperands()) { 1061 O << VPlanIngredient(OriginalPhi); 1062 return; 1063 } 1064 1065 printAsOperand(O, SlotTracker); 1066 O << " = phi "; 1067 printOperands(O, SlotTracker); 1068 } 1069 #endif 1070