1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 30 #include <cassert> 31 32 using namespace llvm; 33 34 extern cl::opt<bool> EnableVPlanNativePath; 35 36 #define LV_NAME "loop-vectorize" 37 #define DEBUG_TYPE LV_NAME 38 39 bool VPRecipeBase::mayWriteToMemory() const { 40 switch (getVPDefID()) { 41 case VPWidenMemoryInstructionSC: { 42 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 43 } 44 case VPReplicateSC: 45 case VPWidenCallSC: 46 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 47 ->mayWriteToMemory(); 48 case VPBranchOnMaskSC: 49 return false; 50 case VPWidenIntOrFpInductionSC: 51 case VPWidenCanonicalIVSC: 52 case VPWidenPHISC: 53 case VPBlendSC: 54 case VPWidenSC: 55 case VPWidenGEPSC: 56 case VPReductionSC: 57 case VPWidenSelectSC: { 58 const Instruction *I = 59 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 60 (void)I; 61 assert((!I || !I->mayWriteToMemory()) && 62 "underlying instruction may write to memory"); 63 return false; 64 } 65 default: 66 return true; 67 } 68 } 69 70 bool VPRecipeBase::mayReadFromMemory() const { 71 switch (getVPDefID()) { 72 case VPWidenMemoryInstructionSC: { 73 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 74 } 75 case VPReplicateSC: 76 case VPWidenCallSC: 77 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 78 ->mayReadFromMemory(); 79 case VPBranchOnMaskSC: 80 return false; 81 case VPWidenIntOrFpInductionSC: 82 case VPWidenCanonicalIVSC: 83 case VPWidenPHISC: 84 case VPBlendSC: 85 case VPWidenSC: 86 case VPWidenGEPSC: 87 case VPReductionSC: 88 case VPWidenSelectSC: { 89 const Instruction *I = 90 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 91 (void)I; 92 assert((!I || !I->mayReadFromMemory()) && 93 "underlying instruction may read from memory"); 94 return false; 95 } 96 default: 97 return true; 98 } 99 } 100 101 bool VPRecipeBase::mayHaveSideEffects() const { 102 switch (getVPDefID()) { 103 case VPWidenIntOrFpInductionSC: 104 case VPWidenPointerInductionSC: 105 case VPWidenCanonicalIVSC: 106 case VPWidenPHISC: 107 case VPBlendSC: 108 case VPWidenSC: 109 case VPWidenGEPSC: 110 case VPReductionSC: 111 case VPWidenSelectSC: 112 case VPScalarIVStepsSC: { 113 const Instruction *I = 114 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 115 (void)I; 116 assert((!I || !I->mayHaveSideEffects()) && 117 "underlying instruction has side-effects"); 118 return false; 119 } 120 case VPReplicateSC: { 121 auto *R = cast<VPReplicateRecipe>(this); 122 return R->getUnderlyingInstr()->mayHaveSideEffects(); 123 } 124 default: 125 return true; 126 } 127 } 128 129 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 130 auto Lane = VPLane::getLastLaneForVF(State.VF); 131 VPValue *ExitValue = getOperand(0); 132 if (Plan.isUniformAfterVectorization(ExitValue)) 133 Lane = VPLane::getFirstLane(); 134 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 135 State.Builder.GetInsertBlock()); 136 } 137 138 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 139 assert(!Parent && "Recipe already in some VPBasicBlock"); 140 assert(InsertPos->getParent() && 141 "Insertion position not in any VPBasicBlock"); 142 Parent = InsertPos->getParent(); 143 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 144 } 145 146 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 147 iplist<VPRecipeBase>::iterator I) { 148 assert(!Parent && "Recipe already in some VPBasicBlock"); 149 assert(I == BB.end() || I->getParent() == &BB); 150 Parent = &BB; 151 BB.getRecipeList().insert(I, this); 152 } 153 154 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 155 assert(!Parent && "Recipe already in some VPBasicBlock"); 156 assert(InsertPos->getParent() && 157 "Insertion position not in any VPBasicBlock"); 158 Parent = InsertPos->getParent(); 159 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 160 } 161 162 void VPRecipeBase::removeFromParent() { 163 assert(getParent() && "Recipe not in any VPBasicBlock"); 164 getParent()->getRecipeList().remove(getIterator()); 165 Parent = nullptr; 166 } 167 168 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 169 assert(getParent() && "Recipe not in any VPBasicBlock"); 170 return getParent()->getRecipeList().erase(getIterator()); 171 } 172 173 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 174 removeFromParent(); 175 insertAfter(InsertPos); 176 } 177 178 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 179 iplist<VPRecipeBase>::iterator I) { 180 removeFromParent(); 181 insertBefore(BB, I); 182 } 183 184 void VPInstruction::generateInstruction(VPTransformState &State, 185 unsigned Part) { 186 IRBuilderBase &Builder = State.Builder; 187 Builder.SetCurrentDebugLocation(DL); 188 189 if (Instruction::isBinaryOp(getOpcode())) { 190 Value *A = State.get(getOperand(0), Part); 191 Value *B = State.get(getOperand(1), Part); 192 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); 193 State.set(this, V, Part); 194 return; 195 } 196 197 switch (getOpcode()) { 198 case VPInstruction::Not: { 199 Value *A = State.get(getOperand(0), Part); 200 Value *V = Builder.CreateNot(A); 201 State.set(this, V, Part); 202 break; 203 } 204 case VPInstruction::ICmpULE: { 205 Value *IV = State.get(getOperand(0), Part); 206 Value *TC = State.get(getOperand(1), Part); 207 Value *V = Builder.CreateICmpULE(IV, TC); 208 State.set(this, V, Part); 209 break; 210 } 211 case Instruction::Select: { 212 Value *Cond = State.get(getOperand(0), Part); 213 Value *Op1 = State.get(getOperand(1), Part); 214 Value *Op2 = State.get(getOperand(2), Part); 215 Value *V = Builder.CreateSelect(Cond, Op1, Op2); 216 State.set(this, V, Part); 217 break; 218 } 219 case VPInstruction::ActiveLaneMask: { 220 // Get first lane of vector induction variable. 221 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 222 // Get the original loop tripcount. 223 Value *ScalarTC = State.get(getOperand(1), Part); 224 225 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 226 auto *PredTy = VectorType::get(Int1Ty, State.VF); 227 Instruction *Call = Builder.CreateIntrinsic( 228 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 229 {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); 230 State.set(this, Call, Part); 231 break; 232 } 233 case VPInstruction::FirstOrderRecurrenceSplice: { 234 // Generate code to combine the previous and current values in vector v3. 235 // 236 // vector.ph: 237 // v_init = vector(..., ..., ..., a[-1]) 238 // br vector.body 239 // 240 // vector.body 241 // i = phi [0, vector.ph], [i+4, vector.body] 242 // v1 = phi [v_init, vector.ph], [v2, vector.body] 243 // v2 = a[i, i+1, i+2, i+3]; 244 // v3 = vector(v1(3), v2(0, 1, 2)) 245 246 // For the first part, use the recurrence phi (v1), otherwise v2. 247 auto *V1 = State.get(getOperand(0), 0); 248 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 249 if (!PartMinus1->getType()->isVectorTy()) { 250 State.set(this, PartMinus1, Part); 251 } else { 252 Value *V2 = State.get(getOperand(1), Part); 253 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); 254 } 255 break; 256 } 257 case VPInstruction::CanonicalIVIncrement: 258 case VPInstruction::CanonicalIVIncrementNUW: { 259 Value *Next = nullptr; 260 if (Part == 0) { 261 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 262 auto *Phi = State.get(getOperand(0), 0); 263 // The loop step is equal to the vectorization factor (num of SIMD 264 // elements) times the unroll factor (num of SIMD instructions). 265 Value *Step = 266 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 267 Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); 268 } else { 269 Next = State.get(this, 0); 270 } 271 272 State.set(this, Next, Part); 273 break; 274 } 275 case VPInstruction::BranchOnCond: { 276 if (Part != 0) 277 break; 278 279 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 280 VPRegionBlock *ParentRegion = getParent()->getParent(); 281 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 282 283 // Replace the temporary unreachable terminator with a new conditional 284 // branch, hooking it up to backward destination for exiting blocks now and 285 // to forward destination(s) later when they are created. 286 BranchInst *CondBr = 287 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 288 289 if (getParent()->isExiting()) 290 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 291 292 CondBr->setSuccessor(0, nullptr); 293 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 294 break; 295 } 296 case VPInstruction::BranchOnCount: { 297 if (Part != 0) 298 break; 299 // First create the compare. 300 Value *IV = State.get(getOperand(0), Part); 301 Value *TC = State.get(getOperand(1), Part); 302 Value *Cond = Builder.CreateICmpEQ(IV, TC); 303 304 // Now create the branch. 305 auto *Plan = getParent()->getPlan(); 306 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 307 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 308 309 // Replace the temporary unreachable terminator with a new conditional 310 // branch, hooking it up to backward destination (the header) now and to the 311 // forward destination (the exit/middle block) later when it is created. 312 // Note that CreateCondBr expects a valid BB as first argument, so we need 313 // to set it to nullptr later. 314 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 315 State.CFG.VPBB2IRBB[Header]); 316 CondBr->setSuccessor(0, nullptr); 317 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 318 break; 319 } 320 default: 321 llvm_unreachable("Unsupported opcode for instruction"); 322 } 323 } 324 325 void VPInstruction::execute(VPTransformState &State) { 326 assert(!State.Instance && "VPInstruction executing an Instance"); 327 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 328 State.Builder.setFastMathFlags(FMF); 329 for (unsigned Part = 0; Part < State.UF; ++Part) 330 generateInstruction(State, Part); 331 } 332 333 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 334 void VPInstruction::dump() const { 335 VPSlotTracker SlotTracker(getParent()->getPlan()); 336 print(dbgs(), "", SlotTracker); 337 } 338 339 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 340 VPSlotTracker &SlotTracker) const { 341 O << Indent << "EMIT "; 342 343 if (hasResult()) { 344 printAsOperand(O, SlotTracker); 345 O << " = "; 346 } 347 348 switch (getOpcode()) { 349 case VPInstruction::Not: 350 O << "not"; 351 break; 352 case VPInstruction::ICmpULE: 353 O << "icmp ule"; 354 break; 355 case VPInstruction::SLPLoad: 356 O << "combined load"; 357 break; 358 case VPInstruction::SLPStore: 359 O << "combined store"; 360 break; 361 case VPInstruction::ActiveLaneMask: 362 O << "active lane mask"; 363 break; 364 case VPInstruction::FirstOrderRecurrenceSplice: 365 O << "first-order splice"; 366 break; 367 case VPInstruction::CanonicalIVIncrement: 368 O << "VF * UF + "; 369 break; 370 case VPInstruction::CanonicalIVIncrementNUW: 371 O << "VF * UF +(nuw) "; 372 break; 373 case VPInstruction::BranchOnCond: 374 O << "branch-on-cond"; 375 break; 376 case VPInstruction::BranchOnCount: 377 O << "branch-on-count "; 378 break; 379 default: 380 O << Instruction::getOpcodeName(getOpcode()); 381 } 382 383 O << FMF; 384 385 for (const VPValue *Operand : operands()) { 386 O << " "; 387 Operand->printAsOperand(O, SlotTracker); 388 } 389 390 if (DL) { 391 O << ", !dbg "; 392 DL.print(O); 393 } 394 } 395 #endif 396 397 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 398 // Make sure the VPInstruction is a floating-point operation. 399 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 400 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 401 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 402 Opcode == Instruction::FCmp) && 403 "this op can't take fast-math flags"); 404 FMF = FMFNew; 405 } 406 407 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 408 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 409 VPSlotTracker &SlotTracker) const { 410 O << Indent << "WIDEN-CALL "; 411 412 auto *CI = cast<CallInst>(getUnderlyingInstr()); 413 if (CI->getType()->isVoidTy()) 414 O << "void "; 415 else { 416 printAsOperand(O, SlotTracker); 417 O << " = "; 418 } 419 420 O << "call @" << CI->getCalledFunction()->getName() << "("; 421 printOperands(O, SlotTracker); 422 O << ")"; 423 } 424 425 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 426 VPSlotTracker &SlotTracker) const { 427 O << Indent << "WIDEN-SELECT "; 428 printAsOperand(O, SlotTracker); 429 O << " = select "; 430 getOperand(0)->printAsOperand(O, SlotTracker); 431 O << ", "; 432 getOperand(1)->printAsOperand(O, SlotTracker); 433 O << ", "; 434 getOperand(2)->printAsOperand(O, SlotTracker); 435 O << (InvariantCond ? " (condition is loop invariant)" : ""); 436 } 437 #endif 438 439 void VPWidenSelectRecipe::execute(VPTransformState &State) { 440 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 441 State.setDebugLocFromInst(&I); 442 443 // The condition can be loop invariant but still defined inside the 444 // loop. This means that we can't just use the original 'cond' value. 445 // We have to take the 'vectorized' value and pick the first lane. 446 // Instcombine will make this a no-op. 447 auto *InvarCond = 448 InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; 449 450 for (unsigned Part = 0; Part < State.UF; ++Part) { 451 Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); 452 Value *Op0 = State.get(getOperand(1), Part); 453 Value *Op1 = State.get(getOperand(2), Part); 454 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 455 State.set(this, Sel, Part); 456 State.addMetadata(Sel, &I); 457 } 458 } 459 460 void VPWidenRecipe::execute(VPTransformState &State) { 461 auto &I = *cast<Instruction>(getUnderlyingValue()); 462 auto &Builder = State.Builder; 463 switch (I.getOpcode()) { 464 case Instruction::Call: 465 case Instruction::Br: 466 case Instruction::PHI: 467 case Instruction::GetElementPtr: 468 case Instruction::Select: 469 llvm_unreachable("This instruction is handled by a different recipe."); 470 case Instruction::UDiv: 471 case Instruction::SDiv: 472 case Instruction::SRem: 473 case Instruction::URem: 474 case Instruction::Add: 475 case Instruction::FAdd: 476 case Instruction::Sub: 477 case Instruction::FSub: 478 case Instruction::FNeg: 479 case Instruction::Mul: 480 case Instruction::FMul: 481 case Instruction::FDiv: 482 case Instruction::FRem: 483 case Instruction::Shl: 484 case Instruction::LShr: 485 case Instruction::AShr: 486 case Instruction::And: 487 case Instruction::Or: 488 case Instruction::Xor: { 489 // Just widen unops and binops. 490 State.setDebugLocFromInst(&I); 491 492 for (unsigned Part = 0; Part < State.UF; ++Part) { 493 SmallVector<Value *, 2> Ops; 494 for (VPValue *VPOp : operands()) 495 Ops.push_back(State.get(VPOp, Part)); 496 497 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); 498 499 if (auto *VecOp = dyn_cast<Instruction>(V)) { 500 VecOp->copyIRFlags(&I); 501 502 // If the instruction is vectorized and was in a basic block that needed 503 // predication, we can't propagate poison-generating flags (nuw/nsw, 504 // exact, etc.). The control flow has been linearized and the 505 // instruction is no longer guarded by the predicate, which could make 506 // the flag properties to no longer hold. 507 if (State.MayGeneratePoisonRecipes.contains(this)) 508 VecOp->dropPoisonGeneratingFlags(); 509 } 510 511 // Use this vector value for all users of the original instruction. 512 State.set(this, V, Part); 513 State.addMetadata(V, &I); 514 } 515 516 break; 517 } 518 case Instruction::Freeze: { 519 State.setDebugLocFromInst(&I); 520 521 for (unsigned Part = 0; Part < State.UF; ++Part) { 522 Value *Op = State.get(getOperand(0), Part); 523 524 Value *Freeze = Builder.CreateFreeze(Op); 525 State.set(this, Freeze, Part); 526 } 527 break; 528 } 529 case Instruction::ICmp: 530 case Instruction::FCmp: { 531 // Widen compares. Generate vector compares. 532 bool FCmp = (I.getOpcode() == Instruction::FCmp); 533 auto *Cmp = cast<CmpInst>(&I); 534 State.setDebugLocFromInst(Cmp); 535 for (unsigned Part = 0; Part < State.UF; ++Part) { 536 Value *A = State.get(getOperand(0), Part); 537 Value *B = State.get(getOperand(1), Part); 538 Value *C = nullptr; 539 if (FCmp) { 540 // Propagate fast math flags. 541 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 542 Builder.setFastMathFlags(Cmp->getFastMathFlags()); 543 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 544 } else { 545 C = Builder.CreateICmp(Cmp->getPredicate(), A, B); 546 } 547 State.set(this, C, Part); 548 State.addMetadata(C, &I); 549 } 550 551 break; 552 } 553 554 case Instruction::ZExt: 555 case Instruction::SExt: 556 case Instruction::FPToUI: 557 case Instruction::FPToSI: 558 case Instruction::FPExt: 559 case Instruction::PtrToInt: 560 case Instruction::IntToPtr: 561 case Instruction::SIToFP: 562 case Instruction::UIToFP: 563 case Instruction::Trunc: 564 case Instruction::FPTrunc: 565 case Instruction::BitCast: { 566 auto *CI = cast<CastInst>(&I); 567 State.setDebugLocFromInst(CI); 568 569 /// Vectorize casts. 570 Type *DestTy = (State.VF.isScalar()) 571 ? CI->getType() 572 : VectorType::get(CI->getType(), State.VF); 573 574 for (unsigned Part = 0; Part < State.UF; ++Part) { 575 Value *A = State.get(getOperand(0), Part); 576 Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); 577 State.set(this, Cast, Part); 578 State.addMetadata(Cast, &I); 579 } 580 break; 581 } 582 default: 583 // This instruction is not vectorized by simple widening. 584 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); 585 llvm_unreachable("Unhandled instruction!"); 586 } // end of switch. 587 } 588 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 589 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 590 VPSlotTracker &SlotTracker) const { 591 O << Indent << "WIDEN "; 592 printAsOperand(O, SlotTracker); 593 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 594 printOperands(O, SlotTracker); 595 } 596 597 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 598 VPSlotTracker &SlotTracker) const { 599 O << Indent << "WIDEN-INDUCTION"; 600 if (getTruncInst()) { 601 O << "\\l\""; 602 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 603 O << " +\n" << Indent << "\" "; 604 getVPValue(0)->printAsOperand(O, SlotTracker); 605 } else 606 O << " " << VPlanIngredient(IV); 607 608 O << ", "; 609 getStepValue()->printAsOperand(O, SlotTracker); 610 } 611 #endif 612 613 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 614 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 615 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 616 return StartC && StartC->isZero() && StepC && StepC->isOne(); 617 } 618 619 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 620 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 621 } 622 623 bool VPScalarIVStepsRecipe::isCanonical() const { 624 auto *CanIV = getCanonicalIV(); 625 // The start value of the steps-recipe must match the start value of the 626 // canonical induction and it must step by 1. 627 if (CanIV->getStartValue() != getStartValue()) 628 return false; 629 auto *StepVPV = getStepValue(); 630 if (StepVPV->getDef()) 631 return false; 632 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 633 return StepC && StepC->isOne(); 634 } 635 636 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 637 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 638 VPSlotTracker &SlotTracker) const { 639 O << Indent; 640 printAsOperand(O, SlotTracker); 641 O << Indent << "= SCALAR-STEPS "; 642 printOperands(O, SlotTracker); 643 } 644 645 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 646 VPSlotTracker &SlotTracker) const { 647 O << Indent << "WIDEN-GEP "; 648 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 649 size_t IndicesNumber = IsIndexLoopInvariant.size(); 650 for (size_t I = 0; I < IndicesNumber; ++I) 651 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 652 653 O << " "; 654 printAsOperand(O, SlotTracker); 655 O << " = getelementptr "; 656 printOperands(O, SlotTracker); 657 } 658 659 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 660 VPSlotTracker &SlotTracker) const { 661 O << Indent << "BLEND "; 662 Phi->printAsOperand(O, false); 663 O << " ="; 664 if (getNumIncomingValues() == 1) { 665 // Not a User of any mask: not really blending, this is a 666 // single-predecessor phi. 667 O << " "; 668 getIncomingValue(0)->printAsOperand(O, SlotTracker); 669 } else { 670 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 671 O << " "; 672 getIncomingValue(I)->printAsOperand(O, SlotTracker); 673 O << "/"; 674 getMask(I)->printAsOperand(O, SlotTracker); 675 } 676 } 677 } 678 679 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 680 VPSlotTracker &SlotTracker) const { 681 O << Indent << "REDUCE "; 682 printAsOperand(O, SlotTracker); 683 O << " = "; 684 getChainOp()->printAsOperand(O, SlotTracker); 685 O << " +"; 686 if (isa<FPMathOperator>(getUnderlyingInstr())) 687 O << getUnderlyingInstr()->getFastMathFlags(); 688 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 689 getVecOp()->printAsOperand(O, SlotTracker); 690 if (getCondOp()) { 691 O << ", "; 692 getCondOp()->printAsOperand(O, SlotTracker); 693 } 694 O << ")"; 695 if (RdxDesc->IntermediateStore) 696 O << " (with final reduction value stored in invariant address sank " 697 "outside of loop)"; 698 } 699 700 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 701 VPSlotTracker &SlotTracker) const { 702 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 703 704 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 705 printAsOperand(O, SlotTracker); 706 O << " = "; 707 } 708 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 709 O << "call @" << CB->getCalledFunction()->getName() << "("; 710 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 711 O, [&O, &SlotTracker](VPValue *Op) { 712 Op->printAsOperand(O, SlotTracker); 713 }); 714 O << ")"; 715 } else { 716 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 717 printOperands(O, SlotTracker); 718 } 719 720 if (AlsoPack) 721 O << " (S->V)"; 722 } 723 724 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 725 VPSlotTracker &SlotTracker) const { 726 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 727 printAsOperand(O, SlotTracker); 728 O << " = "; 729 printOperands(O, SlotTracker); 730 } 731 732 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 733 VPSlotTracker &SlotTracker) const { 734 O << Indent << "WIDEN "; 735 736 if (!isStore()) { 737 getVPSingleValue()->printAsOperand(O, SlotTracker); 738 O << " = "; 739 } 740 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 741 742 printOperands(O, SlotTracker); 743 } 744 #endif 745 746 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 747 Value *Start = getStartValue()->getLiveInIRValue(); 748 PHINode *EntryPart = PHINode::Create( 749 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 750 751 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 752 EntryPart->addIncoming(Start, VectorPH); 753 EntryPart->setDebugLoc(DL); 754 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 755 State.set(this, EntryPart, Part); 756 } 757 758 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 759 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 760 VPSlotTracker &SlotTracker) const { 761 O << Indent << "EMIT "; 762 printAsOperand(O, SlotTracker); 763 O << " = CANONICAL-INDUCTION"; 764 } 765 #endif 766 767 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 768 bool IsUniform = vputils::onlyFirstLaneUsed(this); 769 return all_of(users(), 770 [&](const VPUser *U) { return U->usesScalars(this); }) && 771 (IsUniform || !VF.isScalable()); 772 } 773 774 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 775 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 776 VPSlotTracker &SlotTracker) const { 777 O << Indent << "EMIT "; 778 printAsOperand(O, SlotTracker); 779 O << " = WIDEN-POINTER-INDUCTION "; 780 getStartValue()->printAsOperand(O, SlotTracker); 781 O << ", " << *IndDesc.getStep(); 782 } 783 #endif 784 785 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 786 assert(!State.Instance && "cannot be used in per-lane"); 787 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 788 SCEVExpander Exp(SE, DL, "induction"); 789 790 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 791 &*State.Builder.GetInsertPoint()); 792 793 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 794 State.set(this, Res, Part); 795 } 796 797 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 798 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 799 VPSlotTracker &SlotTracker) const { 800 O << Indent << "EMIT "; 801 getVPSingleValue()->printAsOperand(O, SlotTracker); 802 O << " = EXPAND SCEV " << *Expr; 803 } 804 #endif 805 806 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 807 Value *CanonicalIV = State.get(getOperand(0), 0); 808 Type *STy = CanonicalIV->getType(); 809 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 810 ElementCount VF = State.VF; 811 Value *VStart = VF.isScalar() 812 ? CanonicalIV 813 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 814 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 815 Value *VStep = createStepForVF(Builder, STy, VF, Part); 816 if (VF.isVector()) { 817 VStep = Builder.CreateVectorSplat(VF, VStep); 818 VStep = 819 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 820 } 821 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 822 State.set(this, CanonicalVectorIV, Part); 823 } 824 } 825 826 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 827 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 828 VPSlotTracker &SlotTracker) const { 829 O << Indent << "EMIT "; 830 printAsOperand(O, SlotTracker); 831 O << " = WIDEN-CANONICAL-INDUCTION "; 832 printOperands(O, SlotTracker); 833 } 834 #endif 835 836 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 837 auto &Builder = State.Builder; 838 // Create a vector from the initial value. 839 auto *VectorInit = getStartValue()->getLiveInIRValue(); 840 841 Type *VecTy = State.VF.isScalar() 842 ? VectorInit->getType() 843 : VectorType::get(VectorInit->getType(), State.VF); 844 845 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 846 if (State.VF.isVector()) { 847 auto *IdxTy = Builder.getInt32Ty(); 848 auto *One = ConstantInt::get(IdxTy, 1); 849 IRBuilder<>::InsertPointGuard Guard(Builder); 850 Builder.SetInsertPoint(VectorPH->getTerminator()); 851 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 852 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 853 VectorInit = Builder.CreateInsertElement( 854 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 855 } 856 857 // Create a phi node for the new recurrence. 858 PHINode *EntryPart = PHINode::Create( 859 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 860 EntryPart->addIncoming(VectorInit, VectorPH); 861 State.set(this, EntryPart, 0); 862 } 863 864 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 865 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 866 VPSlotTracker &SlotTracker) const { 867 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 868 printAsOperand(O, SlotTracker); 869 O << " = phi "; 870 printOperands(O, SlotTracker); 871 } 872 #endif 873 874 void VPReductionPHIRecipe::execute(VPTransformState &State) { 875 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 876 auto &Builder = State.Builder; 877 878 // In order to support recurrences we need to be able to vectorize Phi nodes. 879 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 880 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 881 // this value when we vectorize all of the instructions that use the PHI. 882 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 883 Type *VecTy = 884 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 885 886 BasicBlock *HeaderBB = State.CFG.PrevBB; 887 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 888 "recipe must be in the vector loop header"); 889 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 890 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 891 Value *EntryPart = 892 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 893 State.set(this, EntryPart, Part); 894 } 895 896 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 897 898 // Reductions do not have to start at zero. They can start with 899 // any loop invariant values. 900 VPValue *StartVPV = getStartValue(); 901 Value *StartV = StartVPV->getLiveInIRValue(); 902 903 Value *Iden = nullptr; 904 RecurKind RK = RdxDesc.getRecurrenceKind(); 905 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 906 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 907 // MinMax reduction have the start value as their identify. 908 if (ScalarPHI) { 909 Iden = StartV; 910 } else { 911 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 912 Builder.SetInsertPoint(VectorPH->getTerminator()); 913 StartV = Iden = 914 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 915 } 916 } else { 917 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 918 RdxDesc.getFastMathFlags()); 919 920 if (!ScalarPHI) { 921 Iden = Builder.CreateVectorSplat(State.VF, Iden); 922 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 923 Builder.SetInsertPoint(VectorPH->getTerminator()); 924 Constant *Zero = Builder.getInt32(0); 925 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 926 } 927 } 928 929 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 930 Value *EntryPart = State.get(this, Part); 931 // Make sure to add the reduction start value only to the 932 // first unroll part. 933 Value *StartVal = (Part == 0) ? StartV : Iden; 934 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 935 } 936 } 937 938 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 939 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 940 VPSlotTracker &SlotTracker) const { 941 O << Indent << "WIDEN-REDUCTION-PHI "; 942 943 printAsOperand(O, SlotTracker); 944 O << " = phi "; 945 printOperands(O, SlotTracker); 946 } 947 #endif 948 949 void VPWidenPHIRecipe::execute(VPTransformState &State) { 950 assert(EnableVPlanNativePath && 951 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 952 953 // Currently we enter here in the VPlan-native path for non-induction 954 // PHIs where all control flow is uniform. We simply widen these PHIs. 955 // Create a vector phi with no operands - the vector phi operands will be 956 // set at the end of vector code generation. 957 VPBasicBlock *Parent = getParent(); 958 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 959 unsigned StartIdx = 0; 960 // For phis in header blocks of loop regions, use the index of the value 961 // coming from the preheader. 962 if (LoopRegion->getEntryBasicBlock() == Parent) { 963 for (unsigned I = 0; I < getNumOperands(); ++I) { 964 if (getIncomingBlock(I) == 965 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 966 StartIdx = I; 967 } 968 } 969 Value *Op0 = State.get(getOperand(StartIdx), 0); 970 Type *VecTy = Op0->getType(); 971 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 972 State.set(this, VecPhi, 0); 973 } 974 975 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 976 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 977 VPSlotTracker &SlotTracker) const { 978 O << Indent << "WIDEN-PHI "; 979 980 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 981 // Unless all incoming values are modeled in VPlan print the original PHI 982 // directly. 983 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 984 // values as VPValues. 985 if (getNumOperands() != OriginalPhi->getNumOperands()) { 986 O << VPlanIngredient(OriginalPhi); 987 return; 988 } 989 990 printAsOperand(O, SlotTracker); 991 O << " = phi "; 992 printOperands(O, SlotTracker); 993 } 994 #endif 995