1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 30 #include <cassert> 31 32 using namespace llvm; 33 34 extern cl::opt<bool> EnableVPlanNativePath; 35 36 #define LV_NAME "loop-vectorize" 37 #define DEBUG_TYPE LV_NAME 38 39 bool VPRecipeBase::mayWriteToMemory() const { 40 switch (getVPDefID()) { 41 case VPWidenMemoryInstructionSC: { 42 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 43 } 44 case VPReplicateSC: 45 case VPWidenCallSC: 46 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 47 ->mayWriteToMemory(); 48 case VPBranchOnMaskSC: 49 return false; 50 case VPWidenIntOrFpInductionSC: 51 case VPWidenCanonicalIVSC: 52 case VPWidenPHISC: 53 case VPBlendSC: 54 case VPWidenSC: 55 case VPWidenGEPSC: 56 case VPReductionSC: 57 case VPWidenSelectSC: { 58 const Instruction *I = 59 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 60 (void)I; 61 assert((!I || !I->mayWriteToMemory()) && 62 "underlying instruction may write to memory"); 63 return false; 64 } 65 default: 66 return true; 67 } 68 } 69 70 bool VPRecipeBase::mayReadFromMemory() const { 71 switch (getVPDefID()) { 72 case VPWidenMemoryInstructionSC: { 73 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 74 } 75 case VPReplicateSC: 76 case VPWidenCallSC: 77 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 78 ->mayReadFromMemory(); 79 case VPBranchOnMaskSC: 80 return false; 81 case VPWidenIntOrFpInductionSC: 82 case VPWidenCanonicalIVSC: 83 case VPWidenPHISC: 84 case VPBlendSC: 85 case VPWidenSC: 86 case VPWidenGEPSC: 87 case VPReductionSC: 88 case VPWidenSelectSC: { 89 const Instruction *I = 90 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 91 (void)I; 92 assert((!I || !I->mayReadFromMemory()) && 93 "underlying instruction may read from memory"); 94 return false; 95 } 96 default: 97 return true; 98 } 99 } 100 101 bool VPRecipeBase::mayHaveSideEffects() const { 102 switch (getVPDefID()) { 103 case VPWidenIntOrFpInductionSC: 104 case VPWidenPointerInductionSC: 105 case VPWidenCanonicalIVSC: 106 case VPWidenPHISC: 107 case VPBlendSC: 108 case VPWidenSC: 109 case VPWidenGEPSC: 110 case VPReductionSC: 111 case VPWidenSelectSC: 112 case VPScalarIVStepsSC: { 113 const Instruction *I = 114 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 115 (void)I; 116 assert((!I || !I->mayHaveSideEffects()) && 117 "underlying instruction has side-effects"); 118 return false; 119 } 120 case VPReplicateSC: { 121 auto *R = cast<VPReplicateRecipe>(this); 122 return R->getUnderlyingInstr()->mayHaveSideEffects(); 123 } 124 default: 125 return true; 126 } 127 } 128 129 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 130 auto Lane = VPLane::getLastLaneForVF(State.VF); 131 VPValue *ExitValue = getOperand(0); 132 if (Plan.isUniformAfterVectorization(ExitValue)) 133 Lane = VPLane::getFirstLane(); 134 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 135 State.Builder.GetInsertBlock()); 136 } 137 138 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 139 assert(!Parent && "Recipe already in some VPBasicBlock"); 140 assert(InsertPos->getParent() && 141 "Insertion position not in any VPBasicBlock"); 142 Parent = InsertPos->getParent(); 143 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 144 } 145 146 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 147 iplist<VPRecipeBase>::iterator I) { 148 assert(!Parent && "Recipe already in some VPBasicBlock"); 149 assert(I == BB.end() || I->getParent() == &BB); 150 Parent = &BB; 151 BB.getRecipeList().insert(I, this); 152 } 153 154 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 155 assert(!Parent && "Recipe already in some VPBasicBlock"); 156 assert(InsertPos->getParent() && 157 "Insertion position not in any VPBasicBlock"); 158 Parent = InsertPos->getParent(); 159 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 160 } 161 162 void VPRecipeBase::removeFromParent() { 163 assert(getParent() && "Recipe not in any VPBasicBlock"); 164 getParent()->getRecipeList().remove(getIterator()); 165 Parent = nullptr; 166 } 167 168 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 169 assert(getParent() && "Recipe not in any VPBasicBlock"); 170 return getParent()->getRecipeList().erase(getIterator()); 171 } 172 173 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 174 removeFromParent(); 175 insertAfter(InsertPos); 176 } 177 178 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 179 iplist<VPRecipeBase>::iterator I) { 180 removeFromParent(); 181 insertBefore(BB, I); 182 } 183 184 void VPInstruction::generateInstruction(VPTransformState &State, 185 unsigned Part) { 186 IRBuilderBase &Builder = State.Builder; 187 Builder.SetCurrentDebugLocation(DL); 188 189 if (Instruction::isBinaryOp(getOpcode())) { 190 Value *A = State.get(getOperand(0), Part); 191 Value *B = State.get(getOperand(1), Part); 192 Value *V = 193 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 194 State.set(this, V, Part); 195 return; 196 } 197 198 switch (getOpcode()) { 199 case VPInstruction::Not: { 200 Value *A = State.get(getOperand(0), Part); 201 Value *V = Builder.CreateNot(A, Name); 202 State.set(this, V, Part); 203 break; 204 } 205 case VPInstruction::ICmpULE: { 206 Value *IV = State.get(getOperand(0), Part); 207 Value *TC = State.get(getOperand(1), Part); 208 Value *V = Builder.CreateICmpULE(IV, TC, Name); 209 State.set(this, V, Part); 210 break; 211 } 212 case Instruction::Select: { 213 Value *Cond = State.get(getOperand(0), Part); 214 Value *Op1 = State.get(getOperand(1), Part); 215 Value *Op2 = State.get(getOperand(2), Part); 216 Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name); 217 State.set(this, V, Part); 218 break; 219 } 220 case VPInstruction::ActiveLaneMask: { 221 // Get first lane of vector induction variable. 222 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 223 // Get the original loop tripcount. 224 Value *ScalarTC = State.get(getOperand(1), Part); 225 226 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 227 auto *PredTy = VectorType::get(Int1Ty, State.VF); 228 Instruction *Call = Builder.CreateIntrinsic( 229 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 230 {VIVElem0, ScalarTC}, nullptr, Name); 231 State.set(this, Call, Part); 232 break; 233 } 234 case VPInstruction::FirstOrderRecurrenceSplice: { 235 // Generate code to combine the previous and current values in vector v3. 236 // 237 // vector.ph: 238 // v_init = vector(..., ..., ..., a[-1]) 239 // br vector.body 240 // 241 // vector.body 242 // i = phi [0, vector.ph], [i+4, vector.body] 243 // v1 = phi [v_init, vector.ph], [v2, vector.body] 244 // v2 = a[i, i+1, i+2, i+3]; 245 // v3 = vector(v1(3), v2(0, 1, 2)) 246 247 // For the first part, use the recurrence phi (v1), otherwise v2. 248 auto *V1 = State.get(getOperand(0), 0); 249 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 250 if (!PartMinus1->getType()->isVectorTy()) { 251 State.set(this, PartMinus1, Part); 252 } else { 253 Value *V2 = State.get(getOperand(1), Part); 254 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name), 255 Part); 256 } 257 break; 258 } 259 case VPInstruction::CanonicalIVIncrement: 260 case VPInstruction::CanonicalIVIncrementNUW: { 261 Value *Next = nullptr; 262 if (Part == 0) { 263 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 264 auto *Phi = State.get(getOperand(0), 0); 265 // The loop step is equal to the vectorization factor (num of SIMD 266 // elements) times the unroll factor (num of SIMD instructions). 267 Value *Step = 268 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 269 Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false); 270 } else { 271 Next = State.get(this, 0); 272 } 273 274 State.set(this, Next, Part); 275 break; 276 } 277 278 case VPInstruction::CanonicalIVIncrementForPart: 279 case VPInstruction::CanonicalIVIncrementForPartNUW: { 280 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; 281 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 282 if (Part == 0) { 283 State.set(this, IV, Part); 284 break; 285 } 286 287 // The canonical IV is incremented by the vectorization factor (num of SIMD 288 // elements) times the unroll part. 289 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 290 Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false); 291 State.set(this, Next, Part); 292 break; 293 } 294 case VPInstruction::BranchOnCond: { 295 if (Part != 0) 296 break; 297 298 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 299 VPRegionBlock *ParentRegion = getParent()->getParent(); 300 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 301 302 // Replace the temporary unreachable terminator with a new conditional 303 // branch, hooking it up to backward destination for exiting blocks now and 304 // to forward destination(s) later when they are created. 305 BranchInst *CondBr = 306 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 307 308 if (getParent()->isExiting()) 309 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 310 311 CondBr->setSuccessor(0, nullptr); 312 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 313 break; 314 } 315 case VPInstruction::BranchOnCount: { 316 if (Part != 0) 317 break; 318 // First create the compare. 319 Value *IV = State.get(getOperand(0), Part); 320 Value *TC = State.get(getOperand(1), Part); 321 Value *Cond = Builder.CreateICmpEQ(IV, TC); 322 323 // Now create the branch. 324 auto *Plan = getParent()->getPlan(); 325 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 326 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 327 328 // Replace the temporary unreachable terminator with a new conditional 329 // branch, hooking it up to backward destination (the header) now and to the 330 // forward destination (the exit/middle block) later when it is created. 331 // Note that CreateCondBr expects a valid BB as first argument, so we need 332 // to set it to nullptr later. 333 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 334 State.CFG.VPBB2IRBB[Header]); 335 CondBr->setSuccessor(0, nullptr); 336 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 337 break; 338 } 339 default: 340 llvm_unreachable("Unsupported opcode for instruction"); 341 } 342 } 343 344 void VPInstruction::execute(VPTransformState &State) { 345 assert(!State.Instance && "VPInstruction executing an Instance"); 346 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 347 State.Builder.setFastMathFlags(FMF); 348 for (unsigned Part = 0; Part < State.UF; ++Part) 349 generateInstruction(State, Part); 350 } 351 352 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 353 void VPInstruction::dump() const { 354 VPSlotTracker SlotTracker(getParent()->getPlan()); 355 print(dbgs(), "", SlotTracker); 356 } 357 358 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 359 VPSlotTracker &SlotTracker) const { 360 O << Indent << "EMIT "; 361 362 if (hasResult()) { 363 printAsOperand(O, SlotTracker); 364 O << " = "; 365 } 366 367 switch (getOpcode()) { 368 case VPInstruction::Not: 369 O << "not"; 370 break; 371 case VPInstruction::ICmpULE: 372 O << "icmp ule"; 373 break; 374 case VPInstruction::SLPLoad: 375 O << "combined load"; 376 break; 377 case VPInstruction::SLPStore: 378 O << "combined store"; 379 break; 380 case VPInstruction::ActiveLaneMask: 381 O << "active lane mask"; 382 break; 383 case VPInstruction::FirstOrderRecurrenceSplice: 384 O << "first-order splice"; 385 break; 386 case VPInstruction::CanonicalIVIncrement: 387 O << "VF * UF + "; 388 break; 389 case VPInstruction::CanonicalIVIncrementNUW: 390 O << "VF * UF +(nuw) "; 391 break; 392 case VPInstruction::BranchOnCond: 393 O << "branch-on-cond"; 394 break; 395 case VPInstruction::CanonicalIVIncrementForPart: 396 O << "VF * Part + "; 397 break; 398 case VPInstruction::CanonicalIVIncrementForPartNUW: 399 O << "VF * Part +(nuw) "; 400 break; 401 case VPInstruction::BranchOnCount: 402 O << "branch-on-count "; 403 break; 404 default: 405 O << Instruction::getOpcodeName(getOpcode()); 406 } 407 408 O << FMF; 409 410 for (const VPValue *Operand : operands()) { 411 O << " "; 412 Operand->printAsOperand(O, SlotTracker); 413 } 414 415 if (DL) { 416 O << ", !dbg "; 417 DL.print(O); 418 } 419 } 420 #endif 421 422 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 423 // Make sure the VPInstruction is a floating-point operation. 424 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 425 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 426 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 427 Opcode == Instruction::FCmp) && 428 "this op can't take fast-math flags"); 429 FMF = FMFNew; 430 } 431 432 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 433 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 434 VPSlotTracker &SlotTracker) const { 435 O << Indent << "WIDEN-CALL "; 436 437 auto *CI = cast<CallInst>(getUnderlyingInstr()); 438 if (CI->getType()->isVoidTy()) 439 O << "void "; 440 else { 441 printAsOperand(O, SlotTracker); 442 O << " = "; 443 } 444 445 O << "call @" << CI->getCalledFunction()->getName() << "("; 446 printOperands(O, SlotTracker); 447 O << ")"; 448 } 449 450 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 451 VPSlotTracker &SlotTracker) const { 452 O << Indent << "WIDEN-SELECT "; 453 printAsOperand(O, SlotTracker); 454 O << " = select "; 455 getOperand(0)->printAsOperand(O, SlotTracker); 456 O << ", "; 457 getOperand(1)->printAsOperand(O, SlotTracker); 458 O << ", "; 459 getOperand(2)->printAsOperand(O, SlotTracker); 460 O << (InvariantCond ? " (condition is loop invariant)" : ""); 461 } 462 #endif 463 464 void VPWidenSelectRecipe::execute(VPTransformState &State) { 465 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 466 State.setDebugLocFromInst(&I); 467 468 // The condition can be loop invariant but still defined inside the 469 // loop. This means that we can't just use the original 'cond' value. 470 // We have to take the 'vectorized' value and pick the first lane. 471 // Instcombine will make this a no-op. 472 auto *InvarCond = 473 InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; 474 475 for (unsigned Part = 0; Part < State.UF; ++Part) { 476 Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); 477 Value *Op0 = State.get(getOperand(1), Part); 478 Value *Op1 = State.get(getOperand(2), Part); 479 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 480 State.set(this, Sel, Part); 481 State.addMetadata(Sel, &I); 482 } 483 } 484 485 void VPWidenRecipe::execute(VPTransformState &State) { 486 auto &I = *cast<Instruction>(getUnderlyingValue()); 487 auto &Builder = State.Builder; 488 switch (I.getOpcode()) { 489 case Instruction::Call: 490 case Instruction::Br: 491 case Instruction::PHI: 492 case Instruction::GetElementPtr: 493 case Instruction::Select: 494 llvm_unreachable("This instruction is handled by a different recipe."); 495 case Instruction::UDiv: 496 case Instruction::SDiv: 497 case Instruction::SRem: 498 case Instruction::URem: 499 case Instruction::Add: 500 case Instruction::FAdd: 501 case Instruction::Sub: 502 case Instruction::FSub: 503 case Instruction::FNeg: 504 case Instruction::Mul: 505 case Instruction::FMul: 506 case Instruction::FDiv: 507 case Instruction::FRem: 508 case Instruction::Shl: 509 case Instruction::LShr: 510 case Instruction::AShr: 511 case Instruction::And: 512 case Instruction::Or: 513 case Instruction::Xor: { 514 // Just widen unops and binops. 515 State.setDebugLocFromInst(&I); 516 517 for (unsigned Part = 0; Part < State.UF; ++Part) { 518 SmallVector<Value *, 2> Ops; 519 for (VPValue *VPOp : operands()) 520 Ops.push_back(State.get(VPOp, Part)); 521 522 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); 523 524 if (auto *VecOp = dyn_cast<Instruction>(V)) { 525 VecOp->copyIRFlags(&I); 526 527 // If the instruction is vectorized and was in a basic block that needed 528 // predication, we can't propagate poison-generating flags (nuw/nsw, 529 // exact, etc.). The control flow has been linearized and the 530 // instruction is no longer guarded by the predicate, which could make 531 // the flag properties to no longer hold. 532 if (State.MayGeneratePoisonRecipes.contains(this)) 533 VecOp->dropPoisonGeneratingFlags(); 534 } 535 536 // Use this vector value for all users of the original instruction. 537 State.set(this, V, Part); 538 State.addMetadata(V, &I); 539 } 540 541 break; 542 } 543 case Instruction::Freeze: { 544 State.setDebugLocFromInst(&I); 545 546 for (unsigned Part = 0; Part < State.UF; ++Part) { 547 Value *Op = State.get(getOperand(0), Part); 548 549 Value *Freeze = Builder.CreateFreeze(Op); 550 State.set(this, Freeze, Part); 551 } 552 break; 553 } 554 case Instruction::ICmp: 555 case Instruction::FCmp: { 556 // Widen compares. Generate vector compares. 557 bool FCmp = (I.getOpcode() == Instruction::FCmp); 558 auto *Cmp = cast<CmpInst>(&I); 559 State.setDebugLocFromInst(Cmp); 560 for (unsigned Part = 0; Part < State.UF; ++Part) { 561 Value *A = State.get(getOperand(0), Part); 562 Value *B = State.get(getOperand(1), Part); 563 Value *C = nullptr; 564 if (FCmp) { 565 // Propagate fast math flags. 566 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 567 Builder.setFastMathFlags(Cmp->getFastMathFlags()); 568 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 569 } else { 570 C = Builder.CreateICmp(Cmp->getPredicate(), A, B); 571 } 572 State.set(this, C, Part); 573 State.addMetadata(C, &I); 574 } 575 576 break; 577 } 578 579 case Instruction::ZExt: 580 case Instruction::SExt: 581 case Instruction::FPToUI: 582 case Instruction::FPToSI: 583 case Instruction::FPExt: 584 case Instruction::PtrToInt: 585 case Instruction::IntToPtr: 586 case Instruction::SIToFP: 587 case Instruction::UIToFP: 588 case Instruction::Trunc: 589 case Instruction::FPTrunc: 590 case Instruction::BitCast: { 591 auto *CI = cast<CastInst>(&I); 592 State.setDebugLocFromInst(CI); 593 594 /// Vectorize casts. 595 Type *DestTy = (State.VF.isScalar()) 596 ? CI->getType() 597 : VectorType::get(CI->getType(), State.VF); 598 599 for (unsigned Part = 0; Part < State.UF; ++Part) { 600 Value *A = State.get(getOperand(0), Part); 601 Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); 602 State.set(this, Cast, Part); 603 State.addMetadata(Cast, &I); 604 } 605 break; 606 } 607 default: 608 // This instruction is not vectorized by simple widening. 609 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); 610 llvm_unreachable("Unhandled instruction!"); 611 } // end of switch. 612 } 613 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 614 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 615 VPSlotTracker &SlotTracker) const { 616 O << Indent << "WIDEN "; 617 printAsOperand(O, SlotTracker); 618 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 619 printOperands(O, SlotTracker); 620 } 621 622 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 623 VPSlotTracker &SlotTracker) const { 624 O << Indent << "WIDEN-INDUCTION"; 625 if (getTruncInst()) { 626 O << "\\l\""; 627 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 628 O << " +\n" << Indent << "\" "; 629 getVPValue(0)->printAsOperand(O, SlotTracker); 630 } else 631 O << " " << VPlanIngredient(IV); 632 633 O << ", "; 634 getStepValue()->printAsOperand(O, SlotTracker); 635 } 636 #endif 637 638 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 639 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 640 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 641 return StartC && StartC->isZero() && StepC && StepC->isOne(); 642 } 643 644 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 645 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 646 } 647 648 bool VPScalarIVStepsRecipe::isCanonical() const { 649 auto *CanIV = getCanonicalIV(); 650 // The start value of the steps-recipe must match the start value of the 651 // canonical induction and it must step by 1. 652 if (CanIV->getStartValue() != getStartValue()) 653 return false; 654 auto *StepVPV = getStepValue(); 655 if (StepVPV->getDef()) 656 return false; 657 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 658 return StepC && StepC->isOne(); 659 } 660 661 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 662 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 663 VPSlotTracker &SlotTracker) const { 664 O << Indent; 665 printAsOperand(O, SlotTracker); 666 O << Indent << "= SCALAR-STEPS "; 667 printOperands(O, SlotTracker); 668 } 669 #endif 670 671 void VPWidenGEPRecipe::execute(VPTransformState &State) { 672 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 673 // Construct a vector GEP by widening the operands of the scalar GEP as 674 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 675 // results in a vector of pointers when at least one operand of the GEP 676 // is vector-typed. Thus, to keep the representation compact, we only use 677 // vector-typed operands for loop-varying values. 678 679 if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { 680 // If we are vectorizing, but the GEP has only loop-invariant operands, 681 // the GEP we build (by only using vector-typed operands for 682 // loop-varying values) would be a scalar pointer. Thus, to ensure we 683 // produce a vector of pointers, we need to either arbitrarily pick an 684 // operand to broadcast, or broadcast a clone of the original GEP. 685 // Here, we broadcast a clone of the original. 686 // 687 // TODO: If at some point we decide to scalarize instructions having 688 // loop-invariant operands, this special case will no longer be 689 // required. We would add the scalarization decision to 690 // collectLoopScalars() and teach getVectorValue() to broadcast 691 // the lane-zero scalar value. 692 auto *Clone = State.Builder.Insert(GEP->clone()); 693 for (unsigned Part = 0; Part < State.UF; ++Part) { 694 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); 695 State.set(this, EntryPart, Part); 696 State.addMetadata(EntryPart, GEP); 697 } 698 } else { 699 // If the GEP has at least one loop-varying operand, we are sure to 700 // produce a vector of pointers. But if we are only unrolling, we want 701 // to produce a scalar GEP for each unroll part. Thus, the GEP we 702 // produce with the code below will be scalar (if VF == 1) or vector 703 // (otherwise). Note that for the unroll-only case, we still maintain 704 // values in the vector mapping with initVector, as we do for other 705 // instructions. 706 for (unsigned Part = 0; Part < State.UF; ++Part) { 707 // The pointer operand of the new GEP. If it's loop-invariant, we 708 // won't broadcast it. 709 auto *Ptr = IsPtrLoopInvariant 710 ? State.get(getOperand(0), VPIteration(0, 0)) 711 : State.get(getOperand(0), Part); 712 713 // Collect all the indices for the new GEP. If any index is 714 // loop-invariant, we won't broadcast it. 715 SmallVector<Value *, 4> Indices; 716 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 717 VPValue *Operand = getOperand(I); 718 if (IsIndexLoopInvariant[I - 1]) 719 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 720 else 721 Indices.push_back(State.get(Operand, Part)); 722 } 723 724 // If the GEP instruction is vectorized and was in a basic block that 725 // needed predication, we can't propagate the poison-generating 'inbounds' 726 // flag. The control flow has been linearized and the GEP is no longer 727 // guarded by the predicate, which could make the 'inbounds' properties to 728 // no longer hold. 729 bool IsInBounds = 730 GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; 731 732 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 733 // but it should be a vector, otherwise. 734 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 735 Indices, "", IsInBounds); 736 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 737 "NewGEP is not a pointer vector"); 738 State.set(this, NewGEP, Part); 739 State.addMetadata(NewGEP, GEP); 740 } 741 } 742 } 743 744 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 745 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 746 VPSlotTracker &SlotTracker) const { 747 O << Indent << "WIDEN-GEP "; 748 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 749 size_t IndicesNumber = IsIndexLoopInvariant.size(); 750 for (size_t I = 0; I < IndicesNumber; ++I) 751 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 752 753 O << " "; 754 printAsOperand(O, SlotTracker); 755 O << " = getelementptr "; 756 printOperands(O, SlotTracker); 757 } 758 759 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 760 VPSlotTracker &SlotTracker) const { 761 O << Indent << "BLEND "; 762 Phi->printAsOperand(O, false); 763 O << " ="; 764 if (getNumIncomingValues() == 1) { 765 // Not a User of any mask: not really blending, this is a 766 // single-predecessor phi. 767 O << " "; 768 getIncomingValue(0)->printAsOperand(O, SlotTracker); 769 } else { 770 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 771 O << " "; 772 getIncomingValue(I)->printAsOperand(O, SlotTracker); 773 O << "/"; 774 getMask(I)->printAsOperand(O, SlotTracker); 775 } 776 } 777 } 778 779 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 780 VPSlotTracker &SlotTracker) const { 781 O << Indent << "REDUCE "; 782 printAsOperand(O, SlotTracker); 783 O << " = "; 784 getChainOp()->printAsOperand(O, SlotTracker); 785 O << " +"; 786 if (isa<FPMathOperator>(getUnderlyingInstr())) 787 O << getUnderlyingInstr()->getFastMathFlags(); 788 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 789 getVecOp()->printAsOperand(O, SlotTracker); 790 if (getCondOp()) { 791 O << ", "; 792 getCondOp()->printAsOperand(O, SlotTracker); 793 } 794 O << ")"; 795 if (RdxDesc->IntermediateStore) 796 O << " (with final reduction value stored in invariant address sank " 797 "outside of loop)"; 798 } 799 800 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 801 VPSlotTracker &SlotTracker) const { 802 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 803 804 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 805 printAsOperand(O, SlotTracker); 806 O << " = "; 807 } 808 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 809 O << "call @" << CB->getCalledFunction()->getName() << "("; 810 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 811 O, [&O, &SlotTracker](VPValue *Op) { 812 Op->printAsOperand(O, SlotTracker); 813 }); 814 O << ")"; 815 } else { 816 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 817 printOperands(O, SlotTracker); 818 } 819 820 if (AlsoPack) 821 O << " (S->V)"; 822 } 823 824 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 825 VPSlotTracker &SlotTracker) const { 826 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 827 printAsOperand(O, SlotTracker); 828 O << " = "; 829 printOperands(O, SlotTracker); 830 } 831 832 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 833 VPSlotTracker &SlotTracker) const { 834 O << Indent << "WIDEN "; 835 836 if (!isStore()) { 837 getVPSingleValue()->printAsOperand(O, SlotTracker); 838 O << " = "; 839 } 840 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 841 842 printOperands(O, SlotTracker); 843 } 844 #endif 845 846 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 847 Value *Start = getStartValue()->getLiveInIRValue(); 848 PHINode *EntryPart = PHINode::Create( 849 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 850 851 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 852 EntryPart->addIncoming(Start, VectorPH); 853 EntryPart->setDebugLoc(DL); 854 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 855 State.set(this, EntryPart, Part); 856 } 857 858 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 859 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 860 VPSlotTracker &SlotTracker) const { 861 O << Indent << "EMIT "; 862 printAsOperand(O, SlotTracker); 863 O << " = CANONICAL-INDUCTION"; 864 } 865 #endif 866 867 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 868 bool IsUniform = vputils::onlyFirstLaneUsed(this); 869 return all_of(users(), 870 [&](const VPUser *U) { return U->usesScalars(this); }) && 871 (IsUniform || !VF.isScalable()); 872 } 873 874 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 875 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 876 VPSlotTracker &SlotTracker) const { 877 O << Indent << "EMIT "; 878 printAsOperand(O, SlotTracker); 879 O << " = WIDEN-POINTER-INDUCTION "; 880 getStartValue()->printAsOperand(O, SlotTracker); 881 O << ", " << *IndDesc.getStep(); 882 } 883 #endif 884 885 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 886 assert(!State.Instance && "cannot be used in per-lane"); 887 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 888 SCEVExpander Exp(SE, DL, "induction"); 889 890 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 891 &*State.Builder.GetInsertPoint()); 892 893 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 894 State.set(this, Res, Part); 895 } 896 897 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 898 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 899 VPSlotTracker &SlotTracker) const { 900 O << Indent << "EMIT "; 901 getVPSingleValue()->printAsOperand(O, SlotTracker); 902 O << " = EXPAND SCEV " << *Expr; 903 } 904 #endif 905 906 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 907 Value *CanonicalIV = State.get(getOperand(0), 0); 908 Type *STy = CanonicalIV->getType(); 909 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 910 ElementCount VF = State.VF; 911 Value *VStart = VF.isScalar() 912 ? CanonicalIV 913 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 914 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 915 Value *VStep = createStepForVF(Builder, STy, VF, Part); 916 if (VF.isVector()) { 917 VStep = Builder.CreateVectorSplat(VF, VStep); 918 VStep = 919 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 920 } 921 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 922 State.set(this, CanonicalVectorIV, Part); 923 } 924 } 925 926 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 927 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 928 VPSlotTracker &SlotTracker) const { 929 O << Indent << "EMIT "; 930 printAsOperand(O, SlotTracker); 931 O << " = WIDEN-CANONICAL-INDUCTION "; 932 printOperands(O, SlotTracker); 933 } 934 #endif 935 936 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 937 auto &Builder = State.Builder; 938 // Create a vector from the initial value. 939 auto *VectorInit = getStartValue()->getLiveInIRValue(); 940 941 Type *VecTy = State.VF.isScalar() 942 ? VectorInit->getType() 943 : VectorType::get(VectorInit->getType(), State.VF); 944 945 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 946 if (State.VF.isVector()) { 947 auto *IdxTy = Builder.getInt32Ty(); 948 auto *One = ConstantInt::get(IdxTy, 1); 949 IRBuilder<>::InsertPointGuard Guard(Builder); 950 Builder.SetInsertPoint(VectorPH->getTerminator()); 951 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 952 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 953 VectorInit = Builder.CreateInsertElement( 954 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 955 } 956 957 // Create a phi node for the new recurrence. 958 PHINode *EntryPart = PHINode::Create( 959 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 960 EntryPart->addIncoming(VectorInit, VectorPH); 961 State.set(this, EntryPart, 0); 962 } 963 964 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 965 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 966 VPSlotTracker &SlotTracker) const { 967 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 968 printAsOperand(O, SlotTracker); 969 O << " = phi "; 970 printOperands(O, SlotTracker); 971 } 972 #endif 973 974 void VPReductionPHIRecipe::execute(VPTransformState &State) { 975 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 976 auto &Builder = State.Builder; 977 978 // In order to support recurrences we need to be able to vectorize Phi nodes. 979 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 980 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 981 // this value when we vectorize all of the instructions that use the PHI. 982 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 983 Type *VecTy = 984 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 985 986 BasicBlock *HeaderBB = State.CFG.PrevBB; 987 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 988 "recipe must be in the vector loop header"); 989 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 990 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 991 Value *EntryPart = 992 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 993 State.set(this, EntryPart, Part); 994 } 995 996 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 997 998 // Reductions do not have to start at zero. They can start with 999 // any loop invariant values. 1000 VPValue *StartVPV = getStartValue(); 1001 Value *StartV = StartVPV->getLiveInIRValue(); 1002 1003 Value *Iden = nullptr; 1004 RecurKind RK = RdxDesc.getRecurrenceKind(); 1005 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1006 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 1007 // MinMax reduction have the start value as their identify. 1008 if (ScalarPHI) { 1009 Iden = StartV; 1010 } else { 1011 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1012 Builder.SetInsertPoint(VectorPH->getTerminator()); 1013 StartV = Iden = 1014 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 1015 } 1016 } else { 1017 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 1018 RdxDesc.getFastMathFlags()); 1019 1020 if (!ScalarPHI) { 1021 Iden = Builder.CreateVectorSplat(State.VF, Iden); 1022 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1023 Builder.SetInsertPoint(VectorPH->getTerminator()); 1024 Constant *Zero = Builder.getInt32(0); 1025 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 1026 } 1027 } 1028 1029 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1030 Value *EntryPart = State.get(this, Part); 1031 // Make sure to add the reduction start value only to the 1032 // first unroll part. 1033 Value *StartVal = (Part == 0) ? StartV : Iden; 1034 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 1035 } 1036 } 1037 1038 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1039 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1040 VPSlotTracker &SlotTracker) const { 1041 O << Indent << "WIDEN-REDUCTION-PHI "; 1042 1043 printAsOperand(O, SlotTracker); 1044 O << " = phi "; 1045 printOperands(O, SlotTracker); 1046 } 1047 #endif 1048 1049 void VPWidenPHIRecipe::execute(VPTransformState &State) { 1050 assert(EnableVPlanNativePath && 1051 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 1052 1053 // Currently we enter here in the VPlan-native path for non-induction 1054 // PHIs where all control flow is uniform. We simply widen these PHIs. 1055 // Create a vector phi with no operands - the vector phi operands will be 1056 // set at the end of vector code generation. 1057 VPBasicBlock *Parent = getParent(); 1058 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 1059 unsigned StartIdx = 0; 1060 // For phis in header blocks of loop regions, use the index of the value 1061 // coming from the preheader. 1062 if (LoopRegion->getEntryBasicBlock() == Parent) { 1063 for (unsigned I = 0; I < getNumOperands(); ++I) { 1064 if (getIncomingBlock(I) == 1065 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 1066 StartIdx = I; 1067 } 1068 } 1069 Value *Op0 = State.get(getOperand(StartIdx), 0); 1070 Type *VecTy = Op0->getType(); 1071 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 1072 State.set(this, VecPhi, 0); 1073 } 1074 1075 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1076 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1077 VPSlotTracker &SlotTracker) const { 1078 O << Indent << "WIDEN-PHI "; 1079 1080 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 1081 // Unless all incoming values are modeled in VPlan print the original PHI 1082 // directly. 1083 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 1084 // values as VPValues. 1085 if (getNumOperands() != OriginalPhi->getNumOperands()) { 1086 O << VPlanIngredient(OriginalPhi); 1087 return; 1088 } 1089 1090 printAsOperand(O, SlotTracker); 1091 O << " = phi "; 1092 printOperands(O, SlotTracker); 1093 } 1094 #endif 1095 1096 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 1097 // remove VPActiveLaneMaskPHIRecipe. 1098 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 1099 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1100 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1101 Value *StartMask = State.get(getOperand(0), Part); 1102 PHINode *EntryPart = 1103 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 1104 EntryPart->addIncoming(StartMask, VectorPH); 1105 EntryPart->setDebugLoc(DL); 1106 State.set(this, EntryPart, Part); 1107 } 1108 } 1109 1110 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1111 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1112 VPSlotTracker &SlotTracker) const { 1113 O << Indent << "ACTIVE-LANE-MASK-PHI "; 1114 1115 printAsOperand(O, SlotTracker); 1116 O << " = phi "; 1117 printOperands(O, SlotTracker); 1118 } 1119 #endif 1120