1 //===- LoopVectorizationLegality.cpp --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides loop vectorization legality analysis. Original code 10 // resided in LoopVectorize.cpp for a long time. 11 // 12 // At this point, it is implemented as a utility class, not as an analysis 13 // pass. It should be easy to create an analysis pass around it if there 14 // is a need (but D45420 needs to happen first). 15 // 16 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/IntrinsicInst.h" 20 21 using namespace llvm; 22 23 #define LV_NAME "loop-vectorize" 24 #define DEBUG_TYPE LV_NAME 25 26 extern cl::opt<bool> EnableVPlanPredication; 27 28 static cl::opt<bool> 29 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 30 cl::desc("Enable if-conversion during vectorization.")); 31 32 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 33 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 34 cl::desc("The maximum allowed number of runtime memory checks with a " 35 "vectorize(enable) pragma.")); 36 37 static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 38 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 39 cl::desc("The maximum number of SCEV checks allowed.")); 40 41 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 42 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 43 cl::desc("The maximum number of SCEV checks allowed with a " 44 "vectorize(enable) pragma")); 45 46 /// Maximum vectorization interleave count. 47 static const unsigned MaxInterleaveFactor = 16; 48 49 namespace llvm { 50 51 bool LoopVectorizeHints::Hint::validate(unsigned Val) { 52 switch (Kind) { 53 case HK_WIDTH: 54 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 55 case HK_UNROLL: 56 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 57 case HK_FORCE: 58 return (Val <= 1); 59 case HK_ISVECTORIZED: 60 case HK_PREDICATE: 61 return (Val == 0 || Val == 1); 62 } 63 return false; 64 } 65 66 LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 67 bool InterleaveOnlyWhenForced, 68 OptimizationRemarkEmitter &ORE) 69 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 70 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 71 Force("vectorize.enable", FK_Undefined, HK_FORCE), 72 IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 73 Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), 74 ORE(ORE) { 75 // Populate values with existing loop metadata. 76 getHintsFromMetadata(); 77 78 // force-vector-interleave overrides DisableInterleaving. 79 if (VectorizerParams::isInterleaveForced()) 80 Interleave.Value = VectorizerParams::VectorizationInterleave; 81 82 if (IsVectorized.Value != 1) 83 // If the vectorization width and interleaving count are both 1 then 84 // consider the loop to have been already vectorized because there's 85 // nothing more that we can do. 86 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 87 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 88 << "LV: Interleaving disabled by the pass manager\n"); 89 } 90 91 void LoopVectorizeHints::setAlreadyVectorized() { 92 LLVMContext &Context = TheLoop->getHeader()->getContext(); 93 94 MDNode *IsVectorizedMD = MDNode::get( 95 Context, 96 {MDString::get(Context, "llvm.loop.isvectorized"), 97 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 98 MDNode *LoopID = TheLoop->getLoopID(); 99 MDNode *NewLoopID = 100 makePostTransformationMetadata(Context, LoopID, 101 {Twine(Prefix(), "vectorize.").str(), 102 Twine(Prefix(), "interleave.").str()}, 103 {IsVectorizedMD}); 104 TheLoop->setLoopID(NewLoopID); 105 106 // Update internal cache. 107 IsVectorized.Value = 1; 108 } 109 110 bool LoopVectorizeHints::allowVectorization( 111 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 112 if (getForce() == LoopVectorizeHints::FK_Disabled) { 113 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 114 emitRemarkWithHints(); 115 return false; 116 } 117 118 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 119 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 120 emitRemarkWithHints(); 121 return false; 122 } 123 124 if (getIsVectorized() == 1) { 125 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 126 // FIXME: Add interleave.disable metadata. This will allow 127 // vectorize.disable to be used without disabling the pass and errors 128 // to differentiate between disabled vectorization and a width of 1. 129 ORE.emit([&]() { 130 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 131 "AllDisabled", L->getStartLoc(), 132 L->getHeader()) 133 << "loop not vectorized: vectorization and interleaving are " 134 "explicitly disabled, or the loop has already been " 135 "vectorized"; 136 }); 137 return false; 138 } 139 140 return true; 141 } 142 143 void LoopVectorizeHints::emitRemarkWithHints() const { 144 using namespace ore; 145 146 ORE.emit([&]() { 147 if (Force.Value == LoopVectorizeHints::FK_Disabled) 148 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 149 TheLoop->getStartLoc(), 150 TheLoop->getHeader()) 151 << "loop not vectorized: vectorization is explicitly disabled"; 152 else { 153 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 154 TheLoop->getStartLoc(), TheLoop->getHeader()); 155 R << "loop not vectorized"; 156 if (Force.Value == LoopVectorizeHints::FK_Enabled) { 157 R << " (Force=" << NV("Force", true); 158 if (Width.Value != 0) 159 R << ", Vector Width=" << NV("VectorWidth", Width.Value); 160 if (Interleave.Value != 0) 161 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 162 R << ")"; 163 } 164 return R; 165 } 166 }); 167 } 168 169 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 170 if (getWidth() == 1) 171 return LV_NAME; 172 if (getForce() == LoopVectorizeHints::FK_Disabled) 173 return LV_NAME; 174 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 175 return LV_NAME; 176 return OptimizationRemarkAnalysis::AlwaysPrint; 177 } 178 179 void LoopVectorizeHints::getHintsFromMetadata() { 180 MDNode *LoopID = TheLoop->getLoopID(); 181 if (!LoopID) 182 return; 183 184 // First operand should refer to the loop id itself. 185 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 186 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 187 188 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 189 const MDString *S = nullptr; 190 SmallVector<Metadata *, 4> Args; 191 192 // The expected hint is either a MDString or a MDNode with the first 193 // operand a MDString. 194 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 195 if (!MD || MD->getNumOperands() == 0) 196 continue; 197 S = dyn_cast<MDString>(MD->getOperand(0)); 198 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 199 Args.push_back(MD->getOperand(i)); 200 } else { 201 S = dyn_cast<MDString>(LoopID->getOperand(i)); 202 assert(Args.size() == 0 && "too many arguments for MDString"); 203 } 204 205 if (!S) 206 continue; 207 208 // Check if the hint starts with the loop metadata prefix. 209 StringRef Name = S->getString(); 210 if (Args.size() == 1) 211 setHint(Name, Args[0]); 212 } 213 } 214 215 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 216 if (!Name.startswith(Prefix())) 217 return; 218 Name = Name.substr(Prefix().size(), StringRef::npos); 219 220 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 221 if (!C) 222 return; 223 unsigned Val = C->getZExtValue(); 224 225 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 226 for (auto H : Hints) { 227 if (Name == H->Name) { 228 if (H->validate(Val)) 229 H->Value = Val; 230 else 231 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 232 break; 233 } 234 } 235 } 236 237 bool LoopVectorizationRequirements::doesNotMeet( 238 Function *F, Loop *L, const LoopVectorizeHints &Hints) { 239 const char *PassName = Hints.vectorizeAnalysisPassName(); 240 bool Failed = false; 241 if (UnsafeAlgebraInst && !Hints.allowReordering()) { 242 ORE.emit([&]() { 243 return OptimizationRemarkAnalysisFPCommute( 244 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 245 UnsafeAlgebraInst->getParent()) 246 << "loop not vectorized: cannot prove it is safe to reorder " 247 "floating-point operations"; 248 }); 249 Failed = true; 250 } 251 252 // Test if runtime memcheck thresholds are exceeded. 253 bool PragmaThresholdReached = 254 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 255 bool ThresholdReached = 256 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 257 if ((ThresholdReached && !Hints.allowReordering()) || 258 PragmaThresholdReached) { 259 ORE.emit([&]() { 260 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 261 L->getStartLoc(), 262 L->getHeader()) 263 << "loop not vectorized: cannot prove it is safe to reorder " 264 "memory operations"; 265 }); 266 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 267 Failed = true; 268 } 269 270 return Failed; 271 } 272 273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop 274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 275 // executing the inner loop will execute the same iterations). This check is 276 // very constrained for now but it will be relaxed in the future. \p Lp is 277 // considered uniform if it meets all the following conditions: 278 // 1) it has a canonical IV (starting from 0 and with stride 1), 279 // 2) its latch terminator is a conditional branch and, 280 // 3) its latch condition is a compare instruction whose operands are the 281 // canonical IV and an OuterLp invariant. 282 // This check doesn't take into account the uniformity of other conditions not 283 // related to the loop latch because they don't affect the loop uniformity. 284 // 285 // NOTE: We decided to keep all these checks and its associated documentation 286 // together so that we can easily have a picture of the current supported loop 287 // nests. However, some of the current checks don't depend on \p OuterLp and 288 // would be redundantly executed for each \p Lp if we invoked this function for 289 // different candidate outer loops. This is not the case for now because we 290 // don't currently have the infrastructure to evaluate multiple candidate outer 291 // loops and \p OuterLp will be a fixed parameter while we only support explicit 292 // outer loop vectorization. It's also very likely that these checks go away 293 // before introducing the aforementioned infrastructure. However, if this is not 294 // the case, we should move the \p OuterLp independent checks to a separate 295 // function that is only executed once for each \p Lp. 296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 297 assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 298 299 // If Lp is the outer loop, it's uniform by definition. 300 if (Lp == OuterLp) 301 return true; 302 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 303 304 // 1. 305 PHINode *IV = Lp->getCanonicalInductionVariable(); 306 if (!IV) { 307 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 308 return false; 309 } 310 311 // 2. 312 BasicBlock *Latch = Lp->getLoopLatch(); 313 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 314 if (!LatchBr || LatchBr->isUnconditional()) { 315 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 316 return false; 317 } 318 319 // 3. 320 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 321 if (!LatchCmp) { 322 LLVM_DEBUG( 323 dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 324 return false; 325 } 326 327 Value *CondOp0 = LatchCmp->getOperand(0); 328 Value *CondOp1 = LatchCmp->getOperand(1); 329 Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 330 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 331 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 332 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 333 return false; 334 } 335 336 return true; 337 } 338 339 // Return true if \p Lp and all its nested loops are uniform with regard to \p 340 // OuterLp. 341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 342 if (!isUniformLoop(Lp, OuterLp)) 343 return false; 344 345 // Check if nested loops are uniform. 346 for (Loop *SubLp : *Lp) 347 if (!isUniformLoopNest(SubLp, OuterLp)) 348 return false; 349 350 return true; 351 } 352 353 /// Check whether it is safe to if-convert this phi node. 354 /// 355 /// Phi nodes with constant expressions that can trap are not safe to if 356 /// convert. 357 static bool canIfConvertPHINodes(BasicBlock *BB) { 358 for (PHINode &Phi : BB->phis()) { 359 for (Value *V : Phi.incoming_values()) 360 if (auto *C = dyn_cast<Constant>(V)) 361 if (C->canTrap()) 362 return false; 363 } 364 return true; 365 } 366 367 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 368 if (Ty->isPointerTy()) 369 return DL.getIntPtrType(Ty); 370 371 // It is possible that char's or short's overflow when we ask for the loop's 372 // trip count, work around this by changing the type size. 373 if (Ty->getScalarSizeInBits() < 32) 374 return Type::getInt32Ty(Ty->getContext()); 375 376 return Ty; 377 } 378 379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 380 Ty0 = convertPointerToIntegerType(DL, Ty0); 381 Ty1 = convertPointerToIntegerType(DL, Ty1); 382 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 383 return Ty0; 384 return Ty1; 385 } 386 387 /// Check that the instruction has outside loop users and is not an 388 /// identified reduction variable. 389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 390 SmallPtrSetImpl<Value *> &AllowedExit) { 391 // Reductions, Inductions and non-header phis are allowed to have exit users. All 392 // other instructions must not have external users. 393 if (!AllowedExit.count(Inst)) 394 // Check that all of the users of the loop are inside the BB. 395 for (User *U : Inst->users()) { 396 Instruction *UI = cast<Instruction>(U); 397 // This user may be a reduction exit value. 398 if (!TheLoop->contains(UI)) { 399 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 400 return true; 401 } 402 } 403 return false; 404 } 405 406 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 407 const ValueToValueMap &Strides = 408 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 409 410 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); 411 if (Stride == 1 || Stride == -1) 412 return Stride; 413 return 0; 414 } 415 416 bool LoopVectorizationLegality::isUniform(Value *V) { 417 return LAI->isUniform(V); 418 } 419 420 bool LoopVectorizationLegality::canVectorizeOuterLoop() { 421 assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 422 // Store the result and return it at the end instead of exiting early, in case 423 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 424 bool Result = true; 425 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 426 427 for (BasicBlock *BB : TheLoop->blocks()) { 428 // Check whether the BB terminator is a BranchInst. Any other terminator is 429 // not supported yet. 430 auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 431 if (!Br) { 432 reportVectorizationFailure("Unsupported basic block terminator", 433 "loop control flow is not understood by vectorizer", 434 "CFGNotUnderstood", ORE, TheLoop); 435 if (DoExtraAnalysis) 436 Result = false; 437 else 438 return false; 439 } 440 441 // Check whether the BranchInst is a supported one. Only unconditional 442 // branches, conditional branches with an outer loop invariant condition or 443 // backedges are supported. 444 // FIXME: We skip these checks when VPlan predication is enabled as we 445 // want to allow divergent branches. This whole check will be removed 446 // once VPlan predication is on by default. 447 if (!EnableVPlanPredication && Br && Br->isConditional() && 448 !TheLoop->isLoopInvariant(Br->getCondition()) && 449 !LI->isLoopHeader(Br->getSuccessor(0)) && 450 !LI->isLoopHeader(Br->getSuccessor(1))) { 451 reportVectorizationFailure("Unsupported conditional branch", 452 "loop control flow is not understood by vectorizer", 453 "CFGNotUnderstood", ORE, TheLoop); 454 if (DoExtraAnalysis) 455 Result = false; 456 else 457 return false; 458 } 459 } 460 461 // Check whether inner loops are uniform. At this point, we only support 462 // simple outer loops scenarios with uniform nested loops. 463 if (!isUniformLoopNest(TheLoop /*loop nest*/, 464 TheLoop /*context outer loop*/)) { 465 reportVectorizationFailure("Outer loop contains divergent loops", 466 "loop control flow is not understood by vectorizer", 467 "CFGNotUnderstood", ORE, TheLoop); 468 if (DoExtraAnalysis) 469 Result = false; 470 else 471 return false; 472 } 473 474 // Check whether we are able to set up outer loop induction. 475 if (!setupOuterLoopInductions()) { 476 reportVectorizationFailure("Unsupported outer loop Phi(s)", 477 "Unsupported outer loop Phi(s)", 478 "UnsupportedPhi", ORE, TheLoop); 479 if (DoExtraAnalysis) 480 Result = false; 481 else 482 return false; 483 } 484 485 return Result; 486 } 487 488 void LoopVectorizationLegality::addInductionPhi( 489 PHINode *Phi, const InductionDescriptor &ID, 490 SmallPtrSetImpl<Value *> &AllowedExit) { 491 Inductions[Phi] = ID; 492 493 // In case this induction also comes with casts that we know we can ignore 494 // in the vectorized loop body, record them here. All casts could be recorded 495 // here for ignoring, but suffices to record only the first (as it is the 496 // only one that may bw used outside the cast sequence). 497 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 498 if (!Casts.empty()) 499 InductionCastsToIgnore.insert(*Casts.begin()); 500 501 Type *PhiTy = Phi->getType(); 502 const DataLayout &DL = Phi->getModule()->getDataLayout(); 503 504 // Get the widest type. 505 if (!PhiTy->isFloatingPointTy()) { 506 if (!WidestIndTy) 507 WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 508 else 509 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 510 } 511 512 // Int inductions are special because we only allow one IV. 513 if (ID.getKind() == InductionDescriptor::IK_IntInduction && 514 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 515 isa<Constant>(ID.getStartValue()) && 516 cast<Constant>(ID.getStartValue())->isNullValue()) { 517 518 // Use the phi node with the widest type as induction. Use the last 519 // one if there are multiple (no good reason for doing this other 520 // than it is expedient). We've checked that it begins at zero and 521 // steps by one, so this is a canonical induction variable. 522 if (!PrimaryInduction || PhiTy == WidestIndTy) 523 PrimaryInduction = Phi; 524 } 525 526 // Both the PHI node itself, and the "post-increment" value feeding 527 // back into the PHI node may have external users. 528 // We can allow those uses, except if the SCEVs we have for them rely 529 // on predicates that only hold within the loop, since allowing the exit 530 // currently means re-using this SCEV outside the loop (see PR33706 for more 531 // details). 532 if (PSE.getUnionPredicate().isAlwaysTrue()) { 533 AllowedExit.insert(Phi); 534 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 535 } 536 537 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 538 } 539 540 bool LoopVectorizationLegality::setupOuterLoopInductions() { 541 BasicBlock *Header = TheLoop->getHeader(); 542 543 // Returns true if a given Phi is a supported induction. 544 auto isSupportedPhi = [&](PHINode &Phi) -> bool { 545 InductionDescriptor ID; 546 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 547 ID.getKind() == InductionDescriptor::IK_IntInduction) { 548 addInductionPhi(&Phi, ID, AllowedExit); 549 return true; 550 } else { 551 // Bail out for any Phi in the outer loop header that is not a supported 552 // induction. 553 LLVM_DEBUG( 554 dbgs() 555 << "LV: Found unsupported PHI for outer loop vectorization.\n"); 556 return false; 557 } 558 }; 559 560 if (llvm::all_of(Header->phis(), isSupportedPhi)) 561 return true; 562 else 563 return false; 564 } 565 566 bool LoopVectorizationLegality::canVectorizeInstrs() { 567 BasicBlock *Header = TheLoop->getHeader(); 568 569 // Look for the attribute signaling the absence of NaNs. 570 Function &F = *Header->getParent(); 571 HasFunNoNaNAttr = 572 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 573 574 // For each block in the loop. 575 for (BasicBlock *BB : TheLoop->blocks()) { 576 // Scan the instructions in the block and look for hazards. 577 for (Instruction &I : *BB) { 578 if (auto *Phi = dyn_cast<PHINode>(&I)) { 579 Type *PhiTy = Phi->getType(); 580 // Check that this PHI type is allowed. 581 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 582 !PhiTy->isPointerTy()) { 583 reportVectorizationFailure("Found a non-int non-pointer PHI", 584 "loop control flow is not understood by vectorizer", 585 "CFGNotUnderstood", ORE, TheLoop); 586 return false; 587 } 588 589 // If this PHINode is not in the header block, then we know that we 590 // can convert it to select during if-conversion. No need to check if 591 // the PHIs in this block are induction or reduction variables. 592 if (BB != Header) { 593 // Non-header phi nodes that have outside uses can be vectorized. Add 594 // them to the list of allowed exits. 595 // Unsafe cyclic dependencies with header phis are identified during 596 // legalization for reduction, induction and first order 597 // recurrences. 598 AllowedExit.insert(&I); 599 continue; 600 } 601 602 // We only allow if-converted PHIs with exactly two incoming values. 603 if (Phi->getNumIncomingValues() != 2) { 604 reportVectorizationFailure("Found an invalid PHI", 605 "loop control flow is not understood by vectorizer", 606 "CFGNotUnderstood", ORE, TheLoop, Phi); 607 return false; 608 } 609 610 RecurrenceDescriptor RedDes; 611 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 612 DT)) { 613 if (RedDes.hasUnsafeAlgebra()) 614 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 615 AllowedExit.insert(RedDes.getLoopExitInstr()); 616 Reductions[Phi] = RedDes; 617 continue; 618 } 619 620 // TODO: Instead of recording the AllowedExit, it would be good to record the 621 // complementary set: NotAllowedExit. These include (but may not be 622 // limited to): 623 // 1. Reduction phis as they represent the one-before-last value, which 624 // is not available when vectorized 625 // 2. Induction phis and increment when SCEV predicates cannot be used 626 // outside the loop - see addInductionPhi 627 // 3. Non-Phis with outside uses when SCEV predicates cannot be used 628 // outside the loop - see call to hasOutsideLoopUser in the non-phi 629 // handling below 630 // 4. FirstOrderRecurrence phis that can possibly be handled by 631 // extraction. 632 // By recording these, we can then reason about ways to vectorize each 633 // of these NotAllowedExit. 634 InductionDescriptor ID; 635 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 636 addInductionPhi(Phi, ID, AllowedExit); 637 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 638 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 639 continue; 640 } 641 642 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 643 SinkAfter, DT)) { 644 FirstOrderRecurrences.insert(Phi); 645 continue; 646 } 647 648 // As a last resort, coerce the PHI to a AddRec expression 649 // and re-try classifying it a an induction PHI. 650 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 651 addInductionPhi(Phi, ID, AllowedExit); 652 continue; 653 } 654 655 reportVectorizationFailure("Found an unidentified PHI", 656 "value that could not be identified as " 657 "reduction is used outside the loop", 658 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 659 return false; 660 } // end of PHI handling 661 662 // We handle calls that: 663 // * Are debug info intrinsics. 664 // * Have a mapping to an IR intrinsic. 665 // * Have a vector version available. 666 auto *CI = dyn_cast<CallInst>(&I); 667 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 668 !isa<DbgInfoIntrinsic>(CI) && 669 !(CI->getCalledFunction() && TLI && 670 TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { 671 // If the call is a recognized math libary call, it is likely that 672 // we can vectorize it given loosened floating-point constraints. 673 LibFunc Func; 674 bool IsMathLibCall = 675 TLI && CI->getCalledFunction() && 676 CI->getType()->isFloatingPointTy() && 677 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 678 TLI->hasOptimizedCodeGen(Func); 679 680 if (IsMathLibCall) { 681 // TODO: Ideally, we should not use clang-specific language here, 682 // but it's hard to provide meaningful yet generic advice. 683 // Also, should this be guarded by allowExtraAnalysis() and/or be part 684 // of the returned info from isFunctionVectorizable()? 685 reportVectorizationFailure("Found a non-intrinsic callsite", 686 "library call cannot be vectorized. " 687 "Try compiling with -fno-math-errno, -ffast-math, " 688 "or similar flags", 689 "CantVectorizeLibcall", ORE, TheLoop, CI); 690 } else { 691 reportVectorizationFailure("Found a non-intrinsic callsite", 692 "call instruction cannot be vectorized", 693 "CantVectorizeLibcall", ORE, TheLoop, CI); 694 } 695 return false; 696 } 697 698 // Some intrinsics have scalar arguments and should be same in order for 699 // them to be vectorized (i.e. loop invariant). 700 if (CI) { 701 auto *SE = PSE.getSE(); 702 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 703 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 704 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 705 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 706 reportVectorizationFailure("Found unvectorizable intrinsic", 707 "intrinsic instruction cannot be vectorized", 708 "CantVectorizeIntrinsic", ORE, TheLoop, CI); 709 return false; 710 } 711 } 712 } 713 714 // Check that the instruction return type is vectorizable. 715 // Also, we can't vectorize extractelement instructions. 716 if ((!VectorType::isValidElementType(I.getType()) && 717 !I.getType()->isVoidTy()) || 718 isa<ExtractElementInst>(I)) { 719 reportVectorizationFailure("Found unvectorizable type", 720 "instruction return type cannot be vectorized", 721 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 722 return false; 723 } 724 725 // Check that the stored type is vectorizable. 726 if (auto *ST = dyn_cast<StoreInst>(&I)) { 727 Type *T = ST->getValueOperand()->getType(); 728 if (!VectorType::isValidElementType(T)) { 729 reportVectorizationFailure("Store instruction cannot be vectorized", 730 "store instruction cannot be vectorized", 731 "CantVectorizeStore", ORE, TheLoop, ST); 732 return false; 733 } 734 735 // For nontemporal stores, check that a nontemporal vector version is 736 // supported on the target. 737 if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 738 // Arbitrarily try a vector of 2 elements. 739 Type *VecTy = VectorType::get(T, /*NumElements=*/2); 740 assert(VecTy && "did not find vectorized version of stored type"); 741 unsigned Alignment = getLoadStoreAlignment(ST); 742 assert(Alignment && "Alignment should be set"); 743 if (!TTI->isLegalNTStore(VecTy, llvm::Align(Alignment))) { 744 reportVectorizationFailure( 745 "nontemporal store instruction cannot be vectorized", 746 "nontemporal store instruction cannot be vectorized", 747 "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 748 return false; 749 } 750 } 751 752 } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 753 if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 754 // For nontemporal loads, check that a nontemporal vector version is 755 // supported on the target (arbitrarily try a vector of 2 elements). 756 Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 757 assert(VecTy && "did not find vectorized version of load type"); 758 unsigned Alignment = getLoadStoreAlignment(LD); 759 assert(Alignment && "Alignment should be set"); 760 if (!TTI->isLegalNTLoad(VecTy, llvm::Align(Alignment))) { 761 reportVectorizationFailure( 762 "nontemporal load instruction cannot be vectorized", 763 "nontemporal load instruction cannot be vectorized", 764 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 765 return false; 766 } 767 } 768 769 // FP instructions can allow unsafe algebra, thus vectorizable by 770 // non-IEEE-754 compliant SIMD units. 771 // This applies to floating-point math operations and calls, not memory 772 // operations, shuffles, or casts, as they don't change precision or 773 // semantics. 774 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 775 !I.isFast()) { 776 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 777 Hints->setPotentiallyUnsafe(); 778 } 779 780 // Reduction instructions are allowed to have exit users. 781 // All other instructions must not have external users. 782 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 783 // We can safely vectorize loops where instructions within the loop are 784 // used outside the loop only if the SCEV predicates within the loop is 785 // same as outside the loop. Allowing the exit means reusing the SCEV 786 // outside the loop. 787 if (PSE.getUnionPredicate().isAlwaysTrue()) { 788 AllowedExit.insert(&I); 789 continue; 790 } 791 reportVectorizationFailure("Value cannot be used outside the loop", 792 "value cannot be used outside the loop", 793 "ValueUsedOutsideLoop", ORE, TheLoop, &I); 794 return false; 795 } 796 } // next instr. 797 } 798 799 if (!PrimaryInduction) { 800 if (Inductions.empty()) { 801 reportVectorizationFailure("Did not find one integer induction var", 802 "loop induction variable could not be identified", 803 "NoInductionVariable", ORE, TheLoop); 804 return false; 805 } else if (!WidestIndTy) { 806 reportVectorizationFailure("Did not find one integer induction var", 807 "integer loop induction variable could not be identified", 808 "NoIntegerInductionVariable", ORE, TheLoop); 809 return false; 810 } else { 811 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 812 } 813 } 814 815 // Now we know the widest induction type, check if our found induction 816 // is the same size. If it's not, unset it here and InnerLoopVectorizer 817 // will create another. 818 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 819 PrimaryInduction = nullptr; 820 821 return true; 822 } 823 824 bool LoopVectorizationLegality::canVectorizeMemory() { 825 LAI = &(*GetLAA)(*TheLoop); 826 const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 827 if (LAR) { 828 ORE->emit([&]() { 829 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 830 "loop not vectorized: ", *LAR); 831 }); 832 } 833 if (!LAI->canVectorizeMemory()) 834 return false; 835 836 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 837 reportVectorizationFailure("Stores to a uniform address", 838 "write to a loop invariant address could not be vectorized", 839 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 840 return false; 841 } 842 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 843 PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 844 845 return true; 846 } 847 848 bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 849 Value *In0 = const_cast<Value *>(V); 850 PHINode *PN = dyn_cast_or_null<PHINode>(In0); 851 if (!PN) 852 return false; 853 854 return Inductions.count(PN); 855 } 856 857 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 858 auto *Inst = dyn_cast<Instruction>(V); 859 return (Inst && InductionCastsToIgnore.count(Inst)); 860 } 861 862 bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 863 return isInductionPhi(V) || isCastedInductionVariable(V); 864 } 865 866 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 867 return FirstOrderRecurrences.count(Phi); 868 } 869 870 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 871 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 872 } 873 874 bool LoopVectorizationLegality::blockCanBePredicated( 875 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 876 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 877 878 for (Instruction &I : *BB) { 879 // Check that we don't have a constant expression that can trap as operand. 880 for (Value *Operand : I.operands()) { 881 if (auto *C = dyn_cast<Constant>(Operand)) 882 if (C->canTrap()) 883 return false; 884 } 885 // We might be able to hoist the load. 886 if (I.mayReadFromMemory()) { 887 auto *LI = dyn_cast<LoadInst>(&I); 888 if (!LI) 889 return false; 890 if (!SafePtrs.count(LI->getPointerOperand())) { 891 // !llvm.mem.parallel_loop_access implies if-conversion safety. 892 // Otherwise, record that the load needs (real or emulated) masking 893 // and let the cost model decide. 894 if (!IsAnnotatedParallel || PreserveGuards) 895 MaskedOp.insert(LI); 896 continue; 897 } 898 } 899 900 if (I.mayWriteToMemory()) { 901 auto *SI = dyn_cast<StoreInst>(&I); 902 if (!SI) 903 return false; 904 // Predicated store requires some form of masking: 905 // 1) masked store HW instruction, 906 // 2) emulation via load-blend-store (only if safe and legal to do so, 907 // be aware on the race conditions), or 908 // 3) element-by-element predicate check and scalar store. 909 MaskedOp.insert(SI); 910 continue; 911 } 912 if (I.mayThrow()) 913 return false; 914 } 915 916 return true; 917 } 918 919 bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 920 if (!EnableIfConversion) { 921 reportVectorizationFailure("If-conversion is disabled", 922 "if-conversion is disabled", 923 "IfConversionDisabled", 924 ORE, TheLoop); 925 return false; 926 } 927 928 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 929 930 // A list of pointers which are known to be dereferenceable within scope of 931 // the loop body for each iteration of the loop which executes. That is, 932 // the memory pointed to can be dereferenced (with the access size implied by 933 // the value's type) unconditionally within the loop header without 934 // introducing a new fault. 935 SmallPtrSet<Value *, 8> SafePointes; 936 937 // Collect safe addresses. 938 for (BasicBlock *BB : TheLoop->blocks()) { 939 if (blockNeedsPredication(BB)) 940 continue; 941 942 for (Instruction &I : *BB) 943 if (auto *Ptr = getLoadStorePointerOperand(&I)) 944 SafePointes.insert(Ptr); 945 } 946 947 // Collect the blocks that need predication. 948 BasicBlock *Header = TheLoop->getHeader(); 949 for (BasicBlock *BB : TheLoop->blocks()) { 950 // We don't support switch statements inside loops. 951 if (!isa<BranchInst>(BB->getTerminator())) { 952 reportVectorizationFailure("Loop contains a switch statement", 953 "loop contains a switch statement", 954 "LoopContainsSwitch", ORE, TheLoop, 955 BB->getTerminator()); 956 return false; 957 } 958 959 // We must be able to predicate all blocks that need to be predicated. 960 if (blockNeedsPredication(BB)) { 961 if (!blockCanBePredicated(BB, SafePointes)) { 962 reportVectorizationFailure( 963 "Control flow cannot be substituted for a select", 964 "control flow cannot be substituted for a select", 965 "NoCFGForSelect", ORE, TheLoop, 966 BB->getTerminator()); 967 return false; 968 } 969 } else if (BB != Header && !canIfConvertPHINodes(BB)) { 970 reportVectorizationFailure( 971 "Control flow cannot be substituted for a select", 972 "control flow cannot be substituted for a select", 973 "NoCFGForSelect", ORE, TheLoop, 974 BB->getTerminator()); 975 return false; 976 } 977 } 978 979 // We can if-convert this loop. 980 return true; 981 } 982 983 // Helper function to canVectorizeLoopNestCFG. 984 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 985 bool UseVPlanNativePath) { 986 assert((UseVPlanNativePath || Lp->empty()) && 987 "VPlan-native path is not enabled."); 988 989 // TODO: ORE should be improved to show more accurate information when an 990 // outer loop can't be vectorized because a nested loop is not understood or 991 // legal. Something like: "outer_loop_location: loop not vectorized: 992 // (inner_loop_location) loop control flow is not understood by vectorizer". 993 994 // Store the result and return it at the end instead of exiting early, in case 995 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 996 bool Result = true; 997 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 998 999 // We must have a loop in canonical form. Loops with indirectbr in them cannot 1000 // be canonicalized. 1001 if (!Lp->getLoopPreheader()) { 1002 reportVectorizationFailure("Loop doesn't have a legal pre-header", 1003 "loop control flow is not understood by vectorizer", 1004 "CFGNotUnderstood", ORE, TheLoop); 1005 if (DoExtraAnalysis) 1006 Result = false; 1007 else 1008 return false; 1009 } 1010 1011 // We must have a single backedge. 1012 if (Lp->getNumBackEdges() != 1) { 1013 reportVectorizationFailure("The loop must have a single backedge", 1014 "loop control flow is not understood by vectorizer", 1015 "CFGNotUnderstood", ORE, TheLoop); 1016 if (DoExtraAnalysis) 1017 Result = false; 1018 else 1019 return false; 1020 } 1021 1022 // We must have a single exiting block. 1023 if (!Lp->getExitingBlock()) { 1024 reportVectorizationFailure("The loop must have an exiting block", 1025 "loop control flow is not understood by vectorizer", 1026 "CFGNotUnderstood", ORE, TheLoop); 1027 if (DoExtraAnalysis) 1028 Result = false; 1029 else 1030 return false; 1031 } 1032 1033 // We only handle bottom-tested loops, i.e. loop in which the condition is 1034 // checked at the end of each iteration. With that we can assume that all 1035 // instructions in the loop are executed the same number of times. 1036 if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 1037 reportVectorizationFailure("The exiting block is not the loop latch", 1038 "loop control flow is not understood by vectorizer", 1039 "CFGNotUnderstood", ORE, TheLoop); 1040 if (DoExtraAnalysis) 1041 Result = false; 1042 else 1043 return false; 1044 } 1045 1046 return Result; 1047 } 1048 1049 bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1050 Loop *Lp, bool UseVPlanNativePath) { 1051 // Store the result and return it at the end instead of exiting early, in case 1052 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1053 bool Result = true; 1054 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1055 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1056 if (DoExtraAnalysis) 1057 Result = false; 1058 else 1059 return false; 1060 } 1061 1062 // Recursively check whether the loop control flow of nested loops is 1063 // understood. 1064 for (Loop *SubLp : *Lp) 1065 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1066 if (DoExtraAnalysis) 1067 Result = false; 1068 else 1069 return false; 1070 } 1071 1072 return Result; 1073 } 1074 1075 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1076 // Store the result and return it at the end instead of exiting early, in case 1077 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1078 bool Result = true; 1079 1080 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1081 // Check whether the loop-related control flow in the loop nest is expected by 1082 // vectorizer. 1083 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1084 if (DoExtraAnalysis) 1085 Result = false; 1086 else 1087 return false; 1088 } 1089 1090 // We need to have a loop header. 1091 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1092 << '\n'); 1093 1094 // Specific checks for outer loops. We skip the remaining legal checks at this 1095 // point because they don't support outer loops. 1096 if (!TheLoop->empty()) { 1097 assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1098 1099 if (!canVectorizeOuterLoop()) { 1100 reportVectorizationFailure("Unsupported outer loop", 1101 "unsupported outer loop", 1102 "UnsupportedOuterLoop", 1103 ORE, TheLoop); 1104 // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1105 // outer loops. 1106 return false; 1107 } 1108 1109 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1110 return Result; 1111 } 1112 1113 assert(TheLoop->empty() && "Inner loop expected."); 1114 // Check if we can if-convert non-single-bb loops. 1115 unsigned NumBlocks = TheLoop->getNumBlocks(); 1116 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1117 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1118 if (DoExtraAnalysis) 1119 Result = false; 1120 else 1121 return false; 1122 } 1123 1124 // Check if we can vectorize the instructions and CFG in this loop. 1125 if (!canVectorizeInstrs()) { 1126 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1127 if (DoExtraAnalysis) 1128 Result = false; 1129 else 1130 return false; 1131 } 1132 1133 // Go over each instruction and look at memory deps. 1134 if (!canVectorizeMemory()) { 1135 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1136 if (DoExtraAnalysis) 1137 Result = false; 1138 else 1139 return false; 1140 } 1141 1142 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1143 << (LAI->getRuntimePointerChecking()->Need 1144 ? " (with a runtime bound check)" 1145 : "") 1146 << "!\n"); 1147 1148 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1149 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1150 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1151 1152 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 1153 reportVectorizationFailure("Too many SCEV checks needed", 1154 "Too many SCEV assumptions need to be made and checked at runtime", 1155 "TooManySCEVRunTimeChecks", ORE, TheLoop); 1156 if (DoExtraAnalysis) 1157 Result = false; 1158 else 1159 return false; 1160 } 1161 1162 // Okay! We've done all the tests. If any have failed, return false. Otherwise 1163 // we can vectorize, and at this point we don't have any other mem analysis 1164 // which may limit our maximum vectorization factor, so just return true with 1165 // no restrictions. 1166 return Result; 1167 } 1168 1169 bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1170 1171 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1172 1173 if (!PrimaryInduction) { 1174 reportVectorizationFailure( 1175 "No primary induction, cannot fold tail by masking", 1176 "Missing a primary induction variable in the loop, which is " 1177 "needed in order to fold tail by masking as required.", 1178 "NoPrimaryInduction", ORE, TheLoop); 1179 return false; 1180 } 1181 1182 SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1183 1184 for (auto &Reduction : *getReductionVars()) 1185 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1186 1187 // TODO: handle non-reduction outside users when tail is folded by masking. 1188 for (auto *AE : AllowedExit) { 1189 // Check that all users of allowed exit values are inside the loop or 1190 // are the live-out of a reduction. 1191 if (ReductionLiveOuts.count(AE)) 1192 continue; 1193 for (User *U : AE->users()) { 1194 Instruction *UI = cast<Instruction>(U); 1195 if (TheLoop->contains(UI)) 1196 continue; 1197 reportVectorizationFailure( 1198 "Cannot fold tail by masking, loop has an outside user for", 1199 "Cannot fold tail by masking in the presence of live outs.", 1200 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1201 return false; 1202 } 1203 } 1204 1205 // The list of pointers that we can safely read and write to remains empty. 1206 SmallPtrSet<Value *, 8> SafePointers; 1207 1208 // Check and mark all blocks for predication, including those that ordinarily 1209 // do not need predication such as the header block. 1210 for (BasicBlock *BB : TheLoop->blocks()) { 1211 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 1212 reportVectorizationFailure( 1213 "Cannot fold tail by masking as required", 1214 "control flow cannot be substituted for a select", 1215 "NoCFGForSelect", ORE, TheLoop, 1216 BB->getTerminator()); 1217 return false; 1218 } 1219 } 1220 1221 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1222 return true; 1223 } 1224 1225 } // namespace llvm 1226