1 //===- LoopVectorizationLegality.cpp --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides loop vectorization legality analysis. Original code 10 // resided in LoopVectorize.cpp for a long time. 11 // 12 // At this point, it is implemented as a utility class, not as an analysis 13 // pass. It should be easy to create an analysis pass around it if there 14 // is a need (but D45420 needs to happen first). 15 // 16 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/IntrinsicInst.h" 20 21 using namespace llvm; 22 23 #define LV_NAME "loop-vectorize" 24 #define DEBUG_TYPE LV_NAME 25 26 extern cl::opt<bool> EnableVPlanPredication; 27 28 static cl::opt<bool> 29 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 30 cl::desc("Enable if-conversion during vectorization.")); 31 32 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 33 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 34 cl::desc("The maximum allowed number of runtime memory checks with a " 35 "vectorize(enable) pragma.")); 36 37 static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 38 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 39 cl::desc("The maximum number of SCEV checks allowed.")); 40 41 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 42 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 43 cl::desc("The maximum number of SCEV checks allowed with a " 44 "vectorize(enable) pragma")); 45 46 /// Maximum vectorization interleave count. 47 static const unsigned MaxInterleaveFactor = 16; 48 49 namespace llvm { 50 51 bool LoopVectorizeHints::Hint::validate(unsigned Val) { 52 switch (Kind) { 53 case HK_WIDTH: 54 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 55 case HK_UNROLL: 56 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 57 case HK_FORCE: 58 return (Val <= 1); 59 case HK_ISVECTORIZED: 60 case HK_PREDICATE: 61 return (Val == 0 || Val == 1); 62 } 63 return false; 64 } 65 66 LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 67 bool InterleaveOnlyWhenForced, 68 OptimizationRemarkEmitter &ORE) 69 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 70 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 71 Force("vectorize.enable", FK_Undefined, HK_FORCE), 72 IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 73 Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), 74 ORE(ORE) { 75 // Populate values with existing loop metadata. 76 getHintsFromMetadata(); 77 78 // force-vector-interleave overrides DisableInterleaving. 79 if (VectorizerParams::isInterleaveForced()) 80 Interleave.Value = VectorizerParams::VectorizationInterleave; 81 82 if (IsVectorized.Value != 1) 83 // If the vectorization width and interleaving count are both 1 then 84 // consider the loop to have been already vectorized because there's 85 // nothing more that we can do. 86 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 87 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 88 << "LV: Interleaving disabled by the pass manager\n"); 89 } 90 91 void LoopVectorizeHints::setAlreadyVectorized() { 92 LLVMContext &Context = TheLoop->getHeader()->getContext(); 93 94 MDNode *IsVectorizedMD = MDNode::get( 95 Context, 96 {MDString::get(Context, "llvm.loop.isvectorized"), 97 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 98 MDNode *LoopID = TheLoop->getLoopID(); 99 MDNode *NewLoopID = 100 makePostTransformationMetadata(Context, LoopID, 101 {Twine(Prefix(), "vectorize.").str(), 102 Twine(Prefix(), "interleave.").str()}, 103 {IsVectorizedMD}); 104 TheLoop->setLoopID(NewLoopID); 105 106 // Update internal cache. 107 IsVectorized.Value = 1; 108 } 109 110 bool LoopVectorizeHints::allowVectorization( 111 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 112 if (getForce() == LoopVectorizeHints::FK_Disabled) { 113 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 114 emitRemarkWithHints(); 115 return false; 116 } 117 118 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 119 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 120 emitRemarkWithHints(); 121 return false; 122 } 123 124 if (getIsVectorized() == 1) { 125 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 126 // FIXME: Add interleave.disable metadata. This will allow 127 // vectorize.disable to be used without disabling the pass and errors 128 // to differentiate between disabled vectorization and a width of 1. 129 ORE.emit([&]() { 130 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 131 "AllDisabled", L->getStartLoc(), 132 L->getHeader()) 133 << "loop not vectorized: vectorization and interleaving are " 134 "explicitly disabled, or the loop has already been " 135 "vectorized"; 136 }); 137 return false; 138 } 139 140 return true; 141 } 142 143 void LoopVectorizeHints::emitRemarkWithHints() const { 144 using namespace ore; 145 146 ORE.emit([&]() { 147 if (Force.Value == LoopVectorizeHints::FK_Disabled) 148 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 149 TheLoop->getStartLoc(), 150 TheLoop->getHeader()) 151 << "loop not vectorized: vectorization is explicitly disabled"; 152 else { 153 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 154 TheLoop->getStartLoc(), TheLoop->getHeader()); 155 R << "loop not vectorized"; 156 if (Force.Value == LoopVectorizeHints::FK_Enabled) { 157 R << " (Force=" << NV("Force", true); 158 if (Width.Value != 0) 159 R << ", Vector Width=" << NV("VectorWidth", Width.Value); 160 if (Interleave.Value != 0) 161 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 162 R << ")"; 163 } 164 return R; 165 } 166 }); 167 } 168 169 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 170 if (getWidth() == 1) 171 return LV_NAME; 172 if (getForce() == LoopVectorizeHints::FK_Disabled) 173 return LV_NAME; 174 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 175 return LV_NAME; 176 return OptimizationRemarkAnalysis::AlwaysPrint; 177 } 178 179 void LoopVectorizeHints::getHintsFromMetadata() { 180 MDNode *LoopID = TheLoop->getLoopID(); 181 if (!LoopID) 182 return; 183 184 // First operand should refer to the loop id itself. 185 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 186 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 187 188 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 189 const MDString *S = nullptr; 190 SmallVector<Metadata *, 4> Args; 191 192 // The expected hint is either a MDString or a MDNode with the first 193 // operand a MDString. 194 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 195 if (!MD || MD->getNumOperands() == 0) 196 continue; 197 S = dyn_cast<MDString>(MD->getOperand(0)); 198 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 199 Args.push_back(MD->getOperand(i)); 200 } else { 201 S = dyn_cast<MDString>(LoopID->getOperand(i)); 202 assert(Args.size() == 0 && "too many arguments for MDString"); 203 } 204 205 if (!S) 206 continue; 207 208 // Check if the hint starts with the loop metadata prefix. 209 StringRef Name = S->getString(); 210 if (Args.size() == 1) 211 setHint(Name, Args[0]); 212 } 213 } 214 215 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 216 if (!Name.startswith(Prefix())) 217 return; 218 Name = Name.substr(Prefix().size(), StringRef::npos); 219 220 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 221 if (!C) 222 return; 223 unsigned Val = C->getZExtValue(); 224 225 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 226 for (auto H : Hints) { 227 if (Name == H->Name) { 228 if (H->validate(Val)) 229 H->Value = Val; 230 else 231 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 232 break; 233 } 234 } 235 } 236 237 bool LoopVectorizationRequirements::doesNotMeet( 238 Function *F, Loop *L, const LoopVectorizeHints &Hints) { 239 const char *PassName = Hints.vectorizeAnalysisPassName(); 240 bool Failed = false; 241 if (UnsafeAlgebraInst && !Hints.allowReordering()) { 242 ORE.emit([&]() { 243 return OptimizationRemarkAnalysisFPCommute( 244 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 245 UnsafeAlgebraInst->getParent()) 246 << "loop not vectorized: cannot prove it is safe to reorder " 247 "floating-point operations"; 248 }); 249 Failed = true; 250 } 251 252 // Test if runtime memcheck thresholds are exceeded. 253 bool PragmaThresholdReached = 254 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 255 bool ThresholdReached = 256 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 257 if ((ThresholdReached && !Hints.allowReordering()) || 258 PragmaThresholdReached) { 259 ORE.emit([&]() { 260 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 261 L->getStartLoc(), 262 L->getHeader()) 263 << "loop not vectorized: cannot prove it is safe to reorder " 264 "memory operations"; 265 }); 266 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 267 Failed = true; 268 } 269 270 return Failed; 271 } 272 273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop 274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 275 // executing the inner loop will execute the same iterations). This check is 276 // very constrained for now but it will be relaxed in the future. \p Lp is 277 // considered uniform if it meets all the following conditions: 278 // 1) it has a canonical IV (starting from 0 and with stride 1), 279 // 2) its latch terminator is a conditional branch and, 280 // 3) its latch condition is a compare instruction whose operands are the 281 // canonical IV and an OuterLp invariant. 282 // This check doesn't take into account the uniformity of other conditions not 283 // related to the loop latch because they don't affect the loop uniformity. 284 // 285 // NOTE: We decided to keep all these checks and its associated documentation 286 // together so that we can easily have a picture of the current supported loop 287 // nests. However, some of the current checks don't depend on \p OuterLp and 288 // would be redundantly executed for each \p Lp if we invoked this function for 289 // different candidate outer loops. This is not the case for now because we 290 // don't currently have the infrastructure to evaluate multiple candidate outer 291 // loops and \p OuterLp will be a fixed parameter while we only support explicit 292 // outer loop vectorization. It's also very likely that these checks go away 293 // before introducing the aforementioned infrastructure. However, if this is not 294 // the case, we should move the \p OuterLp independent checks to a separate 295 // function that is only executed once for each \p Lp. 296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 297 assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 298 299 // If Lp is the outer loop, it's uniform by definition. 300 if (Lp == OuterLp) 301 return true; 302 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 303 304 // 1. 305 PHINode *IV = Lp->getCanonicalInductionVariable(); 306 if (!IV) { 307 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 308 return false; 309 } 310 311 // 2. 312 BasicBlock *Latch = Lp->getLoopLatch(); 313 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 314 if (!LatchBr || LatchBr->isUnconditional()) { 315 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 316 return false; 317 } 318 319 // 3. 320 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 321 if (!LatchCmp) { 322 LLVM_DEBUG( 323 dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 324 return false; 325 } 326 327 Value *CondOp0 = LatchCmp->getOperand(0); 328 Value *CondOp1 = LatchCmp->getOperand(1); 329 Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 330 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 331 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 332 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 333 return false; 334 } 335 336 return true; 337 } 338 339 // Return true if \p Lp and all its nested loops are uniform with regard to \p 340 // OuterLp. 341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 342 if (!isUniformLoop(Lp, OuterLp)) 343 return false; 344 345 // Check if nested loops are uniform. 346 for (Loop *SubLp : *Lp) 347 if (!isUniformLoopNest(SubLp, OuterLp)) 348 return false; 349 350 return true; 351 } 352 353 /// Check whether it is safe to if-convert this phi node. 354 /// 355 /// Phi nodes with constant expressions that can trap are not safe to if 356 /// convert. 357 static bool canIfConvertPHINodes(BasicBlock *BB) { 358 for (PHINode &Phi : BB->phis()) { 359 for (Value *V : Phi.incoming_values()) 360 if (auto *C = dyn_cast<Constant>(V)) 361 if (C->canTrap()) 362 return false; 363 } 364 return true; 365 } 366 367 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 368 if (Ty->isPointerTy()) 369 return DL.getIntPtrType(Ty); 370 371 // It is possible that char's or short's overflow when we ask for the loop's 372 // trip count, work around this by changing the type size. 373 if (Ty->getScalarSizeInBits() < 32) 374 return Type::getInt32Ty(Ty->getContext()); 375 376 return Ty; 377 } 378 379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 380 Ty0 = convertPointerToIntegerType(DL, Ty0); 381 Ty1 = convertPointerToIntegerType(DL, Ty1); 382 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 383 return Ty0; 384 return Ty1; 385 } 386 387 /// Check that the instruction has outside loop users and is not an 388 /// identified reduction variable. 389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 390 SmallPtrSetImpl<Value *> &AllowedExit) { 391 // Reductions, Inductions and non-header phis are allowed to have exit users. All 392 // other instructions must not have external users. 393 if (!AllowedExit.count(Inst)) 394 // Check that all of the users of the loop are inside the BB. 395 for (User *U : Inst->users()) { 396 Instruction *UI = cast<Instruction>(U); 397 // This user may be a reduction exit value. 398 if (!TheLoop->contains(UI)) { 399 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 400 return true; 401 } 402 } 403 return false; 404 } 405 406 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 407 const ValueToValueMap &Strides = 408 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 409 410 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); 411 if (Stride == 1 || Stride == -1) 412 return Stride; 413 return 0; 414 } 415 416 bool LoopVectorizationLegality::isUniform(Value *V) { 417 return LAI->isUniform(V); 418 } 419 420 bool LoopVectorizationLegality::canVectorizeOuterLoop() { 421 assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 422 // Store the result and return it at the end instead of exiting early, in case 423 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 424 bool Result = true; 425 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 426 427 for (BasicBlock *BB : TheLoop->blocks()) { 428 // Check whether the BB terminator is a BranchInst. Any other terminator is 429 // not supported yet. 430 auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 431 if (!Br) { 432 reportVectorizationFailure("Unsupported basic block terminator", 433 "loop control flow is not understood by vectorizer", 434 "CFGNotUnderstood", ORE, TheLoop); 435 if (DoExtraAnalysis) 436 Result = false; 437 else 438 return false; 439 } 440 441 // Check whether the BranchInst is a supported one. Only unconditional 442 // branches, conditional branches with an outer loop invariant condition or 443 // backedges are supported. 444 // FIXME: We skip these checks when VPlan predication is enabled as we 445 // want to allow divergent branches. This whole check will be removed 446 // once VPlan predication is on by default. 447 if (!EnableVPlanPredication && Br && Br->isConditional() && 448 !TheLoop->isLoopInvariant(Br->getCondition()) && 449 !LI->isLoopHeader(Br->getSuccessor(0)) && 450 !LI->isLoopHeader(Br->getSuccessor(1))) { 451 reportVectorizationFailure("Unsupported conditional branch", 452 "loop control flow is not understood by vectorizer", 453 "CFGNotUnderstood", ORE, TheLoop); 454 if (DoExtraAnalysis) 455 Result = false; 456 else 457 return false; 458 } 459 } 460 461 // Check whether inner loops are uniform. At this point, we only support 462 // simple outer loops scenarios with uniform nested loops. 463 if (!isUniformLoopNest(TheLoop /*loop nest*/, 464 TheLoop /*context outer loop*/)) { 465 reportVectorizationFailure("Outer loop contains divergent loops", 466 "loop control flow is not understood by vectorizer", 467 "CFGNotUnderstood", ORE, TheLoop); 468 if (DoExtraAnalysis) 469 Result = false; 470 else 471 return false; 472 } 473 474 // Check whether we are able to set up outer loop induction. 475 if (!setupOuterLoopInductions()) { 476 reportVectorizationFailure("Unsupported outer loop Phi(s)", 477 "Unsupported outer loop Phi(s)", 478 "UnsupportedPhi", ORE, TheLoop); 479 if (DoExtraAnalysis) 480 Result = false; 481 else 482 return false; 483 } 484 485 return Result; 486 } 487 488 void LoopVectorizationLegality::addInductionPhi( 489 PHINode *Phi, const InductionDescriptor &ID, 490 SmallPtrSetImpl<Value *> &AllowedExit) { 491 Inductions[Phi] = ID; 492 493 // In case this induction also comes with casts that we know we can ignore 494 // in the vectorized loop body, record them here. All casts could be recorded 495 // here for ignoring, but suffices to record only the first (as it is the 496 // only one that may bw used outside the cast sequence). 497 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 498 if (!Casts.empty()) 499 InductionCastsToIgnore.insert(*Casts.begin()); 500 501 Type *PhiTy = Phi->getType(); 502 const DataLayout &DL = Phi->getModule()->getDataLayout(); 503 504 // Get the widest type. 505 if (!PhiTy->isFloatingPointTy()) { 506 if (!WidestIndTy) 507 WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 508 else 509 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 510 } 511 512 // Int inductions are special because we only allow one IV. 513 if (ID.getKind() == InductionDescriptor::IK_IntInduction && 514 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 515 isa<Constant>(ID.getStartValue()) && 516 cast<Constant>(ID.getStartValue())->isNullValue()) { 517 518 // Use the phi node with the widest type as induction. Use the last 519 // one if there are multiple (no good reason for doing this other 520 // than it is expedient). We've checked that it begins at zero and 521 // steps by one, so this is a canonical induction variable. 522 if (!PrimaryInduction || PhiTy == WidestIndTy) 523 PrimaryInduction = Phi; 524 } 525 526 // Both the PHI node itself, and the "post-increment" value feeding 527 // back into the PHI node may have external users. 528 // We can allow those uses, except if the SCEVs we have for them rely 529 // on predicates that only hold within the loop, since allowing the exit 530 // currently means re-using this SCEV outside the loop (see PR33706 for more 531 // details). 532 if (PSE.getUnionPredicate().isAlwaysTrue()) { 533 AllowedExit.insert(Phi); 534 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 535 } 536 537 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 538 } 539 540 bool LoopVectorizationLegality::setupOuterLoopInductions() { 541 BasicBlock *Header = TheLoop->getHeader(); 542 543 // Returns true if a given Phi is a supported induction. 544 auto isSupportedPhi = [&](PHINode &Phi) -> bool { 545 InductionDescriptor ID; 546 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 547 ID.getKind() == InductionDescriptor::IK_IntInduction) { 548 addInductionPhi(&Phi, ID, AllowedExit); 549 return true; 550 } else { 551 // Bail out for any Phi in the outer loop header that is not a supported 552 // induction. 553 LLVM_DEBUG( 554 dbgs() 555 << "LV: Found unsupported PHI for outer loop vectorization.\n"); 556 return false; 557 } 558 }; 559 560 if (llvm::all_of(Header->phis(), isSupportedPhi)) 561 return true; 562 else 563 return false; 564 } 565 566 bool LoopVectorizationLegality::canVectorizeInstrs() { 567 BasicBlock *Header = TheLoop->getHeader(); 568 569 // Look for the attribute signaling the absence of NaNs. 570 Function &F = *Header->getParent(); 571 HasFunNoNaNAttr = 572 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 573 574 // For each block in the loop. 575 for (BasicBlock *BB : TheLoop->blocks()) { 576 // Scan the instructions in the block and look for hazards. 577 for (Instruction &I : *BB) { 578 if (auto *Phi = dyn_cast<PHINode>(&I)) { 579 Type *PhiTy = Phi->getType(); 580 // Check that this PHI type is allowed. 581 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 582 !PhiTy->isPointerTy()) { 583 reportVectorizationFailure("Found a non-int non-pointer PHI", 584 "loop control flow is not understood by vectorizer", 585 "CFGNotUnderstood", ORE, TheLoop); 586 return false; 587 } 588 589 // If this PHINode is not in the header block, then we know that we 590 // can convert it to select during if-conversion. No need to check if 591 // the PHIs in this block are induction or reduction variables. 592 if (BB != Header) { 593 // Non-header phi nodes that have outside uses can be vectorized. Add 594 // them to the list of allowed exits. 595 // Unsafe cyclic dependencies with header phis are identified during 596 // legalization for reduction, induction and first order 597 // recurrences. 598 continue; 599 } 600 601 // We only allow if-converted PHIs with exactly two incoming values. 602 if (Phi->getNumIncomingValues() != 2) { 603 reportVectorizationFailure("Found an invalid PHI", 604 "loop control flow is not understood by vectorizer", 605 "CFGNotUnderstood", ORE, TheLoop, Phi); 606 return false; 607 } 608 609 RecurrenceDescriptor RedDes; 610 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 611 DT)) { 612 if (RedDes.hasUnsafeAlgebra()) 613 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 614 AllowedExit.insert(RedDes.getLoopExitInstr()); 615 Reductions[Phi] = RedDes; 616 continue; 617 } 618 619 // TODO: Instead of recording the AllowedExit, it would be good to record the 620 // complementary set: NotAllowedExit. These include (but may not be 621 // limited to): 622 // 1. Reduction phis as they represent the one-before-last value, which 623 // is not available when vectorized 624 // 2. Induction phis and increment when SCEV predicates cannot be used 625 // outside the loop - see addInductionPhi 626 // 3. Non-Phis with outside uses when SCEV predicates cannot be used 627 // outside the loop - see call to hasOutsideLoopUser in the non-phi 628 // handling below 629 // 4. FirstOrderRecurrence phis that can possibly be handled by 630 // extraction. 631 // By recording these, we can then reason about ways to vectorize each 632 // of these NotAllowedExit. 633 InductionDescriptor ID; 634 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 635 addInductionPhi(Phi, ID, AllowedExit); 636 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 637 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 638 continue; 639 } 640 641 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 642 SinkAfter, DT)) { 643 FirstOrderRecurrences.insert(Phi); 644 continue; 645 } 646 647 // As a last resort, coerce the PHI to a AddRec expression 648 // and re-try classifying it a an induction PHI. 649 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 650 addInductionPhi(Phi, ID, AllowedExit); 651 continue; 652 } 653 654 reportVectorizationFailure("Found an unidentified PHI", 655 "value that could not be identified as " 656 "reduction is used outside the loop", 657 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 658 return false; 659 } // end of PHI handling 660 661 // We handle calls that: 662 // * Are debug info intrinsics. 663 // * Have a mapping to an IR intrinsic. 664 // * Have a vector version available. 665 auto *CI = dyn_cast<CallInst>(&I); 666 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 667 !isa<DbgInfoIntrinsic>(CI) && 668 !(CI->getCalledFunction() && TLI && 669 TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { 670 // If the call is a recognized math libary call, it is likely that 671 // we can vectorize it given loosened floating-point constraints. 672 LibFunc Func; 673 bool IsMathLibCall = 674 TLI && CI->getCalledFunction() && 675 CI->getType()->isFloatingPointTy() && 676 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 677 TLI->hasOptimizedCodeGen(Func); 678 679 if (IsMathLibCall) { 680 // TODO: Ideally, we should not use clang-specific language here, 681 // but it's hard to provide meaningful yet generic advice. 682 // Also, should this be guarded by allowExtraAnalysis() and/or be part 683 // of the returned info from isFunctionVectorizable()? 684 reportVectorizationFailure("Found a non-intrinsic callsite", 685 "library call cannot be vectorized. " 686 "Try compiling with -fno-math-errno, -ffast-math, " 687 "or similar flags", 688 "CantVectorizeLibcall", ORE, TheLoop, CI); 689 } else { 690 reportVectorizationFailure("Found a non-intrinsic callsite", 691 "call instruction cannot be vectorized", 692 "CantVectorizeLibcall", ORE, TheLoop, CI); 693 } 694 return false; 695 } 696 697 // Some intrinsics have scalar arguments and should be same in order for 698 // them to be vectorized (i.e. loop invariant). 699 if (CI) { 700 auto *SE = PSE.getSE(); 701 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 702 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 703 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 704 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 705 reportVectorizationFailure("Found unvectorizable intrinsic", 706 "intrinsic instruction cannot be vectorized", 707 "CantVectorizeIntrinsic", ORE, TheLoop, CI); 708 return false; 709 } 710 } 711 } 712 713 // Check that the instruction return type is vectorizable. 714 // Also, we can't vectorize extractelement instructions. 715 if ((!VectorType::isValidElementType(I.getType()) && 716 !I.getType()->isVoidTy()) || 717 isa<ExtractElementInst>(I)) { 718 reportVectorizationFailure("Found unvectorizable type", 719 "instruction return type cannot be vectorized", 720 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 721 return false; 722 } 723 724 // Check that the stored type is vectorizable. 725 if (auto *ST = dyn_cast<StoreInst>(&I)) { 726 Type *T = ST->getValueOperand()->getType(); 727 if (!VectorType::isValidElementType(T)) { 728 reportVectorizationFailure("Store instruction cannot be vectorized", 729 "store instruction cannot be vectorized", 730 "CantVectorizeStore", ORE, TheLoop, ST); 731 return false; 732 } 733 734 // For nontemporal stores, check that a nontemporal vector version is 735 // supported on the target. 736 if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 737 // Arbitrarily try a vector of 2 elements. 738 Type *VecTy = VectorType::get(T, /*NumElements=*/2); 739 assert(VecTy && "did not find vectorized version of stored type"); 740 unsigned Alignment = getLoadStoreAlignment(ST); 741 if (!TTI->isLegalNTStore(VecTy, Alignment)) { 742 reportVectorizationFailure( 743 "nontemporal store instruction cannot be vectorized", 744 "nontemporal store instruction cannot be vectorized", 745 "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 746 return false; 747 } 748 } 749 750 } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 751 if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 752 // For nontemporal loads, check that a nontemporal vector version is 753 // supported on the target (arbitrarily try a vector of 2 elements). 754 Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 755 assert(VecTy && "did not find vectorized version of load type"); 756 unsigned Alignment = getLoadStoreAlignment(LD); 757 if (!TTI->isLegalNTLoad(VecTy, Alignment)) { 758 reportVectorizationFailure( 759 "nontemporal load instruction cannot be vectorized", 760 "nontemporal load instruction cannot be vectorized", 761 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 762 return false; 763 } 764 } 765 766 // FP instructions can allow unsafe algebra, thus vectorizable by 767 // non-IEEE-754 compliant SIMD units. 768 // This applies to floating-point math operations and calls, not memory 769 // operations, shuffles, or casts, as they don't change precision or 770 // semantics. 771 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 772 !I.isFast()) { 773 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 774 Hints->setPotentiallyUnsafe(); 775 } 776 777 // Reduction instructions are allowed to have exit users. 778 // All other instructions must not have external users. 779 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 780 // We can safely vectorize loops where instructions within the loop are 781 // used outside the loop only if the SCEV predicates within the loop is 782 // same as outside the loop. Allowing the exit means reusing the SCEV 783 // outside the loop. 784 if (PSE.getUnionPredicate().isAlwaysTrue()) { 785 AllowedExit.insert(&I); 786 continue; 787 } 788 reportVectorizationFailure("Value cannot be used outside the loop", 789 "value cannot be used outside the loop", 790 "ValueUsedOutsideLoop", ORE, TheLoop, &I); 791 return false; 792 } 793 } // next instr. 794 } 795 796 if (!PrimaryInduction) { 797 if (Inductions.empty()) { 798 reportVectorizationFailure("Did not find one integer induction var", 799 "loop induction variable could not be identified", 800 "NoInductionVariable", ORE, TheLoop); 801 return false; 802 } else if (!WidestIndTy) { 803 reportVectorizationFailure("Did not find one integer induction var", 804 "integer loop induction variable could not be identified", 805 "NoIntegerInductionVariable", ORE, TheLoop); 806 return false; 807 } else { 808 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 809 } 810 } 811 812 // Now we know the widest induction type, check if our found induction 813 // is the same size. If it's not, unset it here and InnerLoopVectorizer 814 // will create another. 815 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 816 PrimaryInduction = nullptr; 817 818 return true; 819 } 820 821 bool LoopVectorizationLegality::canVectorizeMemory() { 822 LAI = &(*GetLAA)(*TheLoop); 823 const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 824 if (LAR) { 825 ORE->emit([&]() { 826 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 827 "loop not vectorized: ", *LAR); 828 }); 829 } 830 if (!LAI->canVectorizeMemory()) 831 return false; 832 833 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 834 reportVectorizationFailure("Stores to a uniform address", 835 "write to a loop invariant address could not be vectorized", 836 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 837 return false; 838 } 839 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 840 PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 841 842 return true; 843 } 844 845 bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 846 Value *In0 = const_cast<Value *>(V); 847 PHINode *PN = dyn_cast_or_null<PHINode>(In0); 848 if (!PN) 849 return false; 850 851 return Inductions.count(PN); 852 } 853 854 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 855 auto *Inst = dyn_cast<Instruction>(V); 856 return (Inst && InductionCastsToIgnore.count(Inst)); 857 } 858 859 bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 860 return isInductionPhi(V) || isCastedInductionVariable(V); 861 } 862 863 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 864 return FirstOrderRecurrences.count(Phi); 865 } 866 867 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 868 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 869 } 870 871 bool LoopVectorizationLegality::blockCanBePredicated( 872 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 873 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 874 875 for (Instruction &I : *BB) { 876 // Check that we don't have a constant expression that can trap as operand. 877 for (Value *Operand : I.operands()) { 878 if (auto *C = dyn_cast<Constant>(Operand)) 879 if (C->canTrap()) 880 return false; 881 } 882 // We might be able to hoist the load. 883 if (I.mayReadFromMemory()) { 884 auto *LI = dyn_cast<LoadInst>(&I); 885 if (!LI) 886 return false; 887 if (!SafePtrs.count(LI->getPointerOperand())) { 888 // !llvm.mem.parallel_loop_access implies if-conversion safety. 889 // Otherwise, record that the load needs (real or emulated) masking 890 // and let the cost model decide. 891 if (!IsAnnotatedParallel || PreserveGuards) 892 MaskedOp.insert(LI); 893 continue; 894 } 895 } 896 897 if (I.mayWriteToMemory()) { 898 auto *SI = dyn_cast<StoreInst>(&I); 899 if (!SI) 900 return false; 901 // Predicated store requires some form of masking: 902 // 1) masked store HW instruction, 903 // 2) emulation via load-blend-store (only if safe and legal to do so, 904 // be aware on the race conditions), or 905 // 3) element-by-element predicate check and scalar store. 906 MaskedOp.insert(SI); 907 continue; 908 } 909 if (I.mayThrow()) 910 return false; 911 } 912 913 return true; 914 } 915 916 bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 917 if (!EnableIfConversion) { 918 reportVectorizationFailure("If-conversion is disabled", 919 "if-conversion is disabled", 920 "IfConversionDisabled", 921 ORE, TheLoop); 922 return false; 923 } 924 925 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 926 927 // A list of pointers that we can safely read and write to. 928 SmallPtrSet<Value *, 8> SafePointes; 929 930 // Collect safe addresses. 931 for (BasicBlock *BB : TheLoop->blocks()) { 932 if (blockNeedsPredication(BB)) 933 continue; 934 935 for (Instruction &I : *BB) 936 if (auto *Ptr = getLoadStorePointerOperand(&I)) 937 SafePointes.insert(Ptr); 938 } 939 940 // Collect the blocks that need predication. 941 BasicBlock *Header = TheLoop->getHeader(); 942 for (BasicBlock *BB : TheLoop->blocks()) { 943 // We don't support switch statements inside loops. 944 if (!isa<BranchInst>(BB->getTerminator())) { 945 reportVectorizationFailure("Loop contains a switch statement", 946 "loop contains a switch statement", 947 "LoopContainsSwitch", ORE, TheLoop, 948 BB->getTerminator()); 949 return false; 950 } 951 952 // We must be able to predicate all blocks that need to be predicated. 953 if (blockNeedsPredication(BB)) { 954 if (!blockCanBePredicated(BB, SafePointes)) { 955 reportVectorizationFailure( 956 "Control flow cannot be substituted for a select", 957 "control flow cannot be substituted for a select", 958 "NoCFGForSelect", ORE, TheLoop, 959 BB->getTerminator()); 960 return false; 961 } 962 } else if (BB != Header && !canIfConvertPHINodes(BB)) { 963 reportVectorizationFailure( 964 "Control flow cannot be substituted for a select", 965 "control flow cannot be substituted for a select", 966 "NoCFGForSelect", ORE, TheLoop, 967 BB->getTerminator()); 968 return false; 969 } 970 } 971 972 // We can if-convert this loop. 973 return true; 974 } 975 976 // Helper function to canVectorizeLoopNestCFG. 977 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 978 bool UseVPlanNativePath) { 979 assert((UseVPlanNativePath || Lp->empty()) && 980 "VPlan-native path is not enabled."); 981 982 // TODO: ORE should be improved to show more accurate information when an 983 // outer loop can't be vectorized because a nested loop is not understood or 984 // legal. Something like: "outer_loop_location: loop not vectorized: 985 // (inner_loop_location) loop control flow is not understood by vectorizer". 986 987 // Store the result and return it at the end instead of exiting early, in case 988 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 989 bool Result = true; 990 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 991 992 // We must have a loop in canonical form. Loops with indirectbr in them cannot 993 // be canonicalized. 994 if (!Lp->getLoopPreheader()) { 995 reportVectorizationFailure("Loop doesn't have a legal pre-header", 996 "loop control flow is not understood by vectorizer", 997 "CFGNotUnderstood", ORE, TheLoop); 998 if (DoExtraAnalysis) 999 Result = false; 1000 else 1001 return false; 1002 } 1003 1004 // We must have a single backedge. 1005 if (Lp->getNumBackEdges() != 1) { 1006 reportVectorizationFailure("The loop must have a single backedge", 1007 "loop control flow is not understood by vectorizer", 1008 "CFGNotUnderstood", ORE, TheLoop); 1009 if (DoExtraAnalysis) 1010 Result = false; 1011 else 1012 return false; 1013 } 1014 1015 // We must have a single exiting block. 1016 if (!Lp->getExitingBlock()) { 1017 reportVectorizationFailure("The loop must have an exiting block", 1018 "loop control flow is not understood by vectorizer", 1019 "CFGNotUnderstood", ORE, TheLoop); 1020 if (DoExtraAnalysis) 1021 Result = false; 1022 else 1023 return false; 1024 } 1025 1026 // We only handle bottom-tested loops, i.e. loop in which the condition is 1027 // checked at the end of each iteration. With that we can assume that all 1028 // instructions in the loop are executed the same number of times. 1029 if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 1030 reportVectorizationFailure("The exiting block is not the loop latch", 1031 "loop control flow is not understood by vectorizer", 1032 "CFGNotUnderstood", ORE, TheLoop); 1033 if (DoExtraAnalysis) 1034 Result = false; 1035 else 1036 return false; 1037 } 1038 1039 return Result; 1040 } 1041 1042 bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1043 Loop *Lp, bool UseVPlanNativePath) { 1044 // Store the result and return it at the end instead of exiting early, in case 1045 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1046 bool Result = true; 1047 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1048 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1049 if (DoExtraAnalysis) 1050 Result = false; 1051 else 1052 return false; 1053 } 1054 1055 // Recursively check whether the loop control flow of nested loops is 1056 // understood. 1057 for (Loop *SubLp : *Lp) 1058 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1059 if (DoExtraAnalysis) 1060 Result = false; 1061 else 1062 return false; 1063 } 1064 1065 return Result; 1066 } 1067 1068 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1069 // Store the result and return it at the end instead of exiting early, in case 1070 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1071 bool Result = true; 1072 1073 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1074 // Check whether the loop-related control flow in the loop nest is expected by 1075 // vectorizer. 1076 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1077 if (DoExtraAnalysis) 1078 Result = false; 1079 else 1080 return false; 1081 } 1082 1083 // We need to have a loop header. 1084 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1085 << '\n'); 1086 1087 // Specific checks for outer loops. We skip the remaining legal checks at this 1088 // point because they don't support outer loops. 1089 if (!TheLoop->empty()) { 1090 assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1091 1092 if (!canVectorizeOuterLoop()) { 1093 reportVectorizationFailure("Unsupported outer loop", 1094 "unsupported outer loop", 1095 "UnsupportedOuterLoop", 1096 ORE, TheLoop); 1097 // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1098 // outer loops. 1099 return false; 1100 } 1101 1102 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1103 return Result; 1104 } 1105 1106 assert(TheLoop->empty() && "Inner loop expected."); 1107 // Check if we can if-convert non-single-bb loops. 1108 unsigned NumBlocks = TheLoop->getNumBlocks(); 1109 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1110 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1111 if (DoExtraAnalysis) 1112 Result = false; 1113 else 1114 return false; 1115 } 1116 1117 // Check if we can vectorize the instructions and CFG in this loop. 1118 if (!canVectorizeInstrs()) { 1119 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1120 if (DoExtraAnalysis) 1121 Result = false; 1122 else 1123 return false; 1124 } 1125 1126 // Go over each instruction and look at memory deps. 1127 if (!canVectorizeMemory()) { 1128 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1129 if (DoExtraAnalysis) 1130 Result = false; 1131 else 1132 return false; 1133 } 1134 1135 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1136 << (LAI->getRuntimePointerChecking()->Need 1137 ? " (with a runtime bound check)" 1138 : "") 1139 << "!\n"); 1140 1141 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1142 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1143 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1144 1145 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 1146 reportVectorizationFailure("Too many SCEV checks needed", 1147 "Too many SCEV assumptions need to be made and checked at runtime", 1148 "TooManySCEVRunTimeChecks", ORE, TheLoop); 1149 if (DoExtraAnalysis) 1150 Result = false; 1151 else 1152 return false; 1153 } 1154 1155 // Okay! We've done all the tests. If any have failed, return false. Otherwise 1156 // we can vectorize, and at this point we don't have any other mem analysis 1157 // which may limit our maximum vectorization factor, so just return true with 1158 // no restrictions. 1159 return Result; 1160 } 1161 1162 bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1163 1164 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1165 1166 if (!PrimaryInduction) { 1167 reportVectorizationFailure( 1168 "No primary induction, cannot fold tail by masking", 1169 "Missing a primary induction variable in the loop, which is " 1170 "needed in order to fold tail by masking as required.", 1171 "NoPrimaryInduction", ORE, TheLoop); 1172 return false; 1173 } 1174 1175 // TODO: handle reductions when tail is folded by masking. 1176 if (!Reductions.empty()) { 1177 reportVectorizationFailure( 1178 "Loop has reductions, cannot fold tail by masking", 1179 "Cannot fold tail by masking in the presence of reductions.", 1180 "ReductionFoldingTailByMasking", ORE, TheLoop); 1181 return false; 1182 } 1183 1184 // TODO: handle outside users when tail is folded by masking. 1185 for (auto *AE : AllowedExit) { 1186 // Check that all users of allowed exit values are inside the loop. 1187 for (User *U : AE->users()) { 1188 Instruction *UI = cast<Instruction>(U); 1189 if (TheLoop->contains(UI)) 1190 continue; 1191 reportVectorizationFailure( 1192 "Cannot fold tail by masking, loop has an outside user for", 1193 "Cannot fold tail by masking in the presence of live outs.", 1194 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1195 return false; 1196 } 1197 } 1198 1199 // The list of pointers that we can safely read and write to remains empty. 1200 SmallPtrSet<Value *, 8> SafePointers; 1201 1202 // Check and mark all blocks for predication, including those that ordinarily 1203 // do not need predication such as the header block. 1204 for (BasicBlock *BB : TheLoop->blocks()) { 1205 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 1206 reportVectorizationFailure( 1207 "Cannot fold tail by masking as required", 1208 "control flow cannot be substituted for a select", 1209 "NoCFGForSelect", ORE, TheLoop, 1210 BB->getTerminator()); 1211 return false; 1212 } 1213 } 1214 1215 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1216 return true; 1217 } 1218 1219 } // namespace llvm 1220