1 //===- LoopVectorizationLegality.cpp --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides loop vectorization legality analysis. Original code 10 // resided in LoopVectorize.cpp for a long time. 11 // 12 // At this point, it is implemented as a utility class, not as an analysis 13 // pass. It should be easy to create an analysis pass around it if there 14 // is a need (but D45420 needs to happen first). 15 // 16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 17 #include "llvm/Analysis/Loads.h" 18 #include "llvm/Analysis/ValueTracking.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/IntrinsicInst.h" 21 #include "llvm/IR/PatternMatch.h" 22 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 23 24 using namespace llvm; 25 using namespace PatternMatch; 26 27 #define LV_NAME "loop-vectorize" 28 #define DEBUG_TYPE LV_NAME 29 30 extern cl::opt<bool> EnableVPlanPredication; 31 32 static cl::opt<bool> 33 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 34 cl::desc("Enable if-conversion during vectorization.")); 35 36 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 37 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 38 cl::desc("The maximum allowed number of runtime memory checks with a " 39 "vectorize(enable) pragma.")); 40 41 static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 42 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 43 cl::desc("The maximum number of SCEV checks allowed.")); 44 45 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 46 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 47 cl::desc("The maximum number of SCEV checks allowed with a " 48 "vectorize(enable) pragma")); 49 50 /// Maximum vectorization interleave count. 51 static const unsigned MaxInterleaveFactor = 16; 52 53 namespace llvm { 54 55 bool LoopVectorizeHints::Hint::validate(unsigned Val) { 56 switch (Kind) { 57 case HK_WIDTH: 58 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 59 case HK_UNROLL: 60 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 61 case HK_FORCE: 62 return (Val <= 1); 63 case HK_ISVECTORIZED: 64 case HK_PREDICATE: 65 return (Val == 0 || Val == 1); 66 } 67 return false; 68 } 69 70 LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 71 bool InterleaveOnlyWhenForced, 72 OptimizationRemarkEmitter &ORE) 73 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 74 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 75 Force("vectorize.enable", FK_Undefined, HK_FORCE), 76 IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 77 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 78 ORE(ORE) { 79 // Populate values with existing loop metadata. 80 getHintsFromMetadata(); 81 82 // force-vector-interleave overrides DisableInterleaving. 83 if (VectorizerParams::isInterleaveForced()) 84 Interleave.Value = VectorizerParams::VectorizationInterleave; 85 86 if (IsVectorized.Value != 1) 87 // If the vectorization width and interleaving count are both 1 then 88 // consider the loop to have been already vectorized because there's 89 // nothing more that we can do. 90 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 91 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 92 << "LV: Interleaving disabled by the pass manager\n"); 93 } 94 95 void LoopVectorizeHints::setAlreadyVectorized() { 96 LLVMContext &Context = TheLoop->getHeader()->getContext(); 97 98 MDNode *IsVectorizedMD = MDNode::get( 99 Context, 100 {MDString::get(Context, "llvm.loop.isvectorized"), 101 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 102 MDNode *LoopID = TheLoop->getLoopID(); 103 MDNode *NewLoopID = 104 makePostTransformationMetadata(Context, LoopID, 105 {Twine(Prefix(), "vectorize.").str(), 106 Twine(Prefix(), "interleave.").str()}, 107 {IsVectorizedMD}); 108 TheLoop->setLoopID(NewLoopID); 109 110 // Update internal cache. 111 IsVectorized.Value = 1; 112 } 113 114 bool LoopVectorizeHints::allowVectorization( 115 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 116 if (getForce() == LoopVectorizeHints::FK_Disabled) { 117 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 118 emitRemarkWithHints(); 119 return false; 120 } 121 122 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 123 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 124 emitRemarkWithHints(); 125 return false; 126 } 127 128 if (getIsVectorized() == 1) { 129 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 130 // FIXME: Add interleave.disable metadata. This will allow 131 // vectorize.disable to be used without disabling the pass and errors 132 // to differentiate between disabled vectorization and a width of 1. 133 ORE.emit([&]() { 134 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 135 "AllDisabled", L->getStartLoc(), 136 L->getHeader()) 137 << "loop not vectorized: vectorization and interleaving are " 138 "explicitly disabled, or the loop has already been " 139 "vectorized"; 140 }); 141 return false; 142 } 143 144 return true; 145 } 146 147 void LoopVectorizeHints::emitRemarkWithHints() const { 148 using namespace ore; 149 150 ORE.emit([&]() { 151 if (Force.Value == LoopVectorizeHints::FK_Disabled) 152 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 153 TheLoop->getStartLoc(), 154 TheLoop->getHeader()) 155 << "loop not vectorized: vectorization is explicitly disabled"; 156 else { 157 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 158 TheLoop->getStartLoc(), TheLoop->getHeader()); 159 R << "loop not vectorized"; 160 if (Force.Value == LoopVectorizeHints::FK_Enabled) { 161 R << " (Force=" << NV("Force", true); 162 if (Width.Value != 0) 163 R << ", Vector Width=" << NV("VectorWidth", Width.Value); 164 if (Interleave.Value != 0) 165 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 166 R << ")"; 167 } 168 return R; 169 } 170 }); 171 } 172 173 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 174 if (getWidth() == 1) 175 return LV_NAME; 176 if (getForce() == LoopVectorizeHints::FK_Disabled) 177 return LV_NAME; 178 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 179 return LV_NAME; 180 return OptimizationRemarkAnalysis::AlwaysPrint; 181 } 182 183 void LoopVectorizeHints::getHintsFromMetadata() { 184 MDNode *LoopID = TheLoop->getLoopID(); 185 if (!LoopID) 186 return; 187 188 // First operand should refer to the loop id itself. 189 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 190 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 191 192 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 193 const MDString *S = nullptr; 194 SmallVector<Metadata *, 4> Args; 195 196 // The expected hint is either a MDString or a MDNode with the first 197 // operand a MDString. 198 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 199 if (!MD || MD->getNumOperands() == 0) 200 continue; 201 S = dyn_cast<MDString>(MD->getOperand(0)); 202 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 203 Args.push_back(MD->getOperand(i)); 204 } else { 205 S = dyn_cast<MDString>(LoopID->getOperand(i)); 206 assert(Args.size() == 0 && "too many arguments for MDString"); 207 } 208 209 if (!S) 210 continue; 211 212 // Check if the hint starts with the loop metadata prefix. 213 StringRef Name = S->getString(); 214 if (Args.size() == 1) 215 setHint(Name, Args[0]); 216 } 217 } 218 219 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 220 if (!Name.startswith(Prefix())) 221 return; 222 Name = Name.substr(Prefix().size(), StringRef::npos); 223 224 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 225 if (!C) 226 return; 227 unsigned Val = C->getZExtValue(); 228 229 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 230 for (auto H : Hints) { 231 if (Name == H->Name) { 232 if (H->validate(Val)) 233 H->Value = Val; 234 else 235 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 236 break; 237 } 238 } 239 } 240 241 bool LoopVectorizationRequirements::doesNotMeet( 242 Function *F, Loop *L, const LoopVectorizeHints &Hints) { 243 const char *PassName = Hints.vectorizeAnalysisPassName(); 244 bool Failed = false; 245 if (UnsafeAlgebraInst && !Hints.allowReordering()) { 246 ORE.emit([&]() { 247 return OptimizationRemarkAnalysisFPCommute( 248 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 249 UnsafeAlgebraInst->getParent()) 250 << "loop not vectorized: cannot prove it is safe to reorder " 251 "floating-point operations"; 252 }); 253 Failed = true; 254 } 255 256 // Test if runtime memcheck thresholds are exceeded. 257 bool PragmaThresholdReached = 258 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 259 bool ThresholdReached = 260 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 261 if ((ThresholdReached && !Hints.allowReordering()) || 262 PragmaThresholdReached) { 263 ORE.emit([&]() { 264 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 265 L->getStartLoc(), 266 L->getHeader()) 267 << "loop not vectorized: cannot prove it is safe to reorder " 268 "memory operations"; 269 }); 270 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 271 Failed = true; 272 } 273 274 return Failed; 275 } 276 277 // Return true if the inner loop \p Lp is uniform with regard to the outer loop 278 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 279 // executing the inner loop will execute the same iterations). This check is 280 // very constrained for now but it will be relaxed in the future. \p Lp is 281 // considered uniform if it meets all the following conditions: 282 // 1) it has a canonical IV (starting from 0 and with stride 1), 283 // 2) its latch terminator is a conditional branch and, 284 // 3) its latch condition is a compare instruction whose operands are the 285 // canonical IV and an OuterLp invariant. 286 // This check doesn't take into account the uniformity of other conditions not 287 // related to the loop latch because they don't affect the loop uniformity. 288 // 289 // NOTE: We decided to keep all these checks and its associated documentation 290 // together so that we can easily have a picture of the current supported loop 291 // nests. However, some of the current checks don't depend on \p OuterLp and 292 // would be redundantly executed for each \p Lp if we invoked this function for 293 // different candidate outer loops. This is not the case for now because we 294 // don't currently have the infrastructure to evaluate multiple candidate outer 295 // loops and \p OuterLp will be a fixed parameter while we only support explicit 296 // outer loop vectorization. It's also very likely that these checks go away 297 // before introducing the aforementioned infrastructure. However, if this is not 298 // the case, we should move the \p OuterLp independent checks to a separate 299 // function that is only executed once for each \p Lp. 300 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 301 assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 302 303 // If Lp is the outer loop, it's uniform by definition. 304 if (Lp == OuterLp) 305 return true; 306 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 307 308 // 1. 309 PHINode *IV = Lp->getCanonicalInductionVariable(); 310 if (!IV) { 311 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 312 return false; 313 } 314 315 // 2. 316 BasicBlock *Latch = Lp->getLoopLatch(); 317 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 318 if (!LatchBr || LatchBr->isUnconditional()) { 319 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 320 return false; 321 } 322 323 // 3. 324 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 325 if (!LatchCmp) { 326 LLVM_DEBUG( 327 dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 328 return false; 329 } 330 331 Value *CondOp0 = LatchCmp->getOperand(0); 332 Value *CondOp1 = LatchCmp->getOperand(1); 333 Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 334 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 335 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 336 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 337 return false; 338 } 339 340 return true; 341 } 342 343 // Return true if \p Lp and all its nested loops are uniform with regard to \p 344 // OuterLp. 345 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 346 if (!isUniformLoop(Lp, OuterLp)) 347 return false; 348 349 // Check if nested loops are uniform. 350 for (Loop *SubLp : *Lp) 351 if (!isUniformLoopNest(SubLp, OuterLp)) 352 return false; 353 354 return true; 355 } 356 357 /// Check whether it is safe to if-convert this phi node. 358 /// 359 /// Phi nodes with constant expressions that can trap are not safe to if 360 /// convert. 361 static bool canIfConvertPHINodes(BasicBlock *BB) { 362 for (PHINode &Phi : BB->phis()) { 363 for (Value *V : Phi.incoming_values()) 364 if (auto *C = dyn_cast<Constant>(V)) 365 if (C->canTrap()) 366 return false; 367 } 368 return true; 369 } 370 371 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 372 if (Ty->isPointerTy()) 373 return DL.getIntPtrType(Ty); 374 375 // It is possible that char's or short's overflow when we ask for the loop's 376 // trip count, work around this by changing the type size. 377 if (Ty->getScalarSizeInBits() < 32) 378 return Type::getInt32Ty(Ty->getContext()); 379 380 return Ty; 381 } 382 383 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 384 Ty0 = convertPointerToIntegerType(DL, Ty0); 385 Ty1 = convertPointerToIntegerType(DL, Ty1); 386 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 387 return Ty0; 388 return Ty1; 389 } 390 391 /// Check that the instruction has outside loop users and is not an 392 /// identified reduction variable. 393 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 394 SmallPtrSetImpl<Value *> &AllowedExit) { 395 // Reductions, Inductions and non-header phis are allowed to have exit users. All 396 // other instructions must not have external users. 397 if (!AllowedExit.count(Inst)) 398 // Check that all of the users of the loop are inside the BB. 399 for (User *U : Inst->users()) { 400 Instruction *UI = cast<Instruction>(U); 401 // This user may be a reduction exit value. 402 if (!TheLoop->contains(UI)) { 403 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 404 return true; 405 } 406 } 407 return false; 408 } 409 410 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 411 const ValueToValueMap &Strides = 412 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 413 414 bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); 415 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 416 if (Stride == 1 || Stride == -1) 417 return Stride; 418 return 0; 419 } 420 421 bool LoopVectorizationLegality::isUniform(Value *V) { 422 return LAI->isUniform(V); 423 } 424 425 bool LoopVectorizationLegality::canVectorizeOuterLoop() { 426 assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 427 // Store the result and return it at the end instead of exiting early, in case 428 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 429 bool Result = true; 430 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 431 432 for (BasicBlock *BB : TheLoop->blocks()) { 433 // Check whether the BB terminator is a BranchInst. Any other terminator is 434 // not supported yet. 435 auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 436 if (!Br) { 437 reportVectorizationFailure("Unsupported basic block terminator", 438 "loop control flow is not understood by vectorizer", 439 "CFGNotUnderstood", ORE, TheLoop); 440 if (DoExtraAnalysis) 441 Result = false; 442 else 443 return false; 444 } 445 446 // Check whether the BranchInst is a supported one. Only unconditional 447 // branches, conditional branches with an outer loop invariant condition or 448 // backedges are supported. 449 // FIXME: We skip these checks when VPlan predication is enabled as we 450 // want to allow divergent branches. This whole check will be removed 451 // once VPlan predication is on by default. 452 if (!EnableVPlanPredication && Br && Br->isConditional() && 453 !TheLoop->isLoopInvariant(Br->getCondition()) && 454 !LI->isLoopHeader(Br->getSuccessor(0)) && 455 !LI->isLoopHeader(Br->getSuccessor(1))) { 456 reportVectorizationFailure("Unsupported conditional branch", 457 "loop control flow is not understood by vectorizer", 458 "CFGNotUnderstood", ORE, TheLoop); 459 if (DoExtraAnalysis) 460 Result = false; 461 else 462 return false; 463 } 464 } 465 466 // Check whether inner loops are uniform. At this point, we only support 467 // simple outer loops scenarios with uniform nested loops. 468 if (!isUniformLoopNest(TheLoop /*loop nest*/, 469 TheLoop /*context outer loop*/)) { 470 reportVectorizationFailure("Outer loop contains divergent loops", 471 "loop control flow is not understood by vectorizer", 472 "CFGNotUnderstood", ORE, TheLoop); 473 if (DoExtraAnalysis) 474 Result = false; 475 else 476 return false; 477 } 478 479 // Check whether we are able to set up outer loop induction. 480 if (!setupOuterLoopInductions()) { 481 reportVectorizationFailure("Unsupported outer loop Phi(s)", 482 "Unsupported outer loop Phi(s)", 483 "UnsupportedPhi", ORE, TheLoop); 484 if (DoExtraAnalysis) 485 Result = false; 486 else 487 return false; 488 } 489 490 return Result; 491 } 492 493 void LoopVectorizationLegality::addInductionPhi( 494 PHINode *Phi, const InductionDescriptor &ID, 495 SmallPtrSetImpl<Value *> &AllowedExit) { 496 Inductions[Phi] = ID; 497 498 // In case this induction also comes with casts that we know we can ignore 499 // in the vectorized loop body, record them here. All casts could be recorded 500 // here for ignoring, but suffices to record only the first (as it is the 501 // only one that may bw used outside the cast sequence). 502 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 503 if (!Casts.empty()) 504 InductionCastsToIgnore.insert(*Casts.begin()); 505 506 Type *PhiTy = Phi->getType(); 507 const DataLayout &DL = Phi->getModule()->getDataLayout(); 508 509 // Get the widest type. 510 if (!PhiTy->isFloatingPointTy()) { 511 if (!WidestIndTy) 512 WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 513 else 514 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 515 } 516 517 // Int inductions are special because we only allow one IV. 518 if (ID.getKind() == InductionDescriptor::IK_IntInduction && 519 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 520 isa<Constant>(ID.getStartValue()) && 521 cast<Constant>(ID.getStartValue())->isNullValue()) { 522 523 // Use the phi node with the widest type as induction. Use the last 524 // one if there are multiple (no good reason for doing this other 525 // than it is expedient). We've checked that it begins at zero and 526 // steps by one, so this is a canonical induction variable. 527 if (!PrimaryInduction || PhiTy == WidestIndTy) 528 PrimaryInduction = Phi; 529 } 530 531 // Both the PHI node itself, and the "post-increment" value feeding 532 // back into the PHI node may have external users. 533 // We can allow those uses, except if the SCEVs we have for them rely 534 // on predicates that only hold within the loop, since allowing the exit 535 // currently means re-using this SCEV outside the loop (see PR33706 for more 536 // details). 537 if (PSE.getUnionPredicate().isAlwaysTrue()) { 538 AllowedExit.insert(Phi); 539 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 540 } 541 542 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 543 } 544 545 bool LoopVectorizationLegality::setupOuterLoopInductions() { 546 BasicBlock *Header = TheLoop->getHeader(); 547 548 // Returns true if a given Phi is a supported induction. 549 auto isSupportedPhi = [&](PHINode &Phi) -> bool { 550 InductionDescriptor ID; 551 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 552 ID.getKind() == InductionDescriptor::IK_IntInduction) { 553 addInductionPhi(&Phi, ID, AllowedExit); 554 return true; 555 } else { 556 // Bail out for any Phi in the outer loop header that is not a supported 557 // induction. 558 LLVM_DEBUG( 559 dbgs() 560 << "LV: Found unsupported PHI for outer loop vectorization.\n"); 561 return false; 562 } 563 }; 564 565 if (llvm::all_of(Header->phis(), isSupportedPhi)) 566 return true; 567 else 568 return false; 569 } 570 571 /// Checks if a function is scalarizable according to the TLI, in 572 /// the sense that it should be vectorized and then expanded in 573 /// multiple scalarcalls. This is represented in the 574 /// TLI via mappings that do not specify a vector name, as in the 575 /// following example: 576 /// 577 /// const VecDesc VecIntrinsics[] = { 578 /// {"llvm.phx.abs.i32", "", 4} 579 /// }; 580 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 581 const StringRef ScalarName = CI.getCalledFunction()->getName(); 582 bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 583 // Check that all known VFs are not associated to a vector 584 // function, i.e. the vector name is emty. 585 if (Scalarize) 586 for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); 587 VF <= WidestVF; VF *= 2) { 588 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 589 } 590 return Scalarize; 591 } 592 593 bool LoopVectorizationLegality::canVectorizeInstrs() { 594 BasicBlock *Header = TheLoop->getHeader(); 595 596 // Look for the attribute signaling the absence of NaNs. 597 Function &F = *Header->getParent(); 598 HasFunNoNaNAttr = 599 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 600 601 // For each block in the loop. 602 for (BasicBlock *BB : TheLoop->blocks()) { 603 // Scan the instructions in the block and look for hazards. 604 for (Instruction &I : *BB) { 605 if (auto *Phi = dyn_cast<PHINode>(&I)) { 606 Type *PhiTy = Phi->getType(); 607 // Check that this PHI type is allowed. 608 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 609 !PhiTy->isPointerTy()) { 610 reportVectorizationFailure("Found a non-int non-pointer PHI", 611 "loop control flow is not understood by vectorizer", 612 "CFGNotUnderstood", ORE, TheLoop); 613 return false; 614 } 615 616 // If this PHINode is not in the header block, then we know that we 617 // can convert it to select during if-conversion. No need to check if 618 // the PHIs in this block are induction or reduction variables. 619 if (BB != Header) { 620 // Non-header phi nodes that have outside uses can be vectorized. Add 621 // them to the list of allowed exits. 622 // Unsafe cyclic dependencies with header phis are identified during 623 // legalization for reduction, induction and first order 624 // recurrences. 625 AllowedExit.insert(&I); 626 continue; 627 } 628 629 // We only allow if-converted PHIs with exactly two incoming values. 630 if (Phi->getNumIncomingValues() != 2) { 631 reportVectorizationFailure("Found an invalid PHI", 632 "loop control flow is not understood by vectorizer", 633 "CFGNotUnderstood", ORE, TheLoop, Phi); 634 return false; 635 } 636 637 RecurrenceDescriptor RedDes; 638 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 639 DT)) { 640 if (RedDes.hasUnsafeAlgebra()) 641 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 642 AllowedExit.insert(RedDes.getLoopExitInstr()); 643 Reductions[Phi] = RedDes; 644 continue; 645 } 646 647 // TODO: Instead of recording the AllowedExit, it would be good to record the 648 // complementary set: NotAllowedExit. These include (but may not be 649 // limited to): 650 // 1. Reduction phis as they represent the one-before-last value, which 651 // is not available when vectorized 652 // 2. Induction phis and increment when SCEV predicates cannot be used 653 // outside the loop - see addInductionPhi 654 // 3. Non-Phis with outside uses when SCEV predicates cannot be used 655 // outside the loop - see call to hasOutsideLoopUser in the non-phi 656 // handling below 657 // 4. FirstOrderRecurrence phis that can possibly be handled by 658 // extraction. 659 // By recording these, we can then reason about ways to vectorize each 660 // of these NotAllowedExit. 661 InductionDescriptor ID; 662 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 663 addInductionPhi(Phi, ID, AllowedExit); 664 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 665 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 666 continue; 667 } 668 669 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 670 SinkAfter, DT)) { 671 AllowedExit.insert(Phi); 672 FirstOrderRecurrences.insert(Phi); 673 continue; 674 } 675 676 // As a last resort, coerce the PHI to a AddRec expression 677 // and re-try classifying it a an induction PHI. 678 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 679 addInductionPhi(Phi, ID, AllowedExit); 680 continue; 681 } 682 683 reportVectorizationFailure("Found an unidentified PHI", 684 "value that could not be identified as " 685 "reduction is used outside the loop", 686 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 687 return false; 688 } // end of PHI handling 689 690 // We handle calls that: 691 // * Are debug info intrinsics. 692 // * Have a mapping to an IR intrinsic. 693 // * Have a vector version available. 694 auto *CI = dyn_cast<CallInst>(&I); 695 696 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 697 !isa<DbgInfoIntrinsic>(CI) && 698 !(CI->getCalledFunction() && TLI && 699 (!VFDatabase::getMappings(*CI).empty() || 700 isTLIScalarize(*TLI, *CI)))) { 701 // If the call is a recognized math libary call, it is likely that 702 // we can vectorize it given loosened floating-point constraints. 703 LibFunc Func; 704 bool IsMathLibCall = 705 TLI && CI->getCalledFunction() && 706 CI->getType()->isFloatingPointTy() && 707 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 708 TLI->hasOptimizedCodeGen(Func); 709 710 if (IsMathLibCall) { 711 // TODO: Ideally, we should not use clang-specific language here, 712 // but it's hard to provide meaningful yet generic advice. 713 // Also, should this be guarded by allowExtraAnalysis() and/or be part 714 // of the returned info from isFunctionVectorizable()? 715 reportVectorizationFailure( 716 "Found a non-intrinsic callsite", 717 "library call cannot be vectorized. " 718 "Try compiling with -fno-math-errno, -ffast-math, " 719 "or similar flags", 720 "CantVectorizeLibcall", ORE, TheLoop, CI); 721 } else { 722 reportVectorizationFailure("Found a non-intrinsic callsite", 723 "call instruction cannot be vectorized", 724 "CantVectorizeLibcall", ORE, TheLoop, CI); 725 } 726 return false; 727 } 728 729 // Some intrinsics have scalar arguments and should be same in order for 730 // them to be vectorized (i.e. loop invariant). 731 if (CI) { 732 auto *SE = PSE.getSE(); 733 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 734 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 735 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 736 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 737 reportVectorizationFailure("Found unvectorizable intrinsic", 738 "intrinsic instruction cannot be vectorized", 739 "CantVectorizeIntrinsic", ORE, TheLoop, CI); 740 return false; 741 } 742 } 743 } 744 745 // Check that the instruction return type is vectorizable. 746 // Also, we can't vectorize extractelement instructions. 747 if ((!VectorType::isValidElementType(I.getType()) && 748 !I.getType()->isVoidTy()) || 749 isa<ExtractElementInst>(I)) { 750 reportVectorizationFailure("Found unvectorizable type", 751 "instruction return type cannot be vectorized", 752 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 753 return false; 754 } 755 756 // Check that the stored type is vectorizable. 757 if (auto *ST = dyn_cast<StoreInst>(&I)) { 758 Type *T = ST->getValueOperand()->getType(); 759 if (!VectorType::isValidElementType(T)) { 760 reportVectorizationFailure("Store instruction cannot be vectorized", 761 "store instruction cannot be vectorized", 762 "CantVectorizeStore", ORE, TheLoop, ST); 763 return false; 764 } 765 766 // For nontemporal stores, check that a nontemporal vector version is 767 // supported on the target. 768 if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 769 // Arbitrarily try a vector of 2 elements. 770 Type *VecTy = VectorType::get(T, /*NumElements=*/2); 771 assert(VecTy && "did not find vectorized version of stored type"); 772 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) { 773 reportVectorizationFailure( 774 "nontemporal store instruction cannot be vectorized", 775 "nontemporal store instruction cannot be vectorized", 776 "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 777 return false; 778 } 779 } 780 781 } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 782 if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 783 // For nontemporal loads, check that a nontemporal vector version is 784 // supported on the target (arbitrarily try a vector of 2 elements). 785 Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 786 assert(VecTy && "did not find vectorized version of load type"); 787 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) { 788 reportVectorizationFailure( 789 "nontemporal load instruction cannot be vectorized", 790 "nontemporal load instruction cannot be vectorized", 791 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 792 return false; 793 } 794 } 795 796 // FP instructions can allow unsafe algebra, thus vectorizable by 797 // non-IEEE-754 compliant SIMD units. 798 // This applies to floating-point math operations and calls, not memory 799 // operations, shuffles, or casts, as they don't change precision or 800 // semantics. 801 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 802 !I.isFast()) { 803 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 804 Hints->setPotentiallyUnsafe(); 805 } 806 807 // Reduction instructions are allowed to have exit users. 808 // All other instructions must not have external users. 809 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 810 // We can safely vectorize loops where instructions within the loop are 811 // used outside the loop only if the SCEV predicates within the loop is 812 // same as outside the loop. Allowing the exit means reusing the SCEV 813 // outside the loop. 814 if (PSE.getUnionPredicate().isAlwaysTrue()) { 815 AllowedExit.insert(&I); 816 continue; 817 } 818 reportVectorizationFailure("Value cannot be used outside the loop", 819 "value cannot be used outside the loop", 820 "ValueUsedOutsideLoop", ORE, TheLoop, &I); 821 return false; 822 } 823 } // next instr. 824 } 825 826 if (!PrimaryInduction) { 827 if (Inductions.empty()) { 828 reportVectorizationFailure("Did not find one integer induction var", 829 "loop induction variable could not be identified", 830 "NoInductionVariable", ORE, TheLoop); 831 return false; 832 } else if (!WidestIndTy) { 833 reportVectorizationFailure("Did not find one integer induction var", 834 "integer loop induction variable could not be identified", 835 "NoIntegerInductionVariable", ORE, TheLoop); 836 return false; 837 } else { 838 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 839 } 840 } 841 842 // For first order recurrences, we use the previous value (incoming value from 843 // the latch) to check if it dominates all users of the recurrence. Bail out 844 // if we have to sink such an instruction for another recurrence, as the 845 // dominance requirement may not hold after sinking. 846 BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 847 if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 848 Instruction *V = 849 cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 850 return SinkAfter.find(V) != SinkAfter.end(); 851 })) 852 return false; 853 854 // Now we know the widest induction type, check if our found induction 855 // is the same size. If it's not, unset it here and InnerLoopVectorizer 856 // will create another. 857 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 858 PrimaryInduction = nullptr; 859 860 return true; 861 } 862 863 bool LoopVectorizationLegality::canVectorizeMemory() { 864 LAI = &(*GetLAA)(*TheLoop); 865 const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 866 if (LAR) { 867 ORE->emit([&]() { 868 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 869 "loop not vectorized: ", *LAR); 870 }); 871 } 872 if (!LAI->canVectorizeMemory()) 873 return false; 874 875 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 876 reportVectorizationFailure("Stores to a uniform address", 877 "write to a loop invariant address could not be vectorized", 878 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 879 return false; 880 } 881 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 882 PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 883 884 return true; 885 } 886 887 bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 888 Value *In0 = const_cast<Value *>(V); 889 PHINode *PN = dyn_cast_or_null<PHINode>(In0); 890 if (!PN) 891 return false; 892 893 return Inductions.count(PN); 894 } 895 896 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 897 auto *Inst = dyn_cast<Instruction>(V); 898 return (Inst && InductionCastsToIgnore.count(Inst)); 899 } 900 901 bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 902 return isInductionPhi(V) || isCastedInductionVariable(V); 903 } 904 905 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 906 return FirstOrderRecurrences.count(Phi); 907 } 908 909 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 910 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 911 } 912 913 bool LoopVectorizationLegality::blockCanBePredicated( 914 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 915 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 916 917 for (Instruction &I : *BB) { 918 // Check that we don't have a constant expression that can trap as operand. 919 for (Value *Operand : I.operands()) { 920 if (auto *C = dyn_cast<Constant>(Operand)) 921 if (C->canTrap()) 922 return false; 923 } 924 925 // We can predicate blocks with calls to assume, as long as we drop them in 926 // case we flatten the CFG via predication. 927 if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 928 ConditionalAssumes.insert(&I); 929 continue; 930 } 931 932 // We might be able to hoist the load. 933 if (I.mayReadFromMemory()) { 934 auto *LI = dyn_cast<LoadInst>(&I); 935 if (!LI) 936 return false; 937 if (!SafePtrs.count(LI->getPointerOperand())) { 938 // !llvm.mem.parallel_loop_access implies if-conversion safety. 939 // Otherwise, record that the load needs (real or emulated) masking 940 // and let the cost model decide. 941 if (!IsAnnotatedParallel || PreserveGuards) 942 MaskedOp.insert(LI); 943 continue; 944 } 945 } 946 947 if (I.mayWriteToMemory()) { 948 auto *SI = dyn_cast<StoreInst>(&I); 949 if (!SI) 950 return false; 951 // Predicated store requires some form of masking: 952 // 1) masked store HW instruction, 953 // 2) emulation via load-blend-store (only if safe and legal to do so, 954 // be aware on the race conditions), or 955 // 3) element-by-element predicate check and scalar store. 956 MaskedOp.insert(SI); 957 continue; 958 } 959 if (I.mayThrow()) 960 return false; 961 } 962 963 return true; 964 } 965 966 bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 967 if (!EnableIfConversion) { 968 reportVectorizationFailure("If-conversion is disabled", 969 "if-conversion is disabled", 970 "IfConversionDisabled", 971 ORE, TheLoop); 972 return false; 973 } 974 975 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 976 977 // A list of pointers which are known to be dereferenceable within scope of 978 // the loop body for each iteration of the loop which executes. That is, 979 // the memory pointed to can be dereferenced (with the access size implied by 980 // the value's type) unconditionally within the loop header without 981 // introducing a new fault. 982 SmallPtrSet<Value *, 8> SafePointers; 983 984 // Collect safe addresses. 985 for (BasicBlock *BB : TheLoop->blocks()) { 986 if (!blockNeedsPredication(BB)) { 987 for (Instruction &I : *BB) 988 if (auto *Ptr = getLoadStorePointerOperand(&I)) 989 SafePointers.insert(Ptr); 990 continue; 991 } 992 993 // For a block which requires predication, a address may be safe to access 994 // in the loop w/o predication if we can prove dereferenceability facts 995 // sufficient to ensure it'll never fault within the loop. For the moment, 996 // we restrict this to loads; stores are more complicated due to 997 // concurrency restrictions. 998 ScalarEvolution &SE = *PSE.getSE(); 999 for (Instruction &I : *BB) { 1000 LoadInst *LI = dyn_cast<LoadInst>(&I); 1001 if (LI && !mustSuppressSpeculation(*LI) && 1002 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 1003 SafePointers.insert(LI->getPointerOperand()); 1004 } 1005 } 1006 1007 // Collect the blocks that need predication. 1008 BasicBlock *Header = TheLoop->getHeader(); 1009 for (BasicBlock *BB : TheLoop->blocks()) { 1010 // We don't support switch statements inside loops. 1011 if (!isa<BranchInst>(BB->getTerminator())) { 1012 reportVectorizationFailure("Loop contains a switch statement", 1013 "loop contains a switch statement", 1014 "LoopContainsSwitch", ORE, TheLoop, 1015 BB->getTerminator()); 1016 return false; 1017 } 1018 1019 // We must be able to predicate all blocks that need to be predicated. 1020 if (blockNeedsPredication(BB)) { 1021 if (!blockCanBePredicated(BB, SafePointers)) { 1022 reportVectorizationFailure( 1023 "Control flow cannot be substituted for a select", 1024 "control flow cannot be substituted for a select", 1025 "NoCFGForSelect", ORE, TheLoop, 1026 BB->getTerminator()); 1027 return false; 1028 } 1029 } else if (BB != Header && !canIfConvertPHINodes(BB)) { 1030 reportVectorizationFailure( 1031 "Control flow cannot be substituted for a select", 1032 "control flow cannot be substituted for a select", 1033 "NoCFGForSelect", ORE, TheLoop, 1034 BB->getTerminator()); 1035 return false; 1036 } 1037 } 1038 1039 // We can if-convert this loop. 1040 return true; 1041 } 1042 1043 // Helper function to canVectorizeLoopNestCFG. 1044 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1045 bool UseVPlanNativePath) { 1046 assert((UseVPlanNativePath || Lp->empty()) && 1047 "VPlan-native path is not enabled."); 1048 1049 // TODO: ORE should be improved to show more accurate information when an 1050 // outer loop can't be vectorized because a nested loop is not understood or 1051 // legal. Something like: "outer_loop_location: loop not vectorized: 1052 // (inner_loop_location) loop control flow is not understood by vectorizer". 1053 1054 // Store the result and return it at the end instead of exiting early, in case 1055 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1056 bool Result = true; 1057 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1058 1059 // We must have a loop in canonical form. Loops with indirectbr in them cannot 1060 // be canonicalized. 1061 if (!Lp->getLoopPreheader()) { 1062 reportVectorizationFailure("Loop doesn't have a legal pre-header", 1063 "loop control flow is not understood by vectorizer", 1064 "CFGNotUnderstood", ORE, TheLoop); 1065 if (DoExtraAnalysis) 1066 Result = false; 1067 else 1068 return false; 1069 } 1070 1071 // We must have a single backedge. 1072 if (Lp->getNumBackEdges() != 1) { 1073 reportVectorizationFailure("The loop must have a single backedge", 1074 "loop control flow is not understood by vectorizer", 1075 "CFGNotUnderstood", ORE, TheLoop); 1076 if (DoExtraAnalysis) 1077 Result = false; 1078 else 1079 return false; 1080 } 1081 1082 // We must have a single exiting block. 1083 if (!Lp->getExitingBlock()) { 1084 reportVectorizationFailure("The loop must have an exiting block", 1085 "loop control flow is not understood by vectorizer", 1086 "CFGNotUnderstood", ORE, TheLoop); 1087 if (DoExtraAnalysis) 1088 Result = false; 1089 else 1090 return false; 1091 } 1092 1093 // We only handle bottom-tested loops, i.e. loop in which the condition is 1094 // checked at the end of each iteration. With that we can assume that all 1095 // instructions in the loop are executed the same number of times. 1096 if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 1097 reportVectorizationFailure("The exiting block is not the loop latch", 1098 "loop control flow is not understood by vectorizer", 1099 "CFGNotUnderstood", ORE, TheLoop); 1100 if (DoExtraAnalysis) 1101 Result = false; 1102 else 1103 return false; 1104 } 1105 1106 return Result; 1107 } 1108 1109 bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1110 Loop *Lp, bool UseVPlanNativePath) { 1111 // Store the result and return it at the end instead of exiting early, in case 1112 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1113 bool Result = true; 1114 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1115 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1116 if (DoExtraAnalysis) 1117 Result = false; 1118 else 1119 return false; 1120 } 1121 1122 // Recursively check whether the loop control flow of nested loops is 1123 // understood. 1124 for (Loop *SubLp : *Lp) 1125 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1126 if (DoExtraAnalysis) 1127 Result = false; 1128 else 1129 return false; 1130 } 1131 1132 return Result; 1133 } 1134 1135 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1136 // Store the result and return it at the end instead of exiting early, in case 1137 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1138 bool Result = true; 1139 1140 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1141 // Check whether the loop-related control flow in the loop nest is expected by 1142 // vectorizer. 1143 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1144 if (DoExtraAnalysis) 1145 Result = false; 1146 else 1147 return false; 1148 } 1149 1150 // We need to have a loop header. 1151 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1152 << '\n'); 1153 1154 // Specific checks for outer loops. We skip the remaining legal checks at this 1155 // point because they don't support outer loops. 1156 if (!TheLoop->empty()) { 1157 assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1158 1159 if (!canVectorizeOuterLoop()) { 1160 reportVectorizationFailure("Unsupported outer loop", 1161 "unsupported outer loop", 1162 "UnsupportedOuterLoop", 1163 ORE, TheLoop); 1164 // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1165 // outer loops. 1166 return false; 1167 } 1168 1169 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1170 return Result; 1171 } 1172 1173 assert(TheLoop->empty() && "Inner loop expected."); 1174 // Check if we can if-convert non-single-bb loops. 1175 unsigned NumBlocks = TheLoop->getNumBlocks(); 1176 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1177 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1178 if (DoExtraAnalysis) 1179 Result = false; 1180 else 1181 return false; 1182 } 1183 1184 // Check if we can vectorize the instructions and CFG in this loop. 1185 if (!canVectorizeInstrs()) { 1186 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1187 if (DoExtraAnalysis) 1188 Result = false; 1189 else 1190 return false; 1191 } 1192 1193 // Go over each instruction and look at memory deps. 1194 if (!canVectorizeMemory()) { 1195 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1196 if (DoExtraAnalysis) 1197 Result = false; 1198 else 1199 return false; 1200 } 1201 1202 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1203 << (LAI->getRuntimePointerChecking()->Need 1204 ? " (with a runtime bound check)" 1205 : "") 1206 << "!\n"); 1207 1208 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1209 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1210 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1211 1212 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 1213 reportVectorizationFailure("Too many SCEV checks needed", 1214 "Too many SCEV assumptions need to be made and checked at runtime", 1215 "TooManySCEVRunTimeChecks", ORE, TheLoop); 1216 if (DoExtraAnalysis) 1217 Result = false; 1218 else 1219 return false; 1220 } 1221 1222 // Okay! We've done all the tests. If any have failed, return false. Otherwise 1223 // we can vectorize, and at this point we don't have any other mem analysis 1224 // which may limit our maximum vectorization factor, so just return true with 1225 // no restrictions. 1226 return Result; 1227 } 1228 1229 bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1230 1231 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1232 1233 SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1234 1235 for (auto &Reduction : getReductionVars()) 1236 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1237 1238 // TODO: handle non-reduction outside users when tail is folded by masking. 1239 for (auto *AE : AllowedExit) { 1240 // Check that all users of allowed exit values are inside the loop or 1241 // are the live-out of a reduction. 1242 if (ReductionLiveOuts.count(AE)) 1243 continue; 1244 for (User *U : AE->users()) { 1245 Instruction *UI = cast<Instruction>(U); 1246 if (TheLoop->contains(UI)) 1247 continue; 1248 reportVectorizationFailure( 1249 "Cannot fold tail by masking, loop has an outside user for", 1250 "Cannot fold tail by masking in the presence of live outs.", 1251 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1252 return false; 1253 } 1254 } 1255 1256 // The list of pointers that we can safely read and write to remains empty. 1257 SmallPtrSet<Value *, 8> SafePointers; 1258 1259 // Check and mark all blocks for predication, including those that ordinarily 1260 // do not need predication such as the header block. 1261 for (BasicBlock *BB : TheLoop->blocks()) { 1262 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 1263 reportVectorizationFailure( 1264 "Cannot fold tail by masking as required", 1265 "control flow cannot be substituted for a select", 1266 "NoCFGForSelect", ORE, TheLoop, 1267 BB->getTerminator()); 1268 return false; 1269 } 1270 } 1271 1272 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1273 return true; 1274 } 1275 1276 } // namespace llvm 1277