1 //===- LoopVectorizationLegality.cpp --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides loop vectorization legality analysis. Original code 10 // resided in LoopVectorize.cpp for a long time. 11 // 12 // At this point, it is implemented as a utility class, not as an analysis 13 // pass. It should be easy to create an analysis pass around it if there 14 // is a need (but D45420 needs to happen first). 15 // 16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 17 #include "llvm/Analysis/Loads.h" 18 #include "llvm/Analysis/LoopInfo.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/Analysis/VectorUtils.h" 21 #include "llvm/IR/IntrinsicInst.h" 22 #include "llvm/IR/PatternMatch.h" 23 #include "llvm/Transforms/Utils/SizeOpts.h" 24 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 25 26 using namespace llvm; 27 using namespace PatternMatch; 28 29 #define LV_NAME "loop-vectorize" 30 #define DEBUG_TYPE LV_NAME 31 32 extern cl::opt<bool> EnableVPlanPredication; 33 34 static cl::opt<bool> 35 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 36 cl::desc("Enable if-conversion during vectorization.")); 37 38 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 39 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 40 cl::desc("The maximum allowed number of runtime memory checks with a " 41 "vectorize(enable) pragma.")); 42 43 static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 44 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 45 cl::desc("The maximum number of SCEV checks allowed.")); 46 47 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 48 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 49 cl::desc("The maximum number of SCEV checks allowed with a " 50 "vectorize(enable) pragma")); 51 52 /// Maximum vectorization interleave count. 53 static const unsigned MaxInterleaveFactor = 16; 54 55 namespace llvm { 56 57 bool LoopVectorizeHints::Hint::validate(unsigned Val) { 58 switch (Kind) { 59 case HK_WIDTH: 60 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 61 case HK_UNROLL: 62 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 63 case HK_FORCE: 64 return (Val <= 1); 65 case HK_ISVECTORIZED: 66 case HK_PREDICATE: 67 return (Val == 0 || Val == 1); 68 } 69 return false; 70 } 71 72 LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 73 bool InterleaveOnlyWhenForced, 74 OptimizationRemarkEmitter &ORE) 75 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 76 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 77 Force("vectorize.enable", FK_Undefined, HK_FORCE), 78 IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 79 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 80 ORE(ORE) { 81 // Populate values with existing loop metadata. 82 getHintsFromMetadata(); 83 84 // force-vector-interleave overrides DisableInterleaving. 85 if (VectorizerParams::isInterleaveForced()) 86 Interleave.Value = VectorizerParams::VectorizationInterleave; 87 88 if (IsVectorized.Value != 1) 89 // If the vectorization width and interleaving count are both 1 then 90 // consider the loop to have been already vectorized because there's 91 // nothing more that we can do. 92 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 93 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 94 << "LV: Interleaving disabled by the pass manager\n"); 95 } 96 97 void LoopVectorizeHints::setAlreadyVectorized() { 98 LLVMContext &Context = TheLoop->getHeader()->getContext(); 99 100 MDNode *IsVectorizedMD = MDNode::get( 101 Context, 102 {MDString::get(Context, "llvm.loop.isvectorized"), 103 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 104 MDNode *LoopID = TheLoop->getLoopID(); 105 MDNode *NewLoopID = 106 makePostTransformationMetadata(Context, LoopID, 107 {Twine(Prefix(), "vectorize.").str(), 108 Twine(Prefix(), "interleave.").str()}, 109 {IsVectorizedMD}); 110 TheLoop->setLoopID(NewLoopID); 111 112 // Update internal cache. 113 IsVectorized.Value = 1; 114 } 115 116 bool LoopVectorizeHints::allowVectorization( 117 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 118 if (getForce() == LoopVectorizeHints::FK_Disabled) { 119 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 120 emitRemarkWithHints(); 121 return false; 122 } 123 124 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 125 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 126 emitRemarkWithHints(); 127 return false; 128 } 129 130 if (getIsVectorized() == 1) { 131 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 132 // FIXME: Add interleave.disable metadata. This will allow 133 // vectorize.disable to be used without disabling the pass and errors 134 // to differentiate between disabled vectorization and a width of 1. 135 ORE.emit([&]() { 136 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 137 "AllDisabled", L->getStartLoc(), 138 L->getHeader()) 139 << "loop not vectorized: vectorization and interleaving are " 140 "explicitly disabled, or the loop has already been " 141 "vectorized"; 142 }); 143 return false; 144 } 145 146 return true; 147 } 148 149 void LoopVectorizeHints::emitRemarkWithHints() const { 150 using namespace ore; 151 152 ORE.emit([&]() { 153 if (Force.Value == LoopVectorizeHints::FK_Disabled) 154 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 155 TheLoop->getStartLoc(), 156 TheLoop->getHeader()) 157 << "loop not vectorized: vectorization is explicitly disabled"; 158 else { 159 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 160 TheLoop->getStartLoc(), TheLoop->getHeader()); 161 R << "loop not vectorized"; 162 if (Force.Value == LoopVectorizeHints::FK_Enabled) { 163 R << " (Force=" << NV("Force", true); 164 if (Width.Value != 0) 165 R << ", Vector Width=" << NV("VectorWidth", Width.Value); 166 if (Interleave.Value != 0) 167 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 168 R << ")"; 169 } 170 return R; 171 } 172 }); 173 } 174 175 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 176 if (getWidth() == 1) 177 return LV_NAME; 178 if (getForce() == LoopVectorizeHints::FK_Disabled) 179 return LV_NAME; 180 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 181 return LV_NAME; 182 return OptimizationRemarkAnalysis::AlwaysPrint; 183 } 184 185 void LoopVectorizeHints::getHintsFromMetadata() { 186 MDNode *LoopID = TheLoop->getLoopID(); 187 if (!LoopID) 188 return; 189 190 // First operand should refer to the loop id itself. 191 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 192 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 193 194 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 195 const MDString *S = nullptr; 196 SmallVector<Metadata *, 4> Args; 197 198 // The expected hint is either a MDString or a MDNode with the first 199 // operand a MDString. 200 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 201 if (!MD || MD->getNumOperands() == 0) 202 continue; 203 S = dyn_cast<MDString>(MD->getOperand(0)); 204 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 205 Args.push_back(MD->getOperand(i)); 206 } else { 207 S = dyn_cast<MDString>(LoopID->getOperand(i)); 208 assert(Args.size() == 0 && "too many arguments for MDString"); 209 } 210 211 if (!S) 212 continue; 213 214 // Check if the hint starts with the loop metadata prefix. 215 StringRef Name = S->getString(); 216 if (Args.size() == 1) 217 setHint(Name, Args[0]); 218 } 219 } 220 221 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 222 if (!Name.startswith(Prefix())) 223 return; 224 Name = Name.substr(Prefix().size(), StringRef::npos); 225 226 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 227 if (!C) 228 return; 229 unsigned Val = C->getZExtValue(); 230 231 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 232 for (auto H : Hints) { 233 if (Name == H->Name) { 234 if (H->validate(Val)) 235 H->Value = Val; 236 else 237 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 238 break; 239 } 240 } 241 } 242 243 bool LoopVectorizationRequirements::doesNotMeet( 244 Function *F, Loop *L, const LoopVectorizeHints &Hints) { 245 const char *PassName = Hints.vectorizeAnalysisPassName(); 246 bool Failed = false; 247 if (UnsafeAlgebraInst && !Hints.allowReordering()) { 248 ORE.emit([&]() { 249 return OptimizationRemarkAnalysisFPCommute( 250 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 251 UnsafeAlgebraInst->getParent()) 252 << "loop not vectorized: cannot prove it is safe to reorder " 253 "floating-point operations"; 254 }); 255 Failed = true; 256 } 257 258 // Test if runtime memcheck thresholds are exceeded. 259 bool PragmaThresholdReached = 260 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 261 bool ThresholdReached = 262 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 263 if ((ThresholdReached && !Hints.allowReordering()) || 264 PragmaThresholdReached) { 265 ORE.emit([&]() { 266 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 267 L->getStartLoc(), 268 L->getHeader()) 269 << "loop not vectorized: cannot prove it is safe to reorder " 270 "memory operations"; 271 }); 272 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 273 Failed = true; 274 } 275 276 return Failed; 277 } 278 279 // Return true if the inner loop \p Lp is uniform with regard to the outer loop 280 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 281 // executing the inner loop will execute the same iterations). This check is 282 // very constrained for now but it will be relaxed in the future. \p Lp is 283 // considered uniform if it meets all the following conditions: 284 // 1) it has a canonical IV (starting from 0 and with stride 1), 285 // 2) its latch terminator is a conditional branch and, 286 // 3) its latch condition is a compare instruction whose operands are the 287 // canonical IV and an OuterLp invariant. 288 // This check doesn't take into account the uniformity of other conditions not 289 // related to the loop latch because they don't affect the loop uniformity. 290 // 291 // NOTE: We decided to keep all these checks and its associated documentation 292 // together so that we can easily have a picture of the current supported loop 293 // nests. However, some of the current checks don't depend on \p OuterLp and 294 // would be redundantly executed for each \p Lp if we invoked this function for 295 // different candidate outer loops. This is not the case for now because we 296 // don't currently have the infrastructure to evaluate multiple candidate outer 297 // loops and \p OuterLp will be a fixed parameter while we only support explicit 298 // outer loop vectorization. It's also very likely that these checks go away 299 // before introducing the aforementioned infrastructure. However, if this is not 300 // the case, we should move the \p OuterLp independent checks to a separate 301 // function that is only executed once for each \p Lp. 302 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 303 assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 304 305 // If Lp is the outer loop, it's uniform by definition. 306 if (Lp == OuterLp) 307 return true; 308 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 309 310 // 1. 311 PHINode *IV = Lp->getCanonicalInductionVariable(); 312 if (!IV) { 313 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 314 return false; 315 } 316 317 // 2. 318 BasicBlock *Latch = Lp->getLoopLatch(); 319 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 320 if (!LatchBr || LatchBr->isUnconditional()) { 321 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 322 return false; 323 } 324 325 // 3. 326 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 327 if (!LatchCmp) { 328 LLVM_DEBUG( 329 dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 330 return false; 331 } 332 333 Value *CondOp0 = LatchCmp->getOperand(0); 334 Value *CondOp1 = LatchCmp->getOperand(1); 335 Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 336 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 337 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 338 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 339 return false; 340 } 341 342 return true; 343 } 344 345 // Return true if \p Lp and all its nested loops are uniform with regard to \p 346 // OuterLp. 347 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 348 if (!isUniformLoop(Lp, OuterLp)) 349 return false; 350 351 // Check if nested loops are uniform. 352 for (Loop *SubLp : *Lp) 353 if (!isUniformLoopNest(SubLp, OuterLp)) 354 return false; 355 356 return true; 357 } 358 359 /// Check whether it is safe to if-convert this phi node. 360 /// 361 /// Phi nodes with constant expressions that can trap are not safe to if 362 /// convert. 363 static bool canIfConvertPHINodes(BasicBlock *BB) { 364 for (PHINode &Phi : BB->phis()) { 365 for (Value *V : Phi.incoming_values()) 366 if (auto *C = dyn_cast<Constant>(V)) 367 if (C->canTrap()) 368 return false; 369 } 370 return true; 371 } 372 373 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 374 if (Ty->isPointerTy()) 375 return DL.getIntPtrType(Ty); 376 377 // It is possible that char's or short's overflow when we ask for the loop's 378 // trip count, work around this by changing the type size. 379 if (Ty->getScalarSizeInBits() < 32) 380 return Type::getInt32Ty(Ty->getContext()); 381 382 return Ty; 383 } 384 385 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 386 Ty0 = convertPointerToIntegerType(DL, Ty0); 387 Ty1 = convertPointerToIntegerType(DL, Ty1); 388 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 389 return Ty0; 390 return Ty1; 391 } 392 393 /// Check that the instruction has outside loop users and is not an 394 /// identified reduction variable. 395 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 396 SmallPtrSetImpl<Value *> &AllowedExit) { 397 // Reductions, Inductions and non-header phis are allowed to have exit users. All 398 // other instructions must not have external users. 399 if (!AllowedExit.count(Inst)) 400 // Check that all of the users of the loop are inside the BB. 401 for (User *U : Inst->users()) { 402 Instruction *UI = cast<Instruction>(U); 403 // This user may be a reduction exit value. 404 if (!TheLoop->contains(UI)) { 405 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 406 return true; 407 } 408 } 409 return false; 410 } 411 412 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 413 const ValueToValueMap &Strides = 414 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 415 416 Function *F = TheLoop->getHeader()->getParent(); 417 bool OptForSize = F->hasOptSize() || 418 llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI, 419 PGSOQueryType::IRPass); 420 bool CanAddPredicate = !OptForSize; 421 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 422 if (Stride == 1 || Stride == -1) 423 return Stride; 424 return 0; 425 } 426 427 bool LoopVectorizationLegality::isUniform(Value *V) { 428 return LAI->isUniform(V); 429 } 430 431 bool LoopVectorizationLegality::canVectorizeOuterLoop() { 432 assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 433 // Store the result and return it at the end instead of exiting early, in case 434 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 435 bool Result = true; 436 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 437 438 for (BasicBlock *BB : TheLoop->blocks()) { 439 // Check whether the BB terminator is a BranchInst. Any other terminator is 440 // not supported yet. 441 auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 442 if (!Br) { 443 reportVectorizationFailure("Unsupported basic block terminator", 444 "loop control flow is not understood by vectorizer", 445 "CFGNotUnderstood", ORE, TheLoop); 446 if (DoExtraAnalysis) 447 Result = false; 448 else 449 return false; 450 } 451 452 // Check whether the BranchInst is a supported one. Only unconditional 453 // branches, conditional branches with an outer loop invariant condition or 454 // backedges are supported. 455 // FIXME: We skip these checks when VPlan predication is enabled as we 456 // want to allow divergent branches. This whole check will be removed 457 // once VPlan predication is on by default. 458 if (!EnableVPlanPredication && Br && Br->isConditional() && 459 !TheLoop->isLoopInvariant(Br->getCondition()) && 460 !LI->isLoopHeader(Br->getSuccessor(0)) && 461 !LI->isLoopHeader(Br->getSuccessor(1))) { 462 reportVectorizationFailure("Unsupported conditional branch", 463 "loop control flow is not understood by vectorizer", 464 "CFGNotUnderstood", ORE, TheLoop); 465 if (DoExtraAnalysis) 466 Result = false; 467 else 468 return false; 469 } 470 } 471 472 // Check whether inner loops are uniform. At this point, we only support 473 // simple outer loops scenarios with uniform nested loops. 474 if (!isUniformLoopNest(TheLoop /*loop nest*/, 475 TheLoop /*context outer loop*/)) { 476 reportVectorizationFailure("Outer loop contains divergent loops", 477 "loop control flow is not understood by vectorizer", 478 "CFGNotUnderstood", ORE, TheLoop); 479 if (DoExtraAnalysis) 480 Result = false; 481 else 482 return false; 483 } 484 485 // Check whether we are able to set up outer loop induction. 486 if (!setupOuterLoopInductions()) { 487 reportVectorizationFailure("Unsupported outer loop Phi(s)", 488 "Unsupported outer loop Phi(s)", 489 "UnsupportedPhi", ORE, TheLoop); 490 if (DoExtraAnalysis) 491 Result = false; 492 else 493 return false; 494 } 495 496 return Result; 497 } 498 499 void LoopVectorizationLegality::addInductionPhi( 500 PHINode *Phi, const InductionDescriptor &ID, 501 SmallPtrSetImpl<Value *> &AllowedExit) { 502 Inductions[Phi] = ID; 503 504 // In case this induction also comes with casts that we know we can ignore 505 // in the vectorized loop body, record them here. All casts could be recorded 506 // here for ignoring, but suffices to record only the first (as it is the 507 // only one that may bw used outside the cast sequence). 508 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 509 if (!Casts.empty()) 510 InductionCastsToIgnore.insert(*Casts.begin()); 511 512 Type *PhiTy = Phi->getType(); 513 const DataLayout &DL = Phi->getModule()->getDataLayout(); 514 515 // Get the widest type. 516 if (!PhiTy->isFloatingPointTy()) { 517 if (!WidestIndTy) 518 WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 519 else 520 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 521 } 522 523 // Int inductions are special because we only allow one IV. 524 if (ID.getKind() == InductionDescriptor::IK_IntInduction && 525 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 526 isa<Constant>(ID.getStartValue()) && 527 cast<Constant>(ID.getStartValue())->isNullValue()) { 528 529 // Use the phi node with the widest type as induction. Use the last 530 // one if there are multiple (no good reason for doing this other 531 // than it is expedient). We've checked that it begins at zero and 532 // steps by one, so this is a canonical induction variable. 533 if (!PrimaryInduction || PhiTy == WidestIndTy) 534 PrimaryInduction = Phi; 535 } 536 537 // Both the PHI node itself, and the "post-increment" value feeding 538 // back into the PHI node may have external users. 539 // We can allow those uses, except if the SCEVs we have for them rely 540 // on predicates that only hold within the loop, since allowing the exit 541 // currently means re-using this SCEV outside the loop (see PR33706 for more 542 // details). 543 if (PSE.getUnionPredicate().isAlwaysTrue()) { 544 AllowedExit.insert(Phi); 545 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 546 } 547 548 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 549 } 550 551 bool LoopVectorizationLegality::setupOuterLoopInductions() { 552 BasicBlock *Header = TheLoop->getHeader(); 553 554 // Returns true if a given Phi is a supported induction. 555 auto isSupportedPhi = [&](PHINode &Phi) -> bool { 556 InductionDescriptor ID; 557 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 558 ID.getKind() == InductionDescriptor::IK_IntInduction) { 559 addInductionPhi(&Phi, ID, AllowedExit); 560 return true; 561 } else { 562 // Bail out for any Phi in the outer loop header that is not a supported 563 // induction. 564 LLVM_DEBUG( 565 dbgs() 566 << "LV: Found unsupported PHI for outer loop vectorization.\n"); 567 return false; 568 } 569 }; 570 571 if (llvm::all_of(Header->phis(), isSupportedPhi)) 572 return true; 573 else 574 return false; 575 } 576 577 /// Checks if a function is scalarizable according to the TLI, in 578 /// the sense that it should be vectorized and then expanded in 579 /// multiple scalarcalls. This is represented in the 580 /// TLI via mappings that do not specify a vector name, as in the 581 /// following example: 582 /// 583 /// const VecDesc VecIntrinsics[] = { 584 /// {"llvm.phx.abs.i32", "", 4} 585 /// }; 586 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 587 const StringRef ScalarName = CI.getCalledFunction()->getName(); 588 bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 589 // Check that all known VFs are not associated to a vector 590 // function, i.e. the vector name is emty. 591 if (Scalarize) 592 for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); 593 VF <= WidestVF; VF *= 2) { 594 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 595 } 596 return Scalarize; 597 } 598 599 bool LoopVectorizationLegality::canVectorizeInstrs() { 600 BasicBlock *Header = TheLoop->getHeader(); 601 602 // Look for the attribute signaling the absence of NaNs. 603 Function &F = *Header->getParent(); 604 HasFunNoNaNAttr = 605 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 606 607 // For each block in the loop. 608 for (BasicBlock *BB : TheLoop->blocks()) { 609 // Scan the instructions in the block and look for hazards. 610 for (Instruction &I : *BB) { 611 if (auto *Phi = dyn_cast<PHINode>(&I)) { 612 Type *PhiTy = Phi->getType(); 613 // Check that this PHI type is allowed. 614 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 615 !PhiTy->isPointerTy()) { 616 reportVectorizationFailure("Found a non-int non-pointer PHI", 617 "loop control flow is not understood by vectorizer", 618 "CFGNotUnderstood", ORE, TheLoop); 619 return false; 620 } 621 622 // If this PHINode is not in the header block, then we know that we 623 // can convert it to select during if-conversion. No need to check if 624 // the PHIs in this block are induction or reduction variables. 625 if (BB != Header) { 626 // Non-header phi nodes that have outside uses can be vectorized. Add 627 // them to the list of allowed exits. 628 // Unsafe cyclic dependencies with header phis are identified during 629 // legalization for reduction, induction and first order 630 // recurrences. 631 AllowedExit.insert(&I); 632 continue; 633 } 634 635 // We only allow if-converted PHIs with exactly two incoming values. 636 if (Phi->getNumIncomingValues() != 2) { 637 reportVectorizationFailure("Found an invalid PHI", 638 "loop control flow is not understood by vectorizer", 639 "CFGNotUnderstood", ORE, TheLoop, Phi); 640 return false; 641 } 642 643 RecurrenceDescriptor RedDes; 644 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 645 DT)) { 646 if (RedDes.hasUnsafeAlgebra()) 647 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 648 AllowedExit.insert(RedDes.getLoopExitInstr()); 649 Reductions[Phi] = RedDes; 650 continue; 651 } 652 653 // TODO: Instead of recording the AllowedExit, it would be good to record the 654 // complementary set: NotAllowedExit. These include (but may not be 655 // limited to): 656 // 1. Reduction phis as they represent the one-before-last value, which 657 // is not available when vectorized 658 // 2. Induction phis and increment when SCEV predicates cannot be used 659 // outside the loop - see addInductionPhi 660 // 3. Non-Phis with outside uses when SCEV predicates cannot be used 661 // outside the loop - see call to hasOutsideLoopUser in the non-phi 662 // handling below 663 // 4. FirstOrderRecurrence phis that can possibly be handled by 664 // extraction. 665 // By recording these, we can then reason about ways to vectorize each 666 // of these NotAllowedExit. 667 InductionDescriptor ID; 668 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 669 addInductionPhi(Phi, ID, AllowedExit); 670 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 671 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 672 continue; 673 } 674 675 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 676 SinkAfter, DT)) { 677 AllowedExit.insert(Phi); 678 FirstOrderRecurrences.insert(Phi); 679 continue; 680 } 681 682 // As a last resort, coerce the PHI to a AddRec expression 683 // and re-try classifying it a an induction PHI. 684 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 685 addInductionPhi(Phi, ID, AllowedExit); 686 continue; 687 } 688 689 reportVectorizationFailure("Found an unidentified PHI", 690 "value that could not be identified as " 691 "reduction is used outside the loop", 692 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 693 return false; 694 } // end of PHI handling 695 696 // We handle calls that: 697 // * Are debug info intrinsics. 698 // * Have a mapping to an IR intrinsic. 699 // * Have a vector version available. 700 auto *CI = dyn_cast<CallInst>(&I); 701 702 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 703 !isa<DbgInfoIntrinsic>(CI) && 704 !(CI->getCalledFunction() && TLI && 705 (!VFDatabase::getMappings(*CI).empty() || 706 isTLIScalarize(*TLI, *CI)))) { 707 // If the call is a recognized math libary call, it is likely that 708 // we can vectorize it given loosened floating-point constraints. 709 LibFunc Func; 710 bool IsMathLibCall = 711 TLI && CI->getCalledFunction() && 712 CI->getType()->isFloatingPointTy() && 713 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 714 TLI->hasOptimizedCodeGen(Func); 715 716 if (IsMathLibCall) { 717 // TODO: Ideally, we should not use clang-specific language here, 718 // but it's hard to provide meaningful yet generic advice. 719 // Also, should this be guarded by allowExtraAnalysis() and/or be part 720 // of the returned info from isFunctionVectorizable()? 721 reportVectorizationFailure( 722 "Found a non-intrinsic callsite", 723 "library call cannot be vectorized. " 724 "Try compiling with -fno-math-errno, -ffast-math, " 725 "or similar flags", 726 "CantVectorizeLibcall", ORE, TheLoop, CI); 727 } else { 728 reportVectorizationFailure("Found a non-intrinsic callsite", 729 "call instruction cannot be vectorized", 730 "CantVectorizeLibcall", ORE, TheLoop, CI); 731 } 732 return false; 733 } 734 735 // Some intrinsics have scalar arguments and should be same in order for 736 // them to be vectorized (i.e. loop invariant). 737 if (CI) { 738 auto *SE = PSE.getSE(); 739 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 740 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 741 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 742 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 743 reportVectorizationFailure("Found unvectorizable intrinsic", 744 "intrinsic instruction cannot be vectorized", 745 "CantVectorizeIntrinsic", ORE, TheLoop, CI); 746 return false; 747 } 748 } 749 } 750 751 // Check that the instruction return type is vectorizable. 752 // Also, we can't vectorize extractelement instructions. 753 if ((!VectorType::isValidElementType(I.getType()) && 754 !I.getType()->isVoidTy()) || 755 isa<ExtractElementInst>(I)) { 756 reportVectorizationFailure("Found unvectorizable type", 757 "instruction return type cannot be vectorized", 758 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 759 return false; 760 } 761 762 // Check that the stored type is vectorizable. 763 if (auto *ST = dyn_cast<StoreInst>(&I)) { 764 Type *T = ST->getValueOperand()->getType(); 765 if (!VectorType::isValidElementType(T)) { 766 reportVectorizationFailure("Store instruction cannot be vectorized", 767 "store instruction cannot be vectorized", 768 "CantVectorizeStore", ORE, TheLoop, ST); 769 return false; 770 } 771 772 // For nontemporal stores, check that a nontemporal vector version is 773 // supported on the target. 774 if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 775 // Arbitrarily try a vector of 2 elements. 776 auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2); 777 assert(VecTy && "did not find vectorized version of stored type"); 778 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) { 779 reportVectorizationFailure( 780 "nontemporal store instruction cannot be vectorized", 781 "nontemporal store instruction cannot be vectorized", 782 "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 783 return false; 784 } 785 } 786 787 } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 788 if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 789 // For nontemporal loads, check that a nontemporal vector version is 790 // supported on the target (arbitrarily try a vector of 2 elements). 791 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2); 792 assert(VecTy && "did not find vectorized version of load type"); 793 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) { 794 reportVectorizationFailure( 795 "nontemporal load instruction cannot be vectorized", 796 "nontemporal load instruction cannot be vectorized", 797 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 798 return false; 799 } 800 } 801 802 // FP instructions can allow unsafe algebra, thus vectorizable by 803 // non-IEEE-754 compliant SIMD units. 804 // This applies to floating-point math operations and calls, not memory 805 // operations, shuffles, or casts, as they don't change precision or 806 // semantics. 807 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 808 !I.isFast()) { 809 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 810 Hints->setPotentiallyUnsafe(); 811 } 812 813 // Reduction instructions are allowed to have exit users. 814 // All other instructions must not have external users. 815 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 816 // We can safely vectorize loops where instructions within the loop are 817 // used outside the loop only if the SCEV predicates within the loop is 818 // same as outside the loop. Allowing the exit means reusing the SCEV 819 // outside the loop. 820 if (PSE.getUnionPredicate().isAlwaysTrue()) { 821 AllowedExit.insert(&I); 822 continue; 823 } 824 reportVectorizationFailure("Value cannot be used outside the loop", 825 "value cannot be used outside the loop", 826 "ValueUsedOutsideLoop", ORE, TheLoop, &I); 827 return false; 828 } 829 } // next instr. 830 } 831 832 if (!PrimaryInduction) { 833 if (Inductions.empty()) { 834 reportVectorizationFailure("Did not find one integer induction var", 835 "loop induction variable could not be identified", 836 "NoInductionVariable", ORE, TheLoop); 837 return false; 838 } else if (!WidestIndTy) { 839 reportVectorizationFailure("Did not find one integer induction var", 840 "integer loop induction variable could not be identified", 841 "NoIntegerInductionVariable", ORE, TheLoop); 842 return false; 843 } else { 844 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 845 } 846 } 847 848 // For first order recurrences, we use the previous value (incoming value from 849 // the latch) to check if it dominates all users of the recurrence. Bail out 850 // if we have to sink such an instruction for another recurrence, as the 851 // dominance requirement may not hold after sinking. 852 BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 853 if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 854 Instruction *V = 855 cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 856 return SinkAfter.find(V) != SinkAfter.end(); 857 })) 858 return false; 859 860 // Now we know the widest induction type, check if our found induction 861 // is the same size. If it's not, unset it here and InnerLoopVectorizer 862 // will create another. 863 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 864 PrimaryInduction = nullptr; 865 866 return true; 867 } 868 869 bool LoopVectorizationLegality::canVectorizeMemory() { 870 LAI = &(*GetLAA)(*TheLoop); 871 const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 872 if (LAR) { 873 ORE->emit([&]() { 874 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 875 "loop not vectorized: ", *LAR); 876 }); 877 } 878 if (!LAI->canVectorizeMemory()) 879 return false; 880 881 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 882 reportVectorizationFailure("Stores to a uniform address", 883 "write to a loop invariant address could not be vectorized", 884 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 885 return false; 886 } 887 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 888 PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 889 890 return true; 891 } 892 893 bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 894 Value *In0 = const_cast<Value *>(V); 895 PHINode *PN = dyn_cast_or_null<PHINode>(In0); 896 if (!PN) 897 return false; 898 899 return Inductions.count(PN); 900 } 901 902 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 903 auto *Inst = dyn_cast<Instruction>(V); 904 return (Inst && InductionCastsToIgnore.count(Inst)); 905 } 906 907 bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 908 return isInductionPhi(V) || isCastedInductionVariable(V); 909 } 910 911 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 912 return FirstOrderRecurrences.count(Phi); 913 } 914 915 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 916 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 917 } 918 919 bool LoopVectorizationLegality::blockCanBePredicated( 920 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 921 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 922 923 for (Instruction &I : *BB) { 924 // Check that we don't have a constant expression that can trap as operand. 925 for (Value *Operand : I.operands()) { 926 if (auto *C = dyn_cast<Constant>(Operand)) 927 if (C->canTrap()) 928 return false; 929 } 930 931 // We can predicate blocks with calls to assume, as long as we drop them in 932 // case we flatten the CFG via predication. 933 if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 934 ConditionalAssumes.insert(&I); 935 continue; 936 } 937 938 // We might be able to hoist the load. 939 if (I.mayReadFromMemory()) { 940 auto *LI = dyn_cast<LoadInst>(&I); 941 if (!LI) 942 return false; 943 if (!SafePtrs.count(LI->getPointerOperand())) { 944 // !llvm.mem.parallel_loop_access implies if-conversion safety. 945 // Otherwise, record that the load needs (real or emulated) masking 946 // and let the cost model decide. 947 if (!IsAnnotatedParallel || PreserveGuards) 948 MaskedOp.insert(LI); 949 continue; 950 } 951 } 952 953 if (I.mayWriteToMemory()) { 954 auto *SI = dyn_cast<StoreInst>(&I); 955 if (!SI) 956 return false; 957 // Predicated store requires some form of masking: 958 // 1) masked store HW instruction, 959 // 2) emulation via load-blend-store (only if safe and legal to do so, 960 // be aware on the race conditions), or 961 // 3) element-by-element predicate check and scalar store. 962 MaskedOp.insert(SI); 963 continue; 964 } 965 if (I.mayThrow()) 966 return false; 967 } 968 969 return true; 970 } 971 972 bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 973 if (!EnableIfConversion) { 974 reportVectorizationFailure("If-conversion is disabled", 975 "if-conversion is disabled", 976 "IfConversionDisabled", 977 ORE, TheLoop); 978 return false; 979 } 980 981 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 982 983 // A list of pointers which are known to be dereferenceable within scope of 984 // the loop body for each iteration of the loop which executes. That is, 985 // the memory pointed to can be dereferenced (with the access size implied by 986 // the value's type) unconditionally within the loop header without 987 // introducing a new fault. 988 SmallPtrSet<Value *, 8> SafePointers; 989 990 // Collect safe addresses. 991 for (BasicBlock *BB : TheLoop->blocks()) { 992 if (!blockNeedsPredication(BB)) { 993 for (Instruction &I : *BB) 994 if (auto *Ptr = getLoadStorePointerOperand(&I)) 995 SafePointers.insert(Ptr); 996 continue; 997 } 998 999 // For a block which requires predication, a address may be safe to access 1000 // in the loop w/o predication if we can prove dereferenceability facts 1001 // sufficient to ensure it'll never fault within the loop. For the moment, 1002 // we restrict this to loads; stores are more complicated due to 1003 // concurrency restrictions. 1004 ScalarEvolution &SE = *PSE.getSE(); 1005 for (Instruction &I : *BB) { 1006 LoadInst *LI = dyn_cast<LoadInst>(&I); 1007 if (LI && !mustSuppressSpeculation(*LI) && 1008 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 1009 SafePointers.insert(LI->getPointerOperand()); 1010 } 1011 } 1012 1013 // Collect the blocks that need predication. 1014 BasicBlock *Header = TheLoop->getHeader(); 1015 for (BasicBlock *BB : TheLoop->blocks()) { 1016 // We don't support switch statements inside loops. 1017 if (!isa<BranchInst>(BB->getTerminator())) { 1018 reportVectorizationFailure("Loop contains a switch statement", 1019 "loop contains a switch statement", 1020 "LoopContainsSwitch", ORE, TheLoop, 1021 BB->getTerminator()); 1022 return false; 1023 } 1024 1025 // We must be able to predicate all blocks that need to be predicated. 1026 if (blockNeedsPredication(BB)) { 1027 if (!blockCanBePredicated(BB, SafePointers)) { 1028 reportVectorizationFailure( 1029 "Control flow cannot be substituted for a select", 1030 "control flow cannot be substituted for a select", 1031 "NoCFGForSelect", ORE, TheLoop, 1032 BB->getTerminator()); 1033 return false; 1034 } 1035 } else if (BB != Header && !canIfConvertPHINodes(BB)) { 1036 reportVectorizationFailure( 1037 "Control flow cannot be substituted for a select", 1038 "control flow cannot be substituted for a select", 1039 "NoCFGForSelect", ORE, TheLoop, 1040 BB->getTerminator()); 1041 return false; 1042 } 1043 } 1044 1045 // We can if-convert this loop. 1046 return true; 1047 } 1048 1049 // Helper function to canVectorizeLoopNestCFG. 1050 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1051 bool UseVPlanNativePath) { 1052 assert((UseVPlanNativePath || Lp->empty()) && 1053 "VPlan-native path is not enabled."); 1054 1055 // TODO: ORE should be improved to show more accurate information when an 1056 // outer loop can't be vectorized because a nested loop is not understood or 1057 // legal. Something like: "outer_loop_location: loop not vectorized: 1058 // (inner_loop_location) loop control flow is not understood by vectorizer". 1059 1060 // Store the result and return it at the end instead of exiting early, in case 1061 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1062 bool Result = true; 1063 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1064 1065 // We must have a loop in canonical form. Loops with indirectbr in them cannot 1066 // be canonicalized. 1067 if (!Lp->getLoopPreheader()) { 1068 reportVectorizationFailure("Loop doesn't have a legal pre-header", 1069 "loop control flow is not understood by vectorizer", 1070 "CFGNotUnderstood", ORE, TheLoop); 1071 if (DoExtraAnalysis) 1072 Result = false; 1073 else 1074 return false; 1075 } 1076 1077 // We must have a single backedge. 1078 if (Lp->getNumBackEdges() != 1) { 1079 reportVectorizationFailure("The loop must have a single backedge", 1080 "loop control flow is not understood by vectorizer", 1081 "CFGNotUnderstood", ORE, TheLoop); 1082 if (DoExtraAnalysis) 1083 Result = false; 1084 else 1085 return false; 1086 } 1087 1088 // We must have a single exiting block. 1089 if (!Lp->getExitingBlock()) { 1090 reportVectorizationFailure("The loop must have an exiting block", 1091 "loop control flow is not understood by vectorizer", 1092 "CFGNotUnderstood", ORE, TheLoop); 1093 if (DoExtraAnalysis) 1094 Result = false; 1095 else 1096 return false; 1097 } 1098 1099 // We only handle bottom-tested loops, i.e. loop in which the condition is 1100 // checked at the end of each iteration. With that we can assume that all 1101 // instructions in the loop are executed the same number of times. 1102 if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 1103 reportVectorizationFailure("The exiting block is not the loop latch", 1104 "loop control flow is not understood by vectorizer", 1105 "CFGNotUnderstood", ORE, TheLoop); 1106 if (DoExtraAnalysis) 1107 Result = false; 1108 else 1109 return false; 1110 } 1111 1112 return Result; 1113 } 1114 1115 bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1116 Loop *Lp, bool UseVPlanNativePath) { 1117 // Store the result and return it at the end instead of exiting early, in case 1118 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1119 bool Result = true; 1120 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1121 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1122 if (DoExtraAnalysis) 1123 Result = false; 1124 else 1125 return false; 1126 } 1127 1128 // Recursively check whether the loop control flow of nested loops is 1129 // understood. 1130 for (Loop *SubLp : *Lp) 1131 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1132 if (DoExtraAnalysis) 1133 Result = false; 1134 else 1135 return false; 1136 } 1137 1138 return Result; 1139 } 1140 1141 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1142 // Store the result and return it at the end instead of exiting early, in case 1143 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1144 bool Result = true; 1145 1146 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1147 // Check whether the loop-related control flow in the loop nest is expected by 1148 // vectorizer. 1149 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1150 if (DoExtraAnalysis) 1151 Result = false; 1152 else 1153 return false; 1154 } 1155 1156 // We need to have a loop header. 1157 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1158 << '\n'); 1159 1160 // Specific checks for outer loops. We skip the remaining legal checks at this 1161 // point because they don't support outer loops. 1162 if (!TheLoop->empty()) { 1163 assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1164 1165 if (!canVectorizeOuterLoop()) { 1166 reportVectorizationFailure("Unsupported outer loop", 1167 "unsupported outer loop", 1168 "UnsupportedOuterLoop", 1169 ORE, TheLoop); 1170 // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1171 // outer loops. 1172 return false; 1173 } 1174 1175 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1176 return Result; 1177 } 1178 1179 assert(TheLoop->empty() && "Inner loop expected."); 1180 // Check if we can if-convert non-single-bb loops. 1181 unsigned NumBlocks = TheLoop->getNumBlocks(); 1182 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1183 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1184 if (DoExtraAnalysis) 1185 Result = false; 1186 else 1187 return false; 1188 } 1189 1190 // Check if we can vectorize the instructions and CFG in this loop. 1191 if (!canVectorizeInstrs()) { 1192 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1193 if (DoExtraAnalysis) 1194 Result = false; 1195 else 1196 return false; 1197 } 1198 1199 // Go over each instruction and look at memory deps. 1200 if (!canVectorizeMemory()) { 1201 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1202 if (DoExtraAnalysis) 1203 Result = false; 1204 else 1205 return false; 1206 } 1207 1208 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1209 << (LAI->getRuntimePointerChecking()->Need 1210 ? " (with a runtime bound check)" 1211 : "") 1212 << "!\n"); 1213 1214 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1215 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1216 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1217 1218 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 1219 reportVectorizationFailure("Too many SCEV checks needed", 1220 "Too many SCEV assumptions need to be made and checked at runtime", 1221 "TooManySCEVRunTimeChecks", ORE, TheLoop); 1222 if (DoExtraAnalysis) 1223 Result = false; 1224 else 1225 return false; 1226 } 1227 1228 // Okay! We've done all the tests. If any have failed, return false. Otherwise 1229 // we can vectorize, and at this point we don't have any other mem analysis 1230 // which may limit our maximum vectorization factor, so just return true with 1231 // no restrictions. 1232 return Result; 1233 } 1234 1235 bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1236 1237 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1238 1239 SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1240 1241 for (auto &Reduction : getReductionVars()) 1242 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1243 1244 // TODO: handle non-reduction outside users when tail is folded by masking. 1245 for (auto *AE : AllowedExit) { 1246 // Check that all users of allowed exit values are inside the loop or 1247 // are the live-out of a reduction. 1248 if (ReductionLiveOuts.count(AE)) 1249 continue; 1250 for (User *U : AE->users()) { 1251 Instruction *UI = cast<Instruction>(U); 1252 if (TheLoop->contains(UI)) 1253 continue; 1254 reportVectorizationFailure( 1255 "Cannot fold tail by masking, loop has an outside user for", 1256 "Cannot fold tail by masking in the presence of live outs.", 1257 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1258 return false; 1259 } 1260 } 1261 1262 // The list of pointers that we can safely read and write to remains empty. 1263 SmallPtrSet<Value *, 8> SafePointers; 1264 1265 // Check and mark all blocks for predication, including those that ordinarily 1266 // do not need predication such as the header block. 1267 for (BasicBlock *BB : TheLoop->blocks()) { 1268 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 1269 reportVectorizationFailure( 1270 "Cannot fold tail by masking as required", 1271 "control flow cannot be substituted for a select", 1272 "NoCFGForSelect", ORE, TheLoop, 1273 BB->getTerminator()); 1274 return false; 1275 } 1276 } 1277 1278 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1279 return true; 1280 } 1281 1282 } // namespace llvm 1283