1 //===- LoopVectorizationLegality.cpp --------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides loop vectorization legality analysis. Original code 10 // resided in LoopVectorize.cpp for a long time. 11 // 12 // At this point, it is implemented as a utility class, not as an analysis 13 // pass. It should be easy to create an analysis pass around it if there 14 // is a need (but D45420 needs to happen first). 15 // 16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 17 #include "llvm/Analysis/Loads.h" 18 #include "llvm/Analysis/ValueTracking.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/IntrinsicInst.h" 21 #include "llvm/IR/PatternMatch.h" 22 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 23 24 using namespace llvm; 25 using namespace PatternMatch; 26 27 #define LV_NAME "loop-vectorize" 28 #define DEBUG_TYPE LV_NAME 29 30 extern cl::opt<bool> EnableVPlanPredication; 31 32 static cl::opt<bool> 33 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 34 cl::desc("Enable if-conversion during vectorization.")); 35 36 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 37 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 38 cl::desc("The maximum allowed number of runtime memory checks with a " 39 "vectorize(enable) pragma.")); 40 41 static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 42 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 43 cl::desc("The maximum number of SCEV checks allowed.")); 44 45 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 46 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 47 cl::desc("The maximum number of SCEV checks allowed with a " 48 "vectorize(enable) pragma")); 49 50 /// Maximum vectorization interleave count. 51 static const unsigned MaxInterleaveFactor = 16; 52 53 namespace llvm { 54 55 bool LoopVectorizeHints::Hint::validate(unsigned Val) { 56 switch (Kind) { 57 case HK_WIDTH: 58 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 59 case HK_UNROLL: 60 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 61 case HK_FORCE: 62 return (Val <= 1); 63 case HK_ISVECTORIZED: 64 case HK_PREDICATE: 65 return (Val == 0 || Val == 1); 66 } 67 return false; 68 } 69 70 LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 71 bool InterleaveOnlyWhenForced, 72 OptimizationRemarkEmitter &ORE) 73 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 74 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 75 Force("vectorize.enable", FK_Undefined, HK_FORCE), 76 IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 77 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 78 ORE(ORE) { 79 // Populate values with existing loop metadata. 80 getHintsFromMetadata(); 81 82 // force-vector-interleave overrides DisableInterleaving. 83 if (VectorizerParams::isInterleaveForced()) 84 Interleave.Value = VectorizerParams::VectorizationInterleave; 85 86 if (IsVectorized.Value != 1) 87 // If the vectorization width and interleaving count are both 1 then 88 // consider the loop to have been already vectorized because there's 89 // nothing more that we can do. 90 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 91 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 92 << "LV: Interleaving disabled by the pass manager\n"); 93 } 94 95 void LoopVectorizeHints::setAlreadyVectorized() { 96 LLVMContext &Context = TheLoop->getHeader()->getContext(); 97 98 MDNode *IsVectorizedMD = MDNode::get( 99 Context, 100 {MDString::get(Context, "llvm.loop.isvectorized"), 101 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 102 MDNode *LoopID = TheLoop->getLoopID(); 103 MDNode *NewLoopID = 104 makePostTransformationMetadata(Context, LoopID, 105 {Twine(Prefix(), "vectorize.").str(), 106 Twine(Prefix(), "interleave.").str()}, 107 {IsVectorizedMD}); 108 TheLoop->setLoopID(NewLoopID); 109 110 // Update internal cache. 111 IsVectorized.Value = 1; 112 } 113 114 bool LoopVectorizeHints::allowVectorization( 115 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 116 if (getForce() == LoopVectorizeHints::FK_Disabled) { 117 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 118 emitRemarkWithHints(); 119 return false; 120 } 121 122 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 123 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 124 emitRemarkWithHints(); 125 return false; 126 } 127 128 if (getIsVectorized() == 1) { 129 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 130 // FIXME: Add interleave.disable metadata. This will allow 131 // vectorize.disable to be used without disabling the pass and errors 132 // to differentiate between disabled vectorization and a width of 1. 133 ORE.emit([&]() { 134 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 135 "AllDisabled", L->getStartLoc(), 136 L->getHeader()) 137 << "loop not vectorized: vectorization and interleaving are " 138 "explicitly disabled, or the loop has already been " 139 "vectorized"; 140 }); 141 return false; 142 } 143 144 return true; 145 } 146 147 void LoopVectorizeHints::emitRemarkWithHints() const { 148 using namespace ore; 149 150 ORE.emit([&]() { 151 if (Force.Value == LoopVectorizeHints::FK_Disabled) 152 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 153 TheLoop->getStartLoc(), 154 TheLoop->getHeader()) 155 << "loop not vectorized: vectorization is explicitly disabled"; 156 else { 157 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 158 TheLoop->getStartLoc(), TheLoop->getHeader()); 159 R << "loop not vectorized"; 160 if (Force.Value == LoopVectorizeHints::FK_Enabled) { 161 R << " (Force=" << NV("Force", true); 162 if (Width.Value != 0) 163 R << ", Vector Width=" << NV("VectorWidth", Width.Value); 164 if (Interleave.Value != 0) 165 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 166 R << ")"; 167 } 168 return R; 169 } 170 }); 171 } 172 173 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 174 if (getWidth() == 1) 175 return LV_NAME; 176 if (getForce() == LoopVectorizeHints::FK_Disabled) 177 return LV_NAME; 178 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 179 return LV_NAME; 180 return OptimizationRemarkAnalysis::AlwaysPrint; 181 } 182 183 void LoopVectorizeHints::getHintsFromMetadata() { 184 MDNode *LoopID = TheLoop->getLoopID(); 185 if (!LoopID) 186 return; 187 188 // First operand should refer to the loop id itself. 189 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 190 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 191 192 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 193 const MDString *S = nullptr; 194 SmallVector<Metadata *, 4> Args; 195 196 // The expected hint is either a MDString or a MDNode with the first 197 // operand a MDString. 198 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 199 if (!MD || MD->getNumOperands() == 0) 200 continue; 201 S = dyn_cast<MDString>(MD->getOperand(0)); 202 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 203 Args.push_back(MD->getOperand(i)); 204 } else { 205 S = dyn_cast<MDString>(LoopID->getOperand(i)); 206 assert(Args.size() == 0 && "too many arguments for MDString"); 207 } 208 209 if (!S) 210 continue; 211 212 // Check if the hint starts with the loop metadata prefix. 213 StringRef Name = S->getString(); 214 if (Args.size() == 1) 215 setHint(Name, Args[0]); 216 } 217 } 218 219 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 220 if (!Name.startswith(Prefix())) 221 return; 222 Name = Name.substr(Prefix().size(), StringRef::npos); 223 224 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 225 if (!C) 226 return; 227 unsigned Val = C->getZExtValue(); 228 229 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 230 for (auto H : Hints) { 231 if (Name == H->Name) { 232 if (H->validate(Val)) 233 H->Value = Val; 234 else 235 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 236 break; 237 } 238 } 239 } 240 241 bool LoopVectorizationRequirements::doesNotMeet( 242 Function *F, Loop *L, const LoopVectorizeHints &Hints) { 243 const char *PassName = Hints.vectorizeAnalysisPassName(); 244 bool Failed = false; 245 if (UnsafeAlgebraInst && !Hints.allowReordering()) { 246 ORE.emit([&]() { 247 return OptimizationRemarkAnalysisFPCommute( 248 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 249 UnsafeAlgebraInst->getParent()) 250 << "loop not vectorized: cannot prove it is safe to reorder " 251 "floating-point operations"; 252 }); 253 Failed = true; 254 } 255 256 // Test if runtime memcheck thresholds are exceeded. 257 bool PragmaThresholdReached = 258 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 259 bool ThresholdReached = 260 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 261 if ((ThresholdReached && !Hints.allowReordering()) || 262 PragmaThresholdReached) { 263 ORE.emit([&]() { 264 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 265 L->getStartLoc(), 266 L->getHeader()) 267 << "loop not vectorized: cannot prove it is safe to reorder " 268 "memory operations"; 269 }); 270 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 271 Failed = true; 272 } 273 274 return Failed; 275 } 276 277 // Return true if the inner loop \p Lp is uniform with regard to the outer loop 278 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 279 // executing the inner loop will execute the same iterations). This check is 280 // very constrained for now but it will be relaxed in the future. \p Lp is 281 // considered uniform if it meets all the following conditions: 282 // 1) it has a canonical IV (starting from 0 and with stride 1), 283 // 2) its latch terminator is a conditional branch and, 284 // 3) its latch condition is a compare instruction whose operands are the 285 // canonical IV and an OuterLp invariant. 286 // This check doesn't take into account the uniformity of other conditions not 287 // related to the loop latch because they don't affect the loop uniformity. 288 // 289 // NOTE: We decided to keep all these checks and its associated documentation 290 // together so that we can easily have a picture of the current supported loop 291 // nests. However, some of the current checks don't depend on \p OuterLp and 292 // would be redundantly executed for each \p Lp if we invoked this function for 293 // different candidate outer loops. This is not the case for now because we 294 // don't currently have the infrastructure to evaluate multiple candidate outer 295 // loops and \p OuterLp will be a fixed parameter while we only support explicit 296 // outer loop vectorization. It's also very likely that these checks go away 297 // before introducing the aforementioned infrastructure. However, if this is not 298 // the case, we should move the \p OuterLp independent checks to a separate 299 // function that is only executed once for each \p Lp. 300 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 301 assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 302 303 // If Lp is the outer loop, it's uniform by definition. 304 if (Lp == OuterLp) 305 return true; 306 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 307 308 // 1. 309 PHINode *IV = Lp->getCanonicalInductionVariable(); 310 if (!IV) { 311 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 312 return false; 313 } 314 315 // 2. 316 BasicBlock *Latch = Lp->getLoopLatch(); 317 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 318 if (!LatchBr || LatchBr->isUnconditional()) { 319 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 320 return false; 321 } 322 323 // 3. 324 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 325 if (!LatchCmp) { 326 LLVM_DEBUG( 327 dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 328 return false; 329 } 330 331 Value *CondOp0 = LatchCmp->getOperand(0); 332 Value *CondOp1 = LatchCmp->getOperand(1); 333 Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 334 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 335 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 336 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 337 return false; 338 } 339 340 return true; 341 } 342 343 // Return true if \p Lp and all its nested loops are uniform with regard to \p 344 // OuterLp. 345 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 346 if (!isUniformLoop(Lp, OuterLp)) 347 return false; 348 349 // Check if nested loops are uniform. 350 for (Loop *SubLp : *Lp) 351 if (!isUniformLoopNest(SubLp, OuterLp)) 352 return false; 353 354 return true; 355 } 356 357 /// Check whether it is safe to if-convert this phi node. 358 /// 359 /// Phi nodes with constant expressions that can trap are not safe to if 360 /// convert. 361 static bool canIfConvertPHINodes(BasicBlock *BB) { 362 for (PHINode &Phi : BB->phis()) { 363 for (Value *V : Phi.incoming_values()) 364 if (auto *C = dyn_cast<Constant>(V)) 365 if (C->canTrap()) 366 return false; 367 } 368 return true; 369 } 370 371 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 372 if (Ty->isPointerTy()) 373 return DL.getIntPtrType(Ty); 374 375 // It is possible that char's or short's overflow when we ask for the loop's 376 // trip count, work around this by changing the type size. 377 if (Ty->getScalarSizeInBits() < 32) 378 return Type::getInt32Ty(Ty->getContext()); 379 380 return Ty; 381 } 382 383 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 384 Ty0 = convertPointerToIntegerType(DL, Ty0); 385 Ty1 = convertPointerToIntegerType(DL, Ty1); 386 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 387 return Ty0; 388 return Ty1; 389 } 390 391 /// Check that the instruction has outside loop users and is not an 392 /// identified reduction variable. 393 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 394 SmallPtrSetImpl<Value *> &AllowedExit) { 395 // Reductions, Inductions and non-header phis are allowed to have exit users. All 396 // other instructions must not have external users. 397 if (!AllowedExit.count(Inst)) 398 // Check that all of the users of the loop are inside the BB. 399 for (User *U : Inst->users()) { 400 Instruction *UI = cast<Instruction>(U); 401 // This user may be a reduction exit value. 402 if (!TheLoop->contains(UI)) { 403 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 404 return true; 405 } 406 } 407 return false; 408 } 409 410 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 411 const ValueToValueMap &Strides = 412 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 413 414 bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); 415 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 416 if (Stride == 1 || Stride == -1) 417 return Stride; 418 return 0; 419 } 420 421 bool LoopVectorizationLegality::isUniform(Value *V) { 422 return LAI->isUniform(V); 423 } 424 425 bool LoopVectorizationLegality::canVectorizeOuterLoop() { 426 assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 427 // Store the result and return it at the end instead of exiting early, in case 428 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 429 bool Result = true; 430 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 431 432 for (BasicBlock *BB : TheLoop->blocks()) { 433 // Check whether the BB terminator is a BranchInst. Any other terminator is 434 // not supported yet. 435 auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 436 if (!Br) { 437 reportVectorizationFailure("Unsupported basic block terminator", 438 "loop control flow is not understood by vectorizer", 439 "CFGNotUnderstood", ORE, TheLoop); 440 if (DoExtraAnalysis) 441 Result = false; 442 else 443 return false; 444 } 445 446 // Check whether the BranchInst is a supported one. Only unconditional 447 // branches, conditional branches with an outer loop invariant condition or 448 // backedges are supported. 449 // FIXME: We skip these checks when VPlan predication is enabled as we 450 // want to allow divergent branches. This whole check will be removed 451 // once VPlan predication is on by default. 452 if (!EnableVPlanPredication && Br && Br->isConditional() && 453 !TheLoop->isLoopInvariant(Br->getCondition()) && 454 !LI->isLoopHeader(Br->getSuccessor(0)) && 455 !LI->isLoopHeader(Br->getSuccessor(1))) { 456 reportVectorizationFailure("Unsupported conditional branch", 457 "loop control flow is not understood by vectorizer", 458 "CFGNotUnderstood", ORE, TheLoop); 459 if (DoExtraAnalysis) 460 Result = false; 461 else 462 return false; 463 } 464 } 465 466 // Check whether inner loops are uniform. At this point, we only support 467 // simple outer loops scenarios with uniform nested loops. 468 if (!isUniformLoopNest(TheLoop /*loop nest*/, 469 TheLoop /*context outer loop*/)) { 470 reportVectorizationFailure("Outer loop contains divergent loops", 471 "loop control flow is not understood by vectorizer", 472 "CFGNotUnderstood", ORE, TheLoop); 473 if (DoExtraAnalysis) 474 Result = false; 475 else 476 return false; 477 } 478 479 // Check whether we are able to set up outer loop induction. 480 if (!setupOuterLoopInductions()) { 481 reportVectorizationFailure("Unsupported outer loop Phi(s)", 482 "Unsupported outer loop Phi(s)", 483 "UnsupportedPhi", ORE, TheLoop); 484 if (DoExtraAnalysis) 485 Result = false; 486 else 487 return false; 488 } 489 490 return Result; 491 } 492 493 void LoopVectorizationLegality::addInductionPhi( 494 PHINode *Phi, const InductionDescriptor &ID, 495 SmallPtrSetImpl<Value *> &AllowedExit) { 496 Inductions[Phi] = ID; 497 498 // In case this induction also comes with casts that we know we can ignore 499 // in the vectorized loop body, record them here. All casts could be recorded 500 // here for ignoring, but suffices to record only the first (as it is the 501 // only one that may bw used outside the cast sequence). 502 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 503 if (!Casts.empty()) 504 InductionCastsToIgnore.insert(*Casts.begin()); 505 506 Type *PhiTy = Phi->getType(); 507 const DataLayout &DL = Phi->getModule()->getDataLayout(); 508 509 // Get the widest type. 510 if (!PhiTy->isFloatingPointTy()) { 511 if (!WidestIndTy) 512 WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 513 else 514 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 515 } 516 517 // Int inductions are special because we only allow one IV. 518 if (ID.getKind() == InductionDescriptor::IK_IntInduction && 519 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 520 isa<Constant>(ID.getStartValue()) && 521 cast<Constant>(ID.getStartValue())->isNullValue()) { 522 523 // Use the phi node with the widest type as induction. Use the last 524 // one if there are multiple (no good reason for doing this other 525 // than it is expedient). We've checked that it begins at zero and 526 // steps by one, so this is a canonical induction variable. 527 if (!PrimaryInduction || PhiTy == WidestIndTy) 528 PrimaryInduction = Phi; 529 } 530 531 // Both the PHI node itself, and the "post-increment" value feeding 532 // back into the PHI node may have external users. 533 // We can allow those uses, except if the SCEVs we have for them rely 534 // on predicates that only hold within the loop, since allowing the exit 535 // currently means re-using this SCEV outside the loop (see PR33706 for more 536 // details). 537 if (PSE.getUnionPredicate().isAlwaysTrue()) { 538 AllowedExit.insert(Phi); 539 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 540 } 541 542 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 543 } 544 545 bool LoopVectorizationLegality::setupOuterLoopInductions() { 546 BasicBlock *Header = TheLoop->getHeader(); 547 548 // Returns true if a given Phi is a supported induction. 549 auto isSupportedPhi = [&](PHINode &Phi) -> bool { 550 InductionDescriptor ID; 551 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 552 ID.getKind() == InductionDescriptor::IK_IntInduction) { 553 addInductionPhi(&Phi, ID, AllowedExit); 554 return true; 555 } else { 556 // Bail out for any Phi in the outer loop header that is not a supported 557 // induction. 558 LLVM_DEBUG( 559 dbgs() 560 << "LV: Found unsupported PHI for outer loop vectorization.\n"); 561 return false; 562 } 563 }; 564 565 if (llvm::all_of(Header->phis(), isSupportedPhi)) 566 return true; 567 else 568 return false; 569 } 570 571 /// Checks if a function is scalarizable according to the TLI, in 572 /// the sense that it should be vectorized and then expanded in 573 /// multiple scalarcalls. This is represented in the 574 /// TLI via mappings that do not specify a vector name, as in the 575 /// following example: 576 /// 577 /// const VecDesc VecIntrinsics[] = { 578 /// {"llvm.phx.abs.i32", "", 4} 579 /// }; 580 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 581 const StringRef ScalarName = CI.getCalledFunction()->getName(); 582 bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 583 // Check that all known VFs are not associated to a vector 584 // function, i.e. the vector name is emty. 585 if (Scalarize) 586 for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); 587 VF <= WidestVF; VF *= 2) { 588 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 589 } 590 return Scalarize; 591 } 592 593 bool LoopVectorizationLegality::canVectorizeInstrs() { 594 BasicBlock *Header = TheLoop->getHeader(); 595 596 // Look for the attribute signaling the absence of NaNs. 597 Function &F = *Header->getParent(); 598 HasFunNoNaNAttr = 599 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 600 601 // For each block in the loop. 602 for (BasicBlock *BB : TheLoop->blocks()) { 603 // Scan the instructions in the block and look for hazards. 604 for (Instruction &I : *BB) { 605 if (auto *Phi = dyn_cast<PHINode>(&I)) { 606 Type *PhiTy = Phi->getType(); 607 // Check that this PHI type is allowed. 608 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 609 !PhiTy->isPointerTy()) { 610 reportVectorizationFailure("Found a non-int non-pointer PHI", 611 "loop control flow is not understood by vectorizer", 612 "CFGNotUnderstood", ORE, TheLoop); 613 return false; 614 } 615 616 // If this PHINode is not in the header block, then we know that we 617 // can convert it to select during if-conversion. No need to check if 618 // the PHIs in this block are induction or reduction variables. 619 if (BB != Header) { 620 // Non-header phi nodes that have outside uses can be vectorized. Add 621 // them to the list of allowed exits. 622 // Unsafe cyclic dependencies with header phis are identified during 623 // legalization for reduction, induction and first order 624 // recurrences. 625 AllowedExit.insert(&I); 626 continue; 627 } 628 629 // We only allow if-converted PHIs with exactly two incoming values. 630 if (Phi->getNumIncomingValues() != 2) { 631 reportVectorizationFailure("Found an invalid PHI", 632 "loop control flow is not understood by vectorizer", 633 "CFGNotUnderstood", ORE, TheLoop, Phi); 634 return false; 635 } 636 637 RecurrenceDescriptor RedDes; 638 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 639 DT)) { 640 if (RedDes.hasUnsafeAlgebra()) 641 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 642 AllowedExit.insert(RedDes.getLoopExitInstr()); 643 Reductions[Phi] = RedDes; 644 continue; 645 } 646 647 // TODO: Instead of recording the AllowedExit, it would be good to record the 648 // complementary set: NotAllowedExit. These include (but may not be 649 // limited to): 650 // 1. Reduction phis as they represent the one-before-last value, which 651 // is not available when vectorized 652 // 2. Induction phis and increment when SCEV predicates cannot be used 653 // outside the loop - see addInductionPhi 654 // 3. Non-Phis with outside uses when SCEV predicates cannot be used 655 // outside the loop - see call to hasOutsideLoopUser in the non-phi 656 // handling below 657 // 4. FirstOrderRecurrence phis that can possibly be handled by 658 // extraction. 659 // By recording these, we can then reason about ways to vectorize each 660 // of these NotAllowedExit. 661 InductionDescriptor ID; 662 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 663 addInductionPhi(Phi, ID, AllowedExit); 664 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 665 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 666 continue; 667 } 668 669 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 670 SinkAfter, DT)) { 671 FirstOrderRecurrences.insert(Phi); 672 continue; 673 } 674 675 // As a last resort, coerce the PHI to a AddRec expression 676 // and re-try classifying it a an induction PHI. 677 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 678 addInductionPhi(Phi, ID, AllowedExit); 679 continue; 680 } 681 682 reportVectorizationFailure("Found an unidentified PHI", 683 "value that could not be identified as " 684 "reduction is used outside the loop", 685 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 686 return false; 687 } // end of PHI handling 688 689 // We handle calls that: 690 // * Are debug info intrinsics. 691 // * Have a mapping to an IR intrinsic. 692 // * Have a vector version available. 693 auto *CI = dyn_cast<CallInst>(&I); 694 695 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 696 !isa<DbgInfoIntrinsic>(CI) && 697 !(CI->getCalledFunction() && TLI && 698 (!VFDatabase::getMappings(*CI).empty() || 699 isTLIScalarize(*TLI, *CI)))) { 700 // If the call is a recognized math libary call, it is likely that 701 // we can vectorize it given loosened floating-point constraints. 702 LibFunc Func; 703 bool IsMathLibCall = 704 TLI && CI->getCalledFunction() && 705 CI->getType()->isFloatingPointTy() && 706 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 707 TLI->hasOptimizedCodeGen(Func); 708 709 if (IsMathLibCall) { 710 // TODO: Ideally, we should not use clang-specific language here, 711 // but it's hard to provide meaningful yet generic advice. 712 // Also, should this be guarded by allowExtraAnalysis() and/or be part 713 // of the returned info from isFunctionVectorizable()? 714 reportVectorizationFailure( 715 "Found a non-intrinsic callsite", 716 "library call cannot be vectorized. " 717 "Try compiling with -fno-math-errno, -ffast-math, " 718 "or similar flags", 719 "CantVectorizeLibcall", ORE, TheLoop, CI); 720 } else { 721 reportVectorizationFailure("Found a non-intrinsic callsite", 722 "call instruction cannot be vectorized", 723 "CantVectorizeLibcall", ORE, TheLoop, CI); 724 } 725 return false; 726 } 727 728 // Some intrinsics have scalar arguments and should be same in order for 729 // them to be vectorized (i.e. loop invariant). 730 if (CI) { 731 auto *SE = PSE.getSE(); 732 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 733 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 734 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 735 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 736 reportVectorizationFailure("Found unvectorizable intrinsic", 737 "intrinsic instruction cannot be vectorized", 738 "CantVectorizeIntrinsic", ORE, TheLoop, CI); 739 return false; 740 } 741 } 742 } 743 744 // Check that the instruction return type is vectorizable. 745 // Also, we can't vectorize extractelement instructions. 746 if ((!VectorType::isValidElementType(I.getType()) && 747 !I.getType()->isVoidTy()) || 748 isa<ExtractElementInst>(I)) { 749 reportVectorizationFailure("Found unvectorizable type", 750 "instruction return type cannot be vectorized", 751 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 752 return false; 753 } 754 755 // Check that the stored type is vectorizable. 756 if (auto *ST = dyn_cast<StoreInst>(&I)) { 757 Type *T = ST->getValueOperand()->getType(); 758 if (!VectorType::isValidElementType(T)) { 759 reportVectorizationFailure("Store instruction cannot be vectorized", 760 "store instruction cannot be vectorized", 761 "CantVectorizeStore", ORE, TheLoop, ST); 762 return false; 763 } 764 765 // For nontemporal stores, check that a nontemporal vector version is 766 // supported on the target. 767 if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 768 // Arbitrarily try a vector of 2 elements. 769 Type *VecTy = VectorType::get(T, /*NumElements=*/2); 770 assert(VecTy && "did not find vectorized version of stored type"); 771 const MaybeAlign Alignment = getLoadStoreAlignment(ST); 772 assert(Alignment && "Alignment should be set"); 773 if (!TTI->isLegalNTStore(VecTy, *Alignment)) { 774 reportVectorizationFailure( 775 "nontemporal store instruction cannot be vectorized", 776 "nontemporal store instruction cannot be vectorized", 777 "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 778 return false; 779 } 780 } 781 782 } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 783 if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 784 // For nontemporal loads, check that a nontemporal vector version is 785 // supported on the target (arbitrarily try a vector of 2 elements). 786 Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 787 assert(VecTy && "did not find vectorized version of load type"); 788 const MaybeAlign Alignment = getLoadStoreAlignment(LD); 789 assert(Alignment && "Alignment should be set"); 790 if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { 791 reportVectorizationFailure( 792 "nontemporal load instruction cannot be vectorized", 793 "nontemporal load instruction cannot be vectorized", 794 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 795 return false; 796 } 797 } 798 799 // FP instructions can allow unsafe algebra, thus vectorizable by 800 // non-IEEE-754 compliant SIMD units. 801 // This applies to floating-point math operations and calls, not memory 802 // operations, shuffles, or casts, as they don't change precision or 803 // semantics. 804 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 805 !I.isFast()) { 806 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 807 Hints->setPotentiallyUnsafe(); 808 } 809 810 // Reduction instructions are allowed to have exit users. 811 // All other instructions must not have external users. 812 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 813 // We can safely vectorize loops where instructions within the loop are 814 // used outside the loop only if the SCEV predicates within the loop is 815 // same as outside the loop. Allowing the exit means reusing the SCEV 816 // outside the loop. 817 if (PSE.getUnionPredicate().isAlwaysTrue()) { 818 AllowedExit.insert(&I); 819 continue; 820 } 821 reportVectorizationFailure("Value cannot be used outside the loop", 822 "value cannot be used outside the loop", 823 "ValueUsedOutsideLoop", ORE, TheLoop, &I); 824 return false; 825 } 826 } // next instr. 827 } 828 829 if (!PrimaryInduction) { 830 if (Inductions.empty()) { 831 reportVectorizationFailure("Did not find one integer induction var", 832 "loop induction variable could not be identified", 833 "NoInductionVariable", ORE, TheLoop); 834 return false; 835 } else if (!WidestIndTy) { 836 reportVectorizationFailure("Did not find one integer induction var", 837 "integer loop induction variable could not be identified", 838 "NoIntegerInductionVariable", ORE, TheLoop); 839 return false; 840 } else { 841 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 842 } 843 } 844 845 // For first order recurrences, we use the previous value (incoming value from 846 // the latch) to check if it dominates all users of the recurrence. Bail out 847 // if we have to sink such an instruction for another recurrence, as the 848 // dominance requirement may not hold after sinking. 849 BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 850 if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 851 Instruction *V = 852 cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 853 return SinkAfter.find(V) != SinkAfter.end(); 854 })) 855 return false; 856 857 // Now we know the widest induction type, check if our found induction 858 // is the same size. If it's not, unset it here and InnerLoopVectorizer 859 // will create another. 860 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 861 PrimaryInduction = nullptr; 862 863 return true; 864 } 865 866 bool LoopVectorizationLegality::canVectorizeMemory() { 867 LAI = &(*GetLAA)(*TheLoop); 868 const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 869 if (LAR) { 870 ORE->emit([&]() { 871 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 872 "loop not vectorized: ", *LAR); 873 }); 874 } 875 if (!LAI->canVectorizeMemory()) 876 return false; 877 878 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 879 reportVectorizationFailure("Stores to a uniform address", 880 "write to a loop invariant address could not be vectorized", 881 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 882 return false; 883 } 884 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 885 PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 886 887 return true; 888 } 889 890 bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 891 Value *In0 = const_cast<Value *>(V); 892 PHINode *PN = dyn_cast_or_null<PHINode>(In0); 893 if (!PN) 894 return false; 895 896 return Inductions.count(PN); 897 } 898 899 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 900 auto *Inst = dyn_cast<Instruction>(V); 901 return (Inst && InductionCastsToIgnore.count(Inst)); 902 } 903 904 bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 905 return isInductionPhi(V) || isCastedInductionVariable(V); 906 } 907 908 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 909 return FirstOrderRecurrences.count(Phi); 910 } 911 912 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 913 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 914 } 915 916 bool LoopVectorizationLegality::blockCanBePredicated( 917 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 918 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 919 920 for (Instruction &I : *BB) { 921 // Check that we don't have a constant expression that can trap as operand. 922 for (Value *Operand : I.operands()) { 923 if (auto *C = dyn_cast<Constant>(Operand)) 924 if (C->canTrap()) 925 return false; 926 } 927 928 // We can predicate blocks with calls to assume, as long as we drop them in 929 // case we flatten the CFG via predication. 930 if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 931 ConditionalAssumes.insert(&I); 932 continue; 933 } 934 935 // We might be able to hoist the load. 936 if (I.mayReadFromMemory()) { 937 auto *LI = dyn_cast<LoadInst>(&I); 938 if (!LI) 939 return false; 940 if (!SafePtrs.count(LI->getPointerOperand())) { 941 // !llvm.mem.parallel_loop_access implies if-conversion safety. 942 // Otherwise, record that the load needs (real or emulated) masking 943 // and let the cost model decide. 944 if (!IsAnnotatedParallel || PreserveGuards) 945 MaskedOp.insert(LI); 946 continue; 947 } 948 } 949 950 if (I.mayWriteToMemory()) { 951 auto *SI = dyn_cast<StoreInst>(&I); 952 if (!SI) 953 return false; 954 // Predicated store requires some form of masking: 955 // 1) masked store HW instruction, 956 // 2) emulation via load-blend-store (only if safe and legal to do so, 957 // be aware on the race conditions), or 958 // 3) element-by-element predicate check and scalar store. 959 MaskedOp.insert(SI); 960 continue; 961 } 962 if (I.mayThrow()) 963 return false; 964 } 965 966 return true; 967 } 968 969 bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 970 if (!EnableIfConversion) { 971 reportVectorizationFailure("If-conversion is disabled", 972 "if-conversion is disabled", 973 "IfConversionDisabled", 974 ORE, TheLoop); 975 return false; 976 } 977 978 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 979 980 // A list of pointers which are known to be dereferenceable within scope of 981 // the loop body for each iteration of the loop which executes. That is, 982 // the memory pointed to can be dereferenced (with the access size implied by 983 // the value's type) unconditionally within the loop header without 984 // introducing a new fault. 985 SmallPtrSet<Value *, 8> SafePointes; 986 987 // Collect safe addresses. 988 for (BasicBlock *BB : TheLoop->blocks()) { 989 if (!blockNeedsPredication(BB)) { 990 for (Instruction &I : *BB) 991 if (auto *Ptr = getLoadStorePointerOperand(&I)) 992 SafePointes.insert(Ptr); 993 continue; 994 } 995 996 // For a block which requires predication, a address may be safe to access 997 // in the loop w/o predication if we can prove dereferenceability facts 998 // sufficient to ensure it'll never fault within the loop. For the moment, 999 // we restrict this to loads; stores are more complicated due to 1000 // concurrency restrictions. 1001 ScalarEvolution &SE = *PSE.getSE(); 1002 for (Instruction &I : *BB) { 1003 LoadInst *LI = dyn_cast<LoadInst>(&I); 1004 if (LI && !mustSuppressSpeculation(*LI) && 1005 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 1006 SafePointes.insert(LI->getPointerOperand()); 1007 } 1008 } 1009 1010 // Collect the blocks that need predication. 1011 BasicBlock *Header = TheLoop->getHeader(); 1012 for (BasicBlock *BB : TheLoop->blocks()) { 1013 // We don't support switch statements inside loops. 1014 if (!isa<BranchInst>(BB->getTerminator())) { 1015 reportVectorizationFailure("Loop contains a switch statement", 1016 "loop contains a switch statement", 1017 "LoopContainsSwitch", ORE, TheLoop, 1018 BB->getTerminator()); 1019 return false; 1020 } 1021 1022 // We must be able to predicate all blocks that need to be predicated. 1023 if (blockNeedsPredication(BB)) { 1024 if (!blockCanBePredicated(BB, SafePointes)) { 1025 reportVectorizationFailure( 1026 "Control flow cannot be substituted for a select", 1027 "control flow cannot be substituted for a select", 1028 "NoCFGForSelect", ORE, TheLoop, 1029 BB->getTerminator()); 1030 return false; 1031 } 1032 } else if (BB != Header && !canIfConvertPHINodes(BB)) { 1033 reportVectorizationFailure( 1034 "Control flow cannot be substituted for a select", 1035 "control flow cannot be substituted for a select", 1036 "NoCFGForSelect", ORE, TheLoop, 1037 BB->getTerminator()); 1038 return false; 1039 } 1040 } 1041 1042 // We can if-convert this loop. 1043 return true; 1044 } 1045 1046 // Helper function to canVectorizeLoopNestCFG. 1047 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1048 bool UseVPlanNativePath) { 1049 assert((UseVPlanNativePath || Lp->empty()) && 1050 "VPlan-native path is not enabled."); 1051 1052 // TODO: ORE should be improved to show more accurate information when an 1053 // outer loop can't be vectorized because a nested loop is not understood or 1054 // legal. Something like: "outer_loop_location: loop not vectorized: 1055 // (inner_loop_location) loop control flow is not understood by vectorizer". 1056 1057 // Store the result and return it at the end instead of exiting early, in case 1058 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1059 bool Result = true; 1060 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1061 1062 // We must have a loop in canonical form. Loops with indirectbr in them cannot 1063 // be canonicalized. 1064 if (!Lp->getLoopPreheader()) { 1065 reportVectorizationFailure("Loop doesn't have a legal pre-header", 1066 "loop control flow is not understood by vectorizer", 1067 "CFGNotUnderstood", ORE, TheLoop); 1068 if (DoExtraAnalysis) 1069 Result = false; 1070 else 1071 return false; 1072 } 1073 1074 // We must have a single backedge. 1075 if (Lp->getNumBackEdges() != 1) { 1076 reportVectorizationFailure("The loop must have a single backedge", 1077 "loop control flow is not understood by vectorizer", 1078 "CFGNotUnderstood", ORE, TheLoop); 1079 if (DoExtraAnalysis) 1080 Result = false; 1081 else 1082 return false; 1083 } 1084 1085 // We must have a single exiting block. 1086 if (!Lp->getExitingBlock()) { 1087 reportVectorizationFailure("The loop must have an exiting block", 1088 "loop control flow is not understood by vectorizer", 1089 "CFGNotUnderstood", ORE, TheLoop); 1090 if (DoExtraAnalysis) 1091 Result = false; 1092 else 1093 return false; 1094 } 1095 1096 // We only handle bottom-tested loops, i.e. loop in which the condition is 1097 // checked at the end of each iteration. With that we can assume that all 1098 // instructions in the loop are executed the same number of times. 1099 if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 1100 reportVectorizationFailure("The exiting block is not the loop latch", 1101 "loop control flow is not understood by vectorizer", 1102 "CFGNotUnderstood", ORE, TheLoop); 1103 if (DoExtraAnalysis) 1104 Result = false; 1105 else 1106 return false; 1107 } 1108 1109 return Result; 1110 } 1111 1112 bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1113 Loop *Lp, bool UseVPlanNativePath) { 1114 // Store the result and return it at the end instead of exiting early, in case 1115 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1116 bool Result = true; 1117 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1118 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1119 if (DoExtraAnalysis) 1120 Result = false; 1121 else 1122 return false; 1123 } 1124 1125 // Recursively check whether the loop control flow of nested loops is 1126 // understood. 1127 for (Loop *SubLp : *Lp) 1128 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1129 if (DoExtraAnalysis) 1130 Result = false; 1131 else 1132 return false; 1133 } 1134 1135 return Result; 1136 } 1137 1138 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1139 // Store the result and return it at the end instead of exiting early, in case 1140 // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1141 bool Result = true; 1142 1143 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1144 // Check whether the loop-related control flow in the loop nest is expected by 1145 // vectorizer. 1146 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1147 if (DoExtraAnalysis) 1148 Result = false; 1149 else 1150 return false; 1151 } 1152 1153 // We need to have a loop header. 1154 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1155 << '\n'); 1156 1157 // Specific checks for outer loops. We skip the remaining legal checks at this 1158 // point because they don't support outer loops. 1159 if (!TheLoop->empty()) { 1160 assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1161 1162 if (!canVectorizeOuterLoop()) { 1163 reportVectorizationFailure("Unsupported outer loop", 1164 "unsupported outer loop", 1165 "UnsupportedOuterLoop", 1166 ORE, TheLoop); 1167 // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1168 // outer loops. 1169 return false; 1170 } 1171 1172 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1173 return Result; 1174 } 1175 1176 assert(TheLoop->empty() && "Inner loop expected."); 1177 // Check if we can if-convert non-single-bb loops. 1178 unsigned NumBlocks = TheLoop->getNumBlocks(); 1179 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1180 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1181 if (DoExtraAnalysis) 1182 Result = false; 1183 else 1184 return false; 1185 } 1186 1187 // Check if we can vectorize the instructions and CFG in this loop. 1188 if (!canVectorizeInstrs()) { 1189 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1190 if (DoExtraAnalysis) 1191 Result = false; 1192 else 1193 return false; 1194 } 1195 1196 // Go over each instruction and look at memory deps. 1197 if (!canVectorizeMemory()) { 1198 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1199 if (DoExtraAnalysis) 1200 Result = false; 1201 else 1202 return false; 1203 } 1204 1205 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1206 << (LAI->getRuntimePointerChecking()->Need 1207 ? " (with a runtime bound check)" 1208 : "") 1209 << "!\n"); 1210 1211 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1212 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1213 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1214 1215 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 1216 reportVectorizationFailure("Too many SCEV checks needed", 1217 "Too many SCEV assumptions need to be made and checked at runtime", 1218 "TooManySCEVRunTimeChecks", ORE, TheLoop); 1219 if (DoExtraAnalysis) 1220 Result = false; 1221 else 1222 return false; 1223 } 1224 1225 // Okay! We've done all the tests. If any have failed, return false. Otherwise 1226 // we can vectorize, and at this point we don't have any other mem analysis 1227 // which may limit our maximum vectorization factor, so just return true with 1228 // no restrictions. 1229 return Result; 1230 } 1231 1232 bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1233 1234 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1235 1236 SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1237 1238 for (auto &Reduction : getReductionVars()) 1239 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1240 1241 // TODO: handle non-reduction outside users when tail is folded by masking. 1242 for (auto *AE : AllowedExit) { 1243 // Check that all users of allowed exit values are inside the loop or 1244 // are the live-out of a reduction. 1245 if (ReductionLiveOuts.count(AE)) 1246 continue; 1247 for (User *U : AE->users()) { 1248 Instruction *UI = cast<Instruction>(U); 1249 if (TheLoop->contains(UI)) 1250 continue; 1251 reportVectorizationFailure( 1252 "Cannot fold tail by masking, loop has an outside user for", 1253 "Cannot fold tail by masking in the presence of live outs.", 1254 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1255 return false; 1256 } 1257 } 1258 1259 // The list of pointers that we can safely read and write to remains empty. 1260 SmallPtrSet<Value *, 8> SafePointers; 1261 1262 // Check and mark all blocks for predication, including those that ordinarily 1263 // do not need predication such as the header block. 1264 for (BasicBlock *BB : TheLoop->blocks()) { 1265 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 1266 reportVectorizationFailure( 1267 "Cannot fold tail by masking as required", 1268 "control flow cannot be substituted for a select", 1269 "NoCFGForSelect", ORE, TheLoop, 1270 BB->getTerminator()); 1271 return false; 1272 } 1273 } 1274 1275 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1276 return true; 1277 } 1278 1279 } // namespace llvm 1280